alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

jforum example source code file (SafeHtml.java)

This example jforum source code file (SafeHtml.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - jforum tags/keywords

forumexception, hashset, html, iterator, lexer, lexer, set, string, string, stringbuffer, stringbuffer, tag, tag, util, vector

The jforum SafeHtml.java source code

/*
 * Copyright (c) JForum Team
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, 
 * with or without modification, are permitted provided 
 * that the following conditions are met:
 * 
 * 1) Redistributions of source code must retain the above 
 * copyright notice, this list of conditions and the 
 * following  disclaimer.
 * 2)  Redistributions in binary form must reproduce the 
 * above copyright notice, this list of conditions and 
 * the following disclaimer in the documentation and/or 
 * other materials provided with the distribution.
 * 3) Neither the name of "Rafael Steil" nor 
 * the names of its contributors may be used to endorse 
 * or promote products derived from this software without 
 * specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT 
 * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
 * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 * IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
 * 
 * This file creation date: 27/09/2004 23:59:10
 * The JForum Project
 * http://www.jforum.net
 */
package net.jforum.util;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;

import net.jforum.exceptions.ForumException;
import net.jforum.util.preferences.ConfigKeys;
import net.jforum.util.preferences.SystemGlobals;
import net.jforum.view.forum.common.ViewCommon;

import org.htmlparser.Attribute;
import org.htmlparser.Node;
import org.htmlparser.Tag;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.nodes.TextNode;

/**
 * Process text with html and remove possible malicious tags and attributes.
 * Work based on tips from Amit Klein and the following documents:
 * <br>
 * <li>http://ha.ckers.org/xss.html
 * <li>http://quickwired.com/kallahar/smallprojects/php_xss_filter_function.php
 * <br>
 * @author Rafael Steil
 * @version $Id: SafeHtml.java,v 1.25 2007/09/19 14:08:57 rafaelsteil Exp $
 */
public class SafeHtml 
{
	private static Set welcomeTags;
	private static Set welcomeAttributes;
	private static Set allowedProtocols;
	
	static {
		welcomeTags = new HashSet();
		welcomeAttributes = new HashSet();
		allowedProtocols = new HashSet();
		
		splitAndTrim(ConfigKeys.HTML_TAGS_WELCOME, welcomeTags);
		splitAndTrim(ConfigKeys.HTML_ATTRIBUTES_WELCOME, welcomeAttributes);
		splitAndTrim(ConfigKeys.HTML_LINKS_ALLOW_PROTOCOLS, allowedProtocols);
	}
	
	private static void splitAndTrim(String s, Set data)
	{
		String s1 = SystemGlobals.getValue(s);
		
		if (s1 == null) {
			return;
		}
		
		String[] tags = s1.toUpperCase().split(",");

		for (int i = 0; i < tags.length; i++) {
			data.add(tags[i].trim());
		}
	}
	
	/**
	 * Given an input, analyze each HTML tag and remove unsecure attributes from them. 
	 * @param contents The content to verify
	 * @return the content, secure. 
	 */
	public String ensureAllAttributesAreSafe(String contents) 
	{
		StringBuffer sb = new StringBuffer(contents.length());
		
		try {
			Lexer lexer = new Lexer(contents);
			Node node;
			
			while ((node = lexer.nextNode()) != null) {
				if (node instanceof Tag) {
					Tag tag = (Tag)node;
					
					this.checkAndValidateAttributes(tag, false);
					
					sb.append(tag.toHtml());
				}
				else {
					sb.append(node.toHtml());
				}
			}
		}
		catch (Exception e) {
			throw new ForumException("Problems while parsing HTML: " + e, e);
		}
		
		return sb.toString();
	}
	
	/**
	 * Given an input, makes it safe for HTML displaying. 
	 * Removes any not allowed HTML tag or attribute, as well
	 * unwanted Javascript statements inside the tags. 
	 * @param contents the input to analyze
	 * @return the modified and safe string
	 */
	public String makeSafe(String contents)
	{
		if (contents == null || contents.length() == 0) {
			return contents;
		}
		
		StringBuffer sb = new StringBuffer(contents.length());
		
		try {
			Lexer lexer = new Lexer(contents);
			Node node;
			
			while ((node = lexer.nextNode()) != null) {
				boolean isTextNode = node instanceof TextNode;
				
				if (isTextNode) {
					// Text nodes are raw data, so we just
					// strip off all possible html content
					String text = node.toHtml();
					
					if (text.indexOf('>') > -1 || text.indexOf('<') > -1) {
						StringBuffer tmp = new StringBuffer(text);
						
						ViewCommon.replaceAll(tmp, "<", "<");
						ViewCommon.replaceAll(tmp, ">", ">");
						ViewCommon.replaceAll(tmp, "\"", """);
						
						node.setText(tmp.toString());
					}
				}
				
				if (isTextNode || (node instanceof Tag && this.isTagWelcome(node))) {
					sb.append(node.toHtml());
				}
				else {
					StringBuffer tmp = new StringBuffer(node.toHtml());
					
					ViewCommon.replaceAll(tmp, "<", "<");
					ViewCommon.replaceAll(tmp, ">", ">");
					
					sb.append(tmp.toString());
				}
			}
		}
		catch (Exception e) {
			throw new ForumException("Error while parsing HTML: " + e, e);
		}
		
		return sb.toString();
	}
	
	/**
	 * Returns true if a given tag is allowed. 
	 * Also, it checks and removes any unwanted attribute the tag may contain. 
	 * @param node The tag node to analyze
	 * @return true if it is a valid tag. 
	 */
	private boolean isTagWelcome(Node node)
	{
		Tag tag = (Tag)node;

		if (!welcomeTags.contains(tag.getTagName())) {
			return false;
		}
		
		this.checkAndValidateAttributes(tag, true);
		
		return true;
	}
	
	/**
	 * Given a tag, check its attributes, removing those unwanted or not secure 
	 * @param tag The tag to analyze
	 * @param checkIfAttributeIsWelcome true if the attribute name should be matched
	 * against the list of welcome attributes, set in the main configuration file. 
	 */
	private void checkAndValidateAttributes(Tag tag, boolean checkIfAttributeIsWelcome)
	{
		Vector newAttributes = new Vector();
		
		for (Iterator iter = tag.getAttributesEx().iterator(); iter.hasNext(); ) {
			Attribute a = (Attribute)iter.next();

			String name = a.getName();
			
			if (name == null) {
				newAttributes.add(a);
			}
			else {
				name = name.toUpperCase();
				
				if (a.getValue() == null) {
					newAttributes.add(a);
					continue;
				}
				
				String value = a.getValue().toLowerCase();
				
				if (checkIfAttributeIsWelcome && !this.isAttributeWelcome(name)) {
					continue;
				}
				
				if (!this.isAttributeSafe(name, value)) {
					continue;
				}
					
				if (a.getValue().indexOf("&#") > -1) {
					a.setValue(a.getValue().replaceAll("&#", "&#"));
				}
				
				newAttributes.add(a);
			}
		}
		
		tag.setAttributesEx(newAttributes);
	}
	
	/**
	 * Check if the given attribute name is in the list of allowed attributes
	 * @param name the attribute name
	 * @return true if it is an allowed attribute name
	 */
	private boolean isAttributeWelcome(String name)
	{
		return welcomeAttributes.contains(name);
	}

	/**
	 * Check if the attribute is safe, checking either its name and value. 
	 * @param name the attribute name
	 * @param value the attribute value
	 * @return true if it is a safe attribute
	 */
	private boolean isAttributeSafe(String name, String value)
	{
		if (name.length() >= 2 && name.charAt(0) == 'O' && name.charAt(1) == 'N') {
			return false;
		}
		
		if (value.indexOf('\n') > -1 || value.indexOf('\r') > -1 || value.indexOf('\0') > -1) {
			return false;
		}
			
		if (("HREF".equals(name) || "SRC".equals(name))) {
			if (!this.isHrefValid(value)) {
				return false;
			}
		}
		else if ("STYLE".equals(name)) {
			// It is much more a try to not allow constructions
			// like style="background-color: url(javascript:xxxx)" than anything else
			if (value.indexOf('(') > -1) {
				return false;
			}
		}
		
		return true;
	}
	
	/**
	 * Checks if a given address is valid
	 * @param href The address to check
	 * @return true if it is valid
	 */
	private boolean isHrefValid(String href) 
	{
		if (SystemGlobals.getBoolValue(ConfigKeys.HTML_LINKS_ALLOW_RELATIVE)
			&& href.length() > 0 
			&& href.charAt(0) == '/') {
			return true;
		}
		
		for (Iterator iter = allowedProtocols.iterator(); iter.hasNext(); ) {
			String protocol = iter.next().toString().toLowerCase();
			
			if (href.startsWith(protocol)) {
				return true;
			}
		}
		
		return false;
	}
}

Other jforum examples (source code examples)

Here is a short list of links related to this jforum SafeHtml.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.