Java example - DecodeInterceptor.java

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" ^TM.
The source code

/*   
 *  Copyright 1999-2004 The Apache Sofware Foundation.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.apache.tomcat.modules.mappers;

import java.io.IOException;

import org.apache.tomcat.core.BaseInterceptor;
import org.apache.tomcat.core.Context;
import org.apache.tomcat.core.ContextManager;
import org.apache.tomcat.core.Request;
import org.apache.tomcat.core.Response;
import org.apache.tomcat.core.ServerSession;
import org.apache.tomcat.core.TomcatException;
import org.apache.tomcat.util.buf.ByteChunk;
import org.apache.tomcat.util.buf.MessageBytes;
import org.apache.tomcat.util.http.ContentType;
import org.apache.tomcat.util.http.MimeHeaders;

/**
 * Default actions after receiving the request: get charset, unescape,
 * pre-process.  This intercept can optionally normalize the request
 * and check for certain unsafe escapes.  Both of these options
 * are on by default.
 * 
 */
public class DecodeInterceptor extends  BaseInterceptor  {
    private String defaultEncoding=null;
    // debug, default will be false, null, null
    private boolean useSessionEncoding=true; 
    private String charsetAttribute="charset";
    private String charsetURIAttribute=";charset=";

    // Note ids
    private int encodingInfoNote;
    // req.decoded - Is set after the request is decoded. The value is the
    // module that provided the decoding ( test for not null only )
    private int decodedNote;
    private int encodingSourceNote;
    private int sessionEncodingNote;

    private boolean normalize=true;
    private boolean safe=true;
    private boolean saveOriginal=false;
    public DecodeInterceptor() {
    }

    /* -------------------- Config  -------------------- */

    /** Set server-wide default encoding. 
     *  UTF-8 is recommended ( if you want to brake the standard spec, which
     *  requires 8859-1 )
     */
    public void setDefaultEncoding( String s ) {
	defaultEncoding=s;
    }

    public void setUseSessionEncoding( boolean b ) {
	useSessionEncoding=b;
    }

    public void setCharsetAttribute( String s ) {
	charsetAttribute=s;
	charsetURIAttribute=";" + charsetAttribute + "=";
    }

    /** Decode interceptor can normalize urls, per RFC 1630
    */
    public void setNormalize( boolean b ) {
	normalize=b;
    }

    /** Save the original uri before decoding. Default is false,
     *  for consistency among servers.
     */
    public void setSaveOriginal( boolean b ) {
	saveOriginal=b;
    }

    /** Decode interceptor can reject unsafe urls. These are
        URL's containing the following escapes:
        %25 = '%'
        %2E = '.'
        %2F = '/'
        %5C = '\'
        These are rejected because they interfere with URL's
        pattern matching with reguard to security issues.
    */
    public void setSafe( boolean b ) {
	safe=b;
    }
    
    /* -------------------- Initialization -------------------- */
    
    public void engineInit( ContextManager cm )
	throws TomcatException
    {
	encodingInfoNote=cm.getNoteId(ContextManager.REQUEST_NOTE,
				  "req.encoding" );
	encodingSourceNote=cm.getNoteId(ContextManager.REQUEST_NOTE,
				  "req.encodingSource" );
	sessionEncodingNote=cm.getNoteId(ContextManager.SESSION_NOTE,
				  "session.encoding" );
	decodedNote=cm.getNoteId(ContextManager.REQUEST_NOTE,
				  "req.decoded" );
    }
    /* -------------------- Request mapping -------------------- */


    // Based on Apache's path normalization code
    private void normalizePath(MessageBytes pathMB ) {
	if( debug> 0 ) log( "Normalize " + pathMB.toString());
	if( pathMB.getType() == MessageBytes.T_BYTES ) {
	    boolean modified=normalize( pathMB.getByteChunk());
	    if( modified ) {
		pathMB.resetStringValue();
	    }
	} else if( pathMB.getType() == MessageBytes.T_CHARS ) {
	    String orig=pathMB.toString();
	    String str1=normalize( orig );
	    if( orig!=str1 ) {
		pathMB.resetStringValue();
		pathMB.setString( str1 );
	    }
	} else if( pathMB.getType() == MessageBytes.T_STR ) {
	    String orig=pathMB.toString();
	    String str1=normalize( orig );
	    if( orig!=str1 ) {
		pathMB.resetStringValue();
		pathMB.setString( str1 );
	    }
	}

    }

    private boolean normalize(  ByteChunk bc ) {
	int start=bc.getStart();
	int end=bc.getEnd();
	byte buff[]=bc.getBytes();
	int i=0;
	int j=0;
	boolean modified=false;
	String orig=null;
	if( debug>0 ) orig=new String( buff, start, end-start);
	
	// remove //
	for( i=start, j=start; i 0 ) {
		log( "Eliminate // " + orig + " " + start + " " + end );
	    }
	}
	
	// remove /./
	for( i=start, j=start; i 0 ) {
		log( "Eliminate /./ " + orig);
	    }
	}
	
	// remove  /. at the end
	j=end;
	if( end==start+1 && buff[start]== '.' ) {
	    end--;
	    buff[start] = '/';
	} else if( end >= start+1 && buff[ end-1 ] == '.' &&
		 buff[end-2]=='/' ) {
	    end=end-1;
	}
	if( end!=j ) {
	    bc.setEnd( end );
	    modified=true;
	    if( debug > 0 ) {
		log( "Eliminate ending /. " + orig);
	    }
	}

	// remove /../
	for( i=start, j=start; istart && buff[j]!='/' ) {
		    j--;
		}
	    } else {
		buff[j++]=buff[i];
	    }
	}
	if( i!=j ) {
	    buff[j++]=buff[end-2];
	    buff[j++]=buff[end-1];
	    end=j;
	    bc.setEnd( end );
	    modified=true;
	    if( debug > 0 ) {
		log( "Eliminate /../ " + orig);
	    }
	}


	// remove trailing xx/..
	j=end;
	if( end>=start + 3 &&
	    buff[end-1]=='.' &&
	    buff[end-2]=='.' &&
	    buff[end-3]=='/' ) {
	    end-=3;
	    if(end < start) {
		end = start;
	    }
	    while( end>start &&  buff[end]!='/' )
		end--; 
	}
	if( end!=j ) {
	    bc.setEnd( end );
	    modified=true;
	    if( debug > 0 ) {
		log( "Eliminate ending /.. " + orig);
	    }
	}
	return modified;
    }

    private String normalize(  String str ) {
	int start=0;
	int end=str.length();
	char buff[]=str.toCharArray();
	int i=0;
	int j=0;
	boolean modified=false;
	String orig=str;
	
	// remove //
	for( i=start, j=start; i 0 ) {
		log( "Eliminate // " + orig + " " + start + " " + end );
	    }
	}
	
	// remove /./
	for( i=start, j=start; i 0 ) {
		log( "Eliminate /./ " + orig);
	    }
	}
	
	// remove  /. at the end
	j=end;
	if( end==start+1 && buff[start]== '.' )
	    end--;
	else if( end > start+1 && buff[ end-1 ] == '.' &&
		 buff[end-2]=='/' ) {
	    end=end-1;
	}
	if( end!=j ) {
	    modified=true;
	    if( debug > 0 ) {
		log( "Eliminate ending /. " + orig);
	    }
	}

	// remove /../
	for( i=start, j=start; istart && buff[j]!='/' ) {
		    j--;
		}
	    } else {
		buff[j++]=buff[i];
	    }
	}
	if( i!=j ) {
	    buff[j++]=buff[end-2];
	    buff[j++]=buff[end-1];
	    end=j;
	    modified=true;
	    if( debug > 0 ) {
		log( "Eliminate /../ " + orig);
	    }
	}


	// remove trailing xx/..
	j=end;
	if( end>=start + 3 &&
	    buff[end-1]=='.' &&
	    buff[end-2]=='.' &&
	    buff[end-3]=='/' ) {
	    end-=4;
	    if(end < start) {
		end = start;
	    }
	    while( end>start &&  buff[end]!='/' )
		end--; 
	}
	if( end!=j ) {
	    modified=true;
	    if( debug > 0 ) {
		log( "Eliminate ending /.. " +orig);
	    }
	}
	if( modified )
	    return new String( buff, 0, end );
	else
	    return str;
    }

    private boolean isSafeURI(MessageBytes pathMB) {
        int start = pathMB.indexOf('%');
        if( start >= 0 ) {
            if( pathMB.indexOfIgnoreCase("%25",start) >= 0 )
                return false;

            if( pathMB.indexOfIgnoreCase("%2E",start) >= 0 )
                return false;

            if( pathMB.indexOfIgnoreCase("%2F",start) >= 0 )
                return false;

            if( pathMB.indexOfIgnoreCase("%5C",start) >= 0 )
                return false;
        }
        if (pathMB.indexOf('\0') >= 0)
        {
            return false;
        }

        return true;
    }
    
    public int postReadRequest( Request req ) {
	MessageBytes pathMB = req.requestURI();
	// copy the request 
	
	if( pathMB.isNull())
	    throw new RuntimeException("ASSERT: null path in request URI");

	//if( path.indexOf("?") >=0 )
	//   throw new RuntimeException("ASSERT: ? in requestURI");

        // If path is unsafe, return forbidden
        if( safe && !isSafeURI(pathMB) ){
            req.setAttribute("javax.servlet.error.message","Unsafe URL");
            return 403;
	}
	if( normalize &&
	    ( pathMB.indexOf("//") >= 0 ||
	      pathMB.indexOf("/." ) >=0
	      )) {
	    //debug=1;
	    normalizePath( pathMB );
	    if( debug > 0 )
		log( "Normalized url "  + pathMB );
	}

	// Set the char encoding first
	String charEncoding=null;	
	MimeHeaders headers=req.getMimeHeaders();

	MessageBytes contentType = req.contentType();
	if( contentType != null ) {
	    // XXX use message bytes, optimize !!!
	    String contentTypeString=contentType.toString();
	    charEncoding = ContentType.
		getCharsetFromContentType(contentTypeString);
	    if( debug > 0 ) log( "Got encoding from content-type " +
				 charEncoding + " " + contentTypeString  );
	    req.setNote( encodingSourceNote, "Content-Type" );
	}

	if( debug > 99 ) dumpHeaders(headers);
	
	// No explicit encoding - try to guess it from Accept-Language
	//MessageBytes acceptC= headers.getValue( "Accept-Charset" );

	// No explicit encoding - try to guess it from Accept-Language
	// MessageBytes acceptL= headers.getValue( "Accept-Language" );

	// Special trick: ;charset= attribute ( similar with sessionId )
	// That's perfect for multibyte chars in URLs
	if(charEncoding==null && charsetURIAttribute != null ) {
	    int idxCharset=req.requestURI().indexOf( charsetURIAttribute );
	    if( idxCharset >= 0 ) {
		String uri=req.requestURI().toString();
		int nextAtt=uri.indexOf( ';', idxCharset + 1 );
		String next=null;
		if( nextAtt > 0 ) {
		    next=uri.substring( nextAtt );
		    charEncoding=
			uri.substring(idxCharset+
				      charsetURIAttribute.length(),nextAtt);
		    req.requestURI().
			setString(uri.substring(0, idxCharset) + next);
		    req.setNote( encodingSourceNote, "Request-Attribute" );
		} else {
		    charEncoding=uri.substring(idxCharset+
					       charsetURIAttribute.length());
		    req.requestURI().
			setString(uri.substring(0, idxCharset));
		    req.setNote( encodingSourceNote, "Request-Attribute" );
		}
		
		if( debug > 0 )
		    log("ReqAtt= " + charEncoding + " " +
			req.requestURI() );
	    }
	}
	
	
	// Global Default 
	if( charEncoding==null ) {
	    if( debug > 0 ) log( "Default encoding " + defaultEncoding );
	    if( defaultEncoding != null )
		charEncoding=defaultEncoding;
	}

	if( charEncoding != null )
	    req.setCharEncoding( charEncoding );

	// Decode request, save the original for the facade

	// Already decoded
	if( req.getNote( decodedNote ) != null ) {
	    if( debug> 5 ) log("Already decoded " + req.getNote( decodedNote ));
	    return 0;
	}
	if( saveOriginal ) {
	    try {
		req.unparsedURI().duplicate( pathMB );
	    } catch( IOException ex ) {
		// If it happens, do default processing
		log( "Error copying request ",ex);
	    }
	}
	if (pathMB.indexOf('%') >= 0 || pathMB.indexOf( '+' ) >= 0) {
	    try {
		if(debug>1 )
		    log( "Before " + pathMB.toString());
		req.getURLDecoder().convert( pathMB , false );
		pathMB.resetStringValue();
		if(debug>1 )
		    log( "After " + pathMB.toString());
		if( pathMB.indexOf( '\0' ) >=0 ) {
		    return 404; // XXX should be 400 
		}
		req.setNote( decodedNote, this );
	    } catch( IOException ex ) {
		log( "Error decoding request ", ex);
		return 400;
	    }
	}

	return 0;
    }

    /** Hook - before the response is sent, get the response encoding
     *  and save it per session ( if we are in a session ). All browsers
     *  I know will use the same encoding in the next request.
     *  Since this is not part of the spec, it's disabled by default.
     *  
     */
    public int beforeBody( Request req, Response res ) {
	if( useSessionEncoding ) {
	    ServerSession sess=req.getSession( false );
	    if( sess!=null ) {
		String charset=res.getCharacterEncoding(false);
		if( charset!=null ) {
		    sess.setNote( sessionEncodingNote, charset );
		    if( debug > 0 )
			log( "Setting per session encoding " + charset);
		}
	    }
	}
	return DECLINED;
    }

    
    public Object getInfo( Context ctx, Request req, int info, String k ) {
	// Try to get the encoding info ( this is called later )
	if( info == encodingInfoNote ) {
	    // Second attempt to guess the encoding, the request is processed
	    String charset=null;

	    // Use request attributes
	    if( charset==null && charsetAttribute != null ) {
		charset=(String)req.getAttribute( charsetAttribute );
		if( debug>0 && charset != null )
		    log( "Charset from attribute " + charsetAttribute + " "
			 + charset );
	    }
	    
	    // Use session attributes
	    if( charset==null && useSessionEncoding ) {
		ServerSession sess=req.getSession( false );
		if( sess!=null ) {
		    charset=(String)sess.getNote( sessionEncodingNote );
		    if( debug > 0 && charset!=null )
			log("Charset from session " + charset );
		}
	    }

	    // Per context default
	    
	    if( charset != null ) return charset;
	    
	    charset=ctx.getProperty("charset");
	    if( debug > 0 && charset!=null )
		log( "Default per context " + charset );
	    // Use per context default
	    return charset;
	}
	return null;
    }

    public int setInfo( Context ctx, Request req, int info,
			 String k, Object v )
    {
	return DECLINED;
    }

    private void dumpHeaders( MimeHeaders mh ) {
	for( int i=0; i
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.
What this is

Other links

The source code

new blog posts

... this post is sponsored by my books ...
#1 New Release!	FP Best Seller