alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Glassfish example source code file (TokenizerImpl.java)

This example Glassfish source code file (TokenizerImpl.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Glassfish tags/keywords

arraylist, arraylist, delim, delim, illegalescapesequenceexception, malformedunicodesequenceexception, quote_char, quote_char, string, string, text, tokenizerexception, tokenizerimpl, tokenizerinternal, unterminatedliteralstringexception, util

The Glassfish TokenizerImpl.java source code

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2010 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */

package com.sun.enterprise.admin.util;

import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Arrays;


class IllegalEscapeSequenceException extends TokenizerException
{
	public	IllegalEscapeSequenceException( String msg )	{ super( msg ); }
}

class UnterminatedLiteralStringException extends TokenizerException
{
	public	UnterminatedLiteralStringException( String msg )	{ super( msg ); }
}

class MalformedUnicodeSequenceException extends IllegalEscapeSequenceException
{
	public	MalformedUnicodeSequenceException( String msg )	{ super( msg ); }
}

/**
 */
public final class TokenizerImpl implements Tokenizer
{
	final String[]		mTokens;
	
		public
	TokenizerImpl(
		String		input,
		String		delimiters,
		char		escapeChar,
		String		escapableChars)
		throws TokenizerException
	{
		this( input, delimiters, true, escapeChar, escapableChars );
	}
	
	private static final char	QUOTE_CHAR	= '\"';
	
		public
	TokenizerImpl(
		String		input,
		String		delimiters,
		boolean		multipleDelimsCountAsOne,
		char		escapeChar,
		String		escapableChars)
		throws TokenizerException
	{
		final TokenizerInternal	worker =
			new TokenizerInternal( input, delimiters, escapeChar, escapableChars );
	
		ArrayList	allTokens	= worker.parseTokens( );

		if ( multipleDelimsCountAsOne )
		{
			allTokens	= removeMultipleDelims( allTokens );
		}
		
		mTokens	= interpretTokenList( allTokens );
	}
	
		final static ArrayList
	removeMultipleDelims( ArrayList list )
	{
		final ArrayList		resultList	= new ArrayList();
		
		boolean	lastWasDelim	= false;
		final Iterator	iter	= list.iterator();
		while ( iter.hasNext() )
		{
			final Object	value	= iter.next();
			
			if ( value instanceof String )
			{
				resultList.add( value );
				lastWasDelim	= false;
			}
			else if ( ! lastWasDelim )
			{
				// add the delimiter
				resultList.add( value );
				lastWasDelim	= true;
			}
		}
		
		return( resultList );
	}
	
	/**
		Interpret the parsed token list, which consists of a series of strings
		and tokens.  We need to handle the special cases where the list starts
		with a delimiter and/or ends with a delimiter.  Examples:
		
		""	=> {}
		"."	=> { "", "" }
		"..."	=> { "", "", "", "" }
		"x."	=> { "x", "" }
		".x"	=> { "", "x" }
		"y.x"	=> { "y", "x" }
	 */
		static String[]
	interpretTokenList( ArrayList list )
	{
		final ArrayList		resultList	= new ArrayList();

		boolean	lastWasDelim	= true;

		final Iterator	iter	= list.iterator();
		while ( iter.hasNext() )
		{
			final Object	value	= iter.next();
			if ( value instanceof String )
			{
				resultList.add( value );
				lastWasDelim	= false;
			}
			else
			{
				if ( lastWasDelim )
				{
					// this one's a delimiter, and so was the last one
					// insert the implicit empty string
					resultList.add( "" );
				}
				else
				{
					lastWasDelim	= true;
				}
			}
		}
		
		// a trailing delimiter implies an empty string after it
		if ( lastWasDelim && list.size() != 0 )
		{
			resultList.add( "" );
		}
		
		return( (String[])resultList.toArray( new String[ resultList.size() ] ) );
	}
	
		public String []
	getTokens()
	{
		return( mTokens );
	}
}



final class TokenizerInternal
{
	final String			mInput;
	final String			mDelimiters;
	final char				mEscapeChar;
	final String			mEscapableChars;
	final StringCharacterIterator	mIter;
	
	// a distinct object used to denote a delimiter
	private static class Delim
	{
		private Delim()	{}
		public static Delim	getInstance()	{ return( new Delim() ); }
		public String	toString() { return( "<DELIM>" ); }
	}
	final static Delim	DELIM	= Delim.getInstance();
	
		public
	TokenizerInternal(
		String		input,
		String		delimiters,
		char		escapeChar,
		String		escapableChars)
	{
		mInput			= input;
		mDelimiters		= delimiters;
		mEscapeChar		= escapeChar;
		mEscapableChars	= escapableChars;
		mIter		= new StringCharacterIterator( input );
	}
	
		static boolean
	isSpecialEscapeChar( char theChar )
	{
		// carriage return or newline
		return( theChar == 'n' || theChar == 'r' || theChar == 't' ||theChar == QUOTE_CHAR );
	}
	
		boolean
	isCallerProvidedEscapableChar( char theChar )
	{
		return( mEscapableChars.indexOf( theChar ) >= 0 || theChar == mEscapeChar );
	}
	
		boolean
	isEscapableChar( char theChar )
	{
		return( isCallerProvidedEscapableChar( theChar ) || isSpecialEscapeChar( theChar ) );
	}
	
		boolean
	isDelim( String delims, char theChar )
	{
		return( delims.indexOf( theChar ) >= 0 || theChar == mIter.DONE );
	}
	
		static boolean
	isDigit( char theChar )
	{
		return( (theChar >= '0' && theChar <= '9') );
	}
	

		static boolean
	isHexDigit( char theChar )
	{
		return( isDigit( theChar ) || (theChar >= 'a' && theChar <= 'f') || isUpper( theChar ) );
	}
	
		static boolean
	isUpper( char c )
	{
		return( (c >= 'A' && c <= 'F') );
	}
	
		boolean
	hasMoreChars()
	{
		return( mIter.current() != mIter.DONE );
	}

		char
	nextChar()
	{
		final char	theChar	= mIter.current();
		mIter.next();
		
		return( theChar );
	}
	
	private static final char	QUOTE_CHAR	= '\"';
	private static final char	TAB_CHAR	= '\t';
	
		char
	decodeUnicodeSequence()
		throws MalformedUnicodeSequenceException
	{
		int		value	= 0;
		
		try
		{
			for( int i = 0; i < 4; ++i )
			{
				value	= (value << 4 ) | hexValue( nextChar() );
			}
		}
		catch( Exception e )
		{
			throw new MalformedUnicodeSequenceException( "" );
		}
		
		return( (char)value );
	}
	
		static int
	hexValue( char c )
	{
		if ( ! isHexDigit( c ) )
		{
			throw new IllegalArgumentException();
		}
		
		int	value	= 0;

		if ( isDigit( c ) )
		{
			value	= (int)c - (int)'0';
		}
		else if ( isUpper( c ) )
		{
			value	= (int)c - (int)'A';
		}
		else
		{
			value	= (int)c - (int)'a';
		}
		return( value );
	}
	
		char
	getEscapedChar( final char inputChar )
		throws MalformedUnicodeSequenceException,IllegalEscapeSequenceException
	{
		char	outChar	= 0;
		
		if ( isCallerProvidedEscapableChar( inputChar ) )
		{
			outChar	= inputChar;
		}
		else
		{
			switch( inputChar )
			{
				default:	throw new IllegalEscapeSequenceException( "" + inputChar );
				case 'n':	outChar	= '\n';		break;
				case 'r':	outChar	= '\r';		break;
				case 't':	outChar	= '\t';		break;
				case QUOTE_CHAR:	outChar	= QUOTE_CHAR;	break;
				case 'u':	outChar	= decodeUnicodeSequence();	break;
			}
		}
		
		return( outChar );
	}
	

	
		ArrayList
	parseTokens(  )
		throws UnterminatedLiteralStringException,
			MalformedUnicodeSequenceException, IllegalEscapeSequenceException
	{
		final StringBuffer	tok	= new StringBuffer();
		final ArrayList		tokens	= new ArrayList();
		boolean				insideStringLiteral	= false;
		
		/**
			Escape sequences are always processed regardless of whether we're inside a
			quoted string or not.  A quote string really only alters whether delimiters
			are treated as literal characters, or not.
		 */
		while ( hasMoreChars()  )
		{
			final char	theChar	= nextChar();
			
			if ( theChar == mEscapeChar )
			{
				tok.append( getEscapedChar( nextChar() ) );
			}
			else if ( theChar == Tokenizer.LITERAL_STRING_DELIM )
			{
				// special cases of "", """", """""", etc require forcing an empty string out
				// these case have no delimiter or regular characters to cause a string to
				// be emitted
				if ( insideStringLiteral && tok.length() == 0 && tokens.size() == 0)
				{
					tokens.add( "" );
				}
				
				insideStringLiteral	= ! insideStringLiteral;
			}
			else if ( insideStringLiteral )
			{
				tok.append( theChar );
			}
			else if ( isDelim( mDelimiters, theChar ) )
			{
				// we've hit a delimiter...if characters have accumulated, spit them out
				// then spit out the delimiter token.
				if ( tok.length() != 0 )
				{
					tokens.add( tok.toString() );
					tok.setLength( 0 );
				}
				tokens.add( DELIM );
			}
			else
			{
				tok.append( theChar );
			}
		}
		
		if ( tok.length() != 0 )
		{
			tokens.add( tok.toString() );
		}
		
		if ( insideStringLiteral )
		{
			throw new UnterminatedLiteralStringException( tok.toString() );
		}
		
		return( tokens );
	}
}

Other Glassfish examples (source code examples)

Here is a short list of links related to this Glassfish TokenizerImpl.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.