alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (Scanner.java)

This example Java source code file (Scanner.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

bol, dot, eol, includeentry, invalidcharacter, ioexception, none, plus, scannerdata, star, string, stringbuffer, token, util, vector

The Scanner.java Java example source code

/*
 * Copyright (c) 1999, 2001, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 * COMPONENT_NAME: idl.parser
 *
 * ORIGINS: 27
 *
 * Licensed Materials - Property of IBM
 * 5639-D57 (C) COPYRIGHT International Business Machines Corp. 1997, 1999
 * RMI-IIOP v1.0
 *
 */

package com.sun.tools.corba.se.idl;

// NOTES:
// -F46082.51<daz> Remove -stateful feature.
// -D56351<daz> Update computation of RepositoryIDs to CORBA 2.3 (see spec.).
// -D59166<daz> Add escaped-id. info. to identifiers.
// -F60858.1<daz> Add support for -corba option, levels 2.2 and 2.3: accept 2.3
//   keywords as ids.; accept ids. that match keywords in letter, but not in case.
// -D62023<daz> Add support for -corba option, level 2.4: see keyword checking.

import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;

import java.util.Enumeration;
import java.util.Stack;
import java.util.StringTokenizer;
import java.util.Vector;

/**
 *
 **/
class Scanner
{
  // <f46082.51> -Remove stateful feature.
  //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean scanStateful, boolean emitAllIncludes) throws IOException
  // <f60858.1>
  //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean emitAllIncludes) throws IOException
  /**
   *
   **/
  Scanner (IncludeEntry file, String[] keywords, boolean vbose,
      boolean emitAllIncludes, float cLevel, boolean debug) throws IOException
  {
    readFile (file);
    verbose  = vbose;
    // <f46082.51>
    //stateful = scanStateful;
    emitAll  = emitAllIncludes;
    sortKeywords (keywords);
    corbaLevel = cLevel;
    this.debug = debug ;
  } // ctor

  /**
   *
   **/
  void sortKeywords (String[] keywords)
  {
    for (int i = 0; i < keywords.length; ++i)
      if (wildcardAtEitherEnd (keywords[i]))
        this.openEndedKeywords.addElement (keywords[i]);
      else if (wildcardsInside (keywords[i]))
        this.wildcardKeywords.addElement (keywords[i]);
      else
        this.keywords.addElement (keywords[i]);
  } // sortKeywords

  /**
   *
   **/
  private boolean wildcardAtEitherEnd (String string)
  {
    return string.startsWith ("*") ||
           string.startsWith ("+") ||
           string.startsWith (".") ||
           string.endsWith ("*") ||
           string.endsWith ("+") ||
           string.endsWith (".");
  } // wildcardAtEitherEnd

  /**
   *
   **/
  private boolean wildcardsInside (String string)
  {
    return string.indexOf ("*") > 0 ||
           string.indexOf ("+") > 0 ||
           string.indexOf (".") > 0;
  } // wildcardsInside

  /**
   *
   **/
  void readFile (IncludeEntry file) throws IOException
  {
    String filename = file.name ();
    filename = filename.substring (1, filename.length () - 1);
    readFile (file, filename);
  } // readFile

  /**
   *
   **/
  void readFile (IncludeEntry file, String filename) throws IOException
  {
    data.fileEntry = file;
    data.filename = filename;
    // <f49747.1>
    //FileInputStream stream = new FileInputStream (data.filename);
    //data.fileBytes = new byte [stream.available ()];
    //stream.read (data.fileBytes);
    //stream.close (); <ajb>
    File idlFile = new File (data.filename);
    int len = (int)idlFile.length ();
    FileReader fileReader = new FileReader (idlFile);
    // <d41679> data.fileBytes = new char [len];
    final String EOL = System.getProperty ("line.separator");
    data.fileBytes = new char [len + EOL.length ()];

    fileReader.read (data.fileBytes, 0, len);
    fileReader.close ();

    // <d41679>
    for (int i = 0; i < EOL.length (); i++)
      data.fileBytes[len + i] = EOL.charAt (i);

    readChar ();
  } // readFile

  /**
   *
   **/
  Token getToken () throws IOException
  {
    //System.out.println ("Scanner.getToken char = |" + data.ch + "| (ASCII " + (int)data.ch + ").");

    // The token could be null if the next set of characters
    // is not a token:  white space, comments, ignored preprocessor
    // commands.
    Token token = null;
    String commentText = new String ("");

    while (token == null)
      try
      {
        data.oldIndex = data.fileIndex;
        data.oldLine  = data.line;
        if (data.ch <= ' ') {
          skipWhiteSpace ();
          continue;
        }

        // Special case for wchar and wstring literals.
        // The values are preceded by an L.
        //
        // Bug fix 4382578:  Can't compile a wchar literal.
        //
        // REVISIT.  This maps wchar/wstring literals to
        // our char/string literal types.  Eventually, we
        // need to write stronger checking to be spec
        // compliant in negative cases such as leaving the
        // L off of a wide string or putting it on a string.
        if (data.ch == 'L') {
            // Check to see if the next character is an
            // apostrophe.
            readChar();
            // Note:  This doesn't allow for space between
            // the L and the apostrophe or quote.
            if (data.ch == '\'') {
                // It was a wchar literal.  Get the value
                // and return the token.
                token = getCharacterToken(true);
                readChar();
                continue;
            } else
            if (data.ch == '"') {
                // It was a wstring literal.
                //
                // getUntil assumes we've already passed the
                // first quote.
                readChar ();
                token = new Token (Token.StringLiteral, getUntil ('"'), true);
                readChar ();
                continue;
            } else {
                // It must not have been a wchar literal.
                // Push the input back into the buffer, and
                // fall to the next if case.
                unread(data.ch);
                unread('L');
                readChar();
            }
        }

        if ((data.ch >= 'a' && data.ch <= 'z') ||
            (data.ch >= 'A' && data.ch <= 'Z') ||
            // <f46082.40> Escaped identifier; see data member comments.
            //(data.ch == '_' && underscoreOK)   || <daz>
            (data.ch == '_')   ||
            Character.isLetter (data.ch)) {
            token = getString ();
        } else
        if ((data.ch >= '0' && data.ch <= '9') || data.ch == '.') {
            token = getNumber ();
        } else {
          switch (data.ch)
          {
            case ';':
              token = new Token (Token.Semicolon);
              break;
            case '{':
              token = new Token (Token.LeftBrace);
              break;
            case '}':
              token = new Token (Token.RightBrace);
              break;
            case ':':
              readChar ();
              if (data.ch == ':')
                token = new Token (Token.DoubleColon);
              else
              {
                unread (data.ch);
                token = new Token (Token.Colon);
              }
              break;
            case ',':
              token = new Token (Token.Comma);
              break;
            case '=':
              readChar ();
              if (data.ch == '=')
                token = new Token (Token.DoubleEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.Equal);
              }
              break;
            case '+':
              token = new Token (Token.Plus);
              break;
            case '-':
              token = new Token (Token.Minus);
              break;
            case '(':
              token = new Token (Token.LeftParen);
              break;
            case ')':
              token = new Token (Token.RightParen);
              break;
            case '<':
              readChar ();
              if (data.ch == '<')
                token = new Token (Token.ShiftLeft);
              else if (data.ch == '=')
                token = new Token (Token.LessEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.LessThan);
              }
              break;
            case '>':
              readChar ();
              if (data.ch == '>')
                token = new Token (Token.ShiftRight);
              else if (data.ch == '=')
                token = new Token (Token.GreaterEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.GreaterThan);
              }
              break;
            case '[':
              token = new Token (Token.LeftBracket);
              break;
            case ']':
              token = new Token (Token.RightBracket);
              break;
            case '\'':
              token = getCharacterToken(false);
              break;
            case '"':
              readChar ();
              token = new Token (Token.StringLiteral, getUntil ('"', false, false, false));
              break;
            case '\\':
              readChar ();
              // If this is at the end of a line, then it is the
              // line continuation character - treat it as white space
              if (data.ch == '\n' || data.ch == '\r')
                token = null;
              else
                token = new Token (Token.Backslash);
              break;
            case '|':
              readChar ();
              if (data.ch == '|')
                token = new Token (Token.DoubleBar);
              else
              {
                unread (data.ch);
                token = new Token (Token.Bar);
              }
              break;
            case '^':
              token = new Token (Token.Carat);
              break;
            case '&':
              readChar ();
              if (data.ch == '&')
                token = new Token (Token.DoubleAmpersand);
              else
              {
                unread (data.ch);
                token = new Token (Token.Ampersand);
              }
              break;
            case '*':
              token = new Token (Token.Star);
              break;
            case '/':
              readChar ();
              // <21jul1997daz>  Extract comments rather than skipping them.
              // Preserve only the comment immediately preceding the next token.
              if (data.ch == '/')
                //skipLineComment ();
                commentText = getLineComment();
              else if (data.ch == '*')
                //skipBlockComment ();
                commentText = getBlockComment();
              else
              {
                unread (data.ch);
                token = new Token (Token.Slash);
              }
              break;
            case '%':
              token = new Token (Token.Percent);
              break;
            case '~':
              token = new Token (Token.Tilde);
              break;

            // The period token is recognized in getNumber.
            // The period is only valid in a floating ponit number.
            //case '.':
            //  token = new Token (Token.Period);
            //  break;

            case '#':
              token = getDirective ();
              break;
            case '!':
              readChar ();
              if (data.ch == '=')
                token = new Token (Token.NotEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.Exclamation);
              }
              break;
            case '?':
              try
              {
                token = replaceTrigraph ();
                break;
              }
              catch (InvalidCharacter e) {}
            default:
              throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
          }
          readChar ();
        }
      }
      catch (EOFException e)
      {
        token = new Token (Token.EOF);
      }

    // Transfer comment to parser via token.  <daz>21jul1997
    token.comment = new Comment( commentText );

    //System.out.println ("Scanner.getToken returning token.type = " + token.type);
    //if (token.type == Token.Identifier || token.type == Token.MacroIdentifier || (token.type >= Token.BooleanLiteral && token.type <= Token.StringLiteral))
    //  System.out.println ("Scanner.getToken returns token.name = " + token.name);

    if (debug)
        System.out.println( "Token: " + token ) ;

    return token;
  } // getToken

  /**
   *
   **/
  void scanString (String string)
  {
    dataStack.push (data);

    data = new ScannerData (data);

    data.fileIndex = 0;
    data.oldIndex  = 0;
    // <f49747.1> data.fileBytes = string.getBytes (); 
    int strLen = string.length();
    data.fileBytes = new char[strLen];
    string.getChars (0, strLen, data.fileBytes, 0);

    data.macrodata = true;

    try {readChar ();} catch (IOException e) {}
  } // scanString

  /**
   *
   **/
  void scanIncludedFile (IncludeEntry file, String filename, boolean includeIsImport) throws IOException
  {
    dataStack.push (data);
    data = new ScannerData ();
    data.indent = ((ScannerData)dataStack.peek ()).indent + ' ';
    data.includeIsImport = includeIsImport;
    try
    {
      readFile (file, filename);
      if (!emitAll && includeIsImport)
        SymtabEntry.enteringInclude ();
      // <d56351> As of CORBA 2.3, include files define new scope for Repository
      // ID prefixes. The previous Rep. ID is just below the top of the stack and
      // must be restored when the contents of this include file are parsed (see readCh()).
      Parser.enteringInclude ();

      if (verbose)
        System.out.println (data.indent + Util.getMessage ("Compile.parsing", filename));
    }
    catch (IOException e)
    {
      data = (ScannerData)dataStack.pop ();
      throw e;
    }
  } // scanIncludedFile

  /**
   *
   **/
  private void unread (char ch)
  {
    if (ch == '\n' && !data.macrodata) --data.line;
    --data.fileIndex;
  } // unread

  /**
   *
   **/
  void readChar () throws IOException
  {
    if (data.fileIndex >= data.fileBytes.length)
      if (dataStack.empty ())
        throw new EOFException ();
      else
      {
        // <d56351> Indicate end-of-scope for include file to parser.
        //Parser.exitingInclude ();

        // IBM.11666 - begin
        //if (!emitAll && data.includeIsImport && !data.macrodata)
        //{
        //SymtabEntry.exitingInclude ();
        //Parser.exitingInclude (); // <d59469>
        //}
        if (!data.macrodata)
        {
            if (!emitAll && data.includeIsImport)
                SymtabEntry.exitingInclude();
            Parser.exitingInclude();
        } // IBM.11666 - end

        if (verbose && !data.macrodata)
          System.out.println (data.indent + Util.getMessage ("Compile.parseDone", data.filename));
        data = (ScannerData)dataStack.pop ();
      }
    else
    {
      data.ch = (char)(data.fileBytes[data.fileIndex++] & 0x00ff);
      if (data.ch == '\n' && !data.macrodata) ++data.line;
    }
  } // readChar

  /**
   * Starting at a quote, reads a string with possible
   * unicode or octal values until an end quote.  Doesn't
   * handle line feeds or comments.
   */
  private String getWString() throws IOException
  {
      readChar();
      StringBuffer result = new StringBuffer();

      while (data.ch != '"') {
          if (data.ch == '\\') {
              // Could be a \ooo octal or
              // unicode hex
              readChar();
              if (data.ch == 'u') {
                  // Unicode hex
                  int num = getNDigitHexNumber(4);
                  System.out.println("Got num: " + num);
                  System.out.println("Which is: " + (int)(char)num);
                  result.append((char)num);
                  // result.append((char)getNDigitHexNumber(4));
                  // getNDigitHexNumber reads the next
                  // character, so loop without reading another
                  continue;
              } else
              if (data.ch >= '0' && data.ch <= '7') {
                  // Octal
                  result.append((char)get3DigitOctalNumber());
                  // get3DigitOctalNumber reads the next
                  // character, so loop without reading another
                  continue;
              } else {
                  // Wasn't either, so just append the
                  // slash and current character.
                  result.append('\\');
                  result.append(data.ch);
              }
          } else {
              // Just append the character
              result.append(data.ch);
          }

          // Advance to the next character
          readChar();
      }

      return result.toString();
  }

  /**
   *
   **/
  private Token getCharacterToken(boolean isWide) throws IOException
  {
    // The token name returned contains a string with two elements:
    // first the character appears, then the representation of the
    // character.  These are typically the same, but they CAN be
    // different, for example "O\117"
    Token token = null;
    readChar ();
    if ( data.ch == '\\' )
    {
      readChar ();
      if ((data.ch == 'x') || (data.ch == 'u'))
      {
        char charType = data.ch;
        int hexNum = getNDigitHexNumber ((charType == 'x') ? 2 : 4);
        return new Token (Token.CharacterLiteral,
            ((char)hexNum) + "\\" + charType + Integer.toString (hexNum, 16), isWide );
      }
      if ((data.ch >= '0') && (data.ch <= '7'))
      {
        int octNum = get3DigitOctalNumber ();
        return new Token (Token.CharacterLiteral,
            ((char)octNum) + "\\" + Integer.toString (octNum, 8), isWide );
      }
      return singleCharEscapeSequence (isWide);
    }
    token = new Token (Token.CharacterLiteral, "" + data.ch + data.ch, isWide );
    readChar ();
    return token;
  } // getCharacterToken

  /**
   *
   **/
  private Token singleCharEscapeSequence (boolean isWide) throws IOException
  {
    Token token;
    if (data.ch == 'n')
      // newline
      token = new Token (Token.CharacterLiteral, "\n\\n", isWide);
    else if (data.ch == 't')
      // horizontal tab
      token = new Token (Token.CharacterLiteral, "\t\\t", isWide);
    else if (data.ch == 'v')
      // vertical tab
      token = new Token (Token.CharacterLiteral, "\013\\v", isWide);
    else if (data.ch == 'b')
      // backspace
      token = new Token (Token.CharacterLiteral, "\b\\b", isWide);
    else if (data.ch == 'r')
      // carriage return
      token = new Token (Token.CharacterLiteral, "\r\\r", isWide);
    else if (data.ch == 'f')
      // form feed
      token = new Token (Token.CharacterLiteral, "\f\\f", isWide);
    else if (data.ch == 'a')
      // alert
      token = new Token (Token.CharacterLiteral, "\007\\a", isWide);
    else if (data.ch == '\\')
      // backslash
      token = new Token (Token.CharacterLiteral, "\\\\\\", isWide);
    else if (data.ch == '?')
      // question mark
      token = new Token (Token.CharacterLiteral, "?\\?", isWide);
    else if (data.ch == '\'')
      // single quote
      token = new Token (Token.CharacterLiteral, "'\\'", isWide);
    else if (data.ch == '"')
      // double quote
      token = new Token (Token.CharacterLiteral, "\"\\\"", isWide);
    else
      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
    readChar ();
    return token;
  } // singleCharEscapeSequence

  private Token getString () throws IOException
  {
    StringBuffer sbuf = new StringBuffer() ;
    boolean escaped = false;  // <d59166>
    boolean[] collidesWithKeyword = { false } ;  // <d62023>

    // <f46082.40> An escaped id. begins with '_', which is followed by a normal
    // identifier.  Disallow prefixes of '_' having length > 1.
    if (data.ch == '_') {
        sbuf.append( data.ch ) ;
        readChar ();
        if (escaped = escapedOK)
            if (data.ch == '_')
                throw new InvalidCharacter (data.filename, currentLine (),
                    currentLineNumber (), currentLinePosition (), data.ch);
    }

    // Build up the string of valid characters until a non-string
    // character is encountered.
    while (Character.isLetterOrDigit( data.ch ) || (data.ch == '_')) {
        sbuf.append( data.ch ) ;
        readChar() ;
    }

    String string = sbuf.toString() ;

    // <f46082.40> Escaped identifiers - If identifier has '_' prefix, ignore
    // keyword check and strip '_'; otherwise, perform keyword check.

    if (!escaped) { // Escaped id ==> ignore keyword check
        Token result = Token.makeKeywordToken( string, corbaLevel, escapedOK,
            collidesWithKeyword ) ;
        if (result != null)
            return result ;
    }

    // At this point the string is an identifier.  If it is a
    // string which is also a Java keyword, prepend an underscore
    // so that it doesn't generate a compiler error.
    string = getIdentifier (string);

    // If a left paren immediately follows, this could be a
    // macro definition, return a MacroIdentifier
    if (data.ch == '(') {
        readChar ();
        return new Token (Token.MacroIdentifier, string, escaped,
            collidesWithKeyword[0], false);
    } else
        return new Token (Token.Identifier, string, escaped,
            collidesWithKeyword[0], false);
  }

  // Wildcard values
  static final int Star = 0, Plus = 1, Dot = 2, None = 3;

  /**
   *
   **/
  private boolean matchesClosedWildKeyword (String string)
  {
    boolean     found     = true;
    String      tmpString = string;
    Enumeration e         = wildcardKeywords.elements ();
    while (e.hasMoreElements ())
    {
      int             wildcard = None;
      StringTokenizer tokens   = new StringTokenizer ((String)e.nextElement (), "*+.", true);
      if (tokens.hasMoreTokens ())
      {
        String token = tokens.nextToken ();
        if (tmpString.startsWith (token))
        {
          tmpString = tmpString.substring (token.length ());
          while (tokens.hasMoreTokens () && found)
          {
            token = tokens.nextToken ();
            if (token.equals ("*"))
              wildcard = Star;
            else if (token.equals ("+"))
              wildcard = Plus;
            else if (token.equals ("."))
              wildcard = Dot;
            else if (wildcard == Star)
            {
              int index = tmpString.indexOf (token);
              if (index >= 0)
                tmpString = tmpString.substring (index + token.length ());
              else
                found = false;
            }
            else if (wildcard == Plus)
            {
              int index = tmpString.indexOf (token);
              if (index > 0)
                tmpString = tmpString.substring (index + token.length ());
              else
                found = false;
            }
            else if (wildcard == Dot)
            {
              int index = tmpString.indexOf (token);
              if (index == 1)
                tmpString = tmpString.substring (1 + token.length ());
              else
                found = false;
            }
          }
          if (found && tmpString.equals (""))
            break;
        }
      }
    }
    return found && tmpString.equals ("");
  } // matchesClosedWildKeyword

  /**
   *
   **/
  private String matchesOpenWildcard (String string)
  {
    Enumeration e = openEndedKeywords.elements ();
    String prepend = "";
    while (e.hasMoreElements ())
    {
      int             wildcard  = None;
      boolean         found     = true;
      String          tmpString = string;
      StringTokenizer tokens    = new StringTokenizer ((String)e.nextElement (), "*+.", true);
      while (tokens.hasMoreTokens () && found)
      {
        String token = tokens.nextToken ();
        if (token.equals ("*"))
          wildcard = Star;
        else if (token.equals ("+"))
          wildcard = Plus;
        else if (token.equals ("."))
          wildcard = Dot;
        else if (wildcard == Star)
        {
          wildcard = None;
          int index = tmpString.lastIndexOf (token);
          if (index >= 0)
            tmpString = blankOutMatch (tmpString, index, token.length ());
          else
            found = false;
        }
        else if (wildcard == Plus)
        {
          wildcard = None;
          int index = tmpString.lastIndexOf (token);
          if (index > 0)
            tmpString = blankOutMatch (tmpString, index, token.length ());
          else
            found = false;
        }
        else if (wildcard == Dot)
        {
          wildcard = None;
          int index = tmpString.lastIndexOf (token);
          if (index == 1)
            tmpString = blankOutMatch (tmpString, 1, token.length ());
          else
            found = false;
        }
        else if (wildcard == None)
          if (tmpString.startsWith (token))
            tmpString = blankOutMatch (tmpString, 0, token.length ());
          else
            found = false;
      }

      // Make sure that, if the last character of the keyword is a
      // wildcard, that the string matches what the wildcard
      // requires.
      if (found)
      {
        if (wildcard == Star)
          ;
        else if (wildcard == Plus && tmpString.lastIndexOf (' ') != tmpString.length () - 1)
          ;
        else if (wildcard == Dot && tmpString.lastIndexOf (' ') == tmpString.length () - 2)
          ;
        else if (wildcard == None && tmpString.lastIndexOf (' ') == tmpString.length () - 1)
          ;
        else
          found = false;
      }
      // If found, then prepend an underscore.  But also try matching
      // again after leading and trailing blanks are removed from
      // tmpString.  This isn't quite right, but it solves a problem
      // which surfaced in the Java mapping.  For example:
      // openEndedKeywords = {"+Helper", "+Holder", "+Package"};
      // string            = fooHelperPackage.
      // Given the mechanics of the Java mapping, _fooHelperPackage
      // COULD have a conflict, so for each occurance of a keyword,
      // an underscore is added, so this would cause two underscores:
      // __fooHelperPackage.  To accomplish this, the first time thru
      // tmpString is "fooHelper       " at this point, strip off the
      // trailing blanks and try matching "fooHelper".  This also
      // matches, so two underscores are prepended.
      if (found)
      {
        prepend = prepend + "_" + matchesOpenWildcard (tmpString.trim ());
        break;
      }
    }
    return prepend;
  } // matchesOpenWildcard

  /**
   *
   **/
  private String blankOutMatch (String string, int start, int length)
  {
    char[] blanks = new char [length];
    for (int i = 0; i < length; ++i)
      blanks[i] = ' ';
    return string.substring (0, start) + new String (blanks) + string.substring (start + length);
  } // blankOutMatch

  /**
   *
   **/
  private String getIdentifier (String string)
  {
    if (keywords.contains (string))
      // string matches a non-wildcard keyword
      string = '_' + string;
    else
    {
      // Check to see if string matches any wildcard keywords that
      // aren't open ended (don't have a wildcard as the first or
      // last character.
      String prepend = "";
      if (matchesClosedWildKeyword (string))
        prepend = "_";
      else
        // string did not match any closed wildcard keywords (that
        // is, keywords with wildcards anywhere but at the beginning
        // or end of the word).
        // Now check for * + or . at the beginning or end.
        // These require special handling because they could match
        // more than one keyword.  prepend an underscore for each
        // matched keyword.
        prepend = matchesOpenWildcard (string);
      string = prepend + string;
    }
    return string;
  } // getIdentifier

  /**
   *
   **/
  private Token getDirective () throws IOException
  {
    readChar ();
    String string = new String ();
    while ((data.ch >= 'a' && data.ch <= 'z') || (data.ch >= 'A' && data.ch <= 'Z'))
    {
      string = string + data.ch;
      readChar ();
    }
    unread (data.ch);
    for (int i = 0; i < Token.Directives.length; ++i)
      if (string.equals (Token.Directives[i]))
        return new Token (Token.FirstDirective + i);
    // If it got this far, it is an unknown preprocessor directive.
    return new Token (Token.Unknown, string);
  } // getDirective

  /**
   *
   **/
  private Token getNumber () throws IOException
  {
    if (data.ch == '.')
      return getFractionNoInteger ();
    else if (data.ch == '0')
      return isItHex ();
    else // the only other possibliities are 1..9
      return getInteger ();
  } // getNumber

  /**
   *
   **/
  private Token getFractionNoInteger () throws IOException
  {
    readChar ();
    if (data.ch >= '0' && data.ch <= '9')
      return getFraction (".");
    else
      return new Token (Token.Period);
  } // getFractionNoInteger

  /**
   *
   **/
  private Token getFraction (String string) throws IOException
  {
    while (data.ch >= '0' && data.ch <= '9')
    {
      string = string + data.ch;
      readChar ();
    }
    if (data.ch == 'e' || data.ch == 'E')
      return getExponent (string + 'E');
    else
      return new Token (Token.FloatingPointLiteral, string);
  } // getFraction

  /**
   *
   **/
  private Token getExponent (String string) throws IOException
  {
    readChar ();
    if (data.ch == '+' || data.ch == '-')
    {
      string = string + data.ch;
      readChar ();
    }
    else if (data.ch < '0' || data.ch > '9')
      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
    while (data.ch >= '0' && data.ch <= '9')
    {
      string = string + data.ch;
      readChar ();
    }
    return new Token (Token.FloatingPointLiteral, string);
  } // getExponent

  /**
   *
   **/
  private Token isItHex () throws IOException
  {
    readChar ();
    if (data.ch == '.')
    {
      readChar ();
      return getFraction ("0.");
    }
    else if (data.ch == 'x' || data.ch == 'X')
      return getHexNumber ("0x");
    else if (data.ch == '8' || data.ch == '9')
      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
    else if (data.ch >= '0' && data.ch <= '7')
      return getOctalNumber ();
    else if (data.ch == 'e' || data.ch == 'E')
      return getExponent ("0E");
    else
      return new Token (Token.IntegerLiteral, "0");
  } // isItHex

  /**
   *
   **/
  private Token getOctalNumber () throws IOException
  {
    String string = "0" + data.ch;
    readChar ();
    while ((data.ch >= '0' && data.ch <= '9'))
    {
      if (data.ch == '8' || data.ch == '9')
        throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
      string = string + data.ch;
      readChar ();
    }
    return new Token (Token.IntegerLiteral, string);
  } // getOctalNumber

  /**
   *
   **/
  private Token getHexNumber (String string) throws IOException
  {
    readChar ();
    if ((data.ch < '0' || data.ch > '9') && (data.ch < 'a' || data.ch > 'f') && (data.ch < 'A' || data.ch > 'F'))
      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
    else
      while ((data.ch >= '0' && data.ch <= '9') || (data.ch >= 'a' && data.ch <= 'f') || (data.ch >= 'A' && data.ch <= 'F'))
      {
        string = string + data.ch;
        readChar ();
      }
    return new Token (Token.IntegerLiteral, string);
  } // getHexNumber

  /**
   *
   **/
  private int getNDigitHexNumber (int n) throws IOException
  {
    readChar ();
    if (!isHexChar (data.ch))
      throw new InvalidCharacter (data.filename, currentLine (),
          currentLineNumber (), currentLinePosition (), data.ch);
    String string = "" + data.ch;
    readChar ();
    for (int i = 2; i <= n; i++)
    {
      if (!isHexChar( data.ch))
        break;
      string += data.ch;
      readChar ();
    }
    try
    {
      return Integer.parseInt (string, 16);
    }
    catch (NumberFormatException e)
    {
    }
    return 0;
  } // getNDigitHexNumber

  /**
   *
   **/
  private boolean isHexChar ( char hex )
  {
    return ((data.ch >= '0') && (data.ch <= '9')) ||
        ((data.ch >= 'a') && (data.ch <= 'f')) ||
        ((data.ch >= 'A') && (data.ch <= 'F'));
  }

  /**
   *
   **/
  private int get3DigitOctalNumber () throws IOException
  {
    char firstDigit = data.ch;
    String string = "" + data.ch;
    readChar ();
    if (data.ch >= '0' && data.ch <= '7')
    {
      string = string + data.ch;
      readChar ();
      if (data.ch >= '0' && data.ch <= '7')
      {
        string = string + data.ch;
        if (firstDigit > '3')
          // This is a 3-digit number bigger than 377
          throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), firstDigit);
        readChar ();
      }
    }
    int ret = 0;
    try
    {
      ret = Integer.parseInt (string, 8);
    }
    catch (NumberFormatException e)
    {
      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), string.charAt (0));
    }
    return ret;
  } // get3DigitOctalNumber

  /**
   *
   **/
  private Token getInteger () throws IOException
  {
    String string = "" + data.ch;
    readChar ();
    if (data.ch == '.')
    {
      readChar ();
      return getFraction (string + '.');
    }
    else  if (data.ch == 'e' || data.ch == 'E')
      return getExponent (string + 'E');
    else if (data.ch >= '0' && data.ch <= '9')
      while (data.ch >= '0' && data.ch <= '9')
      {
        string = string + data.ch;
        readChar ();
        if (data.ch == '.')
        {
          readChar ();
          return getFraction (string + '.');
        }
      }
    return new Token (Token.IntegerLiteral, string);
  } // getInteger

  /**
   *
   **/
  private Token replaceTrigraph () throws IOException
  {
    readChar ();
    if (data.ch == '?')
    {
      readChar ();
      if (data.ch == '=')
        data.ch = '#';
      else if (data.ch == '/')
        data.ch = '\\';
      else if (data.ch == '\'')
        data.ch = '^';
      else if (data.ch == '(')
        data.ch = '[';
      else if (data.ch == ')')
        data.ch = ']';
      else if (data.ch == '!')
        data.ch = '|';
      else if (data.ch == '<')
        data.ch = '{';
      else if (data.ch == '>')
        data.ch = '}';
      else if (data.ch == '-')
        data.ch = '~';
      else
      {
        unread (data.ch);
        unread ('?');
        throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
      }
      return getToken ();
    }
    else
    {
      unread ('?');
      throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
    }
  } // replaceTrigraph

  /**
   *
   **/
  void skipWhiteSpace () throws IOException
  {
    while (data.ch <= ' ')
      readChar ();
  } // skipWhiteSpace

  /**
   *
   **/
  private void skipBlockComment () throws IOException
  {
    try
    {
      boolean done = false;
      readChar ();
      while (!done)
      {
        while (data.ch != '*')
          readChar ();
        readChar ();
        if (data.ch == '/')
          done = true;
      }
    }
    catch (EOFException e)
    {
      ParseException.unclosedComment (data.filename);
      throw e;
    }
  } // skipBlockComment

  /**
   *
   **/
  void skipLineComment () throws IOException
  {
    while (data.ch != '\n')
      readChar ();
  } // skipLineComment

  // The following two routines added to extract comments rather
  // than ignore them.

  /**
   * Extract a line comment from the input buffer.
   **/
  private String getLineComment () throws IOException
  {
    StringBuffer sb = new StringBuffer( "/" );
    while (data.ch != '\n')
    {
      if (data.ch != '\r')
        sb.append (data.ch);
      readChar ();
    }
    return sb.toString();
  } // getLineComment

  /**
   * Extract a block comment from the input buffer.
   **/
  private String getBlockComment () throws IOException
  {
    StringBuffer sb = new StringBuffer ("/*");
    try
    {
      boolean done = false;
      readChar ();
      sb.append (data.ch);
      while (!done)
      {
        while (data.ch != '*')
        {
          readChar ();
          sb.append (data.ch);
        }
        readChar ();
        sb.append (data.ch);
        if (data.ch == '/')
          done = true;
      }
    }
    catch (EOFException e)
    {
      ParseException.unclosedComment (data.filename);
      throw e;
    }
    return sb.toString ();
  } // getBlockComment

  /**
   *
   **/
  Token skipUntil (char c) throws IOException
  {
    while (data.ch != c)
    {
      if (data.ch == '/')
      {
        readChar ();
        if (data.ch == '/')
        {
          skipLineComment ();
          // If this is skipping until the newline, skipLineComment
          // reads past the newline, so it won't be seen by the
          // while loop conditional check.
          if (c == '\n') break;
        }
        else if (data.ch == '*')
          skipBlockComment ();
      }
      else
        readChar ();
    }
    return getToken ();
  } // skipUntil

  // getUntil is used for macro definitions and to get quoted
  // strings, so characters within "("...")" and '"'...'"' are
  // ignored.  Ie getUntil ',' on (,,,,),X will return (,,,,)

  String getUntil (char c) throws IOException
  {
      return getUntil (c, true, true, true);
  }

  String getUntil (char c, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException
  {
    String string = "";
    while (data.ch != c)
      string = appendToString (string, allowQuote, allowCharLit, allowComment);
    return string;
  } // getUntil

  /**
   *
   **/
  String getUntil (char c1, char c2) throws IOException
  {
    String string = "";
    while (data.ch != c1 && data.ch != c2)
      string = appendToString (string, false, false, false);
    return string;
  } // getUntil

  /**
   *
   **/
  private String appendToString (String string, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException
  {
    // Ignore any comments if they are allowed
    if (allowComment && data.ch == '/')
    {
      readChar ();
      if (data.ch == '/')
        skipLineComment ();
      else if (data.ch == '*')
        skipBlockComment ();
      else
        string = string + '/';
    }
    // Handle line continuation character
    else if (data.ch == '\\')
    {
      readChar ();
      if (data.ch == '\n')
        readChar ();
      else if (data.ch == '\r')
      {
        readChar ();
        if (data.ch == '\n')
          readChar ();
      }
      else
      {
        string = string + '\\' + data.ch;
        readChar ();
      }
    }
    // characters within "("...")" and '"'...'"' are ignored.
    // Ie getUntil ',' on (,,,,),X will return (,,,)
    else
    {
      if (allowCharLit && data.ch == '"')
      {
        readChar ();
        string = string + '"';
        while (data.ch != '"')
          string = appendToString (string, true, false, allowComment);
      }
      else if (allowQuote && allowCharLit && data.ch == '(')
      {
        readChar ();
        string = string + '(';
        while (data.ch != ')')
          string = appendToString (string, false, false, allowComment);
      }
      else if (allowQuote && data.ch == '\'')
      {
        readChar ();
        string = string + "'";
        while (data.ch != '\'')
          string = appendToString (string, false, true, allowComment);
      }
      string = string + data.ch;
      readChar ();
    }
    return string;
  } // appendToString

  /**
   *
   **/
  String getStringToEOL () throws IOException
  {
    String string = new String ();
    while (data.ch != '\n')
    {
      if (data.ch == '\\')
      {
        readChar ();
        if (data.ch == '\n')
          readChar ();
        else if (data.ch == '\r')
        {
          readChar ();
          if (data.ch == '\n')
            readChar ();
        }
        else
        {
          string = string + data.ch;
          readChar ();
        }
      }
      else
      {
        string = string + data.ch;
        readChar ();
      }
    }
    return string;
  } // getStringToEOL

  /**
   *
   **/
  String filename ()
  {
    return data.filename;
  } // filename

  /**
   *
   **/
  IncludeEntry fileEntry ()
  {
    return data.fileEntry;
  } // fileEntry

  /**
   *
   **/
  int currentLineNumber ()
  {
    return data.line;
  } // currentLineNumber

  /**
   *
   **/
  int lastTokenLineNumber ()
  {
    return data.oldLine;
  } // lastTokenLineNumber

  private int BOL; // Beginning Of Line

  /**
   *
   **/
  String currentLine ()
  {
    BOL = data.fileIndex - 1;
    try
    {
      // If the current position is at the end of the line,
      // set BOL to before the end of the line so the whole
      // line is returned.
      if (data.fileBytes[BOL - 1] == '\r' && data.fileBytes[BOL] == '\n')
        BOL -= 2;
      else if (data.fileBytes[BOL] == '\n')
        --BOL;
      while (data.fileBytes[BOL] != '\n')
        --BOL;
    }
    catch (ArrayIndexOutOfBoundsException e)
    {
      BOL = -1;
    }
    ++BOL; // Go to the first character AFTER the newline
    int EOL = data.fileIndex - 1;
    try
    {
      while (data.fileBytes[EOL] != '\n' && data.fileBytes[EOL] != '\r')
        ++EOL;
    }
    catch (ArrayIndexOutOfBoundsException e)
    {
      EOL = data.fileBytes.length;
    }
    if (BOL < EOL)
      return new String (data.fileBytes, BOL, EOL - BOL);
    else
      return "";
  } // currentLine

  /**
   *
   **/
  String lastTokenLine ()
  {
    int saveFileIndex = data.fileIndex;
    data.fileIndex = data.oldIndex;
    String ret = currentLine ();
    data.fileIndex = saveFileIndex;
    return ret;
  } // lastTokenLine

  /**
   *
   **/
  int currentLinePosition ()
  {
    return data.fileIndex - BOL;
  } // currentLinePosition

  /**
   *
   **/
  int lastTokenLinePosition ()
  {
    return data.oldIndex - BOL;
  } // lastTokenLinePosition

  // The scanner data is moved to a separate class so that all of the
  // data can easily be pushed and popped to a stack.

  // The data must be stackable for macros and #included files.  When
  // a macro is encountered:  the current stack data is reserved on
  // the stack; the stack is loaded with the macro info; processing
  // proceeds with this data.  The same is true for #included files.

  // It may seem that the entire Scanner should be put on a stack in
  // the Parser since all the scanner data is stackable.  But that
  // would mean instantiating a new scanner.  The scanner must
  // continue from where it left off; when certain things cross file
  // boundaries, they must be handled by the scanner, not the parser,
  // things like:  block comments, quoted strings, tokens.
  private ScannerData data              = new ScannerData ();
  private Stack       dataStack         = new Stack ();
  private Vector      keywords          = new Vector ();
  private Vector      openEndedKeywords = new Vector ();
  private Vector      wildcardKeywords  = new Vector ();
  private boolean     verbose;
  // <f46082.40> Identifiers starting with '_' are considered "Escaped",
  // except when scanned during preprocessing.  Class Preprocessor is
  // responsible to modify the escapedOK flag accordingly.  Since preceding
  // underscores are now legal when scanning identifiers as well as
  // macro identifier, underscoreOK is obsolete.
  //
  //        boolean     underscoreOK      = false;
          boolean     escapedOK         = true;
  // <f46082.51> Remove -stateful feature.
  //        boolean     stateful;
  private boolean     emitAll;
  private float       corbaLevel;
  private boolean     debug ;
} // class Scanner

// This is a dumb class, really just a struct.  It contains all of the
// scanner class's data in one place so that that data can be easily
// pushed and popped to a stack.

/**
 *
 **/
class ScannerData
{
  /**
   *
   **/
  public ScannerData ()
  {
  } // ctor

  /**
   *
   **/
  public ScannerData (ScannerData that)
  {
    indent          = that.indent;
    fileEntry       = that.fileEntry;
    filename        = that.filename;
    fileBytes       = that.fileBytes;
    fileIndex       = that.fileIndex;
    oldIndex        = that.oldIndex;
    ch              = that.ch;
    line            = that.line;
    oldLine         = that.oldLine;
    macrodata       = that.macrodata;
    includeIsImport = that.includeIsImport;
  } // copy ctor

  String       indent          = "";
  IncludeEntry fileEntry       = null;
  String       filename        = "";

  // fileBytes is a byte array rather than a char array.  This is
  // safe because OMG IDL is specified to be ISO Latin-1 whose high-
  // order byte is always 0x0.  <f49747.1> Converted from byte[] to char[]
  // to employ Reader classes, which have Character encoding features. <ajb>
  //byte[]       fileBytes       = null;
  char[]       fileBytes       = null;
  int          fileIndex       = 0;
  int          oldIndex        = 0;
  char         ch;
  int          line            = 1;
  int          oldLine         = 1;
  boolean      macrodata       = false;
  boolean      includeIsImport = false;
} // class ScannerData

Other Java examples (source code examples)

Here is a short list of links related to this Java Scanner.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.