alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (Scanner.java)

This example Java source code file (Scanner.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

comment, compilererror, doubleval, eof, identifiertoken, intval, ioexception, scannerinputreader, string, util

The Scanner.java Java example source code

/*
 * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package sun.tools.java;

import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;

/**
 * A Scanner for Java tokens. Errors are reported
 * to the environment object.<p>
 *
 * The scanner keeps track of the current token,
 * the value of the current token (if any), and the start
 * position of the current token.<p>
 *
 * The scan() method advances the scanner to the next
 * token in the input.<p>
 *
 * The match() method is used to quickly match opening
 * brackets (ie: '(', '{', or '[') with their closing
 * counter part. This is useful during error recovery.<p>
 *
 * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
 * this means that both the line number and the exact offset into
 * the file are encoded in each position value.<p>
 *
 * The compiler treats either "\n", "\r" or "\r\n" as the
 * end of a line.<p>
 *
 * WARNING: The contents of this source file are not part of any
 * supported API.  Code that depends on them does so at its own risk:
 * they are subject to change or removal without notice.
 *
 * @author      Arthur van Hoff
 */

public
class Scanner implements Constants {
    /**
     * The increment for each character.
     */
    public static final long OFFSETINC = 1;

    /**
     * The increment for each line.
     */
    public static final long LINEINC = 1L << WHEREOFFSETBITS;

    /**
     * End of input
     */
    public static final int EOF = -1;

    /**
     * Where errors are reported
     */
    public Environment env;

    /**
     * Input reader
     */
    protected ScannerInputReader in;

    /**
     * If true, present all comments as tokens.
     * Contents are not saved, but positions are recorded accurately,
     * so the comment can be recovered from the text.
     * Line terminations are also returned as comment tokens,
     * and may be distinguished by their start and end positions,
     * which are equal (meaning, these tokens contain no chars).
     */
   public boolean scanComments = false;

    /**
     * Current token
     */
    public int token;

    /**
     * The position of the current token
     */
    public long pos;

    /**
     * The position of the previous token
     */
    public long prevPos;

    /**
     * The current character
     */
    protected int ch;

    /*
     * Token values.
     */
    public char charValue;
    public int intValue;
    public long longValue;
    public float floatValue;
    public double doubleValue;
    public String stringValue;
    public Identifier idValue;
    public int radix;   // Radix, when reading int or long

    /*
     * A doc comment preceding the most recent token
     */
    public String docComment;

    /*
     * A growable character buffer.
     */
    private int count;
    private char buffer[] = new char[1024];
    private void growBuffer() {
        char newBuffer[] = new char[buffer.length * 2];
        System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
        buffer = newBuffer;
    }

    // The following two methods have been hand-inlined in
    // scanDocComment.  If you make changes here, you should
    // check to see if scanDocComment also needs modification.
    private void putc(int ch) {
        if (count == buffer.length) {
            growBuffer();
        }
        buffer[count++] = (char)ch;
    }

    private String bufferString() {
        return new String(buffer, 0, count);
    }

    /**
     * Create a scanner to scan an input stream.
     */
    public Scanner(Environment env, InputStream in) throws IOException {
        this.env = env;
        useInputStream(in);
    }

    /**
     * Setup input from the given input stream,
     * and scan the first token from it.
     */
    protected void useInputStream(InputStream in) throws IOException {
        try {
            this.in = new ScannerInputReader(env, in);
        } catch (Exception e) {
            env.setCharacterEncoding(null);
            this.in = new ScannerInputReader(env, in);
        }

        ch = this.in.read();
        prevPos = this.in.pos;

        scan();
    }

    /**
     * Create a scanner to scan an input stream.
     */
    protected Scanner(Environment env) {
        this.env = env;
        // Expect the subclass to call useInputStream at the right time.
    }

    /**
     * Define a keyword.
     */
    private static void defineKeyword(int val) {
        Identifier.lookup(opNames[val]).setType(val);
    }

    /**
     * Initialized keyword and token Hashtables
     */
    static {
        // Statement keywords
        defineKeyword(FOR);
        defineKeyword(IF);
        defineKeyword(ELSE);
        defineKeyword(WHILE);
        defineKeyword(DO);
        defineKeyword(SWITCH);
        defineKeyword(CASE);
        defineKeyword(DEFAULT);
        defineKeyword(BREAK);
        defineKeyword(CONTINUE);
        defineKeyword(RETURN);
        defineKeyword(TRY);
        defineKeyword(CATCH);
        defineKeyword(FINALLY);
        defineKeyword(THROW);

        // Type defineKeywords
        defineKeyword(BYTE);
        defineKeyword(CHAR);
        defineKeyword(SHORT);
        defineKeyword(INT);
        defineKeyword(LONG);
        defineKeyword(FLOAT);
        defineKeyword(DOUBLE);
        defineKeyword(VOID);
        defineKeyword(BOOLEAN);

        // Expression keywords
        defineKeyword(INSTANCEOF);
        defineKeyword(TRUE);
        defineKeyword(FALSE);
        defineKeyword(NEW);
        defineKeyword(THIS);
        defineKeyword(SUPER);
        defineKeyword(NULL);

        // Declaration keywords
        defineKeyword(IMPORT);
        defineKeyword(CLASS);
        defineKeyword(EXTENDS);
        defineKeyword(IMPLEMENTS);
        defineKeyword(INTERFACE);
        defineKeyword(PACKAGE);
        defineKeyword(THROWS);

        // Modifier keywords
        defineKeyword(PRIVATE);
        defineKeyword(PUBLIC);
        defineKeyword(PROTECTED);
        defineKeyword(STATIC);
        defineKeyword(TRANSIENT);
        defineKeyword(SYNCHRONIZED);
        defineKeyword(NATIVE);
        defineKeyword(ABSTRACT);
        defineKeyword(VOLATILE);
        defineKeyword(FINAL);
        defineKeyword(STRICTFP);

        // reserved keywords
        defineKeyword(CONST);
        defineKeyword(GOTO);
    }

    /**
     * Scan a comment. This method should be
     * called once the initial /, * and the next
     * character have been read.
     */
    private void skipComment() throws IOException {
        while (true) {
            switch (ch) {
              case EOF:
                env.error(pos, "eof.in.comment");
                return;

              case '*':
                if ((ch = in.read()) == '/')  {
                    ch = in.read();
                    return;
                }
                break;

              default:
                ch = in.read();
                break;
            }
        }
    }

    /**
     * Scan a doc comment. This method should be called
     * once the initial /, * and * have been read. It gathers
     * the content of the comment (witout leading spaces and '*'s)
     * in the string buffer.
     */
    private String scanDocComment() throws IOException {
        // Note: this method has been hand-optimized to yield
        // better performance.  This was done after it was noted
        // that javadoc spent a great deal of its time here.
        // This should also help the performance of the compiler
        // as well -- it scans the doc comments to find
        // @deprecated tags.
        //
        // The logic of the method has been completely rewritten
        // to avoid the use of flags that need to be looked at
        // for every character read.  Members that are accessed
        // more than once have been stored in local variables.
        // The methods putc() and bufferString() have been
        // inlined by hand.  Extra cases have been added to
        // switch statements to trick the compiler into generating
        // a tableswitch instead of a lookupswitch.
        //
        // This implementation aims to preserve the previous
        // behavior of this method.

        int c;

        // Put `in' in a local variable.
        final ScannerInputReader in = this.in;

        // We maintain the buffer locally rather than calling putc().
        char[] buffer = this.buffer;
        int count = 0;

        // We are called pointing at the second star of the doc
        // comment:
        //
        // Input: /** the rest of the comment ... */
        //          ^
        //
        // We rely on this in the code below.

        // Consume any number of stars.
        while ((c = in.read()) == '*')
            ;

        // Is the comment of the form /**/, /***/, /****/, etc.?
        if (c == '/') {
            // Set ch and return
            ch = in.read();
            return "";
        }

        // Skip a newline on the first line of the comment.
        if (c == '\n') {
            c = in.read();
        }

    outerLoop:
        // The outerLoop processes the doc comment, looping once
        // for each line.  For each line, it first strips off
        // whitespace, then it consumes any stars, then it
        // puts the rest of the line into our buffer.
        while (true) {

            // The wsLoop consumes whitespace from the beginning
            // of each line.
        wsLoop:
            while (true) {
                switch (c) {
                case ' ':
                case '\t':
                    // We could check for other forms of whitespace
                    // as well, but this is left as is for minimum
                    // disturbance of functionality.
                    //
                    // Just skip whitespace.
                    c = in.read();
                    break;

                // We have added extra cases here to trick the
                // compiler into using a tableswitch instead of
                // a lookupswitch.  They can be removed without
                // a change in meaning.
                case 10: case 11: case 12: case 13: case 14: case 15:
                case 16: case 17: case 18: case 19: case 20: case 21:
                case 22: case 23: case 24: case 25: case 26: case 27:
                case 28: case 29: case 30: case 31:
                default:
                    // We've seen something that isn't whitespace,
                    // jump out.
                    break wsLoop;
                }
            } // end wsLoop.

            // Are there stars here?  If so, consume them all
            // and check for the end of comment.
            if (c == '*') {
                // Skip all of the stars...
                do {
                    c = in.read();
                } while (c == '*');

                // ...then check for the closing slash.
                if (c == '/') {
                    // We're done with the doc comment.
                    // Set ch and break out.
                    ch = in.read();
                    break outerLoop;
                }
            }

            // The textLoop processes the rest of the characters
            // on the line, adding them to our buffer.
        textLoop:
            while (true) {
                switch (c) {
                case EOF:
                    // We've seen a premature EOF.  Break out
                    // of the loop.
                    env.error(pos, "eof.in.comment");
                    ch = EOF;
                    break outerLoop;

                case '*':
                    // Is this just a star?  Or is this the
                    // end of a comment?
                    c = in.read();
                    if (c == '/') {
                        // This is the end of the comment,
                        // set ch and return our buffer.
                        ch = in.read();
                        break outerLoop;
                    }
                    // This is just an ordinary star.  Add it to
                    // the buffer.
                    if (count == buffer.length) {
                        growBuffer();
                        buffer = this.buffer;
                    }
                    buffer[count++] = '*';
                    break;

                case '\n':
                    // We've seen a newline.  Add it to our
                    // buffer and break out of this loop,
                    // starting fresh on a new line.
                    if (count == buffer.length) {
                        growBuffer();
                        buffer = this.buffer;
                    }
                    buffer[count++] = '\n';
                    c = in.read();
                    break textLoop;

                // Again, the extra cases here are a trick
                // to get the compiler to generate a tableswitch.
                case 0: case 1: case 2: case 3: case 4: case 5:
                case 6: case 7: case 8: case 11: case 12: case 13:
                case 14: case 15: case 16: case 17: case 18: case 19:
                case 20: case 21: case 22: case 23: case 24: case 25:
                case 26: case 27: case 28: case 29: case 30: case 31:
                case 32: case 33: case 34: case 35: case 36: case 37:
                case 38: case 39: case 40:
                default:
                    // Add the character to our buffer.
                    if (count == buffer.length) {
                        growBuffer();
                        buffer = this.buffer;
                    }
                    buffer[count++] = (char)c;
                    c = in.read();
                    break;
                }
            } // end textLoop
        } // end outerLoop

        // We have scanned our doc comment.  It is stored in
        // buffer.  The previous implementation of scanDocComment
        // stripped off all trailing spaces and stars from the comment.
        // We will do this as well, so as to cause a minimum of
        // disturbance.  Is this what we want?
        if (count > 0) {
            int i = count - 1;
        trailLoop:
            while (i > -1) {
                switch (buffer[i]) {
                case ' ':
                case '\t':
                case '*':
                    i--;
                    break;
                // And again, the extra cases here are a trick
                // to get the compiler to generate a tableswitch.
                case 0: case 1: case 2: case 3: case 4: case 5:
                case 6: case 7: case 8: case 10: case 11: case 12:
                case 13: case 14: case 15: case 16: case 17: case 18:
                case 19: case 20: case 21: case 22: case 23: case 24:
                case 25: case 26: case 27: case 28: case 29: case 30:
                case 31: case 33: case 34: case 35: case 36: case 37:
                case 38: case 39: case 40:
                default:
                    break trailLoop;
                }
            }
            count = i + 1;

            // Return the text of the doc comment.
            return new String(buffer, 0, count);
        } else {
            return "";
        }
    }

    /**
     * Scan a number. The first digit of the number should be the current
     * character.  We may be scanning hex, decimal, or octal at this point
     */
    private void scanNumber() throws IOException {
        boolean seenNonOctal = false;
        boolean overflow = false;
        boolean seenDigit = false; // used to detect invalid hex number 0xL
        radix = (ch == '0' ? 8 : 10);
        long value = ch - '0';
        count = 0;
        putc(ch);               // save character in buffer
    numberLoop:
        for (;;) {
            switch (ch = in.read()) {
              case '.':
                if (radix == 16)
                    break numberLoop; // an illegal character
                scanReal();
                return;

              case '8': case '9':
                // We can't yet throw an error if reading an octal.  We might
                // discover we're really reading a real.
                seenNonOctal = true;
              case '0': case '1': case '2': case '3':
              case '4': case '5': case '6': case '7':
                seenDigit = true;
                putc(ch);
                if (radix == 10) {
                    overflow = overflow || (value * 10)/10 != value;
                    value = (value * 10) + (ch - '0');
                    overflow = overflow || (value - 1 < -1);
                } else if (radix == 8) {
                    overflow = overflow || (value >>> 61) != 0;
                    value = (value << 3) + (ch - '0');
                } else {
                    overflow = overflow || (value >>> 60) != 0;
                    value = (value << 4) + (ch - '0');
                }
                break;

              case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
                if (radix != 16) {
                    scanReal();
                    return;
                }
                // fall through
              case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
                seenDigit = true;
                putc(ch);
                if (radix != 16)
                    break numberLoop; // an illegal character
                overflow = overflow || (value >>> 60) != 0;
                value = (value << 4) + 10 +
                         Character.toLowerCase((char)ch) - 'a';
                break;

              case 'l': case 'L':
                ch = in.read(); // skip over 'l'
                longValue = value;
                token = LONGVAL;
                break numberLoop;

              case 'x': case 'X':
                // if the first character is a '0' and this is the second
                // letter, then read in a hexadecimal number.  Otherwise, error.
                if (count == 1 && radix == 8) {
                    radix = 16;
                    seenDigit = false;
                    break;
                } else {
                    // we'll get an illegal character error
                    break numberLoop;
                }

              default:
                intValue = (int)value;
                token = INTVAL;
                break numberLoop;
            }
        } // while true

        // We have just finished reading the number.  The next thing better
        // not be a letter or digit.
        // Note:  There will be deprecation warnings against these uses
        // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
        // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
        if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
            env.error(in.pos, "invalid.number");
            do { ch = in.read(); }
            while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
            intValue = 0;
            token = INTVAL;
        } else if (radix == 8 && seenNonOctal) {
            // A bogus octal literal.
            intValue = 0;
            token = INTVAL;
            env.error(pos, "invalid.octal.number");
        } else if (radix == 16 && seenDigit == false) {
            // A hex literal with no digits, 0xL, for example.
            intValue = 0;
            token = INTVAL;
            env.error(pos, "invalid.hex.number");
        } else {
            if (token == INTVAL) {
                // Check for overflow.  Note that base 10 literals
                // have different rules than base 8 and 16.
                overflow = overflow ||
                    (value & 0xFFFFFFFF00000000L) != 0 ||
                    (radix == 10 && value > 2147483648L);

                if (overflow) {
                    intValue = 0;

                    // Give a specific error message which tells
                    // the user the range.
                    switch (radix) {
                    case 8:
                        env.error(pos, "overflow.int.oct");
                        break;
                    case 10:
                        env.error(pos, "overflow.int.dec");
                        break;
                    case 16:
                        env.error(pos, "overflow.int.hex");
                        break;
                    default:
                        throw new CompilerError("invalid radix");
                    }
                }
            } else {
                if (overflow) {
                    longValue = 0;

                    // Give a specific error message which tells
                    // the user the range.
                    switch (radix) {
                    case 8:
                        env.error(pos, "overflow.long.oct");
                        break;
                    case 10:
                        env.error(pos, "overflow.long.dec");
                        break;
                    case 16:
                        env.error(pos, "overflow.long.hex");
                        break;
                    default:
                        throw new CompilerError("invalid radix");
                    }
                }
            }
        }
    }

    /**
     * Scan a float.  We are either looking at the decimal, or we have already
     * seen it and put it into the buffer.  We haven't seen an exponent.
     * Scan a float.  Should be called with the current character is either
     * the 'e', 'E' or '.'
     */
    private void scanReal() throws IOException {
        boolean seenExponent = false;
        boolean isSingleFloat = false;
        char lastChar;
        if (ch == '.') {
            putc(ch);
            ch = in.read();
        }

    numberLoop:
        for ( ; ; ch = in.read()) {
            switch (ch) {
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                    putc(ch);
                    break;

                case 'e': case 'E':
                    if (seenExponent)
                        break numberLoop; // we'll get a format error
                    putc(ch);
                    seenExponent = true;
                    break;

                case '+': case '-':
                    lastChar = buffer[count - 1];
                    if (lastChar != 'e' && lastChar != 'E')
                        break numberLoop; // this isn't an error, though!
                    putc(ch);
                    break;

                case 'f': case 'F':
                    ch = in.read(); // skip over 'f'
                    isSingleFloat = true;
                    break numberLoop;

                case 'd': case 'D':
                    ch = in.read(); // skip over 'd'
                    // fall through
                default:
                    break numberLoop;
            } // sswitch
        } // loop

        // we have just finished reading the number.  The next thing better
        // not be a letter or digit.
        if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
            env.error(in.pos, "invalid.number");
            do { ch = in.read(); }
            while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
            doubleValue = 0;
            token = DOUBLEVAL;
        } else {
            token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
            try {
                lastChar = buffer[count - 1];
                if (lastChar == 'e' || lastChar == 'E'
                       || lastChar == '+' || lastChar == '-') {
                    env.error(in.pos -1, "float.format");
                } else if (isSingleFloat) {
                    String string = bufferString();
                    floatValue = Float.valueOf(string).floatValue();
                    if (Float.isInfinite(floatValue)) {
                        env.error(pos, "overflow.float");
                    } else if (floatValue == 0 && !looksLikeZero(string)) {
                        env.error(pos, "underflow.float");
                    }
                } else {
                    String string = bufferString();
                    doubleValue = Double.valueOf(string).doubleValue();
                    if (Double.isInfinite(doubleValue)) {
                        env.error(pos, "overflow.double");
                    } else if (doubleValue == 0 && !looksLikeZero(string)) {
                        env.error(pos, "underflow.double");
                    }
                }
            } catch (NumberFormatException ee) {
                env.error(pos, "float.format");
                doubleValue = 0;
                floatValue = 0;
            }
        }
        return;
    }

    // We have a token that parses as a number.  Is this token possibly zero?
    // i.e. does it have a non-zero value in the mantissa?
    private static boolean looksLikeZero(String token) {
        int length = token.length();
        for (int i = 0; i < length; i++) {
            switch (token.charAt(i)) {
                case 0: case '.':
                    continue;
                case '1': case '2': case '3': case '4': case '5':
                case '6': case '7': case '8': case '9':
                    return false;
                case 'e': case 'E': case 'f': case 'F':
                    return true;
            }
        }
        return true;
    }

    /**
     * Scan an escape character.
     * @return the character or -1 if it escaped an
     * end-of-line.
     */
    private int scanEscapeChar() throws IOException {
        long p = in.pos;

        switch (ch = in.read()) {
          case '0': case '1': case '2': case '3':
          case '4': case '5': case '6': case '7': {
            int n = ch - '0';
            for (int i = 2 ; i > 0 ; i--) {
                switch (ch = in.read()) {
                  case '0': case '1': case '2': case '3':
                  case '4': case '5': case '6': case '7':
                    n = (n << 3) + ch - '0';
                    break;

                  default:
                    if (n > 0xFF) {
                        env.error(p, "invalid.escape.char");
                    }
                    return n;
                }
            }
            ch = in.read();
            if (n > 0xFF) {
                env.error(p, "invalid.escape.char");
            }
            return n;
          }

          case 'r':  ch = in.read(); return '\r';
          case 'n':  ch = in.read(); return '\n';
          case 'f':  ch = in.read(); return '\f';
          case 'b':  ch = in.read(); return '\b';
          case 't':  ch = in.read(); return '\t';
          case '\\': ch = in.read(); return '\\';
          case '\"': ch = in.read(); return '\"';
          case '\'': ch = in.read(); return '\'';
        }

        env.error(p, "invalid.escape.char");
        ch = in.read();
        return -1;
    }

    /**
     * Scan a string. The current character
     * should be the opening " of the string.
     */
    private void scanString() throws IOException {
        token = STRINGVAL;
        count = 0;
        ch = in.read();

        // Scan a String
        while (true) {
            switch (ch) {
              case EOF:
                env.error(pos, "eof.in.string");
                stringValue = bufferString();
                return;

              case '\r':
              case '\n':
                ch = in.read();
                env.error(pos, "newline.in.string");
                stringValue = bufferString();
                return;

              case '"':
                ch = in.read();
                stringValue = bufferString();
                return;

              case '\\': {
                int c = scanEscapeChar();
                if (c >= 0) {
                    putc((char)c);
                }
                break;
              }

              default:
                putc(ch);
                ch = in.read();
                break;
            }
        }
    }

    /**
     * Scan a character. The current character should be
     * the opening ' of the character constant.
     */
    private void scanCharacter() throws IOException {
        token = CHARVAL;

        switch (ch = in.read()) {
          case '\\':
            int c = scanEscapeChar();
            charValue = (char)((c >= 0) ? c : 0);
            break;

        case '\'':
            // There are two standard problems this case deals with.  One
            // is the malformed single quote constant (i.e. the programmer
            // uses ''' instead of '\'') and the other is the empty
            // character constant (i.e. '').  Just consume any number of
            // single quotes and emit an error message.
            charValue = 0;
            env.error(pos, "invalid.char.constant");
            ch = in.read();
            while (ch == '\'') {
                ch = in.read();
            }
            return;

          case '\r':
          case '\n':
            charValue = 0;
            env.error(pos, "invalid.char.constant");
            return;

          default:
            charValue = (char)ch;
            ch = in.read();
            break;
        }

        if (ch == '\'') {
            ch = in.read();
        } else {
            env.error(pos, "invalid.char.constant");
            while (true) {
                switch (ch) {
                  case '\'':
                    ch = in.read();
                    return;
                  case ';':
                  case '\n':
                  case EOF:
                    return;
                  default:
                    ch = in.read();
                }
            }
        }
    }

    /**
     * Scan an Identifier. The current character should
     * be the first character of the identifier.
     */
    private void scanIdentifier() throws IOException {
        count = 0;

        while (true) {
            putc(ch);
            switch (ch = in.read()) {
              case 'a': case 'b': case 'c': case 'd': case 'e':
              case 'f': case 'g': case 'h': case 'i': case 'j':
              case 'k': case 'l': case 'm': case 'n': case 'o':
              case 'p': case 'q': case 'r': case 's': case 't':
              case 'u': case 'v': case 'w': case 'x': case 'y':
              case 'z':
              case 'A': case 'B': case 'C': case 'D': case 'E':
              case 'F': case 'G': case 'H': case 'I': case 'J':
              case 'K': case 'L': case 'M': case 'N': case 'O':
              case 'P': case 'Q': case 'R': case 'S': case 'T':
              case 'U': case 'V': case 'W': case 'X': case 'Y':
              case 'Z':
              case '0': case '1': case '2': case '3': case '4':
              case '5': case '6': case '7': case '8': case '9':
              case '$': case '_':
                break;

              default:
                if (!Character.isJavaLetterOrDigit((char)ch)) {
                    idValue = Identifier.lookup(bufferString());
                    token = idValue.getType();
                    return;
                }
            }
        }
    }

    /**
     * The ending position of the current token
     */
    // Note: This should be part of the pos itself.
    public long getEndPos() {
        return in.pos;
    }

    /**
     * If the current token is IDENT, return the identifier occurrence.
     * It will be freshly allocated.
     */
    public IdentifierToken getIdToken() {
        return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
    }

    /**
     * Scan the next token.
     * @return the position of the previous token.
     */
   public long scan() throws IOException {
       return xscan();
   }

    protected long xscan() throws IOException {
        final ScannerInputReader in = this.in;
        long retPos = pos;
        prevPos = in.pos;
        docComment = null;
        while (true) {
            pos = in.pos;

            switch (ch) {
              case EOF:
                token = EOF;
                return retPos;

              case '\n':
                if (scanComments) {
                    ch = ' ';
                    // Avoid this path the next time around.
                    // Do not just call in.read; we want to present
                    // a null token (and also avoid read-ahead).
                    token = COMMENT;
                    return retPos;
                }
              case ' ':
              case '\t':
              case '\f':
                ch = in.read();
                break;

              case '/':
                switch (ch = in.read()) {
                  case '/':
                    // Parse a // comment
                    while (((ch = in.read()) != EOF) && (ch != '\n'));
                    if (scanComments) {
                        token = COMMENT;
                        return retPos;
                    }
                    break;

                  case '*':
                    ch = in.read();
                    if (ch == '*') {
                        docComment = scanDocComment();
                    } else {
                        skipComment();
                    }
                    if (scanComments) {
                        return retPos;
                    }
                    break;

                  case '=':
                    ch = in.read();
                    token = ASGDIV;
                    return retPos;

                  default:
                    token = DIV;
                    return retPos;
                }
                break;

              case '"':
                scanString();
                return retPos;

              case '\'':
                scanCharacter();
                return retPos;

              case '0': case '1': case '2': case '3': case '4':
              case '5': case '6': case '7': case '8': case '9':
                scanNumber();
                return retPos;

              case '.':
                switch (ch = in.read()) {
                  case '0': case '1': case '2': case '3': case '4':
                  case '5': case '6': case '7': case '8': case '9':
                    count = 0;
                    putc('.');
                    scanReal();
                    break;
                  default:
                    token = FIELD;
                }
                return retPos;

              case '{':
                ch = in.read();
                token = LBRACE;
                return retPos;

              case '}':
                ch = in.read();
                token = RBRACE;
                return retPos;

              case '(':
                ch = in.read();
                token = LPAREN;
                return retPos;

              case ')':
                ch = in.read();
                token = RPAREN;
                return retPos;

              case '[':
                ch = in.read();
                token = LSQBRACKET;
                return retPos;

              case ']':
                ch = in.read();
                token = RSQBRACKET;
                return retPos;

              case ',':
                ch = in.read();
                token = COMMA;
                return retPos;

              case ';':
                ch = in.read();
                token = SEMICOLON;
                return retPos;

              case '?':
                ch = in.read();
                token = QUESTIONMARK;
                return retPos;

              case '~':
                ch = in.read();
                token = BITNOT;
                return retPos;

              case ':':
                ch = in.read();
                token = COLON;
                return retPos;

              case '-':
                switch (ch = in.read()) {
                  case '-':
                    ch = in.read();
                    token = DEC;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGSUB;
                    return retPos;
                }
                token = SUB;
                return retPos;

              case '+':
                switch (ch = in.read()) {
                  case '+':
                    ch = in.read();
                    token = INC;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGADD;
                    return retPos;
                }
                token = ADD;
                return retPos;

              case '<':
                switch (ch = in.read()) {
                  case '<':
                    if ((ch = in.read()) == '=') {
                        ch = in.read();
                        token = ASGLSHIFT;
                        return retPos;
                    }
                    token = LSHIFT;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = LE;
                    return retPos;
                }
                token = LT;
                return retPos;

              case '>':
                switch (ch = in.read()) {
                  case '>':
                    switch (ch = in.read()) {
                      case '=':
                        ch = in.read();
                        token = ASGRSHIFT;
                        return retPos;

                      case '>':
                        if ((ch = in.read()) == '=') {
                            ch = in.read();
                            token = ASGURSHIFT;
                            return retPos;
                        }
                        token = URSHIFT;
                        return retPos;
                    }
                    token = RSHIFT;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = GE;
                    return retPos;
                }
                token = GT;
                return retPos;

              case '|':
                switch (ch = in.read()) {
                  case '|':
                    ch = in.read();
                    token = OR;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGBITOR;
                    return retPos;
                }
                token = BITOR;
                return retPos;

              case '&':
                switch (ch = in.read()) {
                  case '&':
                    ch = in.read();
                    token = AND;
                    return retPos;

                  case '=':
                    ch = in.read();
                    token = ASGBITAND;
                    return retPos;
                }
                token = BITAND;
                return retPos;

              case '=':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = EQ;
                    return retPos;
                }
                token = ASSIGN;
                return retPos;

              case '%':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = ASGREM;
                    return retPos;
                }
                token = REM;
                return retPos;

              case '^':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = ASGBITXOR;
                    return retPos;
                }
                token = BITXOR;
                return retPos;

              case '!':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = NE;
                    return retPos;
                }
                token = NOT;
                return retPos;

              case '*':
                if ((ch = in.read()) == '=') {
                    ch = in.read();
                    token = ASGMUL;
                    return retPos;
                }
                token = MUL;
                return retPos;

              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
              case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
              case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
              case 's': case 't': case 'u': case 'v': case 'w': case 'x':
              case 'y': case 'z':
              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
              case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
              case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
              case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
              case 'Y': case 'Z':
              case '$': case '_':
                scanIdentifier();
                return retPos;

              case '\u001a':
                // Our one concession to DOS.
                if ((ch = in.read()) == EOF) {
                    token = EOF;
                    return retPos;
                }
                env.error(pos, "funny.char");
                ch = in.read();
                break;


              default:
                if (Character.isJavaLetter((char)ch)) {
                    scanIdentifier();
                    return retPos;
                }
                env.error(pos, "funny.char");
                ch = in.read();
                break;
            }
        }
    }

    /**
     * Scan to a matching '}', ']' or ')'. The current token must be
     * a '{', '[' or '(';
     */
    public void match(int open, int close) throws IOException {
        int depth = 1;

        while (true) {
            scan();
            if (token == open) {
                depth++;
            } else if (token == close) {
                if (--depth == 0) {
                    return;
                }
            } else if (token == EOF) {
                env.error(pos, "unbalanced.paren");
                return;
            }
        }
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java Scanner.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.