|
Java example source code file (Parser.java)
The Parser.java Java example source code/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package jdk.nashorn.internal.runtime.regexp.joni; import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff; import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup; import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg; import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; class Parser extends Lexer { protected final Regex regex; protected Node root; protected int returnCode; // return code used by parser methods (they itself return parsed nodes) // this approach will not affect recursive calls protected Parser(ScanEnvironment env, char[] chars, int p, int end) { super(env, chars, p, end); regex = env.reg; } // onig_parse_make_tree protected final Node parse() { root = parseRegexp(); regex.numMem = env.numMem; return root; } private boolean codeExistCheck(int code, boolean ignoreEscaped) { mark(); boolean inEsc = false; while (left()) { if (ignoreEscaped && inEsc) { inEsc = false; } else { fetch(); if (c == code) { restore(); return true; } if (c == syntax.metaCharTable.esc) inEsc = true; } } restore(); return false; } private CClassNode parseCharClass() { fetchTokenInCC(); final boolean neg; if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { neg = true; fetchTokenInCC(); } else { neg = false; } if (token.type == TokenType.CC_CLOSE) { if (!codeExistCheck(']', true)) { throw new SyntaxException(ERR_EMPTY_CHAR_CLASS); } env.ccEscWarn("]"); token.type = TokenType.CHAR; /* allow []...] */ } CClassNode cc = new CClassNode(); CClassNode prevCC = null; CClassNode workCC = null; CCStateArg arg = new CCStateArg(); boolean andStart = false; arg.state = CCSTATE.START; while (token.type != TokenType.CC_CLOSE) { boolean fetched = false; switch (token.type) { case CHAR: if (token.getC() > 0xff) { arg.inType = CCVALTYPE.CODE_POINT; } else { arg.inType = CCVALTYPE.SB; // sb_char: } arg.v = token.getC(); arg.vIsRaw = false; parseCharClassValEntry2(cc, arg); // goto val_entry2 break; case RAW_BYTE: arg.v = token.getC(); arg.inType = CCVALTYPE.SB; // raw_single: arg.vIsRaw = true; parseCharClassValEntry2(cc, arg); // goto val_entry2 break; case CODE_POINT: arg.v = token.getCode(); arg.vIsRaw = true; parseCharClassValEntry(cc, arg); // val_entry:, val_entry2 break; case CHAR_TYPE: cc.addCType(token.getPropCType(), token.getPropNot(), env, this); cc.nextStateClass(arg, env); // next_class: break; case CC_RANGE: if (arg.state == CCSTATE.VALUE) { fetchTokenInCC(); fetched = true; if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */ parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry; break; } else if (token.type == TokenType.CC_AND) { env.ccEscWarn("-"); parseCharClassRangeEndVal(cc, arg); // goto range_end_val break; } arg.state = CCSTATE.RANGE; } else if (arg.state == CCSTATE.START) { arg.v = token.getC(); /* [-xa] is allowed */ arg.vIsRaw = false; fetchTokenInCC(); fetched = true; if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */ parseCharClassValEntry(cc, arg); // goto val_entry break; } else if (arg.state == CCSTATE.RANGE) { env.ccEscWarn("-"); parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */ break; } else { /* CCS_COMPLETE */ fetchTokenInCC(); fetched = true; if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */ parseCharClassRangeEndVal(cc, arg); // goto range_end_val break; } else if (token.type == TokenType.CC_AND) { env.ccEscWarn("-"); parseCharClassRangeEndVal(cc, arg); // goto range_end_val break; } if (syntax.allowDoubleRangeOpInCC()) { env.ccEscWarn("-"); arg.inType = CCVALTYPE.SB; arg.v = '-'; arg.vIsRaw = false; parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */ break; } throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); } break; case CC_CC_OPEN: /* [ */ CClassNode acc = parseCharClass(); cc.or(acc); break; case CC_AND: /* && */ if (arg.state == CCSTATE.VALUE) { arg.v = 0; // ??? safe v ? arg.vIsRaw = false; cc.nextStateValue(arg, env); } /* initialize local variables */ andStart = true; arg.state = CCSTATE.START; if (prevCC != null) { prevCC.and(cc); } else { prevCC = cc; if (workCC == null) workCC = new CClassNode(); cc = workCC; } cc.clear(); break; case EOT: throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS); default: throw new InternalException(ERR_PARSER_BUG); } // switch if (!fetched) fetchTokenInCC(); } // while if (arg.state == CCSTATE.VALUE) { arg.v = 0; // ??? safe v ? arg.vIsRaw = false; cc.nextStateValue(arg, env); } if (prevCC != null) { prevCC.and(cc); cc = prevCC; } if (neg) { cc.setNot(); } else { cc.clearNot(); } if (cc.isNot() && syntax.notNewlineInNegativeCC()) { if (!cc.isEmpty()) { final int NEW_LINE = 0x0a; if (EncodingHelper.isNewLine(NEW_LINE)) { cc.bs.set(NEW_LINE); } } } return cc; } private void parseCharClassSbChar(CClassNode cc, CCStateArg arg) { arg.inType = CCVALTYPE.SB; arg.v = token.getC(); arg.vIsRaw = false; parseCharClassValEntry2(cc, arg); // goto val_entry2 } private void parseCharClassRangeEndVal(CClassNode cc, CCStateArg arg) { arg.v = '-'; arg.vIsRaw = false; parseCharClassValEntry(cc, arg); // goto val_entry } private void parseCharClassValEntry(CClassNode cc, CCStateArg arg) { arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT; parseCharClassValEntry2(cc, arg); // val_entry2: } private void parseCharClassValEntry2(CClassNode cc, CCStateArg arg) { cc.nextStateValue(arg, env); } private Node parseEnclose(TokenType term) { Node node = null; if (!left()) { throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); } int option = env.option; if (peekIs('?') && syntax.op2QMarkGroupEffect()) { inc(); if (!left()) { throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); } fetch(); switch(c) { case ':': /* (?:...) grouping only */ fetchToken(); // group: node = parseSubExp(term); returnCode = 1; /* group */ return node; case '=': node = new AnchorNode(AnchorType.PREC_READ); break; case '!': /* preceding read */ node = new AnchorNode(AnchorType.PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose break; case '\'': break; case '<': /* look behind (?<=...), (? |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.