|
Lucene example source code file (QueryParser.jj)
The Lucene QueryParser.jj source code/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Surround query language parser */ /* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */ options { STATIC=false; JAVA_UNICODE_ESCAPE=true; USER_CHAR_STREAM=true; } PARSER_BEGIN(QueryParser) package org.apache.lucene.queryParser.surround.parser; import java.util.ArrayList; import java.util.List; import java.io.StringReader; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.queryParser.surround.query.SrndQuery; import org.apache.lucene.queryParser.surround.query.FieldsQuery; import org.apache.lucene.queryParser.surround.query.OrQuery; import org.apache.lucene.queryParser.surround.query.AndQuery; import org.apache.lucene.queryParser.surround.query.NotQuery; import org.apache.lucene.queryParser.surround.query.DistanceQuery; import org.apache.lucene.queryParser.surround.query.SrndTermQuery; import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery; import org.apache.lucene.queryParser.surround.query.SrndTruncQuery; /** * This class is generated by JavaCC. The only method that clients should need * to call is <a href="#parse">parse(). */ public class QueryParser { final int minimumPrefixLength = 3; final int minimumCharsInTrunc = 3; final String truncationErrorMessage = "Too unrestrictive truncation: "; final String boostErrorMessage = "Cannot handle boost value: "; /* CHECKME: These should be the same as for the tokenizer. How? */ final char truncator = '*'; final char anyChar = '?'; final char quote = '\"'; final char fieldOperator = ':'; final char comma = ','; /* prefix list separator */ final char carat = '^'; /* weight operator */ static public SrndQuery parse(String query) throws ParseException { QueryParser parser = new QueryParser(); return parser.parse2(query); } public QueryParser() { this(new FastCharStream(new StringReader(""))); } public SrndQuery parse2(String query) throws ParseException { ReInit(new FastCharStream(new StringReader(query))); try { return TopSrndQuery(); } catch (TokenMgrError tme) { throw new ParseException(tme.getMessage()); } } protected SrndQuery getFieldsQuery( SrndQuery q, ArrayList<String> fieldNames) { /* FIXME: check acceptable subquery: at least one subquery should not be * a fields query. */ return new FieldsQuery(q, fieldNames, fieldOperator); } protected SrndQuery getOrQuery(List<SrndQuery> queries, boolean infix, Token orToken) { return new OrQuery(queries, infix, orToken.image); } protected SrndQuery getAndQuery(List<SrndQuery> queries, boolean infix, Token andToken) { return new AndQuery( queries, infix, andToken.image); } protected SrndQuery getNotQuery(List<SrndQuery> queries, Token notToken) { return new NotQuery( queries, notToken.image); } protected static int getOpDistance(String distanceOp) { /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */ return distanceOp.length() == 1 ? 1 : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1)); } protected static void checkDistanceSubQueries(DistanceQuery distq, String opName) throws ParseException { String m = distq.distanceSubQueryNotAllowed(); if (m != null) { throw new ParseException("Operator " + opName + ": " + m); } } protected SrndQuery getDistanceQuery( List<SrndQuery> queries, boolean infix, Token dToken, boolean ordered) throws ParseException { DistanceQuery dq = new DistanceQuery(queries, infix, getOpDistance(dToken.image), dToken.image, ordered); checkDistanceSubQueries(dq, dToken.image); return dq; } protected SrndQuery getTermQuery( String term, boolean quoted) { return new SrndTermQuery(term, quoted); } protected boolean allowedSuffix(String suffixed) { return (suffixed.length() - 1) >= minimumPrefixLength; } protected SrndQuery getPrefixQuery( String prefix, boolean quoted) { return new SrndPrefixQuery(prefix, quoted, truncator); } protected boolean allowedTruncation(String truncated) { /* At least 3 normal characters needed. */ int nrNormalChars = 0; for (int i = 0; i < truncated.length(); i++) { char c = truncated.charAt(i); if ((c != truncator) && (c != anyChar)) { nrNormalChars++; } } return nrNormalChars >= minimumCharsInTrunc; } protected SrndQuery getTruncQuery(String truncated) { return new SrndTruncQuery(truncated, truncator, anyChar); } } PARSER_END(QueryParser) /* ***************** */ /* Token Definitions */ /* ***************** */ <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > | <#_TERM_CHAR: /* everything except whitespace and operators */ ( ~[ " ", "\t", "\n", "\r", ",", "?", "*", "(", ")", ":", "^", "\""] ) > | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) > | <#_STAR: "*" > /* term truncation */ | <#_ONE_CHAR: "?" > /* precisely one character in a term */ /* 2..99 prefix for distance operators */ | <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))> } <DEFAULT> SKIP : { < <_WHITESPACE>> } /* Operator tokens (in increasing order of precedence): */ <DEFAULT> TOKEN : { <OR: "OR" | "or"> | <AND: "AND" | "and"> | <NOT: "NOT" | "not"> | <W: (<_DISTOP_NUM>)? ("W"|"w")> | <N: (<_DISTOP_NUM>)? ("N"|"n")> /* These are excluded in _TERM_CHAR: */ | <LPAREN: "("> | <RPAREN: ")"> | <COMMA: ","> | <COLON: ":"> | <CARAT: "^"> : Boost /* Literal non empty term between single quotes, * escape quoted quote or backslash by backslash. * Evt. truncated. */ | <TRUNCQUOTED: "\"" (~["\""])+ "\"" <_STAR>> | <QUOTED: "\"" ( (~["\"", "\\"]) | ("\\" ["\\", "\""]))+ "\""> | <SUFFIXTERM: (<_TERM_CHAR>)+ <_STAR>> | <TRUNCTERM: (<_TERM_CHAR>)+ (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */ (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )* > | <TERM: (<_TERM_CHAR>)+> } <Boost> TOKEN : { <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT } SrndQuery TopSrndQuery() : { SrndQuery q; }{ q = FieldsQuery() <EOF> {return q;} } SrndQuery FieldsQuery() : { SrndQuery q; ArrayList<String> fieldNames; }{ fieldNames = OptionalFields() q = OrQuery() {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);} } ArrayList<String> OptionalFields() : { Token fieldName; ArrayList<String> fieldNames = null; }{ ( LOOKAHEAD(2) // to the colon fieldName = <TERM> <COLON> { if (fieldNames == null) { fieldNames = new ArrayList<String>(); } fieldNames.add(fieldName.image); } )* {return fieldNames;} } SrndQuery OrQuery() : { SrndQuery q; ArrayList<SrndQuery> queries = null; Token oprt = null; }{ q = AndQuery() ( oprt = <OR> { /* keep only last used operator */ if (queries == null) { queries = new ArrayList<SrndQuery>(); queries.add(q); } } q = AndQuery() { queries.add(q); } )* {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);} } SrndQuery AndQuery() : { SrndQuery q; ArrayList<SrndQuery> queries = null; Token oprt = null; }{ q = NotQuery() ( oprt = <AND> { /* keep only last used operator */ if (queries == null) { queries = new ArrayList<SrndQuery>(); queries.add(q); } } q = NotQuery() { queries.add(q); } )* {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);} } SrndQuery NotQuery() : { SrndQuery q; ArrayList<SrndQuery> queries = null; Token oprt = null; }{ q = NQuery() ( oprt = <NOT> { /* keep only last used operator */ if (queries == null) { queries = new ArrayList<SrndQuery>(); queries.add(q); } } q = NQuery() { queries.add(q); } )* {return (queries == null) ? q : getNotQuery(queries, oprt);} } SrndQuery NQuery() : { SrndQuery q; ArrayList<SrndQuery> queries; Token dt; }{ q = WQuery() ( dt = <N> { queries = new ArrayList<SrndQuery>(); queries.add(q); /* left associative */ } q = WQuery() { queries.add(q); q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */); } )* {return q;} } SrndQuery WQuery() : { SrndQuery q; ArrayList<SrndQuery> queries; Token wt; }{ q = PrimaryQuery() ( wt = <W> { queries = new ArrayList<SrndQuery>(); queries.add(q); /* left associative */ } q = PrimaryQuery() { queries.add(q); q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */); } )* {return q;} } SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */ SrndQuery q; }{ ( <LPAREN> q = FieldsQuery() Other Lucene examples (source code examples)Here is a short list of links related to this Lucene QueryParser.jj source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.