|
Lucene example source code file (ComplexPhraseQueryParser.java)
The Lucene ComplexPhraseQueryParser.java source codepackage org.apache.lucene.queryParser.complexPhrase; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNotQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.Version; /** * QueryParser which permits complex phrase query syntax eg "(john jon * jonathan~) peters*". * <p> * Performs potentially multiple passes over Query text to parse any nested * logic in PhraseQueries. - First pass takes any PhraseQuery content between * quotes and stores for subsequent pass. All other query content is parsed as * normal - Second pass parses any stored PhraseQuery content, checking all * embedded clauses are referring to the same field and therefore can be * rewritten as Span queries. All PhraseQuery clauses are expressed as * ComplexPhraseQuery objects * </p> * <p> * This could arguably be done in one pass using a new QueryParser but here I am * working within the constraints of the existing parser as a base class. This * currently simply feeds all phrase content through an analyzer to select * phrase terms - any "special" syntax such as * ~ * etc are not given special * status * </p> * */ public class ComplexPhraseQueryParser extends QueryParser { private ArrayList<ComplexPhraseQuery> complexPhrases = null; private boolean isPass2ResolvingPhrases; private ComplexPhraseQuery currentPhraseQuery = null; public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) { super(matchVersion, f, a); } @Override protected Query getFieldQuery(String field, String queryText, int slop) { ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop); complexPhrases.add(cpq); // add to list of phrases to be parsed once // we // are through with this pass return cpq; } @Override public Query parse(String query) throws ParseException { if (isPass2ResolvingPhrases) { MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod(); try { // Temporarily force BooleanQuery rewrite so that Parser will // generate visible // collection of terms which we can convert into SpanQueries. // ConstantScoreRewrite mode produces an // opaque ConstantScoreQuery object which cannot be interrogated for // terms in the same way a BooleanQuery can. // QueryParser is not guaranteed threadsafe anyway so this temporary // state change should not // present an issue setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return super.parse(query); } finally { setMultiTermRewriteMethod(oldMethod); } } // First pass - parse the top-level query recording any PhraseQuerys // which will need to be resolved complexPhrases = new ArrayList<ComplexPhraseQuery>(); Query q = super.parse(query); // Perform second pass, using this QueryParser to parse any nested // PhraseQueries with different // set of syntax restrictions (i.e. all fields must be same) isPass2ResolvingPhrases = true; try { for (Iterator<ComplexPhraseQuery> iterator = complexPhrases.iterator(); iterator.hasNext();) { currentPhraseQuery = iterator.next(); // in each phrase, now parse the contents between quotes as a // separate parse operation currentPhraseQuery.parsePhraseElements(this); } } finally { isPass2ResolvingPhrases = false; } return q; } // There is No "getTermQuery throws ParseException" method to override so // unfortunately need // to throw a runtime exception here if a term for another field is embedded // in phrase query @Override protected Query newTermQuery(Term term) { if (isPass2ResolvingPhrases) { try { checkPhraseClauseIsForSameField(term.field()); } catch (ParseException pe) { throw new RuntimeException("Error parsing complex phrase", pe); } } return super.newTermQuery(term); } // Helper method used to report on any clauses that appear in query syntax private void checkPhraseClauseIsForSameField(String field) throws ParseException { if (!field.equals(currentPhraseQuery.field)) { throw new ParseException("Cannot have clause for field \"" + field + "\" nested in phrase " + " for field \"" + currentPhraseQuery.field + "\""); } } @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { if (isPass2ResolvingPhrases) { checkPhraseClauseIsForSameField(field); } return super.getWildcardQuery(field, termStr); } @Override protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { if (isPass2ResolvingPhrases) { checkPhraseClauseIsForSameField(field); } return super.getRangeQuery(field, part1, part2, inclusive); } @Override protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { if (isPass2ResolvingPhrases) { // Must use old-style RangeQuery in order to produce a BooleanQuery // that can be turned into SpanOr clause TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive, getRangeCollator()); rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return rangeQuery; } return super.newRangeQuery(field, part1, part2, inclusive); } @Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { if (isPass2ResolvingPhrases) { checkPhraseClauseIsForSameField(field); } return super.getFuzzyQuery(field, termStr, minSimilarity); } /* * Used to handle the query content in between quotes and produced Span-based * interpretations of the clauses. */ static class ComplexPhraseQuery extends Query { String field; String phrasedQueryStringContents; int slopFactor; private Query contents; public ComplexPhraseQuery(String field, String phrasedQueryStringContents, int slopFactor) { super(); this.field = field; this.phrasedQueryStringContents = phrasedQueryStringContents; this.slopFactor = slopFactor; } // Called by ComplexPhraseQueryParser for each phrase after the main // parse // thread is through protected void parsePhraseElements(QueryParser qp) throws ParseException { // TODO ensure that field-sensitivity is preserved ie the query // string below is parsed as // field+":("+phrasedQueryStringContents+")" // but this will need code in rewrite to unwrap the first layer of // boolean query contents = qp.parse(phrasedQueryStringContents); } @Override public Query rewrite(IndexReader reader) throws IOException { // ArrayList spanClauses = new ArrayList(); if (contents instanceof TermQuery) { return contents; } // Build a sequence of Span clauses arranged in a SpanNear - child // clauses can be complex // Booleans e.g. nots and ors etc int numNegatives = 0; if (!(contents instanceof BooleanQuery)) { throw new IllegalArgumentException("Unknown query type \"" + contents.getClass().getName() + "\" found in phrase query string \"" + phrasedQueryStringContents + "\""); } BooleanQuery bq = (BooleanQuery) contents; BooleanClause[] bclauses = bq.getClauses(); SpanQuery[] allSpanClauses = new SpanQuery[bclauses.length]; // For all clauses e.g. one* two~ for (int i = 0; i < bclauses.length; i++) { // HashSet bclauseterms=new HashSet(); Query qc = bclauses[i].getQuery(); // Rewrite this clause e.g one* becomes (one OR onerous) qc = qc.rewrite(reader); if (bclauses[i].getOccur().equals(BooleanClause.Occur.MUST_NOT)) { numNegatives++; } if (qc instanceof BooleanQuery) { ArrayList<SpanQuery> sc = new ArrayList Other Lucene examples (source code examples)Here is a short list of links related to this Lucene ComplexPhraseQueryParser.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.