|
Lucene example source code file (TestPayloadSpans.java)
The Lucene TestPayloadSpans.java source codepackage org.apache.lucene.search.spans; /** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.Collection; import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; import org.apache.lucene.index.Term; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.payloads.PayloadHelper; import org.apache.lucene.search.payloads.PayloadSpanUtil; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.LuceneTestCase; public class TestPayloadSpans extends LuceneTestCase { private IndexSearcher searcher; private Similarity similarity = new DefaultSimilarity(); protected IndexReader indexReader; private IndexReader closeIndexReader; private Directory directory; @Override public void setUp() throws Exception { super.setUp(); PayloadHelper helper = new PayloadHelper(); searcher = helper.setUp(random, similarity, 1000); indexReader = searcher.getIndexReader(); } public void testSpanTermQuery() throws Exception { SpanTermQuery stq; Spans spans; stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); spans = stq.getSpans(indexReader); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 100, 1, 1, 1); stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); spans = stq.getSpans(indexReader); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 100, 0, 0, 0); } public void testSpanFirst() throws IOException { SpanQuery match; SpanFirstQuery sfq; match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); sfq = new SpanFirstQuery(match, 2); Spans spans = sfq.getSpans(indexReader); checkSpans(spans, 109, 1, 1, 1); //Test more complicated subclause SpanQuery[] clauses = new SpanQuery[2]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); match = new SpanNearQuery(clauses, 0, true); sfq = new SpanFirstQuery(match, 2); checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); match = new SpanNearQuery(clauses, 0, false); sfq = new SpanFirstQuery(match, 2); checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); } public void testSpanNot() throws Exception { SpanQuery[] clauses = new SpanQuery[2]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); SpanQuery spq = new SpanNearQuery(clauses, 5, true); SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"))); Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); Document doc = new Document(); doc.add(newField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); checkSpans(snq.getSpans(reader), 1,new int[]{2}); reader.close(); directory.close(); } public void testNestedSpans() throws Exception { SpanTermQuery stq; Spans spans; IndexSearcher searcher = getSearcher(); stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); spans = stq.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 0, null); SpanQuery[] clauses = new SpanQuery[3]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); spans = spanNearQuery.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{3,3}); clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); spanNearQuery = new SpanNearQuery(clauses, 6, true); spans = spanNearQuery.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 1, new int[]{3}); clauses = new SpanQuery[2]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); spanNearQuery = new SpanNearQuery(clauses, 6, true); // xx within 6 of rr SpanQuery[] clauses2 = new SpanQuery[2]; clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); clauses2[1] = spanNearQuery; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false); // yy within 6 of xx within 6 of rr spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{3,3}); searcher.close(); closeIndexReader.close(); directory.close(); } public void testFirstClauseWithoutPayload() throws Exception { Spans spans; IndexSearcher searcher = getSearcher(); SpanQuery[] clauses = new SpanQuery[3]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true); SpanQuery[] clauses2 = new SpanQuery[2]; clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp")); clauses2[1] = spanNearQuery; SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false); SpanQuery[] clauses3 = new SpanQuery[2]; clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); clauses3[1] = snq; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 1, new int[]{3}); searcher.close(); closeIndexReader.close(); directory.close(); } public void testHeavilyNestedSpanQuery() throws Exception { Spans spans; IndexSearcher searcher = getSearcher(); SpanQuery[] clauses = new SpanQuery[3]; clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true); clauses = new SpanQuery[3]; clauses[0] = spanNearQuery; clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five")); clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six")); SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true); SpanQuery[] clauses2 = new SpanQuery[2]; clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven")); clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten")); SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false); SpanQuery[] clauses3 = new SpanQuery[3]; clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine")); clauses3[1] = spanNearQuery2; clauses3[2] = spanNearQuery3; SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); assertTrue("spans is null and it shouldn't be", spans != null); checkSpans(spans, 2, new int[]{8, 8}); searcher.close(); closeIndexReader.close(); directory.close(); } public void testShrinkToAfterShortestMatch() throws CorruptIndexException, LockObtainFailedException, IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); Document doc = new Document(); doc.add(new Field("content", new StringReader("a b c d e f g h i j a k"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher is = newSearcher(reader); writer.close(); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); Spans spans = snq.getSpans(is.getIndexReader()); TopDocs topDocs = is.search(snq, 1); Set<String> payloadSet = new HashSet Other Lucene examples (source code examples)Here is a short list of links related to this Lucene TestPayloadSpans.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.