alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestSpans.java)

This example Lucene source code file (TestSpans.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

document, does, does, exception, exception, io, ioexception, override, spannearquery, spanorquery, spanquery, spanquery, spans, string, string

The Lucene TestSpans.java source code

package org.apache.lucene.search.spans;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;

public class TestSpans extends LuceneTestCase {
  private IndexSearcher searcher;
  private IndexReader reader;
  private Directory directory;
  
  public static final String field = "field";

  @Override
  public void setUp() throws Exception {
    super.setUp();
    directory = newDirectory();
    RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
    for (int i = 0; i < docFields.length; i++) {
      Document doc = new Document();
      doc.add(newField(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
  }
  
  @Override
  public void tearDown() throws Exception {
    searcher.close();
    reader.close();
    directory.close();
    super.tearDown();
  }
  
  private String[] docFields = {
    "w1 w2 w3 w4 w5",
    "w1 w3 w2 w3",
    "w1 xx w2 yy w3",
    "w1 w3 xx w2 yy w3",
    "u2 u2 u1",
    "u2 xx u2 u1",
    "u2 u2 xx u1",
    "u2 xx u2 yy u1",
    "u2 xx u1 u2",
    "u2 u1 xx u2",
    "u1 u2 xx u2",
    "t1 t2 t1 t3 t2 t3"
  };

  public SpanTermQuery makeSpanTermQuery(String text) {
    return new SpanTermQuery(new Term(field, text));
  }
  
  private void checkHits(Query query, int[] results) throws IOException {
    CheckHits.checkHits(random, query, field, searcher, results);
  }
  
  private void orderedSlopTest3SQ(
        SpanQuery q1,
        SpanQuery q2,
        SpanQuery q3,
        int slop,
        int[] expectedDocs) throws IOException {
    boolean ordered = true;
    SpanNearQuery snq = new SpanNearQuery( new SpanQuery[]{q1,q2,q3}, slop, ordered);
    checkHits(snq, expectedDocs);
  }
  
  public void orderedSlopTest3(int slop, int[] expectedDocs) throws IOException {
    orderedSlopTest3SQ(
       makeSpanTermQuery("w1"),
       makeSpanTermQuery("w2"),
       makeSpanTermQuery("w3"),
       slop,
       expectedDocs);
  }
  
  public void orderedSlopTest3Equal(int slop, int[] expectedDocs) throws IOException {
    orderedSlopTest3SQ(
       makeSpanTermQuery("w1"),
       makeSpanTermQuery("w3"),
       makeSpanTermQuery("w3"),
       slop,
       expectedDocs);
  }
  
  public void orderedSlopTest1Equal(int slop, int[] expectedDocs) throws IOException {
    orderedSlopTest3SQ(
       makeSpanTermQuery("u2"),
       makeSpanTermQuery("u2"),
       makeSpanTermQuery("u1"),
       slop,
       expectedDocs);
  }
  
  public void testSpanNearOrdered01() throws Exception {
    orderedSlopTest3(0, new int[] {0});
  }

  public void testSpanNearOrdered02() throws Exception {
    orderedSlopTest3(1, new int[] {0,1});
  }

  public void testSpanNearOrdered03() throws Exception {
    orderedSlopTest3(2, new int[] {0,1,2});
  }

  public void testSpanNearOrdered04() throws Exception {
    orderedSlopTest3(3, new int[] {0,1,2,3});
  }

  public void testSpanNearOrdered05() throws Exception {
    orderedSlopTest3(4, new int[] {0,1,2,3});
  }
  
  public void testSpanNearOrderedEqual01() throws Exception {
    orderedSlopTest3Equal(0, new int[] {});
  }

  public void testSpanNearOrderedEqual02() throws Exception {
    orderedSlopTest3Equal(1, new int[] {1});
  }

  public void testSpanNearOrderedEqual03() throws Exception {
    orderedSlopTest3Equal(2, new int[] {1});
  }

  public void testSpanNearOrderedEqual04() throws Exception {
    orderedSlopTest3Equal(3, new int[] {1,3});
  }
  
  public void testSpanNearOrderedEqual11() throws Exception {
    orderedSlopTest1Equal(0, new int[] {4});
  }
  
  public void testSpanNearOrderedEqual12() throws Exception {
    orderedSlopTest1Equal(0, new int[] {4});
  }
  
  public void testSpanNearOrderedEqual13() throws Exception {
    orderedSlopTest1Equal(1, new int[] {4,5,6});
  }
  
  public void testSpanNearOrderedEqual14() throws Exception {
    orderedSlopTest1Equal(2, new int[] {4,5,6,7});
  }

  public void testSpanNearOrderedEqual15() throws Exception {
    orderedSlopTest1Equal(3, new int[] {4,5,6,7});
  }

  public void testSpanNearOrderedOverlap() throws Exception {
    boolean ordered = true;
    int slop = 1;
    SpanNearQuery snq = new SpanNearQuery(
                              new SpanQuery[] {
                                makeSpanTermQuery("t1"),
                                makeSpanTermQuery("t2"),
                                makeSpanTermQuery("t3") },
                              slop,
                              ordered);
    Spans spans = snq.getSpans(searcher.getIndexReader());

    assertTrue("first range", spans.next());
    assertEquals("first doc", 11, spans.doc());
    assertEquals("first start", 0, spans.start());
    assertEquals("first end", 4, spans.end());

    assertTrue("second range", spans.next());
    assertEquals("second doc", 11, spans.doc());
    assertEquals("second start", 2, spans.start());
    assertEquals("second end", 6, spans.end());

    assertFalse("third range", spans.next());
  }


  public void testSpanNearUnOrdered() throws Exception {

    //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test
    SpanNearQuery snq;
    snq = new SpanNearQuery(
                              new SpanQuery[] {
                                makeSpanTermQuery("u1"),
                                makeSpanTermQuery("u2") },
                              0,
                              false);
    Spans spans = snq.getSpans(searcher.getIndexReader());
    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 4, spans.doc());
    assertEquals("start", 1, spans.start());
    assertEquals("end", 3, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 5, spans.doc());
    assertEquals("start", 2, spans.start());
    assertEquals("end", 4, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 8, spans.doc());
    assertEquals("start", 2, spans.start());
    assertEquals("end", 4, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 9, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 2, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 10, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 2, spans.end());
    assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false);

    SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"),
                                makeSpanTermQuery("u2")}, 0, false);
    snq = new SpanNearQuery(
                              new SpanQuery[] {
                                u1u2,
                                makeSpanTermQuery("u2")
                              },
                              1,
                              false);
    spans = snq.getSpans(searcher.getIndexReader());
    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 4, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 3, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    //unordered spans can be subsets
    assertEquals("doc", 4, spans.doc());
    assertEquals("start", 1, spans.start());
    assertEquals("end", 3, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 5, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 4, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 5, spans.doc());
    assertEquals("start", 2, spans.start());
    assertEquals("end", 4, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 8, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 4, spans.end());


    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 8, spans.doc());
    assertEquals("start", 2, spans.start());
    assertEquals("end", 4, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 9, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 2, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 9, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 4, spans.end());

    assertTrue("Does not have next and it should", spans.next());
    assertEquals("doc", 10, spans.doc());
    assertEquals("start", 0, spans.start());
    assertEquals("end", 2, spans.end());

    assertTrue("Has next and it shouldn't", spans.next() == false);
  }



  private Spans orSpans(String[] terms) throws Exception {
    SpanQuery[] sqa = new SpanQuery[terms.length];
    for (int i = 0; i < terms.length; i++) {
      sqa[i] = makeSpanTermQuery(terms[i]);
    }
    return (new SpanOrQuery(sqa)).getSpans(searcher.getIndexReader());
  }

  private void tstNextSpans(Spans spans, int doc, int start, int end)
  throws Exception {
    assertTrue("next", spans.next());
    assertEquals("doc", doc, spans.doc());
    assertEquals("start", start, spans.start());
    assertEquals("end", end, spans.end());
  }

  public void testSpanOrEmpty() throws Exception {
    Spans spans = orSpans(new String[0]);
    assertFalse("empty next", spans.next());

    SpanOrQuery a = new SpanOrQuery( new SpanQuery[0] );
    SpanOrQuery b = new SpanOrQuery( new SpanQuery[0] );
    assertTrue("empty should equal", a.equals(b));
  }

  public void testSpanOrSingle() throws Exception {
    Spans spans = orSpans(new String[] {"w5"});
    tstNextSpans(spans, 0, 4, 5);
    assertFalse("final next", spans.next());
  }
  
  public void testSpanOrMovesForward() throws Exception {
    Spans spans = orSpans(new String[] {"w1", "xx"});

    spans.next();
    int doc = spans.doc();
    assertEquals(0, doc);
    
    spans.skipTo(0);
    doc = spans.doc();
    
    // LUCENE-1583:
    // according to Spans, a skipTo to the same doc or less
    // should still call next() on the underlying Spans
    assertEquals(1, doc);

  }
  
  public void testSpanOrDouble() throws Exception {
    Spans spans = orSpans(new String[] {"w5", "yy"});
    tstNextSpans(spans, 0, 4, 5);
    tstNextSpans(spans, 2, 3, 4);
    tstNextSpans(spans, 3, 4, 5);
    tstNextSpans(spans, 7, 3, 4);
    assertFalse("final next", spans.next());
  }

  public void testSpanOrDoubleSkip() throws Exception {
    Spans spans = orSpans(new String[] {"w5", "yy"});
    assertTrue("initial skipTo", spans.skipTo(3));
    assertEquals("doc", 3, spans.doc());
    assertEquals("start", 4, spans.start());
    assertEquals("end", 5, spans.end());
    tstNextSpans(spans, 7, 3, 4);
    assertFalse("final next", spans.next());
  }

  public void testSpanOrUnused() throws Exception {
    Spans spans = orSpans(new String[] {"w5", "unusedTerm", "yy"});
    tstNextSpans(spans, 0, 4, 5);
    tstNextSpans(spans, 2, 3, 4);
    tstNextSpans(spans, 3, 4, 5);
    tstNextSpans(spans, 7, 3, 4);
    assertFalse("final next", spans.next());
  }

  public void testSpanOrTripleSameDoc() throws Exception {
    Spans spans = orSpans(new String[] {"t1", "t2", "t3"});
    tstNextSpans(spans, 11, 0, 1);
    tstNextSpans(spans, 11, 1, 2);
    tstNextSpans(spans, 11, 2, 3);
    tstNextSpans(spans, 11, 3, 4);
    tstNextSpans(spans, 11, 4, 5);
    tstNextSpans(spans, 11, 5, 6);
    assertFalse("final next", spans.next());
  }

  public void testSpanScorerZeroSloppyFreq() throws Exception {
    boolean ordered = true;
    int slop = 1;

    final Similarity sim = new DefaultSimilarity() {
      @Override
      public float sloppyFreq(int distance) {
        return 0.0f;
      }
    };

    SpanNearQuery snq = new SpanNearQuery(
                              new SpanQuery[] {
                                makeSpanTermQuery("t1"),
                                makeSpanTermQuery("t2") },
                              slop,
                              ordered) {
      @Override
      public Similarity getSimilarity(Searcher s) {
        return sim;
      }
      };

    Scorer spanScorer = searcher.createNormalizedWeight(snq).scorer(searcher.getIndexReader(), true, false);

    assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals("first doc number", spanScorer.docID(), 11);
    float score = spanScorer.score();
    assertTrue("first doc score should be zero, " + score, score == 0.0f);
    assertTrue("no second doc", spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
  }

  // LUCENE-1404
  private void addDoc(IndexWriter writer, String id, String text) throws IOException {
    final Document doc = new Document();
    doc.add( newField("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED) );
    doc.add( newField("text", text, Field.Store.YES, Field.Index.ANALYZED) );
    writer.addDocument(doc);
  }

  // LUCENE-1404
  private int hitCount(Searcher searcher, String word) throws Throwable {
    return searcher.search(new TermQuery(new Term("text", word)), 10).totalHits;
  }

  // LUCENE-1404
  private SpanQuery createSpan(String value) {
    return new SpanTermQuery(new Term("text", value));
  }                     
  
  // LUCENE-1404
  private SpanQuery createSpan(int slop, boolean ordered, SpanQuery[] clauses) {
    return new SpanNearQuery(clauses, slop, ordered);
  }

  // LUCENE-1404
  private SpanQuery createSpan(int slop, boolean ordered, String term1, String term2) {
    return createSpan(slop, ordered, new SpanQuery[] {createSpan(term1), createSpan(term2)});
  }

  // LUCENE-1404
  public void testNPESpanQuery() throws Throwable {
    final Directory dir = newDirectory();
    final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
        TEST_VERSION_CURRENT, new MockAnalyzer(random)));

    // Add documents
    addDoc(writer, "1", "the big dogs went running to the market");
    addDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly");
    
    // Commit
    writer.close();

    // Get searcher
    final IndexReader reader = IndexReader.open(dir, true);
    final IndexSearcher searcher = newSearcher(reader);

    // Control (make sure docs indexed)
    assertEquals(2, hitCount(searcher, "the"));
    assertEquals(1, hitCount(searcher, "cat"));
    assertEquals(1, hitCount(searcher, "dogs"));
    assertEquals(0, hitCount(searcher, "rabbit"));

    // This throws exception (it shouldn't)
    assertEquals(1,
                 searcher.search(createSpan(0, true,                                 
                                            new SpanQuery[] {createSpan(4, false, "chased", "cat"),
                                                             createSpan("ate")}), 10).totalHits);
    searcher.close();
    reader.close();
    dir.close();
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestSpans.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.