alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestTermRangeQuery.java)

This example Lucene source code file (TestTermRangeQuery.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

a, a, b, b, c, c, d, exception, indexsearcher, indexsearcher, io, ioexception, query, string, termrangequery, text, util

The Lucene TestTermRangeQuery.java source code

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.Reader;
import java.util.Locale;
import java.util.Set;
import java.util.HashSet;
import java.util.Arrays;
import java.text.Collator;


public class TestTermRangeQuery extends LuceneTestCase {

  private int docCount = 0;
  private Directory dir;
  
  @Override
  public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
  }
  
  @Override
  public void tearDown() throws Exception {
    dir.close();
    super.tearDown();
  }

  public void testExclusive() throws Exception {
    Query query = new TermRangeQuery("content", "A", "C", false, false);
    initializeIndex(new String[] {"A", "B", "C", "D"});
    IndexSearcher searcher = new IndexSearcher(dir, true);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,C,D, only B in range", 1, hits.length);
    searcher.close();

    initializeIndex(new String[] {"A", "B", "D"});
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,D, only B in range", 1, hits.length);
    searcher.close();

    addDoc("C");
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("C added, still only B in range", 1, hits.length);
    searcher.close();
  }
  
  public void testInclusive() throws Exception {
    Query query = new TermRangeQuery("content", "A", "C", true, true);

    initializeIndex(new String[]{"A", "B", "C", "D"});
    IndexSearcher searcher = new IndexSearcher(dir, true);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
    searcher.close();

    initializeIndex(new String[]{"A", "B", "D"});
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,D - A and B in range", 2, hits.length);
    searcher.close();

    addDoc("C");
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("C added - A, B, C in range", 3, hits.length);
    searcher.close();
  }

  /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE)
   * with constant score and checks, that only the lower end of terms is put into the range */
  public void testTopTermsRewrite() throws Exception {
    initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});

    IndexSearcher searcher = new IndexSearcher(dir, true);
    TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
    checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
    
    final int savedClauseCount = BooleanQuery.getMaxClauseCount();
    try {
      BooleanQuery.setMaxClauseCount(3);
      checkBooleanTerms(searcher, query, "B", "C", "D");
    } finally {
      BooleanQuery.setMaxClauseCount(savedClauseCount);
    }
    searcher.close();
  }
  
  private void checkBooleanTerms(Searcher searcher, TermRangeQuery query, String... terms) throws IOException {
    query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
    final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query);
    final Set<String> allowedTerms = new HashSet(Arrays.asList(terms));
    assertEquals(allowedTerms.size(), bq.clauses().size());
    for (BooleanClause c : bq.clauses()) {
      assertTrue(c.getQuery() instanceof TermQuery);
      final TermQuery tq = (TermQuery) c.getQuery();
      final String term = tq.getTerm().text();
      assertTrue("invalid term: "+ term, allowedTerms.contains(term));
      allowedTerms.remove(term); // remove to fail on double terms
    }
    assertEquals(0, allowedTerms.size());
  }

  public void testEqualsHashcode() {
    Query query = new TermRangeQuery("content", "A", "C", true, true);
    
    query.setBoost(1.0f);
    Query other = new TermRangeQuery("content", "A", "C", true, true);
    other.setBoost(1.0f);

    assertEquals("query equals itself is true", query, query);
    assertEquals("equivalent queries are equal", query, other);
    assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());

    other.setBoost(2.0f);
    assertFalse("Different boost queries are not equal", query.equals(other));

    other = new TermRangeQuery("notcontent", "A", "C", true, true);
    assertFalse("Different fields are not equal", query.equals(other));

    other = new TermRangeQuery("content", "X", "C", true, true);
    assertFalse("Different lower terms are not equal", query.equals(other));

    other = new TermRangeQuery("content", "A", "Z", true, true);
    assertFalse("Different upper terms are not equal", query.equals(other));

    query = new TermRangeQuery("content", null, "C", true, true);
    other = new TermRangeQuery("content", null, "C", true, true);
    assertEquals("equivalent queries with null lowerterms are equal()", query, other);
    assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());

    query = new TermRangeQuery("content", "C", null, true, true);
    other = new TermRangeQuery("content", "C", null, true, true);
    assertEquals("equivalent queries with null upperterms are equal()", query, other);
    assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());

    query = new TermRangeQuery("content", null, "C", true, true);
    other = new TermRangeQuery("content", "C", null, true, true);
    assertFalse("queries with different upper and lower terms are not equal", query.equals(other));

    query = new TermRangeQuery("content", "A", "C", false, false);
    other = new TermRangeQuery("content", "A", "C", true, true);
    assertFalse("queries with different inclusive are not equal", query.equals(other));
    
    query = new TermRangeQuery("content", "A", "C", false, false);
    other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
    assertFalse("a query with a collator is not equal to one without", query.equals(other));
  }

  public void testExclusiveCollating() throws Exception {
    Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
    initializeIndex(new String[] {"A", "B", "C", "D"});
    IndexSearcher searcher = new IndexSearcher(dir, true);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,C,D, only B in range", 1, hits.length);
    searcher.close();

    initializeIndex(new String[] {"A", "B", "D"});
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,D, only B in range", 1, hits.length);
    searcher.close();

    addDoc("C");
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("C added, still only B in range", 1, hits.length);
    searcher.close();
  }

  public void testInclusiveCollating() throws Exception {
    Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));

    initializeIndex(new String[]{"A", "B", "C", "D"});
    IndexSearcher searcher = new IndexSearcher(dir, true);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
    searcher.close();

    initializeIndex(new String[]{"A", "B", "D"});
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("A,B,D - A and B in range", 2, hits.length);
    searcher.close();

    addDoc("C");
    searcher = new IndexSearcher(dir, true);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("C added - A, B, C in range", 3, hits.length);
    searcher.close();
  }

  public void testFarsi() throws Exception {
    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
    // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
    // characters properly.
    Collator collator = Collator.getInstance(new Locale("ar"));
    Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
    // orders the U+0698 character before the U+0633 character, so the single
    // index Term below should NOT be returned by a TermRangeQuery with a Farsi
    // Collator (or an Arabic one for the case when Farsi is not supported).
    initializeIndex(new String[]{ "\u0633\u0627\u0628"});
    IndexSearcher searcher = new IndexSearcher(dir, true);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("The index Term should not be included.", 0, hits.length);

    query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("The index Term should be included.", 1, hits.length);
    searcher.close();
  }
  
  public void testDanish() throws Exception {
    Collator collator = Collator.getInstance(new Locale("da", "dk"));
    // Danish collation orders the words below in the given order (example taken
    // from TestSort.testInternationalSort() ).
    String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
    Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);

    // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
    // but Danish collation does.
    initializeIndex(words);
    IndexSearcher searcher = new IndexSearcher(dir, true);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("The index Term should be included.", 1, hits.length);

    query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("The index Term should not be included.", 0, hits.length);
    searcher.close();
  }

  private static class SingleCharAnalyzer extends Analyzer {

    private static class SingleCharTokenizer extends Tokenizer {
      char[] buffer = new char[1];
      boolean done = false;
      CharTermAttribute termAtt;
      
      public SingleCharTokenizer(Reader r) {
        super(r);
        termAtt = addAttribute(CharTermAttribute.class);
      }

      @Override
      public boolean incrementToken() throws IOException {
        if (done)
          return false;
        else {
          int count = input.read(buffer);
          clearAttributes();
          done = true;
          if (count == 1) {
            termAtt.copyBuffer(buffer, 0, 1);
          }
          return true;
        }
      }

      @Override
      public final void reset(Reader reader) throws IOException {
        super.reset(reader);
        done = false;
      }
    }

    @Override
    public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
      Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
      if (tokenizer == null) {
        tokenizer = new SingleCharTokenizer(reader);
        setPreviousTokenStream(tokenizer);
      } else
        tokenizer.reset(reader);
      return tokenizer;
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new SingleCharTokenizer(reader);
    }
  }

  private void initializeIndex(String[] values) throws IOException {
    initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
  }

  private void initializeIndex(String[] values, Analyzer analyzer) throws IOException {
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
        TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
    for (int i = 0; i < values.length; i++) {
      insertDoc(writer, values[i]);
    }
    writer.close();
  }

  // shouldnt create an analyzer for every doc?
  private void addDoc(String content) throws IOException {
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
    insertDoc(writer, content);
    writer.close();
  }

  private void insertDoc(IndexWriter writer, String content) throws IOException {
    Document doc = new Document();

    doc.add(newField("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(newField("content", content, Field.Store.NO, Field.Index.ANALYZED));

    writer.addDocument(doc);
    docCount++;
  }

  // LUCENE-38
  public void testExclusiveLowerNull() throws Exception {
    Analyzer analyzer = new SingleCharAnalyzer();
    //http://issues.apache.org/jira/browse/LUCENE-38
    Query query = new TermRangeQuery("content", null, "C",
                                 false, false);
    initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
    IndexSearcher searcher = new IndexSearcher(dir, true);
    int numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
    assertEquals("A,B,<empty string>,C,D => A, B &  are in range", 3, numHits);
    // until Lucene-38 is fixed, use this assert:
    //assertEquals("A,B,<empty string>,C,D => A, B &  are in range", 2, hits.length());

    searcher.close();
    initializeIndex(new String[] {"A", "B", "", "D"}, analyzer);
    searcher = new IndexSearcher(dir, true);
    numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
    assertEquals("A,B,<empty string>,D => A, B &  are in range", 3, numHits);
    // until Lucene-38 is fixed, use this assert:
    //assertEquals("A,B,<empty string>,D => A, B &  are in range", 2, hits.length());
    searcher.close();
    addDoc("C");
    searcher = new IndexSearcher(dir, true);
    numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
    assertEquals("C added, still A, B & <empty string> are in range", 3, numHits);
    // until Lucene-38 is fixed, use this assert
    //assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length());
    searcher.close();
  }

  // LUCENE-38
  public void testInclusiveLowerNull() throws Exception {
    //http://issues.apache.org/jira/browse/LUCENE-38
    Analyzer analyzer = new SingleCharAnalyzer();
    Query query = new TermRangeQuery("content", null, "C", true, true);
    initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
    IndexSearcher searcher = new IndexSearcher(dir, true);
    int numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
    assertEquals("A,B,<empty string>,C,D => A,B,,C in range", 4, numHits);
    // until Lucene-38 is fixed, use this assert
    //assertEquals("A,B,<empty string>,C,D => A,B,,C in range", 3, hits.length());
    searcher.close();
    initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
    searcher = new IndexSearcher(dir, true);
    numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
    assertEquals("A,B,<empty string>,D - A, B and  in range", 3, numHits);
    // until Lucene-38 is fixed, use this assert
    //assertEquals("A,B,<empty string>,D => A, B and  in range", 2, hits.length());
    searcher.close();
    addDoc("C");
    searcher = new IndexSearcher(dir, true);
    numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
    assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
    // until Lucene-38 is fixed, use this assert
    //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
     searcher.close();
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestTermRangeQuery.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.