alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestTermRangeFilter.java)

This example Lucene source code file (TestTermRangeFilter.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

f, f, indexreader, indexsearcher, io, query, string, string, t, t, term, termquery, termrangefilter, termrangefilter, test, text, util

The Lucene TestTermRangeFilter.java source code

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.text.Collator;
import java.util.Locale;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.junit.Test;

/**
 * A basic 'positive' Unit test class for the TermRangeFilter class.
 * 
 * <p>
 * NOTE: at the moment, this class only tests for 'positive' results, it does
 * not verify the results to ensure there are no 'false positives', nor does it
 * adequately test 'negative' results. It also does not test that garbage in
 * results in an Exception.
 */
public class TestTermRangeFilter extends BaseTestRangeFilter {
  
  @Test
  public void testRangeFilterId() throws IOException {
    
    IndexReader reader = signedIndexReader;
    IndexSearcher search = newSearcher(reader);
    
    int medId = ((maxId - minId) / 2);
    
    String minIP = pad(minId);
    String maxIP = pad(maxId);
    String medIP = pad(medId);
    
    int numDocs = reader.numDocs();
    
    assertEquals("num of docs", numDocs, 1 + maxId - minId);
    
    ScoreDoc[] result;
    Query q = new TermQuery(new Term("body", "body"));
    
    // test id, bounded on both ends
    
    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T),
        numDocs).scoreDocs;
    assertEquals("find all", numDocs, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F),
        numDocs).scoreDocs;
    assertEquals("all but last", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T),
        numDocs).scoreDocs;
    assertEquals("all but first", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F),
        numDocs).scoreDocs;
    assertEquals("all but ends", numDocs - 2, result.length);
    
    result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T),
        numDocs).scoreDocs;
    assertEquals("med and up", 1 + maxId - medId, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T),
        numDocs).scoreDocs;
    assertEquals("up to med", 1 + medId - minId, result.length);
    
    // unbounded id
    
    result = search.search(q, new TermRangeFilter("id", minIP, null, T, F),
        numDocs).scoreDocs;
    assertEquals("min and up", numDocs, result.length);
    
    result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T),
        numDocs).scoreDocs;
    assertEquals("max and down", numDocs, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, null, F, F),
        numDocs).scoreDocs;
    assertEquals("not min, but up", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F),
        numDocs).scoreDocs;
    assertEquals("not max, but down", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F),
        numDocs).scoreDocs;
    assertEquals("med and up, not max", maxId - medId, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T),
        numDocs).scoreDocs;
    assertEquals("not min, up to med", medId - minId, result.length);
    
    // very small sets
    
    result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F),
        numDocs).scoreDocs;
    assertEquals("min,min,F,F", 0, result.length);
    result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F),
        numDocs).scoreDocs;
    assertEquals("med,med,F,F", 0, result.length);
    result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F),
        numDocs).scoreDocs;
    assertEquals("max,max,F,F", 0, result.length);
    
    result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T),
        numDocs).scoreDocs;
    assertEquals("min,min,T,T", 1, result.length);
    result = search.search(q, new TermRangeFilter("id", null, minIP, F, T),
        numDocs).scoreDocs;
    assertEquals("nul,min,F,T", 1, result.length);
    
    result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T),
        numDocs).scoreDocs;
    assertEquals("max,max,T,T", 1, result.length);
    result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F),
        numDocs).scoreDocs;
    assertEquals("max,nul,T,T", 1, result.length);
    
    result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T),
        numDocs).scoreDocs;
    assertEquals("med,med,T,T", 1, result.length);
    
    search.close();
  }
  
  @Test
  public void testRangeFilterIdCollating() throws IOException {
    
    IndexReader reader = signedIndexReader;
    IndexSearcher search = newSearcher(reader);
    
    Collator c = Collator.getInstance(Locale.ENGLISH);
    
    int medId = ((maxId - minId) / 2);
    
    String minIP = pad(minId);
    String maxIP = pad(maxId);
    String medIP = pad(medId);
    
    int numDocs = reader.numDocs();
    
    assertEquals("num of docs", numDocs, 1 + maxId - minId);
    
    Query q = new TermQuery(new Term("body", "body"));
    
    // test id, bounded on both ends
    int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T,
        T, c), 1000).totalHits;
    assertEquals("find all", numDocs, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits;
    assertEquals("all but last", numDocs - 1, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits;
    assertEquals("all but first", numDocs - 1, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits;
    assertEquals("all but ends", numDocs - 2, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits;
    assertEquals("med and up", 1 + maxId - medId, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits;
    assertEquals("up to med", 1 + medId - minId, numHits);
    
    // unbounded id
    
    numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c),
        1000).totalHits;
    assertEquals("min and up", numDocs, numHits);
    
    numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c),
        1000).totalHits;
    assertEquals("max and down", numDocs, numHits);
    
    numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c),
        1000).totalHits;
    assertEquals("not min, but up", numDocs - 1, numHits);
    
    numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c),
        1000).totalHits;
    assertEquals("not max, but down", numDocs - 1, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits;
    assertEquals("med and up, not max", maxId - medId, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits;
    assertEquals("not min, up to med", medId - minId, numHits);
    
    // very small sets
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits;
    assertEquals("min,min,F,F", 0, numHits);
    numHits = search.search(q,
        new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits;
    assertEquals("med,med,F,F", 0, numHits);
    numHits = search.search(q,
        new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits;
    assertEquals("max,max,F,F", 0, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits;
    assertEquals("min,min,T,T", 1, numHits);
    numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c),
        1000).totalHits;
    assertEquals("nul,min,F,T", 1, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits;
    assertEquals("max,max,T,T", 1, numHits);
    numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c),
        1000).totalHits;
    assertEquals("max,nul,T,T", 1, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits;
    assertEquals("med,med,T,T", 1, numHits);
    
    search.close();
  }
  
  @Test
  public void testRangeFilterRand() throws IOException {
    
    IndexReader reader = signedIndexReader;
    IndexSearcher search = newSearcher(reader);
    
    String minRP = pad(signedIndexDir.minR);
    String maxRP = pad(signedIndexDir.maxR);
    
    int numDocs = reader.numDocs();
    
    assertEquals("num of docs", numDocs, 1 + maxId - minId);
    
    ScoreDoc[] result;
    Query q = new TermQuery(new Term("body", "body"));
    
    // test extremes, bounded on both ends
    
    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T),
        numDocs).scoreDocs;
    assertEquals("find all", numDocs, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F),
        numDocs).scoreDocs;
    assertEquals("all but biggest", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T),
        numDocs).scoreDocs;
    assertEquals("all but smallest", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F),
        numDocs).scoreDocs;
    assertEquals("all but extremes", numDocs - 2, result.length);
    
    // unbounded
    
    result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F),
        numDocs).scoreDocs;
    assertEquals("smallest and up", numDocs, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T),
        numDocs).scoreDocs;
    assertEquals("biggest and down", numDocs, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F),
        numDocs).scoreDocs;
    assertEquals("not smallest, but up", numDocs - 1, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F),
        numDocs).scoreDocs;
    assertEquals("not biggest, but down", numDocs - 1, result.length);
    
    // very small sets
    
    result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F),
        numDocs).scoreDocs;
    assertEquals("min,min,F,F", 0, result.length);
    result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F),
        numDocs).scoreDocs;
    assertEquals("max,max,F,F", 0, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T),
        numDocs).scoreDocs;
    assertEquals("min,min,T,T", 1, result.length);
    result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T),
        numDocs).scoreDocs;
    assertEquals("nul,min,F,T", 1, result.length);
    
    result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T),
        numDocs).scoreDocs;
    assertEquals("max,max,T,T", 1, result.length);
    result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F),
        numDocs).scoreDocs;
    assertEquals("max,nul,T,T", 1, result.length);
    
    search.close();
  }
  
  @Test
  public void testRangeFilterRandCollating() throws IOException {
    
    // using the unsigned index because collation seems to ignore hyphens
    IndexReader reader = unsignedIndexReader;
    IndexSearcher search = newSearcher(reader);
    
    Collator c = Collator.getInstance(Locale.ENGLISH);
    
    String minRP = pad(unsignedIndexDir.minR);
    String maxRP = pad(unsignedIndexDir.maxR);
    
    int numDocs = reader.numDocs();
    
    assertEquals("num of docs", numDocs, 1 + maxId - minId);
    
    Query q = new TermQuery(new Term("body", "body"));
    
    // test extremes, bounded on both ends
    
    int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T,
        T, c), 1000).totalHits;
    assertEquals("find all", numDocs, numHits);
    
    numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F,
        c), 1000).totalHits;
    assertEquals("all but biggest", numDocs - 1, numHits);
    
    numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T,
        c), 1000).totalHits;
    assertEquals("all but smallest", numDocs - 1, numHits);
    
    numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F,
        c), 1000).totalHits;
    assertEquals("all but extremes", numDocs - 2, numHits);
    
    // unbounded
    
    numHits = search.search(q,
        new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits;
    assertEquals("smallest and up", numDocs, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits;
    assertEquals("biggest and down", numDocs, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits;
    assertEquals("not smallest, but up", numDocs - 1, numHits);
    
    numHits = search.search(q,
        new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits;
    assertEquals("not biggest, but down", numDocs - 1, numHits);
    
    // very small sets
    
    numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F,
        c), 1000).totalHits;
    assertEquals("min,min,F,F", 0, numHits);
    numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F,
        c), 1000).totalHits;
    assertEquals("max,max,F,F", 0, numHits);
    
    numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T,
        c), 1000).totalHits;
    assertEquals("min,min,T,T", 1, numHits);
    numHits = search.search(q,
        new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits;
    assertEquals("nul,min,F,T", 1, numHits);
    
    numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T,
        c), 1000).totalHits;
    assertEquals("max,max,T,T", 1, numHits);
    numHits = search.search(q,
        new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits;
    assertEquals("max,nul,T,T", 1, numHits);
    
    search.close();
  }
  
  @Test
  public void testFarsi() throws Exception {
    
    /* build an index */
    Directory farsiIndex = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex);
    Document doc = new Document();
    doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES,
        Field.Index.NOT_ANALYZED));
    doc
        .add(newField("body", "body", Field.Store.YES,
            Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);
    
    IndexReader reader = writer.getReader();
    writer.close();
    
    IndexSearcher search = newSearcher(reader);
    Query q = new TermQuery(new Term("body", "body"));
    
    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
    // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
    // characters properly.
    Collator collator = Collator.getInstance(new Locale("ar"));
    
    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
    // orders the U+0698 character before the U+0633 character, so the single
    // index Term below should NOT be returned by a TermRangeFilter with a Farsi
    // Collator (or an Arabic one for the case when Farsi is not supported).
    int numHits = search.search(q, new TermRangeFilter("content", "\u062F",
        "\u0698", T, T, collator), 1000).totalHits;
    assertEquals("The index Term should not be included.", 0, numHits);
    
    numHits = search.search(q, new TermRangeFilter("content", "\u0633",
        "\u0638", T, T, collator), 1000).totalHits;
    assertEquals("The index Term should be included.", 1, numHits);
    search.close();
    reader.close();
    farsiIndex.close();
  }
  
  @Test
  public void testDanish() throws Exception {
    
    /* build an index */
    Directory danishIndex = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex);
    // Danish collation orders the words below in the given order
    // (example taken from TestSort.testInternationalSort() ).
    String[] words = {"H\u00D8T", "H\u00C5T", "MAND"};
    for (int docnum = 0; docnum < words.length; ++docnum) {
      Document doc = new Document();
      doc.add(newField("content", words[docnum], Field.Store.YES,
          Field.Index.NOT_ANALYZED));
      doc.add(newField("body", "body", Field.Store.YES,
          Field.Index.NOT_ANALYZED));
      writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    writer.close();
    
    IndexSearcher search = newSearcher(reader);
    Query q = new TermQuery(new Term("body", "body"));
    
    Collator collator = Collator.getInstance(new Locale("da", "dk"));
    
    // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
    // but Danish collation does.
    int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T",
        "MAND", F, F, collator), 1000).totalHits;
    assertEquals("The index Term should be included.", 1, numHits);
    
    numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T",
        "MAND", F, F, collator), 1000).totalHits;
    assertEquals("The index Term should not be included.", 0, numHits);
    search.close();
    reader.close();
    danishIndex.close();
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestTermRangeFilter.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.