alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (QueryUtils.java)

This example Lucene source code file (QueryUtils.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

indexreader, indexreader, indexsearcher, indexsearcher, io, ioexception, ioexception, multisearcher, override, query, query, runtimeexception, scorer, searcher, util, weight

The Lucene QueryUtils.java source code

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Random;

import junit.framework.Assert;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util._TestUtil;

import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;




public class QueryUtils {

  /** Check the types of things query objects should be able to do. */
  public static void check(Query q) {
    checkHashEquals(q);
  }

  /** check very basic hashCode and equals */
  public static void checkHashEquals(Query q) {
    Query q2 = (Query)q.clone();
    checkEqual(q,q2);

    Query q3 = (Query)q.clone();
    q3.setBoost(7.21792348f);
    checkUnequal(q,q3);

    // test that a class check is done so that no exception is thrown
    // in the implementation of equals()
    Query whacky = new Query() {
      @Override
      public String toString(String field) {
        return "My Whacky Query";
      }
    };
    whacky.setBoost(q.getBoost());
    checkUnequal(q, whacky);
    
    // null test
    Assert.assertFalse(q.equals(null));
  }

  public static void checkEqual(Query q1, Query q2) {
    Assert.assertEquals(q1, q2);
    Assert.assertEquals(q1.hashCode(), q2.hashCode());
  }

  public static void checkUnequal(Query q1, Query q2) {
    Assert.assertTrue(!q1.equals(q2));
    Assert.assertTrue(!q2.equals(q1));

    // possible this test can fail on a hash collision... if that
    // happens, please change test to use a different example.
    Assert.assertTrue(q1.hashCode() != q2.hashCode());
  }
  
  /** deep check that explanations of a query 'score' correctly */
  public static void checkExplanations (final Query q, final Searcher s) throws IOException {
    CheckHits.checkExplanations(q, null, s, true);
  }
  
  /** 
   * Various query sanity checks on a searcher, some checks are only done for
   * instanceof IndexSearcher.
   *
   * @see #check(Query)
   * @see #checkFirstSkipTo
   * @see #checkSkipTo
   * @see #checkExplanations
   * @see #checkSerialization
   * @see #checkEqual
   */
  public static void check(Random random, Query q1, Searcher s) {
    check(random, q1, s, true);
  }
  private static void check(Random random, Query q1, Searcher s, boolean wrap) {
    try {
      check(q1);
      if (s!=null) {
        if (s instanceof IndexSearcher) {
          IndexSearcher is = (IndexSearcher)s;
          checkFirstSkipTo(q1,is);
          checkSkipTo(q1,is);
          if (wrap) {
            check(random, q1, wrapUnderlyingReader(random, is, -1), false);
            check(random, q1, wrapUnderlyingReader(random, is,  0), false);
            check(random, q1, wrapUnderlyingReader(random, is, +1), false);
          }
        }
        if (wrap) {
          check(random,q1, wrapSearcher(random, s, -1), false);
          check(random,q1, wrapSearcher(random, s,  0), false);
          check(random,q1, wrapSearcher(random, s, +1), false);
        }
        checkExplanations(q1,s);
        checkSerialization(q1,s);
        
        Query q2 = (Query)q1.clone();
        checkEqual(s.rewrite(q1),
                   s.rewrite(q2));
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Given an IndexSearcher, returns a new IndexSearcher whose IndexReader 
   * is a MultiReader containing the Reader of the original IndexSearcher, 
   * as well as several "empty" IndexReaders -- some of which will have 
   * deleted documents in them.  This new IndexSearcher should 
   * behave exactly the same as the original IndexSearcher.
   * @param s the searcher to wrap
   * @param edge if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub
   */
  public static IndexSearcher wrapUnderlyingReader(Random random, final IndexSearcher s, final int edge) 
    throws IOException {

    IndexReader r = s.getIndexReader();

    // we can't put deleted docs before the nested reader, because
    // it will throw off the docIds
    IndexReader[] readers = new IndexReader[] {
      edge < 0 ? r : IndexReader.open(makeEmptyIndex(random, 0), true),
      IndexReader.open(makeEmptyIndex(random, 0), true),
      new MultiReader(new IndexReader[] {
        IndexReader.open(makeEmptyIndex(random, edge < 0 ? 4 : 0), true),
        IndexReader.open(makeEmptyIndex(random, 0), true),
        0 == edge ? r : IndexReader.open(makeEmptyIndex(random, 0), true)
      }),
      IndexReader.open(makeEmptyIndex(random, 0 < edge ? 0 : 7), true),
      IndexReader.open(makeEmptyIndex(random, 0), true),
      new MultiReader(new IndexReader[] {
        IndexReader.open(makeEmptyIndex(random, 0 < edge ? 0 : 5), true),
        IndexReader.open(makeEmptyIndex(random, 0), true),
        0 < edge ? r : IndexReader.open(makeEmptyIndex(random, 0), true)
      })
    };
    IndexSearcher out = new IndexSearcher(new MultiReader(readers));
    out.setSimilarity(s.getSimilarity());
    return out;
  }
  /**
   * Given a Searcher, returns a new MultiSearcher wrapping the  
   * the original Searcher, 
   * as well as several "empty" IndexSearchers -- some of which will have
   * deleted documents in them.  This new MultiSearcher 
   * should behave exactly the same as the original Searcher.
   * @param s the Searcher to wrap
   * @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive s will be the last sub
   */
  public static MultiSearcher wrapSearcher(Random random, final Searcher s, final int edge) 
    throws IOException {

    // we can't put deleted docs before the nested reader, because
    // it will through off the docIds
    Searcher[] searchers = new Searcher[] {
      edge < 0 ? s : new IndexSearcher(makeEmptyIndex(random, 0), true),
      new MultiSearcher(new Searcher[] {
        new IndexSearcher(makeEmptyIndex(random, edge < 0 ? 65 : 0), true),
        new IndexSearcher(makeEmptyIndex(random, 0), true),
        0 == edge ? s : new IndexSearcher(makeEmptyIndex(random, 0), true)
      }),
      new IndexSearcher(makeEmptyIndex(random, 0 < edge ? 0 : 3), true),
      new IndexSearcher(makeEmptyIndex(random, 0), true),
      new MultiSearcher(new Searcher[] {
        new IndexSearcher(makeEmptyIndex(random, 0 < edge ? 0 : 5), true),
        new IndexSearcher(makeEmptyIndex(random, 0), true),
        0 < edge ? s : new IndexSearcher(makeEmptyIndex(random, 0), true)
      })
    };
    MultiSearcher out = new MultiSearcher(searchers);
    out.setSimilarity(s.getSimilarity());
    return out;
  }

  private static Directory makeEmptyIndex(Random random, final int numDeletedDocs) 
    throws IOException {
    Directory d = new MockDirectoryWrapper(random, new RAMDirectory());
      IndexWriter w = new IndexWriter(d, new IndexWriterConfig(
        TEST_VERSION_CURRENT, new MockAnalyzer(random)));

      for (int i = 0; i < numDeletedDocs; i++) {
        w.addDocument(new Document());
      }
      w.commit();
      w.deleteDocuments( new MatchAllDocsQuery() );
      _TestUtil.keepFullyDeletedSegments(w);
      w.commit();

      if (0 < numDeletedDocs)
        Assert.assertTrue("writer has no deletions", w.hasDeletions());

      Assert.assertEquals("writer is missing some deleted docs", 
                          numDeletedDocs, w.maxDoc());
      Assert.assertEquals("writer has non-deleted docs", 
                          0, w.numDocs());
      w.close();
      IndexReader r = IndexReader.open(d, true);
      Assert.assertEquals("reader has wrong number of deleted docs", 
                          numDeletedDocs, r.numDeletedDocs());
      r.close();
      return d;
  }
  

  /** check that the query weight is serializable. 
   * @throws IOException if serialization check fail. 
   */
  private static void checkSerialization(Query q, Searcher s) throws IOException {
    Weight w = s.createNormalizedWeight(q);
    try {
      ByteArrayOutputStream bos = new ByteArrayOutputStream();
      ObjectOutputStream oos = new ObjectOutputStream(bos);
      oos.writeObject(w);
      oos.close();
      ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray()));
      ois.readObject();
      ois.close();
      
      //skip equals() test for now - most weights don't override equals() and we won't add this just for the tests.
      //TestCase.assertEquals("writeObject(w) != w.  ("+w+")",w2,w);   
      
    } catch (Exception e) {
      IOException e2 = new IOException("Serialization failed for "+w);
      e2.initCause(e);
      throw e2;
    }
  }


  /** alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc
   * and ensure a hitcollector receives same docs and scores
   */
  public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
    //System.out.println("Checking "+q);
    
    if (s.createNormalizedWeight(q).scoresDocsOutOfOrder()) return;  // in this case order of skipTo() might differ from that of next().

    final int skip_op = 0;
    final int next_op = 1;
    final int orders [][] = {
        {next_op},
        {skip_op},
        {skip_op, next_op},
        {next_op, skip_op},
        {skip_op, skip_op, next_op, next_op},
        {next_op, next_op, skip_op, skip_op},
        {skip_op, skip_op, skip_op, next_op, next_op},
    };
    for (int k = 0; k < orders.length; k++) {

        final int order[] = orders[k];
        // System.out.print("Order:");for (int i = 0; i < order.length; i++)
        // System.out.print(order[i]==skip_op ? " skip()":" next()");
        // System.out.println();
        final int opidx[] = { 0 };
        final int lastDoc[] = {-1};

        // FUTURE: ensure scorer.doc()==-1

        final float maxDiff = 1e-5f;
        final IndexReader lastReader[] = {null};

        s.search(q, new Collector() {
          private Scorer sc;
          private IndexReader reader;
          private Scorer scorer;

          @Override
          public void setScorer(Scorer scorer) throws IOException {
            this.sc = scorer;
          }

          @Override
          public void collect(int doc) throws IOException {
            float score = sc.score();
            lastDoc[0] = doc;
            try {
              if (scorer == null) {
                Weight w = s.createNormalizedWeight(q);
                scorer = w.scorer(reader, true, false);
              }
              
              int op = order[(opidx[0]++) % order.length];
              // System.out.println(op==skip_op ?
              // "skip("+(sdoc[0]+1)+")":"next()");
              boolean more = op == skip_op ? scorer.advance(scorer.docID() + 1) != DocIdSetIterator.NO_MORE_DOCS
                  : scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
              int scorerDoc = scorer.docID();
              float scorerScore = scorer.score();
              float scorerScore2 = scorer.score();
              float scoreDiff = Math.abs(score - scorerScore);
              float scorerDiff = Math.abs(scorerScore2 - scorerScore);
              if (!more || doc != scorerDoc || scoreDiff > maxDiff
                  || scorerDiff > maxDiff) {
                StringBuilder sbord = new StringBuilder();
                for (int i = 0; i < order.length; i++)
                  sbord.append(order[i] == skip_op ? " skip()" : " next()");
                throw new RuntimeException("ERROR matching docs:" + "\n\t"
                    + (doc != scorerDoc ? "--> " : "") + "doc=" + doc + ", scorerDoc=" + scorerDoc
                    + "\n\t" + (!more ? "--> " : "") + "tscorer.more=" + more
                    + "\n\t" + (scoreDiff > maxDiff ? "--> " : "")
                    + "scorerScore=" + scorerScore + " scoreDiff=" + scoreDiff
                    + " maxDiff=" + maxDiff + "\n\t"
                    + (scorerDiff > maxDiff ? "--> " : "") + "scorerScore2="
                    + scorerScore2 + " scorerDiff=" + scorerDiff
                    + "\n\thitCollector.doc=" + doc + " score=" + score
                    + "\n\t Scorer=" + scorer + "\n\t Query=" + q + "  "
                    + q.getClass().getName() + "\n\t Searcher=" + s
                    + "\n\t Order=" + sbord + "\n\t Op="
                    + (op == skip_op ? " skip()" : " next()"));
              }
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
          }

          @Override
          public void setNextReader(IndexReader reader, int docBase) throws IOException {
            // confirm that skipping beyond the last doc, on the
            // previous reader, hits NO_MORE_DOCS
            if (lastReader[0] != null) {
              final IndexReader previousReader = lastReader[0];
              Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q);
              Scorer scorer = w.scorer(previousReader, true, false);
              if (scorer != null) {
                boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
                Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
              }
            }
            this.reader = lastReader[0] = reader;
            this.scorer = null;
            lastDoc[0] = -1;
          }

          @Override
          public boolean acceptsDocsOutOfOrder() {
            return true;
          }
        });

        if (lastReader[0] != null) {
          // confirm that skipping beyond the last doc, on the
          // previous reader, hits NO_MORE_DOCS
          final IndexReader previousReader = lastReader[0];
          Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q);
          Scorer scorer = w.scorer(previousReader, true, false);
          if (scorer != null) {
            boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
            Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
          }
        }
      }
  }
    
  // check that first skip on just created scorers always goes to the right doc
  private static void checkFirstSkipTo(final Query q, final IndexSearcher s) throws IOException {
    //System.out.println("checkFirstSkipTo: "+q);
    final float maxDiff = 1e-3f;
    final int lastDoc[] = {-1};
    final IndexReader lastReader[] = {null};

    s.search(q,new Collector() {
      private Scorer scorer;
      private IndexReader reader;
      @Override
      public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
      }
      @Override
      public void collect(int doc) throws IOException {
        //System.out.println("doc="+doc);
        float score = scorer.score();
        try {
          
          for (int i=lastDoc[0]+1; i<=doc; i++) {
            Weight w = s.createNormalizedWeight(q);
            Scorer scorer = w.scorer(reader, true, false);
            Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS);
            Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID());
            float skipToScore = scorer.score();
            Assert.assertEquals("unstable skipTo("+i+") score!",skipToScore,scorer.score(),maxDiff); 
            Assert.assertEquals("query assigned doc "+doc+" a score of <"+score+"> but skipTo("+i+") has <"+skipToScore+">!",score,skipToScore,maxDiff);
          }
          lastDoc[0] = doc;
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }

      @Override
      public void setNextReader(IndexReader reader, int docBase) throws IOException {
        // confirm that skipping beyond the last doc, on the
        // previous reader, hits NO_MORE_DOCS
        if (lastReader[0] != null) {
          final IndexReader previousReader = lastReader[0];
          Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q);
          Scorer scorer = w.scorer(previousReader, true, false);

          if (scorer != null) {
            boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
            Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
          }
        }

        this.reader = lastReader[0] = reader;
        lastDoc[0] = -1;
      }
      @Override
      public boolean acceptsDocsOutOfOrder() {
        return false;
      }
    });

    if (lastReader[0] != null) {
      // confirm that skipping beyond the last doc, on the
      // previous reader, hits NO_MORE_DOCS
      final IndexReader previousReader = lastReader[0];
      Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q);
      Scorer scorer = w.scorer(previousReader, true, false);
      if (scorer != null) {
        boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
        Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
      }
    }
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene QueryUtils.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.