home | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestMultiSearcher.java)

This example Lucene source code file (TestMultiSearcher.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

directory, directory, document, document, indexsearcher, indexsearcher, indexwriter, io, ioexception, multisearcher, override, searcher, searcher, string, term, util

The Lucene TestMultiSearcher.java source code

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SetBasedFieldSelector;

import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;

/**
 * Tests {@link MultiSearcher} class.
 */
public class TestMultiSearcher extends LuceneTestCase
{

	/**
	 * ReturnS a new instance of the concrete MultiSearcher class
	 * used in this test.
	 */
	protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException {
		return new MultiSearcher(searchers);
	}

    public void testEmptyIndex() throws Exception {
        // creating two directories for indices
        Directory indexStoreA = newDirectory();
        Directory indexStoreB = newDirectory();

        // creating a document to store
        Document lDoc = new Document();
        lDoc.add(newField("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
        lDoc.add(newField("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
        lDoc.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

        // creating a document to store
        Document lDoc2 = new Document();
        lDoc2.add(newField("fulltext", "in a galaxy far far away.....",
            Field.Store.YES, Field.Index.ANALYZED));
        lDoc2.add(newField("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
        lDoc2.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

        // creating a document to store
        Document lDoc3 = new Document();
        lDoc3.add(newField("fulltext", "a bizarre bug manifested itself....",
            Field.Store.YES, Field.Index.ANALYZED));
        lDoc3.add(newField("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
        lDoc3.add(newField("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

        // creating an index writer for the first index
        IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
        // creating an index writer for the second index, but writing nothing
        IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));

        //--------------------------------------------------------------------
        // scenario 1
        //--------------------------------------------------------------------

        // writing the documents to the first index
        writerA.addDocument(lDoc);
        writerA.addDocument(lDoc2);
        writerA.addDocument(lDoc3);
        writerA.optimize();
        writerA.close();

        // closing the second index
        writerB.close();

        // creating the query
        QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT));
        Query query = parser.parse("handle:1");

        // building the searchables
        Searcher[] searchers = new Searcher[2];
        // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
        searchers[0] = new IndexSearcher(indexStoreB, true);
        searchers[1] = new IndexSearcher(indexStoreA, true);
        // creating the multiSearcher
        Searcher mSearcher = getMultiSearcherInstance(searchers);
        // performing the search
        ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs;

        assertEquals(3, hits.length);

        // iterating over the hit documents
        for (int i = 0; i < hits.length; i++) {
          mSearcher.doc(hits[i].doc);
        }
        mSearcher.close();


        //--------------------------------------------------------------------
        // scenario 2
        //--------------------------------------------------------------------

        // adding one document to the empty index
        writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(
            TEST_VERSION_CURRENT, 
                new StandardAnalyzer(TEST_VERSION_CURRENT))
                .setOpenMode(OpenMode.APPEND));
        writerB.addDocument(lDoc);
        writerB.optimize();
        writerB.close();

        // building the searchables
        Searcher[] searchers2 = new Searcher[2];
        // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
        searchers2[0] = new IndexSearcher(indexStoreB, true);
        searchers2[1] = new IndexSearcher(indexStoreA, true);
        // creating the mulitSearcher
        MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2);
        // performing the same search
        ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs;

        assertEquals(4, hits2.length);

        // iterating over the hit documents
        for (int i = 0; i < hits2.length; i++) {
          // no exception should happen at this point
          mSearcher2.doc(hits2[i].doc);
        }

        // test the subSearcher() method:
        Query subSearcherQuery = parser.parse("id:doc1");
        hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
        assertEquals(2, hits2.length);
        assertEquals(0, mSearcher2.subSearcher(hits2[0].doc));   // hit from searchers2[0]
        assertEquals(1, mSearcher2.subSearcher(hits2[1].doc));   // hit from searchers2[1]
        subSearcherQuery = parser.parse("id:doc2");
        hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs;
        assertEquals(1, hits2.length);
        assertEquals(1, mSearcher2.subSearcher(hits2[0].doc));   // hit from searchers2[1]
        mSearcher2.close();

        //--------------------------------------------------------------------
        // scenario 3
        //--------------------------------------------------------------------

        // deleting the document just added, this will cause a different exception to take place
        Term term = new Term("id", "doc1");
        IndexReader readerB = IndexReader.open(indexStoreB, false);
        readerB.deleteDocuments(term);
        readerB.close();

        // optimizing the index with the writer
        writerB = new IndexWriter(indexStoreB, new IndexWriterConfig(
            TEST_VERSION_CURRENT, 
                new StandardAnalyzer(TEST_VERSION_CURRENT))
                .setOpenMode(OpenMode.APPEND));
        writerB.optimize();
        writerB.close();

        // building the searchables
        Searcher[] searchers3 = new Searcher[2];

        searchers3[0] = new IndexSearcher(indexStoreB, true);
        searchers3[1] = new IndexSearcher(indexStoreA, true);
        // creating the mulitSearcher
        Searcher mSearcher3 = getMultiSearcherInstance(searchers3);
        // performing the same search
        ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs;

        assertEquals(3, hits3.length);

        // iterating over the hit documents
        for (int i = 0; i < hits3.length; i++) {
          mSearcher3.doc(hits3[i].doc);
        }
        mSearcher3.close();
        indexStoreA.close();
        indexStoreB.close();
    }
    
    private Document createDocument(String contents1, String contents2) {
        Document document=new Document();
        
        document.add(newField("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED));
      document.add(newField("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED));
        if (contents2!=null) {
            document.add(newField("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED));
        }
        
        return document;
    }
    
    private void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException {
        IndexWriter indexWriter=null;
        
        try {
          indexWriter = new IndexWriter(directory, LuceneTestCase.newIndexWriterConfig(random,
              TEST_VERSION_CURRENT, new KeywordAnalyzer()).setOpenMode(
                  create ? OpenMode.CREATE : OpenMode.APPEND));
            
            for (int i=0; i<nDocs; i++) {
                indexWriter.addDocument(createDocument("doc" + i, contents2));
            }
        } finally {
            if (indexWriter!=null) {
                indexWriter.close();
            }
        }
    }

  public void testFieldSelector() throws Exception {
    Directory ramDirectory1, ramDirectory2;
    IndexSearcher indexSearcher1, indexSearcher2;

    ramDirectory1 = newDirectory();
    ramDirectory2 = newDirectory();
    Query query = new TermQuery(new Term("contents", "doc0"));

    // Now put the documents in a different index
    initIndex(random, ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
    initIndex(random, ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...

    indexSearcher1 = new IndexSearcher(ramDirectory1, true);
    indexSearcher2 = new IndexSearcher(ramDirectory2, true);

    MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
    assertTrue("searcher is null and it shouldn't be", searcher != null);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertTrue("hits is null and it shouldn't be", hits != null);
    assertTrue(hits.length + " does not equal: " + 2, hits.length == 2);
    Document document = searcher.doc(hits[0].doc);
    assertTrue("document is null and it shouldn't be", document != null);
    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
    //Should be one document from each directory
    //they both have two fields, contents and other
    Set<String> ftl = new HashSet();
    ftl.add("other");
    SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
    document = searcher.doc(hits[0].doc, fs);
    assertTrue("document is null and it shouldn't be", document != null);
    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
    String value = document.get("contents");
    assertTrue("value is not null and it should be", value == null);
    value = document.get("other");
    assertTrue("value is null and it shouldn't be", value != null);
    ftl.clear();
    ftl.add("contents");
    fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet());
    document = searcher.doc(hits[1].doc, fs);
    value = document.get("contents");
    assertTrue("value is null and it shouldn't be", value != null);    
    value = document.get("other");
    assertTrue("value is not null and it should be", value == null);
    indexSearcher1.close();
    indexSearcher2.close();
    ramDirectory1.close();
    ramDirectory2.close();
    searcher.close();
  }

  /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
 public void testNormalization1() throws IOException {
     testNormalization(1, "Using 1 document per index:");
 }
  */
    
    public void testNormalization10() throws IOException {
        testNormalization(10, "Using 10 documents per index:");
    }
    
    private void testNormalization(int nDocs, String message) throws IOException {
        Query query=new TermQuery(new Term("contents", "doc0"));
        
        Directory ramDirectory1;
        IndexSearcher indexSearcher1;
        ScoreDoc[] hits;
        
        ramDirectory1=newDirectory();
        
        // First put the documents in the same index
        initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
        initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
        
        indexSearcher1=new IndexSearcher(ramDirectory1, true);
        indexSearcher1.setDefaultFieldSortScoring(true, true);
        
        hits=indexSearcher1.search(query, null, 1000).scoreDocs;
        
        assertEquals(message, 2, hits.length);
        
        // Store the scores for use later
        float[] scores={ hits[0].score, hits[1].score };
        
        assertTrue(message, scores[0] > scores[1]);
        
        indexSearcher1.close();
        ramDirectory1.close();
        hits=null;
        
        
        
        Directory ramDirectory2;
        IndexSearcher indexSearcher2;
        
        ramDirectory1=newDirectory();
        ramDirectory2=newDirectory();
        
        // Now put the documents in a different index
        initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc...
        initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
        
        indexSearcher1=new IndexSearcher(ramDirectory1, true);
        indexSearcher1.setDefaultFieldSortScoring(true, true);
        indexSearcher2=new IndexSearcher(ramDirectory2, true);
        indexSearcher2.setDefaultFieldSortScoring(true, true);
        
        Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });
        
        hits=searcher.search(query, null, 1000).scoreDocs;
        
        assertEquals(message, 2, hits.length);
        
        // The scores should be the same (within reason)
        assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
        assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
        
        
        
        // Adding a Sort.RELEVANCE object should not change anything
        hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs;
        
        assertEquals(message, 2, hits.length);
        
        assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1
        assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2
        
        searcher.close();
        
        ramDirectory1.close();
        ramDirectory2.close();
    }
    
    /**
     * test that custom similarity is in effect when using MultiSearcher (LUCENE-789).
     * @throws IOException 
     */
    public void testCustomSimilarity () throws IOException {
        Directory dir = newDirectory();
        initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
        IndexSearcher srchr = new IndexSearcher(dir, true);
        MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr});
        
        Similarity customSimilarity = new DefaultSimilarity() {
            // overide all
            @Override
            public float idf(int docFreq, int numDocs) { return 100.0f; }
            @Override
            public float coord(int overlap, int maxOverlap) { return 1.0f; }
            @Override
            public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); }
            @Override
            public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
            @Override
            public float sloppyFreq(int distance) { return 1.0f; }
            @Override
            public float tf(float freq) { return 1.0f; }
        };
        
        srchr.setSimilarity(customSimilarity);
        msrchr.setSimilarity(customSimilarity);
  
        Query query=new TermQuery(new Term("contents", "doc0"));
  
        // Get a score from IndexSearcher
        TopDocs topDocs = srchr.search(query, null, 1);
        float score1 = topDocs.getMaxScore();
        
        // Get the score from MultiSearcher
        topDocs = msrchr.search(query, null, 1);
        float scoreN = topDocs.getMaxScore();
        
        // The scores from the IndexSearcher and Multisearcher should be the same
        // if the same similarity is used.
        assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6);
        msrchr.close();
        srchr.close();
        dir.close();
    }
    
    public void testDocFreq() throws IOException{
      Directory dir1 = newDirectory();
      Directory dir2 = newDirectory();

      initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
      initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
      IndexSearcher searcher1 = new IndexSearcher(dir1, true);
      IndexSearcher searcher2 = new IndexSearcher(dir2, true);
      
      MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
      assertEquals(15, multiSearcher.docFreq(new Term("contents","x")));
      multiSearcher.close();
      searcher1.close();
      searcher2.close();
      dir1.close();
      dir2.close();
    }
    
    public void testCreateDocFrequencyMap() throws IOException{
      Directory dir1 = newDirectory();
      Directory dir2 = newDirectory();
      Term template = new Term("contents") ;
      String[] contents  = {"a", "b", "c"};
      HashSet<Term> termsSet = new HashSet();
      for (int i = 0; i < contents.length; i++) {
        initIndex(random, dir1, i+10, i==0, contents[i]); 
        initIndex(random, dir2, i+5, i==0, contents[i]);
        termsSet.add(template.createTerm(contents[i]));
      }
      IndexSearcher searcher1 = new IndexSearcher(dir1, true);
      IndexSearcher searcher2 = new IndexSearcher(dir2, true);
      MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2});
      Map<Term,Integer> docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet);
      assertEquals(3, docFrequencyMap.size());
      for (int i = 0; i < contents.length; i++) {
        assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i])));
      }
      multiSearcher.close();
      searcher1.close();
      searcher2.close();
      dir1.close();
      dir2.close();
    }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestMultiSearcher.java source code file:



my book on functional programming

 

new blog posts

 

Copyright 1998-2019 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.