alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestLazyProxSkipping.java)

This example Lucene source code file (TestLazyProxSkipping.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

directory, document, document, indexinput, indexwriter, io, ioexception, ioexception, override, override, phrasequery, seekcountingdirectory, seekscountingstream, string, string

The Lucene TestLazyProxSkipping.java source code

package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;

/**
 * Tests lazy skipping on the proximity file.
 *
 */
public class TestLazyProxSkipping extends LuceneTestCase {
    private Searcher searcher;
    private int seeksCounter = 0;
    
    private String field = "tokens";
    private String term1 = "xx";
    private String term2 = "yy";
    private String term3 = "zz";

    private class SeekCountingDirectory extends MockDirectoryWrapper {
      public SeekCountingDirectory(Directory delegate) {
        super(random, delegate);
      }

      @Override
      public IndexInput openInput(String name) throws IOException {
        IndexInput ii = super.openInput(name);
        if (name.endsWith(".prx")) {
          // we decorate the proxStream with a wrapper class that allows to count the number of calls of seek()
          ii = new SeeksCountingStream(ii);
        }
        return ii;
      }
      
    }
    
    private void createIndex(int numHits) throws IOException {
        int numDocs = 500;
        
        Directory directory = new SeekCountingDirectory(new RAMDirectory());
        // note: test explicitly disables payloads
        IndexWriter writer = new IndexWriter(
            directory,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).
                setMaxBufferedDocs(10).
                setMergePolicy(newLogMergePolicy(false))
        );
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            String content;
            if (i % (numDocs / numHits) == 0) {
                // add a document that matches the query "term1 term2"
                content = this.term1 + " " + this.term2;
            } else if (i % 15 == 0) {
                // add a document that only contains term1
                content = this.term1 + " " + this.term1;
            } else {
                // add a document that contains term2 but not term 1
                content = this.term3 + " " + this.term2;
            }

            doc.add(newField(this.field, content, Field.Store.YES, Field.Index.ANALYZED));
            writer.addDocument(doc);
        }
        
        // make sure the index has only a single segment
        writer.optimize();
        writer.close();
        
        SegmentReader reader = SegmentReader.getOnlySegmentReader(directory);

      this.searcher = newSearcher(reader);
    }
    
    private ScoreDoc[] search() throws IOException {
        // create PhraseQuery "term1 term2" and search
        PhraseQuery pq = new PhraseQuery();
        pq.add(new Term(this.field, this.term1));
        pq.add(new Term(this.field, this.term2));
        return this.searcher.search(pq, null, 1000).scoreDocs;        
    }
    
    private void performTest(int numHits) throws IOException {
        createIndex(numHits);
        this.seeksCounter = 0;
        ScoreDoc[] hits = search();
        // verify that the right number of docs was found
        assertEquals(numHits, hits.length);
        
        // check if the number of calls of seek() does not exceed the number of hits
        assertTrue(this.seeksCounter > 0);
        assertTrue(this.seeksCounter <= numHits + 1);
    }
    
    public void testLazySkipping() throws IOException {
        // test whether only the minimum amount of seeks()
        // are performed
        performTest(5);
        searcher.close();
        performTest(10);
        searcher.close();
    }
    
    public void testSeek() throws IOException {
        Directory directory = newDirectory();
        IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
        for (int i = 0; i < 10; i++) {
            Document doc = new Document();
            doc.add(newField(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED));
            writer.addDocument(doc);
        }
        
        writer.close();
        IndexReader reader = IndexReader.open(directory, true);
        TermPositions tp = reader.termPositions();
        tp.seek(new Term(this.field, "b"));
        for (int i = 0; i < 10; i++) {
            tp.next();
            assertEquals(tp.doc(), i);
            assertEquals(tp.nextPosition(), 1);
        }
        tp.seek(new Term(this.field, "a"));
        for (int i = 0; i < 10; i++) {
            tp.next();
            assertEquals(tp.doc(), i);
            assertEquals(tp.nextPosition(), 0);
        }
        reader.close();
        directory.close();
        
    }
    

    // Simply extends IndexInput in a way that we are able to count the number
    // of invocations of seek()
    class SeeksCountingStream extends IndexInput {
          private IndexInput input;      
          
          
          SeeksCountingStream(IndexInput input) {
              this.input = input;
          }      
                
          @Override
          public byte readByte() throws IOException {
              return this.input.readByte();
          }
    
          @Override
          public void readBytes(byte[] b, int offset, int len) throws IOException {
              this.input.readBytes(b, offset, len);        
          }
    
          @Override
          public void close() throws IOException {
              this.input.close();
          }
    
          @Override
          public long getFilePointer() {
              return this.input.getFilePointer();
          }
    
          @Override
          public void seek(long pos) throws IOException {
              TestLazyProxSkipping.this.seeksCounter++;
              this.input.seek(pos);
          }
    
          @Override
          public long length() {
              return this.input.length();
          }
          
          @Override
          public Object clone() {
              return new SeeksCountingStream((IndexInput) this.input.clone());
          }
      
    }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestLazyProxSkipping.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.