alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestPayloadProcessorProvider.java)

This example Lucene source code file (TestPayloadProcessorProvider.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

directory, directory, dirpayloadprocessor, indexwriter, io, ioexception, ioexception, mockanalyzer, num_docs, num_docs, override, payloadtokenstream, perdirpayloadprocessor, term, term, util

The Lucene TestPayloadProcessorProvider.java source code

package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;

public class TestPayloadProcessorProvider extends LuceneTestCase {

  private static final class PerDirPayloadProcessor extends PayloadProcessorProvider {

    private Map<Directory, DirPayloadProcessor> processors;

    public PerDirPayloadProcessor(Map<Directory, DirPayloadProcessor> processors) {
      this.processors = processors;
    }

    @Override
    public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
      return processors.get(dir);
    }

  }

  private static final class PerTermPayloadProcessor extends DirPayloadProcessor {

    @Override
    public PayloadProcessor getProcessor(Term term) throws IOException {
      // don't process payloads of terms other than "p:p1"
      if (!term.field().equals("p") || !term.text().equals("p1")) {
        return null;
      }
      
      // All other terms are processed the same way
      return new DeletePayloadProcessor();
    }
    
  }
  
  /** deletes the incoming payload */
  private static final class DeletePayloadProcessor extends PayloadProcessor {

    @Override
    public int payloadLength() throws IOException {
      return 0;
    }

    @Override
    public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
      return payload;
    }

  }

  private static final class PayloadTokenStream extends TokenStream {

    private final PayloadAttribute payload = addAttribute(PayloadAttribute.class);
    private final CharTermAttribute term = addAttribute(CharTermAttribute.class);

    private boolean called = false;
    private String t;

    public PayloadTokenStream(String t) {
      this.t = t;
    }

    @Override
    public boolean incrementToken() throws IOException {
      if (called) {
        return false;
      }

      called = true;
      byte[] p = new byte[] { 1 };
      payload.setPayload(new Payload(p));
      term.append(t);
      return true;
    }

    @Override
    public void reset() throws IOException {
      super.reset();
      called = false;
      term.setEmpty();
    }
  }

  private static final int NUM_DOCS = 10;

  private IndexWriterConfig getConfig(Random random) {
    return newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(
        TEST_VERSION_CURRENT));
  }

  private void populateDirs(Random random, Directory[] dirs, boolean multipleCommits)
      throws IOException {
    for (int i = 0; i < dirs.length; i++) {
      dirs[i] = newDirectory();
      populateDocs(random, dirs[i], multipleCommits);
      verifyPayloadExists(dirs[i], new Term("p", "p1"), NUM_DOCS);
      verifyPayloadExists(dirs[i], new Term("p", "p2"), NUM_DOCS);
    }
  }

  private void populateDocs(Random random, Directory dir, boolean multipleCommits)
      throws IOException {
    IndexWriter writer = new IndexWriter(
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).
            setMergePolicy(newLogMergePolicy(10))
    );
    TokenStream payloadTS1 = new PayloadTokenStream("p1");
    TokenStream payloadTS2 = new PayloadTokenStream("p2");
    for (int i = 0; i < NUM_DOCS; i++) {
      Document doc = new Document();
      doc.add(newField("id", "doc" + i, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
      doc.add(newField("content", "doc content " + i, Store.NO, Index.ANALYZED));
      doc.add(new Field("p", payloadTS1));
      doc.add(new Field("p", payloadTS2));
      writer.addDocument(doc);
      if (multipleCommits && (i % 4 == 0)) {
        writer.commit();
      }
    }
    writer.close();
  }

  private void verifyPayloadExists(Directory dir, Term term, int numExpected)
      throws IOException {
    IndexReader reader = IndexReader.open(dir);
    try {
      int numPayloads = 0;
      TermPositions tp = reader.termPositions(term);
      while (tp.next()) {
        tp.nextPosition();
        if (tp.isPayloadAvailable()) {
          assertEquals(1, tp.getPayloadLength());
          byte[] p = new byte[tp.getPayloadLength()];
          tp.getPayload(p, 0);
          assertEquals(1, p[0]);
          ++numPayloads;
        }
      }
      assertEquals(numExpected, numPayloads);
    } finally {
      reader.close();
    }
  }

  private void doTest(Random random, boolean addToEmptyIndex,
      int numExpectedPayloads, boolean multipleCommits) throws IOException {
    Directory[] dirs = new Directory[2];
    populateDirs(random, dirs, multipleCommits);

    Directory dir = newDirectory();
    if (!addToEmptyIndex) {
      populateDocs(random, dir, multipleCommits);
      verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS);
      verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);
    }

    // Add two source dirs. By not adding the dest dir, we ensure its payloads
    // won't get processed.
    Map<Directory, DirPayloadProcessor> processors = new HashMap();
    for (Directory d : dirs) {
      processors.put(d, new PerTermPayloadProcessor());
    }
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
    writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors));

    IndexReader[] readers = new IndexReader[dirs.length];
    for (int i = 0; i < readers.length; i++) {
      readers[i] = IndexReader.open(dirs[i]);
    }
    try {
      writer.addIndexes(readers);
    } finally {
      for (IndexReader r : readers) {
        r.close();
      }
    }
    writer.close();
    verifyPayloadExists(dir, new Term("p", "p1"), numExpectedPayloads);
    // the second term should always have all payloads
    numExpectedPayloads = NUM_DOCS * dirs.length
        + (addToEmptyIndex ? 0 : NUM_DOCS);
    verifyPayloadExists(dir, new Term("p", "p2"), numExpectedPayloads);
    for (Directory d : dirs)
      d.close();
    dir.close();
  }

  @Test
  public void testAddIndexes() throws Exception {
    // addIndexes - single commit in each
    doTest(random, true, 0, false);

    // addIndexes - multiple commits in each
    doTest(random, true, 0, true);
  }

  @Test
  public void testAddIndexesIntoExisting() throws Exception {
    // addIndexes - single commit in each
    doTest(random, false, NUM_DOCS, false);

    // addIndexes - multiple commits in each
    doTest(random, false, NUM_DOCS, true);
  }

  @Test
  public void testRegularMerges() throws Exception {
    Directory dir = newDirectory();
    populateDocs(random, dir, true);
    verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS);
    verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);

    // Add two source dirs. By not adding the dest dir, we ensure its payloads
    // won't get processed.
    Map<Directory, DirPayloadProcessor> processors = new HashMap();
    processors.put(dir, new PerTermPayloadProcessor());
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
    writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors));
    writer.optimize();
    writer.close();

    verifyPayloadExists(dir, new Term("p", "p1"), 0);
    verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);
    dir.close();
  }

}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestPayloadProcessorProvider.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.