alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (LineDocSourceTest.java)

This example Lucene source code file (LineDocSourceTest.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

bufferedwriter, bufferedwriter, class, exception, exception, file, file, fileoutputstream, io, object, outputstream, outputstreamwriter, properties, string, string, util

The Lucene LineDocSourceTest.java source code

package org.apache.lucene.benchmark.byTask.feeds;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Properties;

import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.HeaderLineParser;
import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.LineParser;
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;

/** Tests the functionality of {@link LineDocSource}. */
public class LineDocSourceTest extends BenchmarkTestCase {

  private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();

  private void createBZ2LineFile(File file, boolean addHeader) throws Exception {
    OutputStream out = new FileOutputStream(file);
    out = csFactory.createCompressorOutputStream("bzip2", out);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
    writeDocsToFile(writer, addHeader, null);
    writer.close();
  }

  private void writeDocsToFile(BufferedWriter writer, boolean addHeader, Properties otherFields) throws IOException {
    if (addHeader) {
      writer.write(WriteLineDocTask.FIELDS_HEADER_INDICATOR);
      writer.write(WriteLineDocTask.SEP);
      writer.write(DocMaker.TITLE_FIELD);
      writer.write(WriteLineDocTask.SEP);
      writer.write(DocMaker.DATE_FIELD);
      writer.write(WriteLineDocTask.SEP);
      writer.write(DocMaker.BODY_FIELD);
      if (otherFields!=null) {
        // additional field names in the header 
        for (Object fn : otherFields.keySet()) {
          writer.write(WriteLineDocTask.SEP);
          writer.write(fn.toString());
        }
      }
      writer.newLine();
    }
    StringBuilder doc = new StringBuilder();
    doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD);
    if (otherFields!=null) {
      // additional field values in the doc line 
      for (Object fv : otherFields.values()) {
        doc.append(WriteLineDocTask.SEP).append(fv.toString());
      }
    }
    writer.write(doc.toString());
    writer.newLine();
  }

  private void createRegularLineFile(File file, boolean addHeader) throws Exception {
    OutputStream out = new FileOutputStream(file);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
    writeDocsToFile(writer, addHeader, null);
    writer.close();
  }

  private void createRegularLineFileWithMoreFields(File file, String...extraFields) throws Exception {
    OutputStream out = new FileOutputStream(file);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
    Properties p = new Properties();
    for (String f : extraFields) {
      p.setProperty(f, f);
    }
    writeDocsToFile(writer, true, p);
    writer.close();
  }
  
  private void doIndexAndSearchTest(File file, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
    doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions
    doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition
    doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions
  }
  
  private void doIndexAndSearchTestWithRepeats(File file, 
      Class<? extends LineParser> lineParserClass, int numAdds, String storedField) throws Exception {

    Properties props = new Properties();
    
    // LineDocSource specific settings.
    props.setProperty("docs.file", file.getAbsolutePath());
    if (lineParserClass != null) {
      props.setProperty("line.parser", lineParserClass.getName());
    }
    
    // Indexing configuration.
    props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
    props.setProperty("content.source", LineDocSource.class.getName());
    props.setProperty("directory", "RAMDirectory");
    props.setProperty("doc.stored", "true");
    props.setProperty("doc.index.props", "true");
    
    // Create PerfRunData
    Config config = new Config(props);
    PerfRunData runData = new PerfRunData(config);

    TaskSequence tasks = new TaskSequence(runData, "testBzip2", null, false);
    tasks.addTask(new CreateIndexTask(runData));
    for (int i=0; i<numAdds; i++) {
      tasks.addTask(new AddDocTask(runData));
    }
    tasks.addTask(new CloseIndexTask(runData));
    tasks.doLogic();
    
    IndexSearcher searcher = new IndexSearcher(runData.getDirectory(), true);
    TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10);
    assertEquals(numAdds, td.totalHits);
    assertNotNull(td.scoreDocs[0]);
    
    if (storedField==null) {
      storedField = DocMaker.BODY_FIELD; // added to all docs and satisfies field-name == value
    }
    assertEquals("Wrong field value", storedField, searcher.doc(0).get(storedField));

    searcher.close();
  }
  
  /* Tests LineDocSource with a bzip2 input stream. */
  public void testBZip2() throws Exception {
    File file = new File(getWorkDir(), "one-line.bz2");
    createBZ2LineFile(file,true);
    doIndexAndSearchTest(file, null, null);
  }

  public void testBZip2NoHeaderLine() throws Exception {
    File file = new File(getWorkDir(), "one-line.bz2");
    createBZ2LineFile(file,false);
    doIndexAndSearchTest(file, null, null);
  }
  
  public void testRegularFile() throws Exception {
    File file = new File(getWorkDir(), "one-line");
    createRegularLineFile(file,true);
    doIndexAndSearchTest(file, null, null);
  }

  public void testRegularFileSpecialHeader() throws Exception {
    File file = new File(getWorkDir(), "one-line");
    createRegularLineFile(file,true);
    doIndexAndSearchTest(file, HeaderLineParser.class, null);
  }

  public void testRegularFileNoHeaderLine() throws Exception {
    File file = new File(getWorkDir(), "one-line");
    createRegularLineFile(file,false);
    doIndexAndSearchTest(file, null, null);
  }

  public void testInvalidFormat() throws Exception {
    String[] testCases = new String[] {
      "", // empty line
      "title", // just title
      "title" + WriteLineDocTask.SEP, // title + SEP
      "title" + WriteLineDocTask.SEP + "body", // title + SEP + body
      // note that title + SEP + body + SEP is a valid line, which results in an
      // empty body
    };
    
    for (int i = 0; i < testCases.length; i++) {
      File file = new File(getWorkDir(), "one-line");
      BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8"));
      writer.write(testCases[i]);
      writer.newLine();
      writer.close();
      try {
        doIndexAndSearchTest(file, null, null);
        fail("Some exception should have been thrown for: [" + testCases[i] + "]");
      } catch (Exception e) {
        // expected.
      }
    }
  }
  
  /** Doc Name is not part of the default header */
  public void testWithDocsName()  throws Exception {
    File file = new File(getWorkDir(), "one-line");
    createRegularLineFileWithMoreFields(file, DocMaker.NAME_FIELD);
    doIndexAndSearchTest(file, null, DocMaker.NAME_FIELD);
  }

  /** Use fields names that are not defined in Docmaker and so will go to Properties */
  public void testWithProperties()  throws Exception {
    File file = new File(getWorkDir(), "one-line");
    String specialField = "mySpecialField";
    createRegularLineFileWithMoreFields(file, specialField);
    doIndexAndSearchTest(file, null, specialField);
  }
  
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene LineDocSourceTest.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.