alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestQualityRun.java)

This example Lucene source code file (TestQualityRun.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

bufferedreader, description, description, exception, inputstream, io, line, line, narrative, narrative, qualitystats, topic, topic, trectopicsreader, utf-8

The Lucene TestQualityRun.java source code

package org.apache.lucene.benchmark.quality;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.quality.trec.TrecJudge;
import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;

/**
 * Test that quality run does its job.
 * <p>
 * NOTE: if the default scoring or StandardAnalyzer is changed, then
 * this test will not work correctly, as it does not dynamically
 * generate its test trec topics/qrels!
 */
public class TestQualityRun extends BenchmarkTestCase {
  
  @Override
  public void setUp() throws Exception {
    super.setUp();
    copyToWorkDir("reuters.578.lines.txt.bz2");
  }

  public void testTrecQuality() throws Exception {
    // first create the partial reuters index
    createReutersIndex();
    
    int maxResults = 1000;
    String docNameField = "doctitle"; // orig docID is in the linedoc format title 
    
    PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null;
   
    // prepare topics
    InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, "UTF-8")));
    
    // prepare judge
    InputStream qrels = getClass().getResourceAsStream("trecQRels.txt");
    Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, "UTF-8")));
    
    // validate topics & judgments match each other
    judge.validateData(qqs, logger);
    
    Directory dir = newFSDirectory(new File(getWorkDir(),"index"));
    IndexSearcher searcher = new IndexSearcher(dir, true);

    QualityQueryParser qqParser = new SimpleQQParser("title","body");
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    
    SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null;
    qrun.setMaxResults(maxResults);
    QualityStats stats[] = qrun.execute(judge, submitLog, logger);
    
    // --------- verify by the way judgments were altered for this test:
    // for some queries, depending on m = qnum % 8
    // m==0: avg_precision and recall are hurt, by marking fake docs as relevant
    // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
    // m==2: all precision, precision_at_n and recall are hurt.
    // m>=3: these queries remain perfect
    for (int i = 0; i < stats.length; i++) {
      QualityStats s = stats[i];
      switch (i%8) {

      case 0:
        assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
        assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
        }
        break;
      
      case 1:
        assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-2);
        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
          assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
        }
        break;

      case 2:
        assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
        assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
          assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
        }
        break;

      default: {
        assertEquals("avg-p should be perfect: "+s.getAvp(), 1.0, s.getAvp(), 1E-2);
        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-2);
        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-2);
        }
      }
      
      }
    }
    
    QualityStats avg = QualityStats.average(stats);
    if (logger!=null) {
      avg.log("Average statistis:",1,logger,"  ");
    }
    
    assertTrue("mean avg-p should be hurt: "+avg.getAvp(), 1.0 > avg.getAvp());
    assertTrue("avg recall should be hurt: "+avg.getRecall(), 1.0 > avg.getRecall());
    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
      assertTrue("avg p_at_"+j+" should be hurt: "+avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
    }
    
    searcher.close();
    dir.close();
  }
  
  public void testTrecTopicsReader() throws Exception {    
    // prepare topics
    InputStream topicsFile = getClass().getResourceAsStream("trecTopics.txt");
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery qqs[] = qReader.readQueries(
        new BufferedReader(new InputStreamReader(topicsFile, "UTF-8")));
    
    assertEquals(20, qqs.length);
    
    QualityQuery qq = qqs[0];
    assertEquals("statement months  total 1987", qq.getValue("title"));
    assertEquals("Topic 0 Description Line 1 Topic 0 Description Line 2", 
        qq.getValue("description"));
    assertEquals("Topic 0 Narrative Line 1 Topic 0 Narrative Line 2", 
        qq.getValue("narrative"));
    
    qq = qqs[1];
    assertEquals("agreed 15  against five", qq.getValue("title"));
    assertEquals("Topic 1 Description Line 1 Topic 1 Description Line 2", 
        qq.getValue("description"));
    assertEquals("Topic 1 Narrative Line 1 Topic 1 Narrative Line 2", 
        qq.getValue("narrative"));
    
    qq = qqs[19];
    assertEquals("20 while  common week", qq.getValue("title"));
    assertEquals("Topic 19 Description Line 1 Topic 19 Description Line 2", 
        qq.getValue("description"));
    assertEquals("Topic 19 Narrative Line 1 Topic 19 Narrative Line 2", 
        qq.getValue("narrative"));
  }

  // use benchmark logic to create the mini Reuters index
  private void createReutersIndex() throws Exception {
    // 1. alg definition
    String algLines[] = {
        "# ----- properties ",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "analyzer=org.apache.lucene.analysis.standard.ClassicAnalyzer",
        "docs.file=" + getWorkDirResourcePath("reuters.578.lines.txt.bz2"),
        "content.source.log.step=2500",
        "doc.term.vector=false",
        "content.source.forever=false",
        "directory=FSDirectory",
        "doc.stored=true",
        "doc.tokenized=true",
        "# ----- alg ",
        "ResetSystemErase",
        "CreateIndex",
        "{ AddDoc } : *",
        "CloseIndex",
    };
    
    // 2. execute the algorithm  (required in every "logic" test)
    execBenchmark(algLines);
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestQualityRun.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.