alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (QualityBenchmark.java)

This example Lucene source code file (QualityBenchmark.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

docnameextractor, io, printwriter, qualitybenchmark, qualityquery, qualityqueryparser, qualityqueryparser, qualitystats, qualitystats, query, searcher, string, string, topdocs, topdocs

The Lucene QualityBenchmark.java source code

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.quality;

import java.io.IOException;
import java.io.PrintWriter;

import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;

/**
 * Main entry point for running a quality benchmark.
 * <p>
 * There are two main configurations for running a quality benchmark: <ul>
 * <li>Against existing judgements.
 * <li>For submission (e.g. for a contest).
 * </ul>
 * The first configuration requires a non null
 * {@link org.apache.lucene.benchmark.quality.Judge Judge}. 
 * The second configuration requires a non null 
 * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
 */
public class QualityBenchmark {

  /** Quality Queries that this quality benchmark would execute. */
  protected QualityQuery qualityQueries[];
  
  /** Parser for turning QualityQueries into Lucene Queries. */
  protected QualityQueryParser qqParser;
  
  /** Index to be searched. */
  protected Searcher searcher;

  /** index field to extract doc name for each search result; used for judging the results. */  
  protected String docNameField;
  
  /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
  private int maxQueries = Integer.MAX_VALUE;
  
  /** maximal number of results to collect for each query. Default: 1000. */
  private int maxResults = 1000;

  /**
   * Create a QualityBenchmark.
   * @param qqs quality queries to run.
   * @param qqParser parser for turning QualityQueries into Lucene Queries. 
   * @param searcher index to be searched.
   * @param docNameField name of field containing the document name.
   *        This allows to extract the doc name for search results,
   *        and is important for judging the results.  
   */
  public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser, 
      Searcher searcher, String docNameField) {
    this.qualityQueries = qqs;
    this.qqParser = qqParser;
    this.searcher = searcher;
    this.docNameField = docNameField;
  }

  /**
   * Run the quality benchmark.
   * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
   *        If null, no judgements would be made. Usually null for a submission run. 
   * @param submitRep submission report is created if non null.
   * @param qualityLog If not null, quality run data would be printed for each query.
   * @return QualityStats of each quality query that was executed.
   * @throws Exception if quality benchmark failed to run.
   */
  public  QualityStats [] execute(Judge judge, SubmissionReport submitRep, 
                                  PrintWriter qualityLog) throws Exception {
    int nQueries = Math.min(maxQueries, qualityQueries.length);
    QualityStats stats[] = new QualityStats[nQueries]; 
    for (int i=0; i<nQueries; i++) {
      QualityQuery qq = qualityQueries[i];
      // generate query
      Query q = qqParser.parse(qq);
      // search with this query 
      long t1 = System.currentTimeMillis();
      TopDocs td = searcher.search(q,null,maxResults);
      long searchTime = System.currentTimeMillis()-t1;
      //most likely we either submit or judge, but check both 
      if (judge!=null) {
        stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
      }
      if (submitRep!=null) {
        submitRep.report(qq,td,docNameField,searcher);
      }
    } 
    if (submitRep!=null) {
      submitRep.flush();
    }
    return stats;
  }
  
  /* Analyze/judge results for a single quality query; optionally log them. */  
  private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
    QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
    ScoreDoc sd[] = td.scoreDocs;
    long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
    DocNameExtractor xt = new DocNameExtractor(docNameField);
    for (int i=0; i<sd.length; i++) {
      String docName = xt.docName(searcher,sd[i].doc);
      long docNameExtractTime = System.currentTimeMillis() - t1;
      t1 = System.currentTimeMillis();
      boolean isRelevant = judge.isRelevant(docName,qq);
      stts.addResult(i+1,isRelevant, docNameExtractTime);
    }
    if (logger!=null) {
      logger.println(qq.getQueryID()+"  -  "+q);
      stts.log(qq.getQueryID()+" Stats:",1,logger,"  ");
    }
    return stts;
  }

  /**
   * @return the maximum number of quality queries to run. Useful at debugging.
   */
  public int getMaxQueries() {
    return maxQueries;
  }

  /**
   * Set the maximum number of quality queries to run. Useful at debugging.
   */
  public void setMaxQueries(int maxQueries) {
    this.maxQueries = maxQueries;
  }

  /**
   * @return the maximum number of results to collect for each quality query.
   */
  public int getMaxResults() {
    return maxResults;
  }

  /**
   * set the maximum number of results to collect for each quality query.
   */
  public void setMaxResults(int maxResults) {
    this.maxResults = maxResults;
  }

}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene QualityBenchmark.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.