alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (LookupBenchmarkTest.java)

This example Lucene source code file (LookupBenchmarkTest.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

arraylist, benchmarkresult, benchmarkresult, bufferedreader, class, exception, exception, integer, io, list, list, lookup, lookup, net, network, string, termfreq, threading, threads, util

The Lucene LookupBenchmarkTest.java source code

package org.apache.lucene.search.suggest;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Random;
import java.util.concurrent.Callable;

import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.fst.FSTLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;

import org.junit.BeforeClass;
import org.junit.Ignore;

/**
 * Benchmarks tests for implementations of {@link Lookup} interface.
 */
@Ignore("COMMENT ME TO RUN BENCHMARKS!")
public class LookupBenchmarkTest extends LuceneTestCase {
  @SuppressWarnings("unchecked")
  private final List<Class benchmarkClasses = Arrays.asList(
      JaspellLookup.class, 
      TSTLookup.class,
      FSTLookup.class);

  private final static int rounds = 15;
  private final static int warmup = 5;

  private final int num = 7;
  private final boolean onlyMorePopular = true;

  private final static Random random = new Random(0xdeadbeef);

  /**
   * Input term/weight pairs.
   */
  private static TermFreq [] dictionaryInput;

  /**
   * Benchmark term/weight pairs (randomized order).
   */
  private static List<TermFreq> benchmarkInput;

  /**
   * Loads terms and frequencies from Wikipedia (cached).
   */
  @BeforeClass
  public static void setup() throws Exception {
    List<TermFreq> input = readTop50KWiki();
    Collections.shuffle(input, random);
    LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]);
    Collections.shuffle(input, random);
    LookupBenchmarkTest.benchmarkInput = input;
  }

  static final Charset UTF_8 = Charset.forName("UTF-8");

  /**
   * Collect the multilingual input for benchmarks/ tests.
   */
  public static List<TermFreq> readTop50KWiki() throws Exception {
    List<TermFreq> input = new ArrayList();
    URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
    assert resource != null : "Resource missing: Top50KWiki.utf8";

    String line = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8));
    while ((line = br.readLine()) != null) {
      int tab = line.indexOf('|');
      assertTrue("No | separator?: " + line, tab >= 0);
      float weight = Float.parseFloat(line.substring(tab + 1));
      String key = line.substring(0, tab);
      input.add(new TermFreq(key, weight));
    }
    br.close();
    return input;
  }

  /**
   * Test construction time.
   */
  public void testConstructionTime() throws Exception {
    System.err.println("-- construction time");
    for (final Class<? extends Lookup> cls : benchmarkClasses) {
      BenchmarkResult result = measure(new Callable<Integer>() {
        public Integer call() throws Exception {
          final Lookup lookup = buildLookup(cls, dictionaryInput);          
          return lookup.hashCode();
        }
      });

      System.err.println(
          String.format(Locale.ENGLISH, "%-15s input: %d, time[ms]: %s",
              cls.getSimpleName(),
              dictionaryInput.length,
              result.average.toString()));
    }
  }

  /**
   * Test memory required for the storage.
   */
  public void testStorageNeeds() throws Exception {
    System.err.println("-- RAM consumption");
    final RamUsageEstimator rue = new RamUsageEstimator();
    for (Class<? extends Lookup> cls : benchmarkClasses) {
      Lookup lookup = buildLookup(cls, dictionaryInput);
      System.err.println(
          String.format(Locale.ENGLISH, "%-15s size[B]:%,13d",
              lookup.getClass().getSimpleName(), 
              rue.estimateRamUsage(lookup)));
    }
  }

  /**
   * Create {@link Lookup} instance and populate it. 
   */
  private Lookup buildLookup(Class<? extends Lookup> cls, TermFreq[] input) throws Exception {
    Lookup lookup = cls.newInstance();
    lookup.build(new TermFreqArrayIterator(input));
    return lookup;
  }

  /**
   * Test performance of lookup on full hits.
   */
  public void testPerformanceOnFullHits() throws Exception {
    final int minPrefixLen = 100;
    final int maxPrefixLen = 200;
    runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
  }

  /**
   * Test performance of lookup on longer term prefixes (6-9 letters or shorter).
   */
  public void testPerformanceOnPrefixes6_9() throws Exception {
    final int minPrefixLen = 6;
    final int maxPrefixLen = 9;
    runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
  }

  /**
   * Test performance of lookup on short term prefixes (2-4 letters or shorter).
   */
  public void testPerformanceOnPrefixes2_4() throws Exception {
    final int minPrefixLen = 2;
    final int maxPrefixLen = 4;
    runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
  }

  /**
   * Run the actual benchmark. 
   */
  public void runPerformanceTest(final int minPrefixLen, final int maxPrefixLen, 
      final int num, final boolean onlyMorePopular) throws Exception {
    System.err.println(String.format(Locale.ENGLISH,
        "-- prefixes: %d-%d, num: %d, onlyMorePopular: %s",
        minPrefixLen, maxPrefixLen, num, onlyMorePopular));

    for (Class<? extends Lookup> cls : benchmarkClasses) {
      final Lookup lookup = buildLookup(cls, dictionaryInput);

      final List<String> input = new ArrayList(benchmarkInput.size());
      for (TermFreq tf : benchmarkInput) {
        input.add(tf.term.substring(0, Math.min(tf.term.length(), 
              minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
      }

      BenchmarkResult result = measure(new Callable<Integer>() {
        public Integer call() throws Exception {
          int v = 0;
          for (String term : input) {
            v += lookup.lookup(term, onlyMorePopular, num).size();
          }
          return v;
        }
      });

      System.err.println(
          String.format(Locale.ENGLISH, "%-15s queries: %d, time[ms]: %s, ~qps: %.0f",
              lookup.getClass().getSimpleName(),
              input.size(),
              result.average.toString(),
              input.size() / result.average.avg));
    }
  }

  /**
   * Do the measurements.
   */
  private BenchmarkResult measure(Callable<Integer> callable) {
    final double NANOS_PER_MS = 1000000;

    try {
      List<Double> times = new ArrayList();
      for (int i = 0; i < warmup + rounds; i++) {
          final long start = System.nanoTime();
          guard = callable.call().intValue();
          times.add((System.nanoTime() - start) / NANOS_PER_MS);
      }
      return new BenchmarkResult(times, warmup, rounds);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  /** Guard against opts. */
  @SuppressWarnings("unused")
  private static volatile int guard;

  private static class BenchmarkResult {
    /** Average time per round (ms). */
    public final Average average;

    public BenchmarkResult(List<Double> times, int warmup, int rounds) {
      this.average = Average.from(times.subList(warmup, times.size()));
    }
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene LookupBenchmarkTest.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.