alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestData.java)

This example Lucene source code file (TestData.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

exception, freemem, id, id, io, ldcounter, ldcounter, linkedhashmap, reccnt, recsec, runcnt, string, string, testdata, testdata, text, util

The Lucene TestData.java source code

package org.apache.lucene.benchmark.stats;
/**
 * Copyright 2005 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.io.File;
import java.text.NumberFormat;
import java.util.ArrayList;

import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Vector;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.Constants;
import org.apache.lucene.store.Directory;


/**
 * This class holds together all parameters related to a test. Single test is
 * performed several times, and all results are averaged.
 *
 */
public class TestData
{
    public static int[] MAX_BUFFERED_DOCS_COUNTS = new int[]{10, 20, 50, 100, 200, 500};
    public static int[] MERGEFACTOR_COUNTS = new int[]{10, 20, 50, 100, 200, 500};

    /**
     * ID of this test data.
     */
    private String id;
    /**
     * Heap size.
     */
    private long heap;
    /**
     * List of results for each test run with these parameters.
     */
    private Vector<TestRunData> runData = new Vector();
    private int maxBufferedDocs, mergeFactor;
    /**
     * Directory containing source files.
     */
    private File source;
    /**
     * Lucene Directory implementation for creating an index.
     */
    private Directory directory;
    /**
     * Analyzer to use when adding documents.
     */
    private Analyzer analyzer;
    /**
     * If true, use compound file format.
     */
    private boolean compound;
    /**
     * If true, optimize index when finished adding documents.
     */
    private boolean optimize;
    /**
     * Data for search benchmarks.
     */
    private QueryData[] queries;

    public TestData()
    {
        heap = Runtime.getRuntime().maxMemory();
    }

    private static class DCounter
    {
        double total;
        int count, recordCount;
    }

    private static class LCounter
    {
        long total;
        int count;
    }

    private static class LDCounter
    {
      double Dtotal;
      int Dcount, DrecordCount;
      long Ltotal0;
      int Lcount0;
      long Ltotal1;
      int Lcount1;
    }

    /**
     * Get a textual summary of the benchmark results, average from all test runs.
     */
    static final String ID =      "# testData id     ";
    static final String OP =      "operation      ";
    static final String RUNCNT =  "     runCnt";
    static final String RECCNT =  "     recCnt";
    static final String RECSEC =  "          rec/s";
    static final String FREEMEM = "       avgFreeMem";
    static final String TOTMEM =  "      avgTotalMem";
    static final String COLS[] = {
        ID,
        OP,
        RUNCNT,
        RECCNT,
        RECSEC,
        FREEMEM,
        TOTMEM
    };
    public String showRunData(String prefix)
    {
        if (runData.size() == 0)
        {
            return "# [NO RUN DATA]";
        }
        HashMap<String,LDCounter> resByTask = new HashMap(); 
        StringBuilder sb = new StringBuilder();
        String lineSep = System.getProperty("line.separator");
        sb.append("warm = Warm Index Reader").append(lineSep).append("srch = Search Index").append(lineSep).append("trav = Traverse Hits list, optionally retrieving document").append(lineSep).append(lineSep);
        for (int i = 0; i < COLS.length; i++) {
          sb.append(COLS[i]);
        }
        sb.append("\n");
        LinkedHashMap<String,TestData.LCounter[]> mapMem = new LinkedHashMap();
        LinkedHashMap<String,DCounter> mapSpeed = new LinkedHashMap();
        for (int i = 0; i < runData.size(); i++)
        {
            TestRunData trd = runData.get(i);
            for (final String label : trd.getLabels()) 
            {
                MemUsage mem = trd.getMemUsage(label);
                if (mem != null)
                {
                    TestData.LCounter[] tm = mapMem.get(label);
                    if (tm == null)
                    {
                        tm = new TestData.LCounter[2];
                        tm[0] = new TestData.LCounter();
                        tm[1] = new TestData.LCounter();
                        mapMem.put(label, tm);
                    }
                    tm[0].total += mem.avgFree;
                    tm[0].count++;
                    tm[1].total += mem.avgTotal;
                    tm[1].count++;
                }
                TimeData td = trd.getTotals(label);
                if (td != null)
                {
                    TestData.DCounter dc = mapSpeed.get(label);
                    if (dc == null)
                    {
                        dc = new TestData.DCounter();
                        mapSpeed.put(label, dc);
                    }
                    dc.count++;
                    //dc.total += td.getRate();
                    dc.total += (td.count>0 && td.elapsed<=0 ? 1 : td.elapsed); // assume at least 1ms for any countable op
                    dc.recordCount += td.count;
                }
            }
        }
        LinkedHashMap<String,String> res = new LinkedHashMap();
        Iterator<String> it = mapSpeed.keySet().iterator();
        while (it.hasNext())
        {
            String label = it.next();
            TestData.DCounter dc = mapSpeed.get(label);
            res.put(label, 
                format(dc.count, RUNCNT) + 
                format(dc.recordCount / dc.count, RECCNT) +
                format(1,(float) (dc.recordCount * 1000.0 / (dc.total>0 ? dc.total : 1.0)), RECSEC)
                //format((float) (dc.total / (double) dc.count), RECSEC)
                );
            
            // also sum by task
            String task = label.substring(label.lastIndexOf("-")+1);
            LDCounter ldc = resByTask.get(task);
            if (ldc==null) {
              ldc = new LDCounter();
              resByTask.put(task,ldc);
            }
            ldc.Dcount += dc.count;
            ldc.DrecordCount += dc.recordCount;
            ldc.Dtotal += (dc.count>0 && dc.total<=0 ? 1 : dc.total); // assume at least 1ms for any countable op 
        }
        it = mapMem.keySet().iterator();
        while (it.hasNext())
        {
            String label = it.next();
            TestData.LCounter[] lc =  mapMem.get(label);
            String speed = res.get(label);
            boolean makeSpeed = false;
            if (speed == null)
            {
                makeSpeed = true;
                speed =  
                  format(lc[0].count, RUNCNT) + 
                  format(0, RECCNT) + 
                  format(0,(float)0.0, RECSEC);
            }
            res.put(label, speed + 
                format(0, lc[0].total / lc[0].count, FREEMEM) + 
                format(0, lc[1].total / lc[1].count, TOTMEM));
            
            // also sum by task
            String task = label.substring(label.lastIndexOf("-")+1);
            LDCounter ldc = resByTask.get(task);
            if (ldc==null) {
              ldc = new LDCounter();
              resByTask.put(task,ldc);
              makeSpeed = true;
            }
            if (makeSpeed) {
              ldc.Dcount += lc[0].count;
            }
            ldc.Lcount0 += lc[0].count;
            ldc.Lcount1 += lc[1].count;
            ldc.Ltotal0 += lc[0].total;
            ldc.Ltotal1 += lc[1].total;
        }
        it = res.keySet().iterator();
        while (it.hasNext())
        {
            String label = it.next();
            sb.append(format(prefix, ID));
            sb.append(format(label, OP));
            sb.append(res.get(label)).append("\n");
        }
        // show results by task (srch, optimize, etc.) 
        sb.append("\n");
        for (int i = 0; i < COLS.length; i++) {
          sb.append(COLS[i]);
        }
        sb.append("\n");
        it = resByTask.keySet().iterator();
        while (it.hasNext())
        {
            String task = it.next();
            LDCounter ldc = resByTask.get(task);
            sb.append(format("    ", ID));
            sb.append(format(task, OP));
            sb.append(format(ldc.Dcount, RUNCNT)); 
            sb.append(format(ldc.DrecordCount / ldc.Dcount, RECCNT));
            sb.append(format(1,(float) (ldc.DrecordCount * 1000.0 / (ldc.Dtotal>0 ? ldc.Dtotal : 1.0)), RECSEC));
            sb.append(format(0, ldc.Ltotal0 / ldc.Lcount0, FREEMEM)); 
            sb.append(format(0, ldc.Ltotal1 / ldc.Lcount1, TOTMEM));
            sb.append("\n");
        }
        return sb.toString();
    }

    private static NumberFormat numFormat [] = { NumberFormat.getInstance(), NumberFormat.getInstance()};
    private static final String padd = "                                  ";
    static {
      numFormat[0].setMaximumFractionDigits(0);
      numFormat[0].setMinimumFractionDigits(0);
      numFormat[1].setMaximumFractionDigits(1);
      numFormat[1].setMinimumFractionDigits(1);
    }

    // pad number from left
    // numFracDigits must be 0 or 1.
    static String format(int numFracDigits, float f, String col) {
      String res = padd + numFormat[numFracDigits].format(f);
      return res.substring(res.length() - col.length());
    }

    // pad number from left
    static String format(int n, String col) {
      String res = padd + n;
      return res.substring(res.length() - col.length());
    }

    // pad string from right
    static String format(String s, String col) {
      return (s + padd).substring(0,col.length());
    }

    /**
     * Prepare a list of benchmark data, using all possible combinations of
     * benchmark parameters.
     *
     * @param sources   list of directories containing different source document
     *                  collections
     * @param analyzers of analyzers to use.
     */
    public static TestData[] getAll(File[] sources, Analyzer[] analyzers)
    {
        List<TestData> res = new ArrayList(50);
        TestData ref = new TestData();
        for (int q = 0; q < analyzers.length; q++)
        {
            for (int m = 0; m < sources.length; m++)
            {
                for (int i = 0; i < MAX_BUFFERED_DOCS_COUNTS.length; i++)
                {
                    for (int k = 0; k < MERGEFACTOR_COUNTS.length; k++)
                    {
                        for (int n = 0; n < Constants.BOOLEANS.length; n++)
                        {
                            for (int p = 0; p < Constants.BOOLEANS.length; p++)
                            {
                                ref.id = "td-" + q + m + i + k + n + p;
                                ref.source = sources[m];
                                ref.analyzer = analyzers[q];
                                ref.maxBufferedDocs = MAX_BUFFERED_DOCS_COUNTS[i];
                                ref.mergeFactor = MERGEFACTOR_COUNTS[k];
                                ref.compound = Constants.BOOLEANS[n].booleanValue();
                                ref.optimize = Constants.BOOLEANS[p].booleanValue();
                                try
                                {
                                    res.add((TestData)ref.clone());
                                }
                                catch (Exception e)
                                {
                                    e.printStackTrace();
                                }
                            }
                        }
                    }
                }
            }
        }
        return res.toArray(new TestData[0]);
    }

    /**
     * Similar to {@link #getAll(java.io.File[], org.apache.lucene.analysis.Analyzer[])} but only uses
     * maxBufferedDocs of 10 and 100 and same for mergeFactor, thus reducing the number of permutations significantly.
     * It also only uses compound file and optimize is always true.
     *
     * @param sources
     * @param analyzers
     * @return An Array of {@link TestData}
     */
    public static TestData[] getTestDataMinMaxMergeAndMaxBuffered(File[] sources, Analyzer[] analyzers)
    {
        List<TestData> res = new ArrayList(50);
        TestData ref = new TestData();
        for (int q = 0; q < analyzers.length; q++)
        {
            for (int m = 0; m < sources.length; m++)
            {
                ref.id = "td-" + q + m + "_" + 10 + "_" + 10;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 10;
                ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m  + "_" + 10 + "_" + 100;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 10;
                ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m + "_" + 100 + "_" + 10;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 100;
                ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m + "_" + 100 + "_" + 100;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 100;
                ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add((TestData)ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
            }
        }
        return res.toArray(new TestData[0]);
    }

    @Override
    protected Object clone()
    {
        TestData cl = new TestData();
        cl.id = id;
        cl.compound = compound;
        cl.heap = heap;
        cl.mergeFactor = mergeFactor;
        cl.maxBufferedDocs = maxBufferedDocs;
        cl.optimize = optimize;
        cl.source = source;
        cl.directory = directory;
        cl.analyzer = analyzer;
        // don't clone runData
        return cl;
    }

    @Override
    public String toString()
    {
        StringBuilder res = new StringBuilder();
        res.append("#-- ID: ").append(id).append(", ").append(new Date()).append(", heap=").append(heap).append(" --\n");
        res.append("# source=").append(source).append(", directory=").append(directory).append("\n");
        res.append("# maxBufferedDocs=").append(maxBufferedDocs).append(", mergeFactor=").append(mergeFactor);
        res.append(", compound=").append(compound).append(", optimize=").append(optimize).append("\n");
        if (queries != null)
        {
            res.append(QueryData.getLabels()).append("\n");
            for (int i = 0; i < queries.length; i++)
            {
                res.append("# ").append(queries[i].toString()).append("\n");
            }
        }
        return res.toString();
    }

    public Analyzer getAnalyzer()
    {
        return analyzer;
    }

    public void setAnalyzer(Analyzer analyzer)
    {
        this.analyzer = analyzer;
    }

    public boolean isCompound()
    {
        return compound;
    }

    public void setCompound(boolean compound)
    {
        this.compound = compound;
    }

    public Directory getDirectory()
    {
        return directory;
    }

    public void setDirectory(Directory directory)
    {
        this.directory = directory;
    }

    public long getHeap()
    {
        return heap;
    }

    public void setHeap(long heap)
    {
        this.heap = heap;
    }

    public String getId()
    {
        return id;
    }

    public void setId(String id)
    {
        this.id = id;
    }

    public int getMaxBufferedDocs()
    {
        return maxBufferedDocs;
    }

    public void setMaxBufferedDocs(int maxBufferedDocs)
    {
        this.maxBufferedDocs = maxBufferedDocs;
    }

    public int getMergeFactor()
    {
        return mergeFactor;
    }

    public void setMergeFactor(int mergeFactor)
    {
        this.mergeFactor = mergeFactor;
    }

    public boolean isOptimize()
    {
        return optimize;
    }

    public void setOptimize(boolean optimize)
    {
        this.optimize = optimize;
    }

    public QueryData[] getQueries()
    {
        return queries;
    }

    public void setQueries(QueryData[] queries)
    {
        this.queries = queries;
    }

    public Vector<TestRunData> getRunData()
    {
        return runData;
    }

    public void setRunData(Vector<TestRunData> runData)
    {
        this.runData = runData;
    }

    public File getSource()
    {
        return source;
    }

    public void setSource(File source)
    {
        this.source = source;
    }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestData.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.