alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (InstantiatedTerm.java)

This example Lucene source code file (InstantiatedTerm.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

comparator, comparator, instantiatedterm, instantiatedtermdocumentinformation, instantiatedtermdocumentinformation, io, object, override, serializable, string, string, term, term, util

The Lucene InstantiatedTerm.java source code

package org.apache.lucene.store.instantiated;

/**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.Serializable;
import java.util.Comparator;

import org.apache.lucene.index.Term;

/**
 * A term in the inverted index, coupled to the documents it occurs in.
 *
 * @see org.apache.lucene.index.Term
 */
public class InstantiatedTerm
    implements Serializable {

  private static final long serialVersionUID = 1l;

  public static final Comparator<InstantiatedTerm> comparator = new Comparator() {
    public int compare(InstantiatedTerm instantiatedTerm, InstantiatedTerm instantiatedTerm1) {
      return instantiatedTerm.getTerm().compareTo(instantiatedTerm1.getTerm());
    }
  };

  public static final Comparator<Object> termComparator = new Comparator() {
    public int compare(Object o, Object o1) {
      return ((InstantiatedTerm)o).getTerm().compareTo((Term)o1);
    }
  };
  
  private Term term;

  /**
   * index of term in InstantiatedIndex
   * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
  private int termIndex;

  /**
   * @return Term associated with this entry of the index object graph
   */
  public Term getTerm() {
    return term;
  }

  InstantiatedTerm(String field, String text) {
    this.term = new Term(field, text);
  }

//  this could speed up TermDocs.skipTo even more
//  private Map</** document number*/Integer, /** index in associatedDocuments */Integer> associatedDocumentIndexByDocumentNumber = new HashMap();
//
//  public Map</** document number*/Integer, /** index in associatedDocuments */Integer> getAssociatedDocumentIndexByDocumentNumber() {
//    return associatedDocumentIndexByDocumentNumber;
//  }

  /** Ordered by document number */
  private InstantiatedTermDocumentInformation[] associatedDocuments;

  /**
   * Meta data per document in which this term is occurring.
   * Ordered by document number.
   *
   * @return Meta data per document in which this term is occurring.
   */
  public InstantiatedTermDocumentInformation[] getAssociatedDocuments() {
    return associatedDocuments;
  }


  /**
   * Meta data per document in which this term is occurring.
   * Ordered by document number.
   *
   * @param associatedDocuments meta data per document in which this term is occurring, ordered by document number
   */
  void setAssociatedDocuments(InstantiatedTermDocumentInformation[] associatedDocuments) {
    this.associatedDocuments = associatedDocuments;
  }

  /**
   * Finds index to the first beyond the current whose document number is
   * greater than or equal to <i>target, -1 if there is no such element.
   *
   * @param target the document number to match
   * @return -1 if there is no such element
   */
  public int seekCeilingDocumentInformationIndex(int target) {
    return seekCeilingDocumentInformationIndex(target, 0, getAssociatedDocuments().length);
  }

  /**
   * Finds index to the first beyond the current whose document number is
   * greater than or equal to <i>target, -1 if there is no such element.
   *
   * @param target the document number to match
   * @param startOffset associated documents index start offset
   * @return -1 if there is no such element
   */
  public int seekCeilingDocumentInformationIndex(int target, int startOffset) {
    return seekCeilingDocumentInformationIndex(target, startOffset, getAssociatedDocuments().length);
  }

  /**
   * Finds index to the first beyond the current whose document number is
   * greater than or equal to <i>target, -1 if there is no such element.
   *
   * @param target the document number to match
   * @param startOffset associated documents index start offset
   * @param endPosition associated documents index end position
   * @return -1 if there is no such element
   */
  public int seekCeilingDocumentInformationIndex(int target, int startOffset, int endPosition) {

    int pos = binarySearchAssociatedDocuments(target, startOffset, endPosition - startOffset);

//    int pos = Arrays.binarySearch(getAssociatedDocuments(), target, InstantiatedTermDocumentInformation.doumentNumberIntegerComparator);

    if (pos < 0) {
      pos = -1 - pos;
    }
    if (getAssociatedDocuments().length <= pos) {
      return -1;
    } else {
      return pos;
    }
  }

  public int binarySearchAssociatedDocuments(int target) {
    return binarySearchAssociatedDocuments(target, 0);
  }

  public int binarySearchAssociatedDocuments(int target, int offset) {
    return binarySearchAssociatedDocuments(target, offset, associatedDocuments.length - offset);
  }

  /**
   * @param target value to search for in the array
   * @param offset index of the first valid value in the array
   * @param length number of valid values in the array
   * @return index of an occurrence of key in array, or -(insertionIndex + 1) if key is not contained in array (<i>insertionIndex is then the index at which key could be inserted).
   */
  public int binarySearchAssociatedDocuments(int target, int offset, int length) {

    // implementation originally from http://ochafik.free.fr/blog/?p=106

    if (length == 0) {
      return -1 - offset;
    }
    int min = offset, max = offset + length - 1;
    int minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
    int maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();


    int nPreviousSteps = 0;

    for (; ;) {

      // be careful not to compute key - minVal, for there might be an integer overflow.
      if (target <= minVal) return target == minVal ? min : -1 - min;
      if (target >= maxVal) return target == maxVal ? max : -2 - max;

      assert min != max;

      int pivot;
      // A typical binarySearch algorithm uses pivot = (min + max) / 2.
      // The pivot we use here tries to be smarter and to choose a pivot close to the expectable location of the key.
      // This reduces dramatically the number of steps needed to get to the key.
      // However, it does not work well with a logarithmic distribution of values, for instance.
      // When the key is not found quickly the smart way, we switch to the standard pivot.
      if (nPreviousSteps > 2) {
        pivot = (min + max) >> 1;
        // stop increasing nPreviousSteps from now on
      } else {
        // NOTE: We cannot do the following operations in int precision, because there might be overflows.
        //       long operations are slower than float operations with the hardware this was tested on (intel core duo 2, JVM 1.6.0).
        //       Overall, using float proved to be the safest and fastest approach.
        pivot = min + (int) ((target - (float) minVal) / (maxVal - (float) minVal) * (max - min));
        nPreviousSteps++;
      }

      int pivotVal = getAssociatedDocuments()[pivot].getDocument().getDocumentNumber();

      // NOTE: do not store key - pivotVal because of overflows
      if (target > pivotVal) {
        min = pivot + 1;
        max--;
      } else if (target == pivotVal) {
        return pivot;
      } else {
        min++;
        max = pivot - 1;
      }
      maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
      minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
    }
  }


  /**
   * Navigates to the view of this occurrences of this term in a specific document. 
   *
   * This method is only used by InstantiatedIndex(IndexReader) and
   * should not be optimized for less CPU at the cost of more RAM.
   *
   * @param documentNumber the n:th document in the index
   * @return view of this term from specified document
   */
  public InstantiatedTermDocumentInformation getAssociatedDocument(int documentNumber) {
    int pos = binarySearchAssociatedDocuments(documentNumber);
    return pos < 0 ? null : getAssociatedDocuments()[pos];
  }

  public final String field() {
    return term.field();
  }

  public final String text() {
    return term.text();
  }

  @Override
  public String toString() {
    return term.toString();
  }


  public int getTermIndex() {
    return termIndex;
  }

  public void setTermIndex(int termIndex) {
    this.termIndex = termIndex;
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene InstantiatedTerm.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.