alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (InstantiatedIndexReader.java)

This example Lucene source code file (InstantiatedIndexReader.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

instantiateddocument, instantiateddocument, instantiatedindex, instantiatedtermdocs, instantiatedtermenum, io, ioexception, ioexception, list, normupdate, normupdate, override, override, string, termfreqvector, util

The Lucene InstantiatedIndexReader.java source code

package org.apache.lucene.store.instantiated;

/**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;

/**
 * An InstantiatedIndexReader is not a snapshot in time, it is completely in
 * sync with the latest commit to the store!
 * <p>
 * Consider using InstantiatedIndex as if it was immutable.
 */
public class InstantiatedIndexReader extends IndexReader {

  private final InstantiatedIndex index;

  public InstantiatedIndexReader(InstantiatedIndex index) {
    super();
    this.index = index;
    readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
  }

  /**
   * @return always true.
   */
  @Override
  public boolean isOptimized() {
    return true;
  }

  /**
   * An InstantiatedIndexReader is not a snapshot in time, it is completely in
   * sync with the latest commit to the store!
   * 
   * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
   */
  @Override
  public long getVersion() {
    return index.getVersion();
  }

  @Override
  public Directory directory() {
    throw new UnsupportedOperationException();
  }

  /**
   * An InstantiatedIndexReader is always current!
   * 
   * Check whether this IndexReader is still using the current (i.e., most
   * recently committed) version of the index. If a writer has committed any
   * changes to the index since this reader was opened, this will return
   * <code>false, in which case you must open a new IndexReader in
   * order to see the changes. See the description of the <a
   * href="IndexWriter.html#autoCommit"><code>autoCommit flag
   * which controls when the {@link IndexWriter} actually commits changes to the
   * index.
   * 
   * @return always true
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   * @throws UnsupportedOperationException unless overridden in subclass
   */
  @Override
  public boolean isCurrent() throws IOException {
    return true;
  }

  public InstantiatedIndex getIndex() {
    return index;
  }

  private BitVector uncommittedDeletedDocuments;

  private Map<String,List uncommittedNormsByFieldNameAndDocumentNumber = null;

  private class NormUpdate {
    private int doc;
    private byte value;

    public NormUpdate(int doc, byte value) {
      this.doc = doc;
      this.value = value;
    }
  }

  @Override
  public int numDocs() {
    // todo i suppose this value could be cached, but array#length and bitvector#count is fast.
    int numDocs = getIndex().getDocumentsByNumber().length;
    if (uncommittedDeletedDocuments != null) {
      numDocs -= uncommittedDeletedDocuments.count();
    }
    if (index.getDeletedDocuments() != null) {
      numDocs -= index.getDeletedDocuments().count();
    }
    return numDocs;
  }

  @Override
  public int maxDoc() {
    return getIndex().getDocumentsByNumber().length;
  }

  @Override
  public boolean hasDeletions() {
    return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null;
  }


  @Override
  public boolean isDeleted(int n) {
    return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n))
        || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n));
  }


  @Override
  protected void doDelete(int docNum) throws IOException {

    // dont delete if already deleted
    if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum))
        || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) {
      return;
    }

    if (uncommittedDeletedDocuments == null) {
      uncommittedDeletedDocuments = new BitVector(maxDoc());
    }

    uncommittedDeletedDocuments.set(docNum);
  }

  @Override
  protected void doUndeleteAll() throws IOException {
    // todo: read/write lock
    uncommittedDeletedDocuments = null;
    // todo: read/write unlock
  }

  @Override
  protected void doCommit(Map<String,String> commitUserData) throws IOException {
    // todo: read/write lock

    // 1. update norms
    if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
      for (Map.Entry<String,List e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) {
        byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
        for (NormUpdate normUpdate : e.getValue()) {
          norms[normUpdate.doc] = normUpdate.value;
        }
      }
      uncommittedNormsByFieldNameAndDocumentNumber = null;
    }

    // 2. remove deleted documents
    if (uncommittedDeletedDocuments != null) {
      if (index.getDeletedDocuments() == null) {
        index.setDeletedDocuments(uncommittedDeletedDocuments);
      } else {
        for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) {
          if (uncommittedDeletedDocuments.get(d)) {
            index.getDeletedDocuments().set(d);
          }
        }
      }
      uncommittedDeletedDocuments = null;
    }

    // todo unlock read/writelock
  }

  @Override
  protected void doClose() throws IOException {
    // ignored
    // todo perhaps release all associated instances?
  }

  @Override
  public Collection<String> getFieldNames(FieldOption fieldOption) {
    Set<String> fieldSet = new HashSet();
    for (FieldSetting fi : index.getFieldSettings().values()) {
      if (fieldOption == IndexReader.FieldOption.ALL) {
        fieldSet.add(fi.fieldName);
      } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
        fieldSet.add(fi.fieldName);
      } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
        fieldSet.add(fi.fieldName);
      } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
        fieldSet.add(fi.fieldName);
      } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
        fieldSet.add(fi.fieldName);
      } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
          && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
        fieldSet.add(fi.fieldName);
      } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
        fieldSet.add(fi.fieldName);
      } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
          && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
        fieldSet.add(fi.fieldName);
      } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
          && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
        fieldSet.add(fi.fieldName);
      } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
          && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
        fieldSet.add(fi.fieldName);
      } 
    }
    return fieldSet;
  }

  /**
   * Return the {@link org.apache.lucene.document.Document} at the <code>nth
   * position.
     <p>
   * <b>Warning!
   * The resulting document is the actual stored document instance
   * and not a deserialized clone as retuned by an IndexReader
   * over a {@link org.apache.lucene.store.Directory}.
   * I.e., if you need to touch the document, clone it first!
   * <p>
   * This can also be seen as a feature for live changes of stored values,
   * but be careful! Adding a field with an name unknown to the index
   * or to a field with previously no stored values will make
   * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
   * out of sync, causing problems for instance when merging the
   * instantiated index to another index.
     <p>
   * This implementation ignores the field selector! All stored fields are always returned!
   * <p>
   *
   * @param n document number
   * @param fieldSelector ignored
   * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   * 
   * @see org.apache.lucene.document.Fieldable
   * @see org.apache.lucene.document.FieldSelector
   * @see org.apache.lucene.document.SetBasedFieldSelector
   * @see org.apache.lucene.document.LoadFirstFieldSelector
   */
  @Override
  public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
    return document(n);
  }

  /**
   * Returns the stored fields of the <code>nth
   * <code>Document in this index.
   * <p>
   * <b>Warning!
   * The resulting document is the actual stored document instance
   * and not a deserialized clone as retuned by an IndexReader
   * over a {@link org.apache.lucene.store.Directory}.
   * I.e., if you need to touch the document, clone it first!
   * <p>
   * This can also be seen as a feature for live changes of stored values,
   * but be careful! Adding a field with an name unknown to the index
   * or to a field with previously no stored values will make
   * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
   * out of sync, causing problems for instance when merging the
   * instantiated index to another index.
   *
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */

  @Override
  public Document document(int n) throws IOException {
    return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
  }

  /**
   * never ever touch these values. it is the true values, unless norms have
   * been touched.
   */
  @Override
  public byte[] norms(String field) throws IOException {
    byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
    if (norms == null) {
      return new byte[0]; // todo a static final zero length attribute?
    }
    if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
      norms = norms.clone();
      List<NormUpdate> updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
      if (updated != null) {
        for (NormUpdate normUpdate : updated) {
          norms[normUpdate.doc] = normUpdate.value;
        }
      }
    }
    return norms;
  }

  @Override
  public void norms(String field, byte[] bytes, int offset) throws IOException {
    byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
    if (norms == null) {
      return;
    }
    System.arraycopy(norms, 0, bytes, offset, norms.length);
  }

  @Override
  protected void doSetNorm(int doc, String field, byte value) throws IOException {
    if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
      uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List(getIndex().getNormsByFieldNameAndDocumentNumber().size());
    }
    List<NormUpdate> list = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
    if (list == null) {
      list = new LinkedList<NormUpdate>();
      uncommittedNormsByFieldNameAndDocumentNumber.put(field, list);
    }
    list.add(new NormUpdate(doc, value));
  }

  @Override
  public int docFreq(Term t) throws IOException {
    InstantiatedTerm term = getIndex().findTerm(t);
    if (term == null) {
      return 0;
    } else {
      return term.getAssociatedDocuments().length;
    }
  }

  @Override
  public TermEnum terms() throws IOException {
    return new InstantiatedTermEnum(this);
  }

  @Override
  public TermEnum terms(Term t) throws IOException {
    InstantiatedTerm it = getIndex().findTerm(t);
    if (it != null) {
      return new InstantiatedTermEnum(this, it.getTermIndex());
    } else {
      int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
      if (startPos < 0) {
        startPos = -1 - startPos;
      }
      return new InstantiatedTermEnum(this, startPos);
    }
  }

  @Override
  public TermDocs termDocs() throws IOException {
    return new InstantiatedTermDocs(this);
  }


  @Override
  public TermDocs termDocs(Term term) throws IOException {
    if (term == null) {
      return new InstantiatedAllTermDocs(this);
    } else {
      InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
      termDocs.seek(term);
      return termDocs;
    }
  }

  @Override
  public TermPositions termPositions() throws IOException {
    return new InstantiatedTermPositions(this);
  }

  @Override
  public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    if (doc.getVectorSpace() == null) {
      return null;
    }
    TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()];
    Iterator<String> it = doc.getVectorSpace().keySet().iterator();
    for (int i = 0; i < ret.length; i++) {
      ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next());
    }
    return ret;
  }

  @Override
  public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
      return null;
    } else {
      return new InstantiatedTermPositionVector(doc, field);
    }
  }

  @Override
  public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
      List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
      mapper.setExpectations(field, tv.size(), true, true);
      for (InstantiatedTermDocumentInformation tdi : tv) {
        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
      }
    }
  }

  @Override
  public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    for (Map.Entry<String, List e : doc.getVectorSpace().entrySet()) {
      mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
      for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
      }
    }
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene InstantiatedIndexReader.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.