|
Lucene example source code file (InstantiatedIndexReader.java)
This example Lucene source code file (InstantiatedIndexReader.java) is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.
The Lucene InstantiatedIndexReader.java source code
package org.apache.lucene.store.instantiated;
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
/**
* An InstantiatedIndexReader is not a snapshot in time, it is completely in
* sync with the latest commit to the store!
* <p>
* Consider using InstantiatedIndex as if it was immutable.
*/
public class InstantiatedIndexReader extends IndexReader {
private final InstantiatedIndex index;
public InstantiatedIndexReader(InstantiatedIndex index) {
super();
this.index = index;
readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
}
/**
* @return always true.
*/
@Override
public boolean isOptimized() {
return true;
}
/**
* An InstantiatedIndexReader is not a snapshot in time, it is completely in
* sync with the latest commit to the store!
*
* @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
*/
@Override
public long getVersion() {
return index.getVersion();
}
@Override
public Directory directory() {
throw new UnsupportedOperationException();
}
/**
* An InstantiatedIndexReader is always current!
*
* Check whether this IndexReader is still using the current (i.e., most
* recently committed) version of the index. If a writer has committed any
* changes to the index since this reader was opened, this will return
* <code>false, in which case you must open a new IndexReader in
* order to see the changes. See the description of the <a
* href="IndexWriter.html#autoCommit"><code>autoCommit flag
* which controls when the {@link IndexWriter} actually commits changes to the
* index.
*
* @return always true
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @throws UnsupportedOperationException unless overridden in subclass
*/
@Override
public boolean isCurrent() throws IOException {
return true;
}
public InstantiatedIndex getIndex() {
return index;
}
private BitVector uncommittedDeletedDocuments;
private Map<String,List uncommittedNormsByFieldNameAndDocumentNumber = null;
private class NormUpdate {
private int doc;
private byte value;
public NormUpdate(int doc, byte value) {
this.doc = doc;
this.value = value;
}
}
@Override
public int numDocs() {
// todo i suppose this value could be cached, but array#length and bitvector#count is fast.
int numDocs = getIndex().getDocumentsByNumber().length;
if (uncommittedDeletedDocuments != null) {
numDocs -= uncommittedDeletedDocuments.count();
}
if (index.getDeletedDocuments() != null) {
numDocs -= index.getDeletedDocuments().count();
}
return numDocs;
}
@Override
public int maxDoc() {
return getIndex().getDocumentsByNumber().length;
}
@Override
public boolean hasDeletions() {
return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null;
}
@Override
public boolean isDeleted(int n) {
return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n))
|| (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n));
}
@Override
protected void doDelete(int docNum) throws IOException {
// dont delete if already deleted
if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum))
|| (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) {
return;
}
if (uncommittedDeletedDocuments == null) {
uncommittedDeletedDocuments = new BitVector(maxDoc());
}
uncommittedDeletedDocuments.set(docNum);
}
@Override
protected void doUndeleteAll() throws IOException {
// todo: read/write lock
uncommittedDeletedDocuments = null;
// todo: read/write unlock
}
@Override
protected void doCommit(Map<String,String> commitUserData) throws IOException {
// todo: read/write lock
// 1. update norms
if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
for (Map.Entry<String,List e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) {
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
for (NormUpdate normUpdate : e.getValue()) {
norms[normUpdate.doc] = normUpdate.value;
}
}
uncommittedNormsByFieldNameAndDocumentNumber = null;
}
// 2. remove deleted documents
if (uncommittedDeletedDocuments != null) {
if (index.getDeletedDocuments() == null) {
index.setDeletedDocuments(uncommittedDeletedDocuments);
} else {
for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) {
if (uncommittedDeletedDocuments.get(d)) {
index.getDeletedDocuments().set(d);
}
}
}
uncommittedDeletedDocuments = null;
}
// todo unlock read/writelock
}
@Override
protected void doClose() throws IOException {
// ignored
// todo perhaps release all associated instances?
}
@Override
public Collection<String> getFieldNames(FieldOption fieldOption) {
Set<String> fieldSet = new HashSet();
for (FieldSetting fi : index.getFieldSettings().values()) {
if (fieldOption == IndexReader.FieldOption.ALL) {
fieldSet.add(fi.fieldName);
} else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
fieldSet.add(fi.fieldName);
} else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
fieldSet.add(fi.fieldName);
} else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
fieldSet.add(fi.fieldName);
} else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
fieldSet.add(fi.fieldName);
} else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
&& fieldOption == IndexReader.FieldOption.TERMVECTOR) {
fieldSet.add(fi.fieldName);
} else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
fieldSet.add(fi.fieldName);
} else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
&& fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
fieldSet.add(fi.fieldName);
} else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
&& fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
fieldSet.add(fi.fieldName);
} else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
&& fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
fieldSet.add(fi.fieldName);
}
}
return fieldSet;
}
/**
* Return the {@link org.apache.lucene.document.Document} at the <code>nth
* position.
<p>
* <b>Warning!
* The resulting document is the actual stored document instance
* and not a deserialized clone as retuned by an IndexReader
* over a {@link org.apache.lucene.store.Directory}.
* I.e., if you need to touch the document, clone it first!
* <p>
* This can also be seen as a feature for live changes of stored values,
* but be careful! Adding a field with an name unknown to the index
* or to a field with previously no stored values will make
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
* out of sync, causing problems for instance when merging the
* instantiated index to another index.
<p>
* This implementation ignores the field selector! All stored fields are always returned!
* <p>
*
* @param n document number
* @param fieldSelector ignored
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @see org.apache.lucene.document.Fieldable
* @see org.apache.lucene.document.FieldSelector
* @see org.apache.lucene.document.SetBasedFieldSelector
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
return document(n);
}
/**
* Returns the stored fields of the <code>nth
* <code>Document in this index.
* <p>
* <b>Warning!
* The resulting document is the actual stored document instance
* and not a deserialized clone as retuned by an IndexReader
* over a {@link org.apache.lucene.store.Directory}.
* I.e., if you need to touch the document, clone it first!
* <p>
* This can also be seen as a feature for live changes of stored values,
* but be careful! Adding a field with an name unknown to the index
* or to a field with previously no stored values will make
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
* out of sync, causing problems for instance when merging the
* instantiated index to another index.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@Override
public Document document(int n) throws IOException {
return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
}
/**
* never ever touch these values. it is the true values, unless norms have
* been touched.
*/
@Override
public byte[] norms(String field) throws IOException {
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
if (norms == null) {
return new byte[0]; // todo a static final zero length attribute?
}
if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
norms = norms.clone();
List<NormUpdate> updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
if (updated != null) {
for (NormUpdate normUpdate : updated) {
norms[normUpdate.doc] = normUpdate.value;
}
}
}
return norms;
}
@Override
public void norms(String field, byte[] bytes, int offset) throws IOException {
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
if (norms == null) {
return;
}
System.arraycopy(norms, 0, bytes, offset, norms.length);
}
@Override
protected void doSetNorm(int doc, String field, byte value) throws IOException {
if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List(getIndex().getNormsByFieldNameAndDocumentNumber().size());
}
List<NormUpdate> list = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
if (list == null) {
list = new LinkedList<NormUpdate>();
uncommittedNormsByFieldNameAndDocumentNumber.put(field, list);
}
list.add(new NormUpdate(doc, value));
}
@Override
public int docFreq(Term t) throws IOException {
InstantiatedTerm term = getIndex().findTerm(t);
if (term == null) {
return 0;
} else {
return term.getAssociatedDocuments().length;
}
}
@Override
public TermEnum terms() throws IOException {
return new InstantiatedTermEnum(this);
}
@Override
public TermEnum terms(Term t) throws IOException {
InstantiatedTerm it = getIndex().findTerm(t);
if (it != null) {
return new InstantiatedTermEnum(this, it.getTermIndex());
} else {
int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
if (startPos < 0) {
startPos = -1 - startPos;
}
return new InstantiatedTermEnum(this, startPos);
}
}
@Override
public TermDocs termDocs() throws IOException {
return new InstantiatedTermDocs(this);
}
@Override
public TermDocs termDocs(Term term) throws IOException {
if (term == null) {
return new InstantiatedAllTermDocs(this);
} else {
InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
termDocs.seek(term);
return termDocs;
}
}
@Override
public TermPositions termPositions() throws IOException {
return new InstantiatedTermPositions(this);
}
@Override
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
if (doc.getVectorSpace() == null) {
return null;
}
TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()];
Iterator<String> it = doc.getVectorSpace().keySet().iterator();
for (int i = 0; i < ret.length; i++) {
ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next());
}
return ret;
}
@Override
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
return null;
} else {
return new InstantiatedTermPositionVector(doc, field);
}
}
@Override
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
mapper.setExpectations(field, tv.size(), true, true);
for (InstantiatedTermDocumentInformation tdi : tv) {
mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
}
}
}
@Override
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
for (Map.Entry<String, List e : doc.getVectorSpace().entrySet()) {
mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
}
}
}
}
Other Lucene examples (source code examples)
Here is a short list of links related to this Lucene InstantiatedIndexReader.java source code file:
|