|
What this is
This file is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn
Java by Example" TM.
Other links
The source code
package org.apache.lucene.index;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
/** An IndexReader which reads multiple indexes, appending their content.
*
* @version $Id: MultiReader.java,v 1.7 2004/05/17 12:56:47 goller Exp $
*/
public class MultiReader extends IndexReader {
private IndexReader[] subReaders;
private int[] starts; // 1st docno for each segment
private Hashtable normsCache = new Hashtable();
private int maxDoc = 0;
private int numDocs = -1;
private boolean hasDeletions = false;
/**
* Construct a MultiReader aggregating the named set of (sub)readers.
* Directory locking for delete, undeleteAll, and setNorm operations is
* left to the subreaders.
* Note that all subreaders are closed if this Multireader is closed.
* @param subReaders set of (sub)readers
* @throws IOException
*/
public MultiReader(IndexReader[] subReaders) throws IOException {
super(subReaders.length == 0 ? null : subReaders[0].directory());
initialize(subReaders);
}
/** Construct reading the named set of readers. */
MultiReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders)
throws IOException {
super(directory, sis, closeDirectory);
initialize(subReaders);
}
private void initialize(IndexReader[] subReaders) throws IOException{
this.subReaders = subReaders;
starts = new int[subReaders.length + 1]; // build starts array
for (int i = 0; i < subReaders.length; i++) {
starts[i] = maxDoc;
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
if (subReaders[i].hasDeletions())
hasDeletions = true;
}
starts[subReaders.length] = maxDoc;
}
/** Return an array of term frequency vectors for the specified document.
* The array contains a vector for each vectorized field in the document.
* Each vector vector contains term numbers and frequencies for all terms
* in a given vectorized field.
* If no such fields existed, the method returns null.
*/
public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
int i = readerIndex(n); // find segment num
return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
}
public TermFreqVector getTermFreqVector(int n, String field)
throws IOException {
int i = readerIndex(n); // find segment num
return subReaders[i].getTermFreqVector(n - starts[i], field);
}
public synchronized int numDocs() {
if (numDocs == -1) { // check cache
int n = 0; // cache miss--recompute
for (int i = 0; i < subReaders.length; i++)
n += subReaders[i].numDocs(); // sum from readers
numDocs = n;
}
return numDocs;
}
public int maxDoc() {
return maxDoc;
}
public Document document(int n) throws IOException {
int i = readerIndex(n); // find segment num
return subReaders[i].document(n - starts[i]); // dispatch to segment reader
}
public boolean isDeleted(int n) {
int i = readerIndex(n); // find segment num
return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
}
public boolean hasDeletions() { return hasDeletions; }
protected void doDelete(int n) throws IOException {
numDocs = -1; // invalidate cache
int i = readerIndex(n); // find segment num
subReaders[i].delete(n - starts[i]); // dispatch to segment reader
hasDeletions = true;
}
protected void doUndeleteAll() throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].undeleteAll();
hasDeletions = false;
}
private int readerIndex(int n) { // find reader for doc n:
int lo = 0; // search starts array
int hi = subReaders.length - 1; // for first element less
while (hi >= lo) {
int mid = (lo + hi) >> 1;
int midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else { // found a match
while (mid+1 < subReaders.length && starts[mid+1] == midValue) {
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
public synchronized byte[] norms(String field) throws IOException {
byte[] bytes = (byte[])normsCache.get(field);
if (bytes != null)
return bytes; // cache hit
bytes = new byte[maxDoc()];
for (int i = 0; i < subReaders.length; i++)
subReaders[i].norms(field, bytes, starts[i]);
normsCache.put(field, bytes); // update cache
return bytes;
}
public synchronized void norms(String field, byte[] result, int offset)
throws IOException {
byte[] bytes = (byte[])normsCache.get(field);
if (bytes != null) // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc());
for (int i = 0; i < subReaders.length; i++) // read from segments
subReaders[i].norms(field, result, offset + starts[i]);
}
protected void doSetNorm(int n, String field, byte value)
throws IOException {
normsCache.remove(field); // clear cache
int i = readerIndex(n); // find segment num
subReaders[i].setNorm(n-starts[i], field, value); // dispatch
}
public TermEnum terms() throws IOException {
return new MultiTermEnum(subReaders, starts, null);
}
public TermEnum terms(Term term) throws IOException {
return new MultiTermEnum(subReaders, starts, term);
}
public int docFreq(Term t) throws IOException {
int total = 0; // sum freqs in segments
for (int i = 0; i < subReaders.length; i++)
total += subReaders[i].docFreq(t);
return total;
}
public TermDocs termDocs() throws IOException {
return new MultiTermDocs(subReaders, starts);
}
public TermPositions termPositions() throws IOException {
return new MultiTermPositions(subReaders, starts);
}
protected void doCommit() throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
}
protected synchronized void doClose() throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].close();
}
/**
* @see IndexReader#getFieldNames()
*/
public Collection getFieldNames() throws IOException {
// maintain a unique set of field names
Set fieldSet = new HashSet();
for (int i = 0; i < subReaders.length; i++) {
IndexReader reader = subReaders[i];
Collection names = reader.getFieldNames();
// iterate through the field names and add them to the set
for (Iterator iterator = names.iterator(); iterator.hasNext();) {
String s = (String) iterator.next();
fieldSet.add(s);
}
}
return fieldSet;
}
/**
* @see IndexReader#getFieldNames(boolean)
*/
public Collection getFieldNames(boolean indexed) throws IOException {
// maintain a unique set of field names
Set fieldSet = new HashSet();
for (int i = 0; i < subReaders.length; i++) {
IndexReader reader = subReaders[i];
Collection names = reader.getFieldNames(indexed);
fieldSet.addAll(names);
}
return fieldSet;
}
public Collection getIndexedFieldNames(boolean storedTermVector) {
// maintain a unique set of field names
Set fieldSet = new HashSet();
for (int i = 0; i < subReaders.length; i++) {
IndexReader reader = subReaders[i];
Collection names = reader.getIndexedFieldNames(storedTermVector);
fieldSet.addAll(names);
}
return fieldSet;
}
}
class MultiTermEnum extends TermEnum {
private SegmentMergeQueue queue;
private Term term;
private int docFreq;
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
throws IOException {
queue = new SegmentMergeQueue(readers.length);
for (int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i];
TermEnum termEnum;
if (t != null) {
termEnum = reader.terms(t);
} else
termEnum = reader.terms();
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
if (t == null ? smi.next() : termEnum.term() != null)
queue.put(smi); // initialize queue
else
smi.close();
}
if (t != null && queue.size() > 0) {
next();
}
}
public boolean next() throws IOException {
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
if (top == null) {
term = null;
return false;
}
term = top.term;
docFreq = 0;
while (top != null && term.compareTo(top.term) == 0) {
queue.pop();
docFreq += top.termEnum.docFreq(); // increment freq
if (top.next())
queue.put(top); // restore queue
else
top.close(); // done with a segment
top = (SegmentMergeInfo)queue.top();
}
return true;
}
public Term term() {
return term;
}
public int docFreq() {
return docFreq;
}
public void close() throws IOException {
queue.close();
}
}
class MultiTermDocs implements TermDocs {
protected IndexReader[] readers;
protected int[] starts;
protected Term term;
protected int base = 0;
protected int pointer = 0;
private TermDocs[] readerTermDocs;
protected TermDocs current; // == readerTermDocs[pointer]
public MultiTermDocs(IndexReader[] r, int[] s) {
readers = r;
starts = s;
readerTermDocs = new TermDocs[r.length];
}
public int doc() {
return base + current.doc();
}
public int freq() {
return current.freq();
}
public void seek(Term term) {
this.term = term;
this.base = 0;
this.pointer = 0;
this.current = null;
}
public void seek(TermEnum termEnum) throws IOException {
seek(termEnum.term());
}
public boolean next() throws IOException {
if (current != null && current.next()) {
return true;
} else if (pointer < readers.length) {
base = starts[pointer];
current = termDocs(pointer++);
return next();
} else
return false;
}
/** Optimized implementation. */
public int read(final int[] docs, final int[] freqs) throws IOException {
while (true) {
while (current == null) {
if (pointer < readers.length) { // try next segment
base = starts[pointer];
current = termDocs(pointer++);
} else {
return 0;
}
}
int end = current.read(docs, freqs);
if (end == 0) { // none left in segment
current = null;
} else { // got some
final int b = base; // adjust doc numbers
for (int i = 0; i < end; i++)
docs[i] += b;
return end;
}
}
}
/** As yet unoptimized implementation. */
public boolean skipTo(int target) throws IOException {
do {
if (!next())
return false;
} while (target > doc());
return true;
}
private TermDocs termDocs(int i) throws IOException {
if (term == null)
return null;
TermDocs result = readerTermDocs[i];
if (result == null)
result = readerTermDocs[i] = termDocs(readers[i]);
result.seek(term);
return result;
}
protected TermDocs termDocs(IndexReader reader)
throws IOException {
return reader.termDocs();
}
public void close() throws IOException {
for (int i = 0; i < readerTermDocs.length; i++) {
if (readerTermDocs[i] != null)
readerTermDocs[i].close();
}
}
}
class MultiTermPositions extends MultiTermDocs implements TermPositions {
public MultiTermPositions(IndexReader[] r, int[] s) {
super(r,s);
}
protected TermDocs termDocs(IndexReader reader) throws IOException {
return (TermDocs)reader.termPositions();
}
public int nextPosition() throws IOException {
return ((TermPositions)current).nextPosition();
}
}
|