alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Other links

The source code

package org.apache.lucene.index;

/**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;

/** An IndexReader which reads multiple indexes, appending their content.
 *
 * @version $Id: MultiReader.java,v 1.7 2004/05/17 12:56:47 goller Exp $
 */
public class MultiReader extends IndexReader {
  private IndexReader[] subReaders;
  private int[] starts;                           // 1st docno for each segment
  private Hashtable normsCache = new Hashtable();
  private int maxDoc = 0;
  private int numDocs = -1;
  private boolean hasDeletions = false;

 /**
  * 

Construct a MultiReader aggregating the named set of (sub)readers. * Directory locking for delete, undeleteAll, and setNorm operations is * left to the subreaders.

*

Note that all subreaders are closed if this Multireader is closed.

* @param subReaders set of (sub)readers * @throws IOException */ public MultiReader(IndexReader[] subReaders) throws IOException { super(subReaders.length == 0 ? null : subReaders[0].directory()); initialize(subReaders); } /** Construct reading the named set of readers. */ MultiReader(Directory directory, SegmentInfos sis, boolean closeDirectory, IndexReader[] subReaders) throws IOException { super(directory, sis, closeDirectory); initialize(subReaders); } private void initialize(IndexReader[] subReaders) throws IOException{ this.subReaders = subReaders; starts = new int[subReaders.length + 1]; // build starts array for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; maxDoc += subReaders[i].maxDoc(); // compute maxDocs if (subReaders[i].hasDeletions()) hasDeletions = true; } starts[subReaders.length] = maxDoc; } /** Return an array of term frequency vectors for the specified document. * The array contains a vector for each vectorized field in the document. * Each vector vector contains term numbers and frequencies for all terms * in a given vectorized field. * If no such fields existed, the method returns null. */ public TermFreqVector[] getTermFreqVectors(int n) throws IOException { int i = readerIndex(n); // find segment num return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment } public TermFreqVector getTermFreqVector(int n, String field) throws IOException { int i = readerIndex(n); // find segment num return subReaders[i].getTermFreqVector(n - starts[i], field); } public synchronized int numDocs() { if (numDocs == -1) { // check cache int n = 0; // cache miss--recompute for (int i = 0; i < subReaders.length; i++) n += subReaders[i].numDocs(); // sum from readers numDocs = n; } return numDocs; } public int maxDoc() { return maxDoc; } public Document document(int n) throws IOException { int i = readerIndex(n); // find segment num return subReaders[i].document(n - starts[i]); // dispatch to segment reader } public boolean isDeleted(int n) { int i = readerIndex(n); // find segment num return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader } public boolean hasDeletions() { return hasDeletions; } protected void doDelete(int n) throws IOException { numDocs = -1; // invalidate cache int i = readerIndex(n); // find segment num subReaders[i].delete(n - starts[i]); // dispatch to segment reader hasDeletions = true; } protected void doUndeleteAll() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].undeleteAll(); hasDeletions = false; } private int readerIndex(int n) { // find reader for doc n: int lo = 0; // search starts array int hi = subReaders.length - 1; // for first element less while (hi >= lo) { int mid = (lo + hi) >> 1; int midValue = starts[mid]; if (n < midValue) hi = mid - 1; else if (n > midValue) lo = mid + 1; else { // found a match while (mid+1 < subReaders.length && starts[mid+1] == midValue) { mid++; // scan to last match } return mid; } } return hi; } public synchronized byte[] norms(String field) throws IOException { byte[] bytes = (byte[])normsCache.get(field); if (bytes != null) return bytes; // cache hit bytes = new byte[maxDoc()]; for (int i = 0; i < subReaders.length; i++) subReaders[i].norms(field, bytes, starts[i]); normsCache.put(field, bytes); // update cache return bytes; } public synchronized void norms(String field, byte[] result, int offset) throws IOException { byte[] bytes = (byte[])normsCache.get(field); if (bytes != null) // cache hit System.arraycopy(bytes, 0, result, offset, maxDoc()); for (int i = 0; i < subReaders.length; i++) // read from segments subReaders[i].norms(field, result, offset + starts[i]); } protected void doSetNorm(int n, String field, byte value) throws IOException { normsCache.remove(field); // clear cache int i = readerIndex(n); // find segment num subReaders[i].setNorm(n-starts[i], field, value); // dispatch } public TermEnum terms() throws IOException { return new MultiTermEnum(subReaders, starts, null); } public TermEnum terms(Term term) throws IOException { return new MultiTermEnum(subReaders, starts, term); } public int docFreq(Term t) throws IOException { int total = 0; // sum freqs in segments for (int i = 0; i < subReaders.length; i++) total += subReaders[i].docFreq(t); return total; } public TermDocs termDocs() throws IOException { return new MultiTermDocs(subReaders, starts); } public TermPositions termPositions() throws IOException { return new MultiTermPositions(subReaders, starts); } protected void doCommit() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(); } protected synchronized void doClose() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].close(); } /** * @see IndexReader#getFieldNames() */ public Collection getFieldNames() throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < subReaders.length; i++) { IndexReader reader = subReaders[i]; Collection names = reader.getFieldNames(); // iterate through the field names and add them to the set for (Iterator iterator = names.iterator(); iterator.hasNext();) { String s = (String) iterator.next(); fieldSet.add(s); } } return fieldSet; } /** * @see IndexReader#getFieldNames(boolean) */ public Collection getFieldNames(boolean indexed) throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < subReaders.length; i++) { IndexReader reader = subReaders[i]; Collection names = reader.getFieldNames(indexed); fieldSet.addAll(names); } return fieldSet; } public Collection getIndexedFieldNames(boolean storedTermVector) { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < subReaders.length; i++) { IndexReader reader = subReaders[i]; Collection names = reader.getIndexedFieldNames(storedTermVector); fieldSet.addAll(names); } return fieldSet; } } class MultiTermEnum extends TermEnum { private SegmentMergeQueue queue; private Term term; private int docFreq; public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) throws IOException { queue = new SegmentMergeQueue(readers.length); for (int i = 0; i < readers.length; i++) { IndexReader reader = readers[i]; TermEnum termEnum; if (t != null) { termEnum = reader.terms(t); } else termEnum = reader.terms(); SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); if (t == null ? smi.next() : termEnum.term() != null) queue.put(smi); // initialize queue else smi.close(); } if (t != null && queue.size() > 0) { next(); } } public boolean next() throws IOException { SegmentMergeInfo top = (SegmentMergeInfo)queue.top(); if (top == null) { term = null; return false; } term = top.term; docFreq = 0; while (top != null && term.compareTo(top.term) == 0) { queue.pop(); docFreq += top.termEnum.docFreq(); // increment freq if (top.next()) queue.put(top); // restore queue else top.close(); // done with a segment top = (SegmentMergeInfo)queue.top(); } return true; } public Term term() { return term; } public int docFreq() { return docFreq; } public void close() throws IOException { queue.close(); } } class MultiTermDocs implements TermDocs { protected IndexReader[] readers; protected int[] starts; protected Term term; protected int base = 0; protected int pointer = 0; private TermDocs[] readerTermDocs; protected TermDocs current; // == readerTermDocs[pointer] public MultiTermDocs(IndexReader[] r, int[] s) { readers = r; starts = s; readerTermDocs = new TermDocs[r.length]; } public int doc() { return base + current.doc(); } public int freq() { return current.freq(); } public void seek(Term term) { this.term = term; this.base = 0; this.pointer = 0; this.current = null; } public void seek(TermEnum termEnum) throws IOException { seek(termEnum.term()); } public boolean next() throws IOException { if (current != null && current.next()) { return true; } else if (pointer < readers.length) { base = starts[pointer]; current = termDocs(pointer++); return next(); } else return false; } /** Optimized implementation. */ public int read(final int[] docs, final int[] freqs) throws IOException { while (true) { while (current == null) { if (pointer < readers.length) { // try next segment base = starts[pointer]; current = termDocs(pointer++); } else { return 0; } } int end = current.read(docs, freqs); if (end == 0) { // none left in segment current = null; } else { // got some final int b = base; // adjust doc numbers for (int i = 0; i < end; i++) docs[i] += b; return end; } } } /** As yet unoptimized implementation. */ public boolean skipTo(int target) throws IOException { do { if (!next()) return false; } while (target > doc()); return true; } private TermDocs termDocs(int i) throws IOException { if (term == null) return null; TermDocs result = readerTermDocs[i]; if (result == null) result = readerTermDocs[i] = termDocs(readers[i]); result.seek(term); return result; } protected TermDocs termDocs(IndexReader reader) throws IOException { return reader.termDocs(); } public void close() throws IOException { for (int i = 0; i < readerTermDocs.length; i++) { if (readerTermDocs[i] != null) readerTermDocs[i].close(); } } } class MultiTermPositions extends MultiTermDocs implements TermPositions { public MultiTermPositions(IndexReader[] r, int[] s) { super(r,s); } protected TermDocs termDocs(IndexReader reader) throws IOException { return (TermDocs)reader.termPositions(); } public int nextPosition() throws IOException { return ((TermPositions)current).nextPosition(); } }
... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.