|
Lucene example source code file (CachingCollector.java)
The Lucene CachingCollector.java source codepackage org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.RamUsageEstimator; /** * Caches all docs, and optionally also scores, coming from * a search, and is then able to replay them to another * collector. You specify the max RAM this class may use. * Once the collection is done, call {@link #isCached}. If * this returns true, you can use {@link #replay} against a * new collector. If it returns false, this means too much * RAM was required and you must instead re-run the original * search. * * <p>NOTE: this class consumes 4 (or 8 bytes, if * scoring is cached) per collected document. If the result * set is large this can easily be a very substantial amount * of RAM! * * <p>NOTE: this class caches at least 128 documents * before checking RAM limits. * * <p>See the Lucene contrib/grouping module for more * details including a full code example.</p> * * @lucene.experimental */ public abstract class CachingCollector extends Collector { // Max out at 512K arrays private static final int MAX_ARRAY_SIZE = 512 * 1024; private static final int INITIAL_ARRAY_SIZE = 128; private final static int[] EMPTY_INT_ARRAY = new int[0]; private static class SegStart { public final IndexReader reader; public final int base; public final int end; public SegStart(IndexReader reader, int base, int end) { this.reader = reader; this.base = base; this.end = end; } } private static final class CachedScorer extends Scorer { // NOTE: these members are package-private b/c that way accessing them from // the outer class does not incur access check by the JVM. The same // situation would be if they were defined in the outer class as private // members. int doc; float score; private CachedScorer() { super((Weight) null); } @Override public final float score() { return score; } @Override public final int advance(int target) { throw new UnsupportedOperationException(); } @Override public final int docID() { return doc; } @Override public final float freq() { throw new UnsupportedOperationException(); } @Override public final int nextDoc() { throw new UnsupportedOperationException(); } } // A CachingCollector which caches scores private static final class ScoreCachingCollector extends CachingCollector { private final CachedScorer cachedScorer; private final List<float[]> cachedScores; private Scorer scorer; private float[] curScores; ScoreCachingCollector(Collector other, double maxRAMMB) { super(other, maxRAMMB, true); cachedScorer = new CachedScorer(); cachedScores = new ArrayList<float[]>(); curScores = new float[128]; cachedScores.add(curScores); } ScoreCachingCollector(Collector other, int maxDocsToCache) { super(other, maxDocsToCache); cachedScorer = new CachedScorer(); cachedScores = new ArrayList<float[]>(); curScores = new float[INITIAL_ARRAY_SIZE]; cachedScores.add(curScores); } @Override public void collect(int doc) throws IOException { if (curDocs == null) { // Cache was too large cachedScorer.score = scorer.score(); cachedScorer.doc = doc; other.collect(doc); return; } // Allocate a bigger array or abort caching if (upto == curDocs.length) { base += upto; // Compute next array length - don't allocate too big arrays int nextLength = 8*curDocs.length; if (nextLength > MAX_ARRAY_SIZE) { nextLength = MAX_ARRAY_SIZE; } if (base + nextLength > maxDocsToCache) { // try to allocate a smaller array nextLength = maxDocsToCache - base; if (nextLength <= 0) { // Too many docs to collect -- clear cache curDocs = null; curScores = null; cachedSegs.clear(); cachedDocs.clear(); cachedScores.clear(); cachedScorer.score = scorer.score(); cachedScorer.doc = doc; other.collect(doc); return; } } curDocs = new int[nextLength]; cachedDocs.add(curDocs); curScores = new float[nextLength]; cachedScores.add(curScores); upto = 0; } curDocs[upto] = doc; cachedScorer.score = curScores[upto] = scorer.score(); upto++; cachedScorer.doc = doc; other.collect(doc); } @Override public void replay(Collector other) throws IOException { replayInit(other); int curUpto = 0; int curBase = 0; int chunkUpto = 0; curDocs = EMPTY_INT_ARRAY; for (SegStart seg : cachedSegs) { other.setNextReader(seg.reader, seg.base); other.setScorer(cachedScorer); while (curBase + curUpto < seg.end) { if (curUpto == curDocs.length) { curBase += curDocs.length; curDocs = cachedDocs.get(chunkUpto); curScores = cachedScores.get(chunkUpto); chunkUpto++; curUpto = 0; } cachedScorer.score = curScores[curUpto]; cachedScorer.doc = curDocs[curUpto]; other.collect(curDocs[curUpto++]); } } } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; other.setScorer(cachedScorer); } @Override public String toString() { if (isCached()) { return "CachingCollector (" + (base+upto) + " docs & scores cached)"; } else { return "CachingCollector (cache was cleared)"; } } } // A CachingCollector which does not cache scores private static final class NoScoreCachingCollector extends CachingCollector { NoScoreCachingCollector(Collector other, double maxRAMMB) { super(other, maxRAMMB, false); } NoScoreCachingCollector(Collector other, int maxDocsToCache) { super(other, maxDocsToCache); } @Override public void collect(int doc) throws IOException { if (curDocs == null) { // Cache was too large other.collect(doc); return; } // Allocate a bigger array or abort caching if (upto == curDocs.length) { base += upto; // Compute next array length - don't allocate too big arrays int nextLength = 8*curDocs.length; if (nextLength > MAX_ARRAY_SIZE) { nextLength = MAX_ARRAY_SIZE; } if (base + nextLength > maxDocsToCache) { // try to allocate a smaller array nextLength = maxDocsToCache - base; if (nextLength <= 0) { // Too many docs to collect -- clear cache curDocs = null; cachedSegs.clear(); cachedDocs.clear(); other.collect(doc); return; } } curDocs = new int[nextLength]; cachedDocs.add(curDocs); upto = 0; } curDocs[upto] = doc; upto++; other.collect(doc); } @Override public void replay(Collector other) throws IOException { replayInit(other); int curUpto = 0; int curbase = 0; int chunkUpto = 0; curDocs = EMPTY_INT_ARRAY; for (SegStart seg : cachedSegs) { other.setNextReader(seg.reader, seg.base); while (curbase + curUpto < seg.end) { if (curUpto == curDocs.length) { curbase += curDocs.length; curDocs = cachedDocs.get(chunkUpto); chunkUpto++; curUpto = 0; } other.collect(curDocs[curUpto++]); } } } @Override public void setScorer(Scorer scorer) throws IOException { other.setScorer(scorer); } @Override public String toString() { if (isCached()) { return "CachingCollector (" + (base+upto) + " docs cached)"; } else { return "CachingCollector (cache was cleared)"; } } } // TODO: would be nice if a collector defined a // needsScores() method so we can specialize / do checks // up front. This is only relevant for the ScoreCaching // version -- if the wrapped Collector does not need // scores, it can avoid cachedScorer entirely. protected final Collector other; protected final int maxDocsToCache; protected final List<SegStart> cachedSegs = new ArrayList Other Lucene examples (source code examples)Here is a short list of links related to this Lucene CachingCollector.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.