Lucene example source code file (FieldCacheSanityChecker.java)
This example Lucene source code file (FieldCacheSanityChecker.java) is included in the DevDaily.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.
The Lucene FieldCacheSanityChecker.java source code
package org.apache.lucene.util;
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.CacheEntry;
/**
* Provides methods for sanity checking that entries in the FieldCache
* are not wasteful or inconsistent.
* </p>
* <p>
* Lucene 2.9 Introduced numerous enhancements into how the FieldCache
* is used by the low levels of Lucene searching (for Sorting and
* ValueSourceQueries) to improve both the speed for Sorting, as well
* as reopening of IndexReaders. But these changes have shifted the
* usage of FieldCache from "top level" IndexReaders (frequently a
* MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
* As a result, existing applications that directly access the FieldCache
* may find RAM usage increase significantly when upgrading to 2.9 or
* Later. This class provides an API for these applications (or their
* Unit tests) to check at run time if the FieldCache contains "insane"
* usages of the FieldCache.
* </p>
* @lucene.experimental
* @see FieldCache
* @see FieldCacheSanityChecker.Insanity
* @see FieldCacheSanityChecker.InsanityType
*/
public final class FieldCacheSanityChecker {
private RamUsageEstimator ramCalc = null;
public FieldCacheSanityChecker() {
/* NOOP */
}
/**
* If set, will be used to estimate size for all CacheEntry objects
* dealt with.
*/
public void setRamUsageEstimator(RamUsageEstimator r) {
ramCalc = r;
}
/**
* Quick and dirty convenience method
* @see #check
*/
public static Insanity[] checkSanity(FieldCache cache) {
return checkSanity(cache.getCacheEntries());
}
/**
* Quick and dirty convenience method that instantiates an instance with
* "good defaults" and uses it to test the CacheEntrys
* @see #check
*/
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
// doesn't check for interned
sanityChecker.setRamUsageEstimator(new RamUsageEstimator(false));
return sanityChecker.check(cacheEntries);
}
/**
* Tests a CacheEntry[] for indication of "insane" cache usage.
* <p>
* <B>NOTE:FieldCache CreationPlaceholder objects are ignored.
* (:TODO: is this a bad idea? are we masking a real problem?)
* </p>
*/
public Insanity[] check(CacheEntry... cacheEntries) {
if (null == cacheEntries || 0 == cacheEntries.length)
return new Insanity[0];
if (null != ramCalc) {
for (int i = 0; i < cacheEntries.length; i++) {
cacheEntries[i].estimateSize(ramCalc);
}
}
// the indirect mapping lets MapOfSet dedup identical valIds for us
//
// maps the (valId) identityhashCode of cache values to
// sets of CacheEntry instances
final MapOfSets<Integer, CacheEntry> valIdToItems = new MapOfSets(new HashMap>(17));
// maps ReaderField keys to Sets of ValueIds
final MapOfSets<ReaderField, Integer> readerFieldToValIds = new MapOfSets(new HashMap>(17));
//
// any keys that we know result in more then one valId
final Set<ReaderField> valMismatchKeys = new HashSet();
// iterate over all the cacheEntries to get the mappings we'll need
for (int i = 0; i < cacheEntries.length; i++) {
final CacheEntry item = cacheEntries[i];
final Object val = item.getValue();
if (val instanceof FieldCache.CreationPlaceholder)
continue;
final ReaderField rf = new ReaderField(item.getReaderKey(),
item.getFieldName());
final Integer valId = Integer.valueOf(System.identityHashCode(val));
// indirect mapping, so the MapOfSet will dedup identical valIds for us
valIdToItems.put(valId, item);
if (1 < readerFieldToValIds.put(rf, valId)) {
valMismatchKeys.add(rf);
}
}
final List<Insanity> insanity = new ArrayList(valMismatchKeys.size() * 3);
insanity.addAll(checkValueMismatch(valIdToItems,
readerFieldToValIds,
valMismatchKeys));
insanity.addAll(checkSubreaders(valIdToItems,
readerFieldToValIds));
return insanity.toArray(new Insanity[insanity.size()]);
}
/**
* Internal helper method used by check that iterates over
* valMismatchKeys and generates a Collection of Insanity
* instances accordingly. The MapOfSets are used to populate
* the Insanity objects.
* @see InsanityType#VALUEMISMATCH
*/
private Collection<Insanity> checkValueMismatch(MapOfSets valIdToItems,
MapOfSets<ReaderField, Integer> readerFieldToValIds,
Set<ReaderField> valMismatchKeys) {
final List<Insanity> insanity = new ArrayList(valMismatchKeys.size() * 3);
if (! valMismatchKeys.isEmpty() ) {
// we have multiple values for some ReaderFields
final Map<ReaderField, Set rfMap = readerFieldToValIds.getMap();
final Map<Integer, Set valMap = valIdToItems.getMap();
for (final ReaderField rf : valMismatchKeys) {
final List<CacheEntry> badEntries = new ArrayList(valMismatchKeys.size() * 2);
for(final Integer value: rfMap.get(rf)) {
for (final CacheEntry cacheEntry : valMap.get(value)) {
badEntries.add(cacheEntry);
}
}
CacheEntry[] badness = new CacheEntry[badEntries.size()];
badness = badEntries.toArray(badness);
insanity.add(new Insanity(InsanityType.VALUEMISMATCH,
"Multiple distinct value objects for " +
rf.toString(), badness));
}
}
return insanity;
}
/**
* Internal helper method used by check that iterates over
* the keys of readerFieldToValIds and generates a Collection
* of Insanity instances whenever two (or more) ReaderField instances are
* found that have an ancestry relationships.
*
* @see InsanityType#SUBREADER
*/
private Collection<Insanity> checkSubreaders( MapOfSets valIdToItems,
MapOfSets<ReaderField, Integer> readerFieldToValIds) {
final List<Insanity> insanity = new ArrayList(23);
Map<ReaderField, Set badChildren = new HashMap>(17);
MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets(badChildren); // wrapper
Map<Integer, Set viToItemSets = valIdToItems.getMap();
Map<ReaderField, Set rfToValIdSets = readerFieldToValIds.getMap();
Set<ReaderField> seen = new HashSet(17);
Set<ReaderField> readerFields = rfToValIdSets.keySet();
for (final ReaderField rf : readerFields) {
if (seen.contains(rf)) continue;
List<Object> kids = getAllDecendentReaderKeys(rf.readerKey);
for (Object kidKey : kids) {
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
if (badChildren.containsKey(kid)) {
// we've already process this kid as RF and found other problems
// track those problems as our own
badKids.put(rf, kid);
badKids.putAll(rf, badChildren.get(kid));
badChildren.remove(kid);
} else if (rfToValIdSets.containsKey(kid)) {
// we have cache entries for the kid
badKids.put(rf, kid);
}
seen.add(kid);
}
seen.add(rf);
}
// every mapping in badKids represents an Insanity
for (final ReaderField parent : badChildren.keySet()) {
Set<ReaderField> kids = badChildren.get(parent);
List<CacheEntry> badEntries = new ArrayList(kids.size() * 2);
// put parent entr(ies) in first
{
for (final Integer value : rfToValIdSets.get(parent)) {
badEntries.addAll(viToItemSets.get(value));
}
}
// now the entries for the descendants
for (final ReaderField kid : kids) {
for (final Integer value : rfToValIdSets.get(kid)) {
badEntries.addAll(viToItemSets.get(value));
}
}
CacheEntry[] badness = new CacheEntry[badEntries.size()];
badness = badEntries.toArray(badness);
insanity.add(new Insanity(InsanityType.SUBREADER,
"Found caches for decendents of " +
parent.toString(),
badness));
}
return insanity;
}
/**
* Checks if the seed is an IndexReader, and if so will walk
* the hierarchy of subReaders building up a list of the objects
* returned by obj.getFieldCacheKey()
*/
private List<Object> getAllDecendentReaderKeys(Object seed) {
List<Object> all = new ArrayList