alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (CachingWrapperFilter.java)

This example Lucene source code file (CachingWrapperFilter.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

cachingwrapperfilter, cachingwrapperfilter, deletesmode, docidset, filter, filtercache, indexreader, io, ioexception, object, object, override, override, t, t, util

The Lucene CachingWrapperFilter.java source code

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.Serializable;
import java.io.IOException;
import java.util.Map;
import java.util.WeakHashMap;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.OpenBitSetDISI;

/**
 * Wraps another filter's result and caches it.  The purpose is to allow
 * filters to simply filter, and then wrap with this class
 * to add caching.
 *
 * <p>NOTE: if you wrap this filter as a query (eg,
 * using ConstantScoreQuery), you'll likely want to enforce
 * deletions (using either {@link DeletesMode#RECACHE} or
 * {@link DeletesMode#DYNAMIC}).
 */
public class CachingWrapperFilter extends Filter {
  Filter filter;

  /**
   * Expert: Specifies how new deletions against a reopened
   * reader should be handled.
   *
   * <p>The default is IGNORE, which means the cache entry
   * will be re-used for a given segment, even when that
   * segment has been reopened due to changes in deletions.
   * This is a big performance gain, especially with
   * near-real-timer readers, since you don't hit a cache
   * miss on every reopened reader for prior segments.</p>
   *
   * <p>However, in some cases this can cause invalid query
   * results, allowing deleted documents to be returned.
   * This only happens if the main query does not rule out
   * deleted documents on its own, such as a toplevel
   * ConstantScoreQuery.  To fix this, use RECACHE to
   * re-create the cached filter (at a higher per-reopen
   * cost, but at faster subsequent search performance), or
   * use DYNAMIC to dynamically intersect deleted docs (fast
   * reopen time but some hit to search performance).</p>
   */
  public static enum DeletesMode {IGNORE, RECACHE, DYNAMIC};

  protected final FilterCache<DocIdSet> cache;

  static abstract class FilterCache<T> implements Serializable {

    /**
     * A transient Filter cache (package private because of test)
     */
    // NOTE: not final so that we can dynamically re-init
    // after de-serialize
    transient Map<Object,T> cache;

    private final DeletesMode deletesMode;

    public FilterCache(DeletesMode deletesMode) {
      this.deletesMode = deletesMode;
    }

    public synchronized T get(IndexReader reader, Object coreKey, Object delCoreKey) throws IOException {
      T value;

      if (cache == null) {
        cache = new WeakHashMap<Object,T>();
      }

      if (deletesMode == DeletesMode.IGNORE) {
        // key on core
        value = cache.get(coreKey);
      } else if (deletesMode == DeletesMode.RECACHE) {
        // key on deletes, if any, else core
        value = cache.get(delCoreKey);
      } else {
        assert deletesMode == DeletesMode.DYNAMIC;

        // first try for exact match
        value = cache.get(delCoreKey);

        if (value == null) {
          // now for core match, but dynamically AND NOT
          // deletions
          value = cache.get(coreKey);
          if (value != null && reader.hasDeletions()) {
            value = mergeDeletes(reader, value);
          }
        }
      }

      return value;
    }

    protected abstract T mergeDeletes(IndexReader reader, T value);

    public synchronized void put(Object coreKey, Object delCoreKey, T value) {
      if (deletesMode == DeletesMode.IGNORE) {
        cache.put(coreKey, value);
      } else if (deletesMode == DeletesMode.RECACHE) {
        cache.put(delCoreKey, value);
      } else {
        cache.put(coreKey, value);
        cache.put(delCoreKey, value);
      }
    }
  }

  /**
   * New deletes are ignored by default, which gives higher
   * cache hit rate on reopened readers.  Most of the time
   * this is safe, because the filter will be AND'd with a
   * Query that fully enforces deletions.  If instead you
   * need this filter to always enforce deletions, pass
   * either {@link DeletesMode#RECACHE} or {@link
   * DeletesMode#DYNAMIC}.
   * @param filter Filter to cache results of
   */
  public CachingWrapperFilter(Filter filter) {
    this(filter, DeletesMode.IGNORE);
  }

  /**
   * Expert: by default, the cached filter will be shared
   * across reopened segments that only had changes to their
   * deletions.  
   *
   * @param filter Filter to cache results of
   * @param deletesMode See {@link DeletesMode}
   */
  public CachingWrapperFilter(Filter filter, DeletesMode deletesMode) {
    this.filter = filter;
    cache = new FilterCache<DocIdSet>(deletesMode) {
      @Override
      public DocIdSet mergeDeletes(final IndexReader r, final DocIdSet docIdSet) {
        return new FilteredDocIdSet(docIdSet) {
          @Override
          protected boolean match(int docID) {
            return !r.isDeleted(docID);
          }
        };
      }
    };
  }

  /** Provide the DocIdSet to be cached, using the DocIdSet provided
   *  by the wrapped Filter.
   *  <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
   *  returns <code>true, else it copies the {@link DocIdSetIterator} into
   *  an {@link OpenBitSetDISI}.
   */
  protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException {
    if (docIdSet == null) {
      // this is better than returning null, as the nonnull result can be cached
      return DocIdSet.EMPTY_DOCIDSET;
    } else if (docIdSet.isCacheable()) {
      return docIdSet;
    } else {
      final DocIdSetIterator it = docIdSet.iterator();
      // null is allowed to be returned by iterator(),
      // in this case we wrap with the empty set,
      // which is cacheable.
      return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc());
    }
  }

  // for testing
  int hitCount, missCount;

  @Override
  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {

    final Object coreKey = reader.getCoreCacheKey();
    final Object delCoreKey = reader.hasDeletions() ? reader.getDeletesCacheKey() : coreKey;

    DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey);
    if (docIdSet != null) {
      hitCount++;
      return docIdSet;
    }

    missCount++;

    // cache miss
    docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader);

    if (docIdSet != null) {
      cache.put(coreKey, delCoreKey, docIdSet);
    }
    
    return docIdSet;
  }

  @Override
  public String toString() {
    return "CachingWrapperFilter("+filter+")";
  }

  @Override
  public boolean equals(Object o) {
    if (!(o instanceof CachingWrapperFilter)) return false;
    return this.filter.equals(((CachingWrapperFilter)o).filter);
  }

  @Override
  public int hashCode() {
    return filter.hashCode() ^ 0x1117BF25;  
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene CachingWrapperFilter.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.