alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (SnapshotDeletionPolicy.java)

This example Lucene source code file (SnapshotDeletionPolicy.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

entry, illegalstateexception, indexcommit, indexcommit, io, ioexception, map, override, override, set, snapshotdeletionpolicy, snapshotinfo, snapshotinfo, string, string, util

The Lucene SnapshotDeletionPolicy.java source code

package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.io.IOException;

import org.apache.lucene.store.Directory;

/**
 * An {@link IndexDeletionPolicy} that wraps around any other
 * {@link IndexDeletionPolicy} and adds the ability to hold and later release
 * snapshots of an index. While a snapshot is held, the {@link IndexWriter} will
 * not remove any files associated with it even if the index is otherwise being
 * actively, arbitrarily changed. Because we wrap another arbitrary
 * {@link IndexDeletionPolicy}, this gives you the freedom to continue using
 * whatever {@link IndexDeletionPolicy} you would normally want to use with your
 * index.
 * 
 * <p>
 * This class maintains all snapshots in-memory, and so the information is not
 * persisted and not protected against system failures. If persistency is
 * important, you can use {@link PersistentSnapshotDeletionPolicy} (or your own
 * extension) and when creating a new instance of this deletion policy, pass the
 * persistent snapshots information to
 * {@link #SnapshotDeletionPolicy(IndexDeletionPolicy, Map)}.
 * 
 * @lucene.experimental
 */
public class SnapshotDeletionPolicy implements IndexDeletionPolicy {

  /** Holds a Snapshot's information. */
  private static class SnapshotInfo {
    String id;
    String segmentsFileName;
    IndexCommit commit;
    
    public SnapshotInfo(String id, String segmentsFileName, IndexCommit commit) {
      this.id = id;
      this.segmentsFileName = segmentsFileName;
      this.commit = commit;
    }
    
    @Override
    public String toString() {
      return id + " : " + segmentsFileName;
    }
  }
  
  protected class SnapshotCommitPoint extends IndexCommit {
    protected IndexCommit cp;

    protected SnapshotCommitPoint(IndexCommit cp) {
      this.cp = cp;
    }

    @Override
    public String toString() {
      return "SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp + ")";
    }

    /**
     * Returns true if this segment can be deleted. The default implementation
     * returns false if this segment is currently held as snapshot.
     */
    protected boolean shouldDelete(String segmentsFileName) {
      return !segmentsFileToIDs.containsKey(segmentsFileName);
    }

    @Override
    public void delete() {
      synchronized (SnapshotDeletionPolicy.this) {
        // Suppress the delete request if this commit point is
        // currently snapshotted.
        if (shouldDelete(getSegmentsFileName())) {
          cp.delete();
        }
      }
    }

    @Override
    public Directory getDirectory() {
      return cp.getDirectory();
    }

    @Override
    public Collection<String> getFileNames() throws IOException {
      return cp.getFileNames();
    }

    @Override
    public long getGeneration() {
      return cp.getGeneration();
    }

    @Override
    public String getSegmentsFileName() {
      return cp.getSegmentsFileName();
    }

    @Override
    public Map<String, String> getUserData() throws IOException {
      return cp.getUserData();
    }

    @Override
    public long getVersion() {
      return cp.getVersion();
    }

    @Override
    public boolean isDeleted() {
      return cp.isDeleted();
    }

    @Override
    public boolean isOptimized() {
      return cp.isOptimized();
    }
  }

  /** Snapshots info */
  private Map<String, SnapshotInfo> idToSnapshot = new HashMap();

  // multiple IDs could point to the same commit point (segments file name)
  private Map<String, Set segmentsFileToIDs = new HashMap>();

  private IndexDeletionPolicy primary;
  protected IndexCommit lastCommit;

  public SnapshotDeletionPolicy(IndexDeletionPolicy primary) {
    this.primary = primary;
  }

  /**
   * {@link SnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to
   * enable flexible snapshotting.
   * 
   * @param primary
   *          the {@link IndexDeletionPolicy} that is used on non-snapshotted
   *          commits. Snapshotted commits, are not deleted until explicitly
   *          released via {@link #release(String)}
   * @param snapshotsInfo
   *          A mapping of snapshot ID to the segments filename that is being
   *          snapshotted. The expected input would be the output of
   *          {@link #getSnapshots()}. A null value signals that there are no
   *          initial snapshots to maintain.
   */
  public SnapshotDeletionPolicy(IndexDeletionPolicy primary,
      Map<String, String> snapshotsInfo) {
    this(primary);

    if (snapshotsInfo != null) {
      // Add the ID->segmentIDs here - the actual IndexCommits will be
      // reconciled on the call to onInit()
      for (Entry<String, String> e : snapshotsInfo.entrySet()) {
        registerSnapshotInfo(e.getKey(), e.getValue(), null);
      }
    }
  }

  /**
   * Checks if the given id is already used by another snapshot, and throws
   * {@link IllegalStateException} if it is.
   */
  protected void checkSnapshotted(String id) {
    if (isSnapshotted(id)) {
      throw new IllegalStateException("Snapshot ID " + id
          + " is already used - must be unique");
    }
  }

  /** Registers the given snapshot information. */
  protected void registerSnapshotInfo(String id, String segment, IndexCommit commit) {
    idToSnapshot.put(id, new SnapshotInfo(id, segment, commit));
    Set<String> ids = segmentsFileToIDs.get(segment);
    if (ids == null) {
      ids = new HashSet<String>();
      segmentsFileToIDs.put(segment, ids);
    }
    ids.add(id);
  }

  protected List<IndexCommit> wrapCommits(List commits) {
    List<IndexCommit> wrappedCommits = new ArrayList(commits.size());
    for (IndexCommit ic : commits) {
      wrappedCommits.add(new SnapshotCommitPoint(ic));
    }
    return wrappedCommits;
  }

  /**
   * Get a snapshotted IndexCommit by ID. The IndexCommit can then be used to
   * open an IndexReader on a specific commit point, or rollback the index by
   * opening an IndexWriter with the IndexCommit specified in its
   * {@link IndexWriterConfig}.
   * 
   * @param id
   *          a unique identifier of the commit that was snapshotted.
   * @throws IllegalStateException
   *           if no snapshot exists by the specified ID.
   * @return The {@link IndexCommit} for this particular snapshot.
   */
  public synchronized IndexCommit getSnapshot(String id) {
    SnapshotInfo snapshotInfo = idToSnapshot.get(id);
    if (snapshotInfo == null) {
      throw new IllegalStateException("No snapshot exists by ID: " + id);
    }
    return snapshotInfo.commit;
  }

  /**
   * Get all the snapshots in a map of snapshot IDs to the segments they
   * 'cover.' This can be passed to
   * {@link #SnapshotDeletionPolicy(IndexDeletionPolicy, Map)} in order to
   * initialize snapshots at construction.
   */
  public synchronized Map<String, String> getSnapshots() {
    Map<String, String> snapshots = new HashMap();
    for (Entry<String, SnapshotInfo> e : idToSnapshot.entrySet()) {
      snapshots.put(e.getKey(), e.getValue().segmentsFileName);
    }
    return snapshots;
  }

  /**
   * Returns true if the given ID is already used by a snapshot. You can call
   * this method before {@link #snapshot(String)} if you are not sure whether
   * the ID is already used or not.
   */
  public boolean isSnapshotted(String id) {
    return idToSnapshot.containsKey(id);
  }

  public synchronized void onCommit(List<? extends IndexCommit> commits)
      throws IOException {
    primary.onCommit(wrapCommits(commits));
    lastCommit = commits.get(commits.size() - 1);
  }

  public synchronized void onInit(List<? extends IndexCommit> commits)
      throws IOException {
    primary.onInit(wrapCommits(commits));
    lastCommit = commits.get(commits.size() - 1);

    /*
     * Assign snapshotted IndexCommits to their correct snapshot IDs as
     * specified in the constructor.
     */
    for (IndexCommit commit : commits) {
      Set<String> ids = segmentsFileToIDs.get(commit.getSegmentsFileName());
      if (ids != null) {
        for (String id : ids) {
          idToSnapshot.get(id).commit = commit;
        }
      }
    }

    /*
     * Second, see if there are any instances where a snapshot ID was specified
     * in the constructor but an IndexCommit doesn't exist. In this case, the ID
     * should be removed.
     * 
     * Note: This code is protective for extreme cases where IDs point to
     * non-existent segments. As the constructor should have received its
     * information via a call to getSnapshots(), the data should be well-formed.
     */
    // Find lost snapshots
    ArrayList<String> idsToRemove = null;
    for (Entry<String, SnapshotInfo> e : idToSnapshot.entrySet()) {
      if (e.getValue().commit == null) {
        if (idsToRemove == null) {
          idsToRemove = new ArrayList<String>();
        }
        idsToRemove.add(e.getKey());
      }
    }
    // Finally, remove those 'lost' snapshots.
    if (idsToRemove != null) {
      for (String id : idsToRemove) {
        SnapshotInfo info = idToSnapshot.remove(id);
        segmentsFileToIDs.remove(info.segmentsFileName);
      }
    }
  }

  /**
   * Release a snapshotted commit by ID.
   * 
   * @param id
   *          a unique identifier of the commit that is un-snapshotted.
   * @throws IllegalStateException
   *           if no snapshot exists by this ID.
   */
  public synchronized void release(String id) throws IOException {
    SnapshotInfo info = idToSnapshot.remove(id);
    if (info == null) {
      throw new IllegalStateException("Snapshot doesn't exist: " + id);
    }
    Set<String> ids = segmentsFileToIDs.get(info.segmentsFileName);
    if (ids != null) {
      ids.remove(id);
      if (ids.size() == 0) {
        segmentsFileToIDs.remove(info.segmentsFileName);
      }
    }
  }

  /**
   * Snapshots the last commit. Once a commit is 'snapshotted,' it is protected
   * from deletion (as long as this {@link IndexDeletionPolicy} is used). The
   * commit can be removed by calling {@link #release(String)} using the same ID
   * parameter followed by a call to {@link IndexWriter#deleteUnusedFiles()}.
   * <p>
   * <b>NOTE: ID must be unique in the system. If the same ID is used twice,
   * an {@link IllegalStateException} is thrown.
   * <p>
   * <b>NOTE: while the snapshot is held, the files it references will not
   * be deleted, which will consume additional disk space in your index. If you
   * take a snapshot at a particularly bad time (say just before you call
   * optimize()) then in the worst case this could consume an extra 1X of your
   * total index size, until you release the snapshot.
   * 
   * @param id
   *          a unique identifier of the commit that is being snapshotted.
   * @throws IllegalStateException
   *           if either there is no 'last commit' to snapshot, or if the
   *           parameter 'ID' refers to an already snapshotted commit.
   * @return the {@link IndexCommit} that was snapshotted.
   */
  public synchronized IndexCommit snapshot(String id) throws IOException {
    if (lastCommit == null) {
      // no commit exists. Really shouldn't happen, but might be if SDP is
      // accessed before onInit or onCommit were called.
      throw new IllegalStateException("No index commit to snapshot");
    }

    // Can't use the same snapshot ID twice...
    checkSnapshotted(id);

    registerSnapshotInfo(id, lastCommit.getSegmentsFileName(), lastCommit);
    return lastCommit;
  }

}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene SnapshotDeletionPolicy.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.