alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (IndexFileNames.java)

This example Lucene source code file (IndexFileNames.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

field_infos_extension, field_infos_extension, fields_extension, fields_extension, fields_index_extension, freq_extension, prox_extension, regex, string, string, terms_extension, terms_extension, terms_index_extension, vectors_fields_extension, vectors_index_extension

The Lucene IndexFileNames.java source code

package org.apache.lucene.index;

import java.util.regex.Pattern;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * This class contains useful constants representing filenames and extensions
 * used by lucene, as well as convenience methods for querying whether a file
 * name matches an extension ({@link #matchesExtension(String, String)
 * matchesExtension}), as well as generating file names from a segment name,
 * generation and extension (
 * {@link #fileNameFromGeneration(String, String, long) fileNameFromGeneration},
 * {@link #segmentFileName(String, String) segmentFileName}).
 * 
 * @lucene.internal
 */
public final class IndexFileNames {

  /** Name of the index segment file */
  public static final String SEGMENTS = "segments";

  /** Name of the generation reference file name */
  public static final String SEGMENTS_GEN = "segments.gen";
  
  /** Name of the index deletable file (only used in
   * pre-lockless indices) */
  public static final String DELETABLE = "deletable";
   
  /** Extension of norms file */
  public static final String NORMS_EXTENSION = "nrm";

  /** Extension of freq postings file */
  public static final String FREQ_EXTENSION = "frq";

  /** Extension of prox postings file */
  public static final String PROX_EXTENSION = "prx";

  /** Extension of terms file */
  public static final String TERMS_EXTENSION = "tis";

  /** Extension of terms index file */
  public static final String TERMS_INDEX_EXTENSION = "tii";

  /** Extension of stored fields index file */
  public static final String FIELDS_INDEX_EXTENSION = "fdx";

  /** Extension of stored fields file */
  public static final String FIELDS_EXTENSION = "fdt";

  /** Extension of vectors fields file */
  public static final String VECTORS_FIELDS_EXTENSION = "tvf";

  /** Extension of vectors documents file */
  public static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";

  /** Extension of vectors index file */
  public static final String VECTORS_INDEX_EXTENSION = "tvx";

  /** Extension of compound file */
  public static final String COMPOUND_FILE_EXTENSION = "cfs";

  /** Extension of compound file for doc store files*/
  public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";

  /** Extension of deletes */
  public static final String DELETES_EXTENSION = "del";

  /** Extension of field infos */
  public static final String FIELD_INFOS_EXTENSION = "fnm";

  /** Extension of plain norms */
  public static final String PLAIN_NORMS_EXTENSION = "f";

  /** Extension of separate norms */
  public static final String SEPARATE_NORMS_EXTENSION = "s";

  /** Extension of gen file */
  public static final String GEN_EXTENSION = "gen";

  /**
   * This array contains all filename extensions used by
   * Lucene's index files, with two exceptions, namely the
   * extension made up from <code>.f + a number and
   * from <code>.s + a number.  Also note that
   * Lucene's <code>segments_N files do not have any
   * filename extension.
   */
  public static final String INDEX_EXTENSIONS[] = new String[] {
    COMPOUND_FILE_EXTENSION,
    FIELD_INFOS_EXTENSION,
    FIELDS_INDEX_EXTENSION,
    FIELDS_EXTENSION,
    TERMS_INDEX_EXTENSION,
    TERMS_EXTENSION,
    FREQ_EXTENSION,
    PROX_EXTENSION,
    DELETES_EXTENSION,
    VECTORS_INDEX_EXTENSION,
    VECTORS_DOCUMENTS_EXTENSION,
    VECTORS_FIELDS_EXTENSION,
    GEN_EXTENSION,
    NORMS_EXTENSION,
    COMPOUND_FILE_STORE_EXTENSION,
  };

  /** File extensions that are added to a compound file
   * (same as above, minus "del", "gen", "cfs"). */
  public static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] {
    FIELD_INFOS_EXTENSION,
    FIELDS_INDEX_EXTENSION,
    FIELDS_EXTENSION,
    TERMS_INDEX_EXTENSION,
    TERMS_EXTENSION,
    FREQ_EXTENSION,
    PROX_EXTENSION,
    VECTORS_INDEX_EXTENSION,
    VECTORS_DOCUMENTS_EXTENSION,
    VECTORS_FIELDS_EXTENSION,
    NORMS_EXTENSION
  };

  public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
    VECTORS_INDEX_EXTENSION,
    VECTORS_FIELDS_EXTENSION,
    VECTORS_DOCUMENTS_EXTENSION,
    FIELDS_INDEX_EXTENSION,
    FIELDS_EXTENSION
  };

  public static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] {
    FIELD_INFOS_EXTENSION,
    FREQ_EXTENSION,
    PROX_EXTENSION,
    TERMS_EXTENSION,
    TERMS_INDEX_EXTENSION,
    NORMS_EXTENSION
  };
  
  /** File extensions of old-style index files */
  public static final String COMPOUND_EXTENSIONS[] = new String[] {
    FIELD_INFOS_EXTENSION,
    FREQ_EXTENSION,
    PROX_EXTENSION,
    FIELDS_INDEX_EXTENSION,
    FIELDS_EXTENSION,
    TERMS_INDEX_EXTENSION,
    TERMS_EXTENSION
  };
  
  /** File extensions for term vector support */
  public static final String VECTOR_EXTENSIONS[] = new String[] {
    VECTORS_INDEX_EXTENSION,
    VECTORS_DOCUMENTS_EXTENSION,
    VECTORS_FIELDS_EXTENSION
  };

  /**
   * Computes the full file name from base, extension and generation. If the
   * generation is -1, the file name is null. If it's 0, the file name is
   * <base>.<ext>. If it's > 0, the file name is
   * <base>_<gen>.<ext>.<br>
   * <b>NOTE: .<ext> is added to the name only if ext is
   * not an empty string.
   * 
   * @param base main part of the file name
   * @param ext extension of the filename
   * @param gen generation
   */
  public static final String fileNameFromGeneration(String base, String ext, long gen) {
    if (gen == SegmentInfo.NO) {
      return null;
    } else if (gen == SegmentInfo.WITHOUT_GEN) {
      return segmentFileName(base, ext);
    } else {
      // The '6' part in the length is: 1 for '.', 1 for '_' and 4 as estimate
      // to the gen length as string (hopefully an upper limit so SB won't
      // expand in the middle.
      StringBuilder res = new StringBuilder(base.length() + 6 + ext.length())
          .append(base).append('_').append(Long.toString(gen, Character.MAX_RADIX));
      if (ext.length() > 0) {
        res.append('.').append(ext);
      }
      return res.toString();
    }
  }

  /**
   * Returns true if the provided filename is one of the doc store files (ends
   * with an extension in {@link #STORE_INDEX_EXTENSIONS}).
   */
  public static final boolean isDocStoreFile(String fileName) {
    if (fileName.endsWith(COMPOUND_FILE_STORE_EXTENSION))
      return true;
    for (String ext : STORE_INDEX_EXTENSIONS) {
      if (fileName.endsWith(ext))
        return true;
    }
    return false;
  }

  /**
   * Returns the file name that matches the given segment name and extension.
   * This method takes care to return the full file name in the form
   * <segmentName>.<ext>, therefore you don't need to prefix the
   * extension with a '.'.<br>
   * <b>NOTE: .<ext> is added to the result file name only if
   * <code>ext is not empty.
   */
  public static final String segmentFileName(String segmentName, String ext) {
    if (ext.length() > 0) {
      return new StringBuilder(segmentName.length() + 1 + ext.length()).append(
          segmentName).append('.').append(ext).toString();
    } else {
      return segmentName;
    }
  }
  
  /**
   * Returns true if the given filename ends with the given extension. One
   * should provide a <i>pure extension, without '.'.
   */
  public static final boolean matchesExtension(String filename, String ext) {
    // It doesn't make a difference whether we allocate a StringBuilder ourself
    // or not, since there's only 1 '+' operator.
    return filename.endsWith("." + ext);
  }

  /**
   * Strips the segment file name out of the given one. If you used
   * {@link #segmentFileName} or {@link #fileNameFromGeneration} to create your
   * files, then this method simply removes whatever comes before the first '.',
   * or the second '_' (excluding both), in case of deleted docs.
   * 
   * @return the filename with the segment name removed, or the given filename
   *         if it does not contain a '.' and '_'.
   */
  public static final String stripSegmentName(String filename) {
    // If it is a .del file, there's an '_' after the first character
    int idx = filename.indexOf('_', 1);
    if (idx == -1) {
      // If it's not, strip everything that's before the '.'
      idx = filename.indexOf('.');
    }
    if (idx != -1) {
      filename = filename.substring(idx);
    }
    return filename;
  }

  /**
   * Returns true if the given filename ends with the separate norms file
   * pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
   */
  public static boolean isSeparateNormsFile(String filename) {
    int idx = filename.lastIndexOf('.');
    if (idx == -1) return false;
    String ext = filename.substring(idx + 1);
    return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);
  }

}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene IndexFileNames.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.