alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (ReverseStringFilter.java)

This example Lucene source code file (ReverseStringFilter.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

deprecated, deprecated, information_separator_marker, io, ioexception, nomarker, nomarker, pua_ec00_marker, reversestringfilter, reversestringfilter, string, string, tokenfilter, tokenstream, version

The Lucene ReverseStringFilter.java source code

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis.reverse;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;

import java.io.IOException;

/**
 * Reverse token string, for example "country" => "yrtnuoc".
 * <p>
 * If <code>marker is supplied, then tokens will be also prepended by
 * that character. For example, with a marker of \u0001, "country" =>
 * "\u0001yrtnuoc". This is useful when implementing efficient leading
 * wildcards search.
 * </p>
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating ReverseStringFilter, or when using any of
 * its static methods:
 * <ul>
 *   <li> As of 3.1, supplementary characters are handled correctly
 * </ul>
 */
public final class ReverseStringFilter extends TokenFilter {

  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final char marker;
  private final Version matchVersion;
  private static final char NOMARKER = '\uFFFF';
  
  /**
   * Example marker character: U+0001 (START OF HEADING) 
   */
  public static final char START_OF_HEADING_MARKER = '\u0001';
  
  /**
   * Example marker character: U+001F (INFORMATION SEPARATOR ONE)
   */
  public static final char INFORMATION_SEPARATOR_MARKER = '\u001F';
  
  /**
   * Example marker character: U+EC00 (PRIVATE USE AREA: EC00) 
   */
  public static final char PUA_EC00_MARKER = '\uEC00';
  
  /**
   * Example marker character: U+200F (RIGHT-TO-LEFT MARK)
   */
  public static final char RTL_DIRECTION_MARKER = '\u200F';
  
  /**
   * Create a new ReverseStringFilter that reverses all tokens in the 
   * supplied {@link TokenStream}.
   * <p>
   * The reversed tokens will not be marked. 
   * </p>
   * 
   * @param in {@link TokenStream} to filter
   * @deprecated use {@link #ReverseStringFilter(Version, TokenStream)} 
   *    instead. This constructor will be removed in Lucene 4.0
   */
  @Deprecated
  public ReverseStringFilter(TokenStream in) {
    this(in, NOMARKER);
  }
  
  /**
   * Create a new ReverseStringFilter that reverses and marks all tokens in the
   * supplied {@link TokenStream}.
   * <p>
   * The reversed tokens will be prepended (marked) by the <code>marker
   * character.
   * </p>
   * 
   * @param in {@link TokenStream} to filter
   * @param marker A character used to mark reversed tokens
   * @deprecated use {@link #ReverseStringFilter(Version, TokenStream, char)} 
   *    instead. This constructor will be removed in Lucene 4.0 
   */
  @Deprecated
  public ReverseStringFilter(TokenStream in, char marker) {
    this(Version.LUCENE_30, in, marker);
  }
  
  /**
   * Create a new ReverseStringFilter that reverses all tokens in the 
   * supplied {@link TokenStream}.
   * <p>
   * The reversed tokens will not be marked. 
   * </p>
   * 
   * @param matchVersion See <a href="#version">above
   * @param in {@link TokenStream} to filter
   */
  public ReverseStringFilter(Version matchVersion, TokenStream in) {
    this(matchVersion, in, NOMARKER);
  }

  /**
   * Create a new ReverseStringFilter that reverses and marks all tokens in the
   * supplied {@link TokenStream}.
   * <p>
   * The reversed tokens will be prepended (marked) by the <code>marker
   * character.
   * </p>
   * 
   * @param matchVersion See <a href="#version">above
   * @param in {@link TokenStream} to filter
   * @param marker A character used to mark reversed tokens
   */
  public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) {
    super(in);
    this.matchVersion = matchVersion;
    this.marker = marker;
  }

  @Override
  public boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
      int len = termAtt.length();
      if (marker != NOMARKER) {
        len++;
        termAtt.resizeBuffer(len);
        termAtt.buffer()[len - 1] = marker;
      }
      reverse( matchVersion, termAtt.buffer(), 0, len );
      termAtt.setLength(len);
      return true;
    } else {
      return false;
    }
  }

  /**
   * Reverses the given input string
   * 
   * @param input the string to reverse
   * @return the given input string in reversed order
   * @deprecated use {@link #reverse(Version, String)} instead. This method 
   *    will be removed in Lucene 4.0
   */
  @Deprecated
  public static String reverse( final String input ){
    return reverse(Version.LUCENE_30, input);
  }
  
  /**
   * Reverses the given input string
   * 
   * @param matchVersion See <a href="#version">above
   * @param input the string to reverse
   * @return the given input string in reversed order
   */
  public static String reverse( Version matchVersion, final String input ){
    final char[] charInput = input.toCharArray();
    reverse( matchVersion, charInput, 0, charInput.length );
    return new String( charInput );
  }
  
  /**
   * Reverses the given input buffer in-place
   * @param buffer the input char array to reverse
   * @deprecated use {@link #reverse(Version, char[])} instead. This 
   *    method will be removed in Lucene 4.0
   */
  @Deprecated
  public static void reverse( final char[] buffer ){
    reverse( buffer, 0, buffer.length );
  }
  
  /**
   * Reverses the given input buffer in-place
   * @param matchVersion See <a href="#version">above
   * @param buffer the input char array to reverse
   */
  public static void reverse(Version matchVersion, final char[] buffer) {
    reverse(matchVersion, buffer, 0, buffer.length);
  }
  
  /**
   * Partially reverses the given input buffer in-place from offset 0
   * up to the given length.
   * @param buffer the input char array to reverse
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
   * @deprecated use {@link #reverse(Version, char[], int)} instead. This 
   *    method will be removed in Lucene 4.0
   */
  @Deprecated
  public static void reverse( final char[] buffer, final int len ){
    reverse( buffer, 0, len );
  }
  
  /**
   * Partially reverses the given input buffer in-place from offset 0
   * up to the given length.
   * @param matchVersion See <a href="#version">above
   * @param buffer the input char array to reverse
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
   */
  public static void reverse(Version matchVersion, final char[] buffer,
      final int len) {
    reverse( matchVersion, buffer, 0, len );
  }
  
  /**
   * Partially reverses the given input buffer in-place from the given offset
   * up to the given length.
   * @param buffer the input char array to reverse
   * @param start the offset from where to reverse the buffer
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
   * @deprecated use {@link #reverse(Version, char[], int, int)} instead. This 
   *    method will be removed in Lucene 4.0
   */
  @Deprecated
  public static void reverse(char[] buffer, int start, int len ) {
    reverseUnicode3(buffer, start, len);
  }
  
  /**
   * @deprecated Remove this when support for 3.0 indexes is no longer needed.
   */
  @Deprecated
  private static void reverseUnicode3( char[] buffer, int start, int len ){
    if( len <= 1 ) return;
    int num = len>>1;
    for( int i = start; i < ( start + num ); i++ ){
      char c = buffer[i];
      buffer[i] = buffer[start * 2 + len - i - 1];
      buffer[start * 2 + len - i - 1] = c;
    }
  }
  
  /**
   * Partially reverses the given input buffer in-place from the given offset
   * up to the given length.
   * @param matchVersion See <a href="#version">above
   * @param buffer the input char array to reverse
   * @param start the offset from where to reverse the buffer
   * @param len the length in the buffer up to where the
   *        buffer should be reversed
   */
  public static void reverse(Version matchVersion, final char[] buffer,
      final int start, final int len) {
    if (!matchVersion.onOrAfter(Version.LUCENE_31)) {
      reverseUnicode3(buffer, start, len);
      return;
    }
    /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
    if (len < 2)
      return;
    int end = (start + len) - 1;
    char frontHigh = buffer[start];
    char endLow = buffer[end];
    boolean allowFrontSur = true, allowEndSur = true;
    final int mid = start + (len >> 1);
    for (int i = start; i < mid; ++i, --end) {
      final char frontLow = buffer[i + 1];
      final char endHigh = buffer[end - 1];
      final boolean surAtFront = allowFrontSur
          && Character.isSurrogatePair(frontHigh, frontLow);
      if (surAtFront && (len < 3)) {
        // nothing to do since surAtFront is allowed and 1 char left
        return;
      }
      final boolean surAtEnd = allowEndSur
          && Character.isSurrogatePair(endHigh, endLow);
      allowFrontSur = allowEndSur = true;
      if (surAtFront == surAtEnd) {
        if (surAtFront) {
          // both surrogates
          buffer[end] = frontLow;
          buffer[--end] = frontHigh;
          buffer[i] = endHigh;
          buffer[++i] = endLow;
          frontHigh = buffer[i + 1];
          endLow = buffer[end - 1];
        } else {
          // neither surrogates
          buffer[end] = frontHigh;
          buffer[i] = endLow;
          frontHigh = frontLow;
          endLow = endHigh;
        }
      } else {
        if (surAtFront) {
          // surrogate only at the front
          buffer[end] = frontLow;
          buffer[i] = endLow;
          endLow = endHigh;
          allowFrontSur = false;
        } else {
          // surrogate only at the end
          buffer[end] = frontHigh;
          buffer[i] = endHigh;
          frontHigh = frontLow;
          allowEndSur = false;
        }
      }
    }
    if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur)) {
      // only if odd length
      buffer[end] = allowFrontSur ? endLow : frontHigh;
    }
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene ReverseStringFilter.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.