alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestMultiAnalyzerWrapper.java)

This example Lucene source code file (TestMultiAnalyzerWrapper.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

io, multianalyzer, override, override, posincrementanalyzer, queryparserwrapper, queryparserwrapper, reader, standardtokenizer, string, testfilter, testfilter, testposincrementfilter, tokenstream, tokenstream

The Lucene TestMultiAnalyzerWrapper.java source code

package org.apache.lucene.queryParser.standard;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.util.LuceneTestCase;

/**
 * This test case is a copy of the core Lucene query parser test, it was adapted
 * to use new QueryParserWrapper instead of the old query parser.
 * 
 * Test QueryParser's ability to deal with Analyzers that return more than one
 * token per position or that return tokens with a position increment > 1.
 * 
 */
public class TestMultiAnalyzerWrapper extends LuceneTestCase {

  private static int multiToken = 0;

  public void testMultiAnalyzer() throws ParseException {

    QueryParserWrapper qp = new QueryParserWrapper("", new MultiAnalyzer());

    // trivial, no multiple tokens:
    assertEquals("foo", qp.parse("foo").toString());
    assertEquals("foo", qp.parse("\"foo\"").toString());
    assertEquals("foo foobar", qp.parse("foo foobar").toString());
    assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString());
    assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"")
        .toString());

    // two tokens at the same position:
    assertEquals("(multi multi2) foo", qp.parse("multi foo").toString());
    assertEquals("foo (multi multi2)", qp.parse("foo multi").toString());
    assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi")
        .toString());
    assertEquals("+(foo (multi multi2)) +(bar (multi multi2))", qp.parse(
        "+(foo multi) +(bar multi)").toString());
    assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"", qp
        .parse("+(foo multi) field:\"bar multi\"").toString());

    // phrases:
    assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString());
    assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString());
    assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", qp.parse(
        "\"foo multi foobar multi\"").toString());

    // fields:
    assertEquals("(field:multi field:multi2) field:foo", qp.parse(
        "field:multi field:foo").toString());
    assertEquals("field:\"(multi multi2) foo\"", qp
        .parse("field:\"multi foo\"").toString());

    // three tokens at one position:
    assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti")
        .toString());
    assertEquals("foo (triplemulti multi3 multi2) foobar", qp.parse(
        "foo triplemulti foobar").toString());

    // phrase with non-default slop:
    assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10")
        .toString());

    // phrase with non-default boost:
    assertEquals("\"(multi multi2) foo\"^2.0", qp.parse("\"multi foo\"^2")
        .toString());

    // phrase after changing default slop
    qp.setPhraseSlop(99);
    assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse("\"multi foo\" bar")
        .toString());
    assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", qp.parse(
        "\"multi foo\" \"foo bar\"~2").toString());
    qp.setPhraseSlop(0);

    // non-default operator:
    qp.setDefaultOperator(QueryParserWrapper.AND_OPERATOR);
    assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString());

  }

  // public void testMultiAnalyzerWithSubclassOfQueryParser() throws
  // ParseException {
  // this test doesn't make sense when using the new QueryParser API
  // DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer());
  // qp.setPhraseSlop(99); // modified default slop
  //
  // // direct call to (super's) getFieldQuery to demonstrate differnce
  // // between phrase and multiphrase with modified default slop
  // assertEquals("\"foo bar\"~99",
  // qp.getSuperFieldQuery("","foo bar").toString());
  // assertEquals("\"(multi multi2) bar\"~99",
  // qp.getSuperFieldQuery("","multi bar").toString());
  //
  //    
  // // ask sublcass to parse phrase with modified default slop
  // assertEquals("\"(multi multi2) foo\"~99 bar",
  // qp.parse("\"multi foo\" bar").toString());
  //    
  // }

  public void testPosIncrementAnalyzer() throws ParseException {
    QueryParserWrapper qp = new QueryParserWrapper("",
        new PosIncrementAnalyzer());
    assertEquals("quick brown", qp.parse("the quick brown").toString());
    assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString());
    assertEquals("quick brown fox", qp.parse("the quick brown fox").toString());
    assertEquals("\"quick brown fox\"", qp.parse("\"the quick brown fox\"")
        .toString());
  }

  /**
   * Expands "multi" to "multi" and "multi2", both at the same position, and
   * expands "triplemulti" to "triplemulti", "multi3", and "multi2".
   */
  private class MultiAnalyzer extends Analyzer {

    public MultiAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
      result = new TestFilter(result);
      result = new LowerCaseFilter(TEST_VERSION_CURRENT, result);
      return result;
    }
  }

  private final class TestFilter extends TokenFilter {

    private String prevType;
    private int prevStartOffset;
    private int prevEndOffset;

    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);

    public TestFilter(TokenStream in) {
      super(in);
    }

    @Override
    public final boolean incrementToken() throws java.io.IOException {
      if (multiToken > 0) {
        termAtt.setEmpty().append("multi" + (multiToken + 1));
        offsetAtt.setOffset(prevStartOffset, prevEndOffset);
        typeAtt.setType(prevType);
        posIncrAtt.setPositionIncrement(0);
        multiToken--;
        return true;
      } else {
        boolean next = input.incrementToken();
        if (next == false) {
          return false;
        }
        prevType = typeAtt.type();
        prevStartOffset = offsetAtt.startOffset();
        prevEndOffset = offsetAtt.endOffset();
        String text = termAtt.toString();
        if (text.equals("triplemulti")) {
          multiToken = 2;
          return true;
        } else if (text.equals("multi")) {
          multiToken = 1;
          return true;
        } else {
          return true;
        }
      }
    }

  }

  /**
   * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
   * correctly for input other than "the quick brown ...".
   */
  private class PosIncrementAnalyzer extends Analyzer {

    public PosIncrementAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
      result = new TestPosIncrementFilter(result);
      result = new LowerCaseFilter(TEST_VERSION_CURRENT, result);
      return result;
    }
  }

  private class TestPosIncrementFilter extends TokenFilter {

    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);

    public TestPosIncrementFilter(TokenStream in) {
      super(in);
    }

    @Override
    public final boolean incrementToken() throws java.io.IOException {
      while (input.incrementToken()) {
        if (termAtt.toString().equals("the")) {
          // stopword, do nothing
        } else if (termAtt.toString().equals("quick")) {
          posIncrAtt.setPositionIncrement(2);
          return true;
        } else {
          posIncrAtt.setPositionIncrement(1);
          return true;
        }
      }
      return false;
    }

  }

}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestMultiAnalyzerWrapper.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.