alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestSpanQueryParser.java)

This example Lucene source code file (TestSpanQueryParser.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

exception, exception, querynodeexception, querynodeexception, querynodeprocessorpipeline, spanorquery, spanquery, spanquery, spansqueryconfighandler, spansquerytreebuilder, spantermquery, spantermquery, testspanqueryparser, uniquefieldattribute

The Lucene TestSpanQueryParser.java source code

package org.apache.lucene.queryParser.spans;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.parser.SyntaxParser;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser;
import org.apache.lucene.queryParser.standard.processors.WildcardQueryNodeProcessor;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.LuceneTestCase;

/**
 * This test case demonstrates how the new query parser can be used.<br/>
 * <br/>
 * 
 * It tests queries likes "term", "field:term" "term1 term2" "term1 OR term2",
 * which are all already supported by the current syntax parser (
 * {@link StandardSyntaxParser}).<br/>
 * <br/>
 * 
 * The goals is to create a new query parser that supports only the pair
 * "field:term" or a list of pairs separated or not by an OR operator, and from
 * this query generate {@link SpanQuery} objects instead of the regular
 * {@link Query} objects. Basically, every pair will be converted to a
 * {@link SpanTermQuery} object and if there are more than one pair they will be
 * grouped by an {@link OrQueryNode}.<br/>
 * <br/>
 * 
 * Another functionality that will be added is the ability to convert every
 * field defined in the query to an unique specific field.<br/>
 * <br/>
 * 
 * The query generation is divided in three different steps: parsing (syntax),
 * processing (semantic) and building.<br/>
 * <br/>
 * 
 * The parsing phase, as already mentioned will be performed by the current
 * query parser: {@link StandardSyntaxParser}.<br/>
 * <br/>
 * 
 * The processing phase will be performed by a processor pipeline which is
 * compound by 2 processors: {@link SpansValidatorQueryNodeProcessor} and
 * {@link UniqueFieldQueryNodeProcessor}.
 * 
 * <pre>
 * 
 *   {@link SpansValidatorQueryNodeProcessor}: as it's going to use the current 
 *   query parser to parse the syntax, it will support more features than we want,
 *   this processor basically validates the query node tree generated by the parser
 *   and just let got through the elements we want, all the other elements as 
 *   wildcards, range queries, etc...if found, an exception is thrown.
 *   
 *   {@link UniqueFieldQueryNodeProcessor}: this processor will take care of reading
 *   what is the "unique field" from the configuration and convert every field defined
 *   in every pair to this "unique field". For that, a {@link SpansQueryConfigHandler} is
 *   used, which has the {@link UniqueFieldAttribute} defined in it.
 * </pre>
 * 
 * The building phase is performed by the {@link SpansQueryTreeBuilder}, which
 * basically contains a map that defines which builder will be used to generate
 * {@link SpanQuery} objects from {@link QueryNode} objects.<br/>
 * <br/>
 * 
 * @see SpansQueryConfigHandler
 * @see SpansQueryTreeBuilder
 * @see SpansValidatorQueryNodeProcessor
 * @see SpanOrQueryNodeBuilder
 * @see SpanTermQueryNodeBuilder
 * @see StandardSyntaxParser
 * @see UniqueFieldQueryNodeProcessor
 * @see UniqueFieldAttribute
 */
public class TestSpanQueryParser extends LuceneTestCase {

  private QueryNodeProcessorPipeline spanProcessorPipeline;

  private SpansQueryConfigHandler spanQueryConfigHandler;

  private SpansQueryTreeBuilder spansQueryTreeBuilder;

  private SyntaxParser queryParser = new StandardSyntaxParser();

  public TestSpanQueryParser() {
    // empty constructor
  }

  @Override
  public void setUp() throws Exception {
    super.setUp();

    this.spanProcessorPipeline = new QueryNodeProcessorPipeline();
    this.spanQueryConfigHandler = new SpansQueryConfigHandler();
    this.spansQueryTreeBuilder = new SpansQueryTreeBuilder();

    // set up the processor pipeline
    this.spanProcessorPipeline
        .setQueryConfigHandler(this.spanQueryConfigHandler);

    this.spanProcessorPipeline.add(new WildcardQueryNodeProcessor());
    this.spanProcessorPipeline.add(new SpansValidatorQueryNodeProcessor());
    this.spanProcessorPipeline.add(new UniqueFieldQueryNodeProcessor());

  }

  public SpanQuery getSpanQuery(CharSequence query) throws QueryNodeException {
    return getSpanQuery("", query);
  }

  public SpanQuery getSpanQuery(CharSequence uniqueField, CharSequence query)
      throws QueryNodeException {
    UniqueFieldAttribute uniqueFieldAtt = this.spanQueryConfigHandler
        .getAttribute(UniqueFieldAttribute.class);
    uniqueFieldAtt.setUniqueField(uniqueField);

    QueryNode queryTree = this.queryParser.parse(query, "defaultField");
    queryTree = this.spanProcessorPipeline.process(queryTree);

    return this.spansQueryTreeBuilder.build(queryTree);

  }

  public void testTermSpans() throws Exception {
    assertEquals(getSpanQuery("field:term").toString(), "term");
    assertEquals(getSpanQuery("term").toString(), "term");

    assertTrue(getSpanQuery("field:term") instanceof SpanTermQuery);
    assertTrue(getSpanQuery("term") instanceof SpanTermQuery);

  }

  public void testUniqueField() throws Exception {
    assertEquals(getSpanQuery("field", "term").toString(), "field:term");
    assertEquals(getSpanQuery("field", "field:term").toString(), "field:term");
    assertEquals(getSpanQuery("field", "anotherField:term").toString(),
        "field:term");

  }

  public void testOrSpans() throws Exception {
    assertEquals(getSpanQuery("term1 term2").toString(),
        "spanOr([term1, term2])");
    assertEquals(getSpanQuery("term1 OR term2").toString(),
        "spanOr([term1, term2])");

    assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery);
    assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery);

  }

  public void testQueryValidator() throws QueryNodeException {

    try {
      getSpanQuery("term*");
      fail("QueryNodeException was expected, wildcard queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

    try {
      getSpanQuery("[a TO z]");
      fail("QueryNodeException was expected, range queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

    try {
      getSpanQuery("a~0.5");
      fail("QueryNodeException was expected, boost queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

    try {
      getSpanQuery("a^0.5");
      fail("QueryNodeException was expected, fuzzy queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

    try {
      getSpanQuery("\"a b\"");
      fail("QueryNodeException was expected, quoted queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

    try {
      getSpanQuery("(a b)");
      fail("QueryNodeException was expected, parenthesized queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

    try {
      getSpanQuery("a AND b");
      fail("QueryNodeException was expected, and queries should not be supported");

    } catch (QueryNodeException ex) {
      // expected exception
    }

  }

}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestSpanQueryParser.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.