alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (FieldQuery.java)

This example Lucene source code file (FieldQuery.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

collection, fieldquery, map, phrasequery, phrasequery, query, query, queryphrasemap, queryphrasemap, set, string, term, term, termquery, util

The Lucene FieldQuery.java source code

package org.apache.lucene.search.vectorhighlight;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;

/**
 * FieldQuery breaks down query object into terms/phrases and keep
 * them in QueryPhraseMap structure.
 */
public class FieldQuery {

  final boolean fieldMatch;

  // fieldMatch==true,  Map<fieldName,QueryPhraseMap>
  // fieldMatch==false, Map<null,QueryPhraseMap>
  Map<String, QueryPhraseMap> rootMaps = new HashMap();

  // fieldMatch==true,  Map<fieldName,setOfTermsInQueries>
  // fieldMatch==false, Map<null,setOfTermsInQueries>
  Map<String, Set termSetMap = new HashMap>();

  int termOrPhraseNumber; // used for colored tag support

  FieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ){
    this.fieldMatch = fieldMatch;
    Set<Query> flatQueries = new HashSet();
    flatten( query, flatQueries );
    saveTerms( flatQueries );
    Collection<Query> expandQueries = expand( flatQueries );

    for( Query flatQuery : expandQueries ){
      QueryPhraseMap rootMap = getRootMap( flatQuery );
      rootMap.add( flatQuery );
      if( !phraseHighlight && flatQuery instanceof PhraseQuery ){
        PhraseQuery pq = (PhraseQuery)flatQuery;
        if( pq.getTerms().length > 1 ){
          for( Term term : pq.getTerms() )
            rootMap.addTerm( term, flatQuery.getBoost() );
        }
      }
    }
  }
  
  void flatten( Query sourceQuery, Collection<Query> flatQueries ){
    if( sourceQuery instanceof BooleanQuery ){
      BooleanQuery bq = (BooleanQuery)sourceQuery;
      for( BooleanClause clause : bq.getClauses() ){
        if( !clause.isProhibited() )
          flatten( clause.getQuery(), flatQueries );
      }
    }
    else if( sourceQuery instanceof DisjunctionMaxQuery ){
      DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
      for( Query query : dmq ){
        flatten( query, flatQueries );
      }
    }
    else if( sourceQuery instanceof TermQuery ){
      if( !flatQueries.contains( sourceQuery ) )
        flatQueries.add( sourceQuery );
    }
    else if( sourceQuery instanceof PhraseQuery ){
      if( !flatQueries.contains( sourceQuery ) ){
        PhraseQuery pq = (PhraseQuery)sourceQuery;
        if( pq.getTerms().length > 1 )
          flatQueries.add( pq );
        else if( pq.getTerms().length == 1 ){
          flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
        }
      }
    }
    // else discard queries
  }
  
  /*
   * Create expandQueries from flatQueries.
   * 
   * expandQueries := flatQueries + overlapped phrase queries
   * 
   * ex1) flatQueries={a,b,c}
   *      => expandQueries={a,b,c}
   * ex2) flatQueries={a,"b c","c d"}
   *      => expandQueries={a,"b c","c d","b c d"}
   */
  Collection<Query> expand( Collection flatQueries ){
    Set<Query> expandQueries = new HashSet();
    for( Iterator<Query> i = flatQueries.iterator(); i.hasNext(); ){
      Query query = i.next();
      i.remove();
      expandQueries.add( query );
      if( !( query instanceof PhraseQuery ) ) continue;
      for( Iterator<Query> j = flatQueries.iterator(); j.hasNext(); ){
        Query qj = j.next();
        if( !( qj instanceof PhraseQuery ) ) continue;
        checkOverlap( expandQueries, (PhraseQuery)query, (PhraseQuery)qj );
      }
    }
    return expandQueries;
  }

  /*
   * Check if PhraseQuery A and B have overlapped part.
   * 
   * ex1) A="a b", B="b c" => overlap; expandQueries={"a b c"}
   * ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
   * ex3) A="a b", B="c d" => no overlap; expandQueries={}
   */
  private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, PhraseQuery b ){
    if( a.getSlop() != b.getSlop() ) return;
    Term[] ats = a.getTerms();
    Term[] bts = b.getTerms();
    if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return;
    checkOverlap( expandQueries, ats, bts, a.getSlop(), a.getBoost() );
    checkOverlap( expandQueries, bts, ats, b.getSlop(), b.getBoost() );
  }

  /*
   * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
   * 
   * ex1) src="a b", dest="c d"       => no overlap
   * ex2) src="a b", dest="a b c"     => no overlap
   * ex3) src="a b", dest="b c"       => overlap; expandQueries={"a b c"}
   * ex4) src="a b c", dest="b c d"   => overlap; expandQueries={"a b c d"}
   * ex5) src="a b c", dest="b c"     => no overlap
   * ex6) src="a b c", dest="b"       => no overlap
   * ex7) src="a a a a", dest="a a a" => overlap;
   *                                     expandQueries={"a a a a a","a a a a a a"}
   * ex8) src="a b c d", dest="b c"   => no overlap
   */
  private void checkOverlap( Collection<Query> expandQueries, Term[] src, Term[] dest, int slop, float boost ){
    // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
    // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
    // converts PhraseQuery to TermQuery)
    for( int i = 1; i < src.length; i++ ){
      boolean overlap = true;
      for( int j = i; j < src.length; j++ ){
        if( ( j - i ) < dest.length && !src[j].text().equals( dest[j-i].text() ) ){
          overlap = false;
          break;
        }
      }
      if( overlap && src.length - i < dest.length ){
        PhraseQuery pq = new PhraseQuery();
        for( Term srcTerm : src )
          pq.add( srcTerm );
        for( int k = src.length - i; k < dest.length; k++ ){
          pq.add( new Term( src[0].field(), dest[k].text() ) );
        }
        pq.setSlop( slop );
        pq.setBoost( boost );
        if(!expandQueries.contains( pq ) )
          expandQueries.add( pq );
      }
    }
  }
  
  QueryPhraseMap getRootMap( Query query ){
    String key = getKey( query );
    QueryPhraseMap map = rootMaps.get( key );
    if( map == null ){
      map = new QueryPhraseMap( this );
      rootMaps.put( key, map );
    }
    return map;
  }
  
  /*
   * Return 'key' string. 'key' is the field name of the Query.
   * If not fieldMatch, 'key' will be null.
   */
  private String getKey( Query query ){
    if( !fieldMatch ) return null;
    if( query instanceof TermQuery )
      return ((TermQuery)query).getTerm().field();
    else if ( query instanceof PhraseQuery ){
      PhraseQuery pq = (PhraseQuery)query;
      Term[] terms = pq.getTerms();
      return terms[0].field();
    }
    else
      throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
  }

  /*
   * Save the set of terms in the queries to termSetMap.
   * 
   * ex1) q=name:john
   *      - fieldMatch==true
   *          termSetMap=Map<"name",Set<"john">>
   *      - fieldMatch==false
   *          termSetMap=Map<null,Set<"john">>
   *          
   * ex2) q=name:john title:manager
   *      - fieldMatch==true
   *          termSetMap=Map<"name",Set<"john">,
   *                         "title",Set<"manager">>
   *      - fieldMatch==false
   *          termSetMap=Map<null,Set<"john","manager">>
   *          
   * ex3) q=name:"john lennon"
   *      - fieldMatch==true
   *          termSetMap=Map<"name",Set<"john","lennon">>
   *      - fieldMatch==false
   *          termSetMap=Map<null,Set<"john","lennon">>
   */
  void saveTerms( Collection<Query> flatQueries ){
    for( Query query : flatQueries ){
      Set<String> termSet = getTermSet( query );
      if( query instanceof TermQuery )
        termSet.add( ((TermQuery)query).getTerm().text() );
      else if( query instanceof PhraseQuery ){
        for( Term term : ((PhraseQuery)query).getTerms() )
          termSet.add( term.text() );
      }
      else
        throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
    }
  }
  
  private Set<String> getTermSet( Query query ){
    String key = getKey( query );
    Set<String> set = termSetMap.get( key );
    if( set == null ){
      set = new HashSet<String>();
      termSetMap.put( key, set );
    }
    return set;
  }
  
  Set<String> getTermSet( String field ){
    return termSetMap.get( fieldMatch ? field : null );
  }

  /**
   * 
   * @param fieldName
   * @param term
   * @return QueryPhraseMap
   */
  public QueryPhraseMap getFieldTermMap( String fieldName, String term ){
    QueryPhraseMap rootMap = getRootMap( fieldName );
    return rootMap == null ? null : rootMap.subMap.get( term );
  }

  /**
   * 
   * @param fieldName
   * @param phraseCandidate
   * @return QueryPhraseMap
   */
  public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){
    QueryPhraseMap root = getRootMap( fieldName );
    if( root == null ) return null;
    return root.searchPhrase( phraseCandidate );
  }
  
  private QueryPhraseMap getRootMap( String fieldName ){
    return rootMaps.get( fieldMatch ? fieldName : null );
  }
  
  int nextTermOrPhraseNumber(){
    return termOrPhraseNumber++;
  }
  
  public static class QueryPhraseMap {

    boolean terminal;
    int slop;   // valid if terminal == true and phraseHighlight == true
    float boost;  // valid if terminal == true
    int termOrPhraseNumber;   // valid if terminal == true
    FieldQuery fieldQuery;
    Map<String, QueryPhraseMap> subMap = new HashMap();
    
    public QueryPhraseMap( FieldQuery fieldQuery ){
      this.fieldQuery = fieldQuery;
    }

    void addTerm( Term term, float boost ){
      QueryPhraseMap map = getOrNewMap( subMap, term.text() );
      map.markTerminal( boost );
    }
    
    private QueryPhraseMap getOrNewMap( Map<String, QueryPhraseMap> subMap, String term ){
      QueryPhraseMap map = subMap.get( term );
      if( map == null ){
        map = new QueryPhraseMap( fieldQuery );
        subMap.put( term, map );
      }
      return map;
    }

    void add( Query query ){
      if( query instanceof TermQuery ){
        addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
      }
      else if( query instanceof PhraseQuery ){
        PhraseQuery pq = (PhraseQuery)query;
        Term[] terms = pq.getTerms();
        Map<String, QueryPhraseMap> map = subMap;
        QueryPhraseMap qpm = null;
        for( Term term : terms ){
          qpm = getOrNewMap( map, term.text() );
          map = qpm.subMap;
        }
        qpm.markTerminal( pq.getSlop(), pq.getBoost() );
      }
      else
        throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
    }
    
    public QueryPhraseMap getTermMap( String term ){
      return subMap.get( term );
    }
    
    private void markTerminal( float boost ){
      markTerminal( 0, boost );
    }
    
    private void markTerminal( int slop, float boost ){
      this.terminal = true;
      this.slop = slop;
      this.boost = boost;
      this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
    }
    
    public boolean isTerminal(){
      return terminal;
    }
    
    public int getSlop(){
      return slop;
    }
    
    public float getBoost(){
      return boost;
    }
    
    public int getTermOrPhraseNumber(){
      return termOrPhraseNumber;
    }
    
    public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
      QueryPhraseMap currMap = this;
      for( TermInfo ti : phraseCandidate ){
        currMap = currMap.subMap.get( ti.getText() );
        if( currMap == null ) return null;
      }
      return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
    }
    
    public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
      // check terminal
      if( !terminal ) return false;

      // if the candidate is a term, it is valid
      if( phraseCandidate.size() == 1 ) return true;

      // else check whether the candidate is valid phrase
      // compare position-gaps between terms to slop
      int pos = phraseCandidate.get( 0 ).getPosition();
      for( int i = 1; i < phraseCandidate.size(); i++ ){
        int nextPos = phraseCandidate.get( i ).getPosition();
        if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
        pos = nextPos;
      }
      return true;
    }
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene FieldQuery.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.