alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (PositionBasedTermVectorMapper.java)

This example Lucene source code file (PositionBasedTermVectorMapper.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

arraylist, hashmap, integer, list, list, map, map, override, positionbasedtermvectormapper, positionbasedtermvectormapper, string, termvectoroffsetinfo, tvpositioninfo, tvpositioninfo, util

The Lucene PositionBasedTermVectorMapper.java source code

package org.apache.lucene.index;
/**
 * Copyright 2007 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * For each Field, store position by position information.  It ignores frequency information
 * <p/>
 * This is not thread-safe.
 */
public class PositionBasedTermVectorMapper extends TermVectorMapper{
  private Map<String, Map fieldToTerms;

  private String currentField;
  /**
   * A Map of Integer and TVPositionInfo
   */
  private Map<Integer,TVPositionInfo> currentPositions;
  private boolean storeOffsets;

  


  /**
   *
   *
   */
  public PositionBasedTermVectorMapper() {
    super(false, false);
  }

  public PositionBasedTermVectorMapper(boolean ignoringOffsets)
  {
    super(false, ignoringOffsets);
  }

  /**
   * Never ignores positions.  This mapper doesn't make much sense unless there are positions
   * @return false
   */
  @Override
  public boolean isIgnoringPositions() {
    return false;
  }

  /**
   * Callback for the TermVectorReader. 
   * @param term
   * @param frequency
   * @param offsets
   * @param positions
   */
  @Override
  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
    for (int i = 0; i < positions.length; i++) {
      Integer posVal = Integer.valueOf(positions[i]);
      TVPositionInfo pos = currentPositions.get(posVal);
      if (pos == null) {
        pos = new TVPositionInfo(positions[i], storeOffsets);
        currentPositions.put(posVal, pos);
      }
      pos.addTerm(term, offsets != null ? offsets[i] : null);
    }
  }

  /**
   * Callback mechanism used by the TermVectorReader
   * @param field  The field being read
   * @param numTerms The number of terms in the vector
   * @param storeOffsets Whether offsets are available
   * @param storePositions Whether positions are available
   */
  @Override
  public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
    if (storePositions == false)
    {
      throw new RuntimeException("You must store positions in order to use this Mapper");
    }
    if (storeOffsets == true)
    {
      //ignoring offsets
    }
    fieldToTerms = new HashMap<String,Map(numTerms);
    this.storeOffsets = storeOffsets;
    currentField = field;
    currentPositions = new HashMap<Integer,TVPositionInfo>();
    fieldToTerms.put(currentField, currentPositions);
  }

  /**
   * Get the mapping between fields and terms, sorted by the comparator
   *
   * @return A map between field names and a Map.  The sub-Map key is the position as the integer, the value is {@link org.apache.lucene.index.PositionBasedTermVectorMapper.TVPositionInfo}.
   */
  public Map<String,Map  getFieldToTerms() {
    return fieldToTerms;
  }

  /**
   * Container for a term at a position
   */
  public static class TVPositionInfo{
    private int position;

    private List<String> terms;

    private List<TermVectorOffsetInfo> offsets;


    public TVPositionInfo(int position, boolean storeOffsets) {
      this.position = position;
      terms = new ArrayList<String>();
      if (storeOffsets) {
        offsets = new ArrayList<TermVectorOffsetInfo>();
      }
    }

    void addTerm(String term, TermVectorOffsetInfo info)
    {
      terms.add(term);
      if (offsets != null) {
        offsets.add(info);
      }
    }

    /**
     *
     * @return The position of the term
     */
    public int getPosition() {
      return position;
    }

    /**
     * Note, there may be multiple terms at the same position
     * @return A List of Strings
     */
    public List<String> getTerms() {
      return terms;
    }

    /**
     * Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects.  There may be multiple entries since there may be multiple terms at a position
     * @return A List of TermVectorOffsetInfo objects, if offsets are stored.
     */
    public List<TermVectorOffsetInfo> getOffsets() {
      return offsets;
    }
  }


}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene PositionBasedTermVectorMapper.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.