alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Jazzy example source code file (SpellDictionaryASpell.java)

This example Jazzy source code file (SpellDictionaryASpell.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Jazzy tags/keywords

doublemeta, generictransformator, hashtable, io, ioexception, iterator, list, list, security, spelldictionaryaspell, string, string, util, vector, vector, word, word

The Jazzy SpellDictionaryASpell.java source code

/*
Jazzy - a Java library for Spell Checking
Copyright (C) 2001 Mindaugas Idzelis
Full text of license can be found in LICENSE.txt

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/
/* Created by bgalbs on Jan 30, 2003 at 11:45:25 PM */
package com.swabunga.spell.engine;

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.security.InvalidParameterException;
import java.util.*;

/**
 * Container for various methods that any <code>SpellDictionary will use.
 * Based on the original Jazzy <a href="http://aspell.net/">aspell port.
 * <p/>
 *
 *
 */
public abstract class SpellDictionaryASpell implements SpellDictionary {


  /** The reference to a Transformator, used to transform a word into it's phonetic code. */
  protected Transformator tf;

  public SpellDictionaryASpell(File phonetic) throws IOException {
    if (phonetic == null)
      tf = new DoubleMeta();
    else
      tf = new GenericTransformator(phonetic);
  }

  public SpellDictionaryASpell(File phonetic, String encoding) throws IOException {
    if (phonetic == null)
      tf = new DoubleMeta();
    else
      tf = new GenericTransformator(phonetic, encoding);
  }

  public SpellDictionaryASpell(Reader phonetic) throws IOException {
    if (phonetic == null)
      tf = new DoubleMeta();
    else
      tf = new GenericTransformator(phonetic);
  }


  /**
   * Returns a list of Word objects that are the suggestions to an
   * incorrect word.
   * <p>
   * @param word Suggestions for given mispelt word
   * @param threshold The lower boundary of similarity to mispelt word
   * @return Vector a List of suggestions
   */
  public List getSuggestions(String word, int threshold) {

    Hashtable nearmisscodes = new Hashtable();
    String code = getCode(word);

    // add all words that have the same phonetics
    nearmisscodes.put(code, code);
    Vector phoneticList = getWordsFromCode(word, nearmisscodes);

    // do some tranformations to pick up more results
    //interchange
    nearmisscodes = new Hashtable();
    char[] charArray = word.toCharArray();
    for (int i = 0; i < word.length() - 1; i++) {
      char a = charArray[i];
      char b = charArray[i + 1];
      charArray[i] = b;
      charArray[i + 1] = a;
      String s = getCode(new String(charArray));
      nearmisscodes.put(s, s);
      charArray[i] = a;
      charArray[i + 1] = b;
    }

    char[] replacelist = tf.getReplaceList();

    //change
    charArray = word.toCharArray();
    for (int i = 0; i < word.length(); i++) {
      char original = charArray[i];
      for (int j = 0; j < replacelist.length; j++) {
        charArray[i] = replacelist[j];
        String s = getCode(new String(charArray));
        nearmisscodes.put(s, s);
      }
      charArray[i] = original;
    }

    //add
    charArray = (word += " ").toCharArray();
    int iy = charArray.length - 1;
    while (true) {
      for (int j = 0; j < replacelist.length; j++) {
        charArray[iy] = replacelist[j];
        String s = getCode(new String(charArray));
        nearmisscodes.put(s, s);
      }
      if (iy == 0)
        break;
      charArray[iy] = charArray[iy - 1];
      --iy;
    }

    //delete
    word = word.trim();
    charArray = word.toCharArray();
    char[] charArray2 = new char[charArray.length - 1];
    for (int ix = 0; ix < charArray2.length; ix++) {
      charArray2[ix] = charArray[ix];
    }
    char a, b;
    a = charArray[charArray.length - 1];
    int ii = charArray2.length;
    while (true) {
      String s = getCode(new String(charArray));
      nearmisscodes.put(s, s);
      if (ii == 0)
        break;
      b = a;
      a = charArray2[ii - 1];
      charArray2[ii - 1] = b;
      --ii;
    }

    nearmisscodes.remove(code); //already accounted for in phoneticList

    Vector wordlist = getWordsFromCode(word, nearmisscodes);

    if (wordlist.size() == 0 && phoneticList.size() == 0)
      addBestGuess(word, phoneticList);


    // We sort a Vector at the end instead of maintaining a
    // continously sorted TreeSet because everytime you add a collection
    // to a treeset it has to be resorted. It's better to do this operation
    // once at the end.

    Collections.sort(phoneticList, new Word()); //always sort phonetic matches along the top
    Collections.sort(wordlist, new Word()); //the non-phonetic matches can be listed below

    phoneticList.addAll(wordlist);
    return phoneticList;
  }

  /**
   * When we don't come up with any suggestions (probably because the threshold was too strict),
   * then pick the best guesses from the those words that have the same phonetic code.
   * @param word - the word we are trying spell correct
   * @param wordList - the linked list that will get the best guess
   */
  private void addBestGuess(String word, Vector wordList) {
    if (wordList.size() != 0)
      throw new InvalidParameterException("the wordList vector must be empty");

    int bestScore = Integer.MAX_VALUE;

    String code = getCode(word);
    List simwordlist = getWords(code);

    LinkedList candidates = new LinkedList();

    for (Iterator j = simwordlist.iterator(); j.hasNext();) {
      String similar = (String) j.next();
      int distance = EditDistance.getDistance(word, similar);
      if (distance <= bestScore) {
        bestScore = distance;
        Word goodGuess = new Word(similar, distance);
        candidates.add(goodGuess);
      }
    }

    //now, only pull out the guesses that had the best score
    for (Iterator iter = candidates.iterator(); iter.hasNext();) {
      Word candidate = (Word) iter.next();
      if (candidate.getCost() == bestScore)
        wordList.add(candidate);
    }

  }

  private Vector getWordsFromCode(String word, Hashtable codes) {
    Configuration config = Configuration.getConfiguration();
    Vector result = new Vector();
    final int configDistance = config.getInteger(Configuration.SPELL_THRESHOLD);

    for (Enumeration i = codes.keys(); i.hasMoreElements();) {
      String code = (String) i.nextElement();

      List simwordlist = getWords(code);
      for (Iterator iter = simwordlist.iterator(); iter.hasNext();) {
        String similar = (String) iter.next();
        int distance = EditDistance.getDistance(word, similar);
        if (distance < configDistance) {
          Word w = new Word(similar, distance);
          result.addElement(w);
        }
      }
    }
    return result;
  }

  /**
   * Returns the phonetic code representing the word.
   */
  public String getCode(String word) {
    return tf.transform(word);
  }

  /**
   * Returns a list of words that have the same phonetic code.
   */
  protected abstract List getWords(String phoneticCode);

  /**
   * Returns true if the word is correctly spelled against the current word list.
   */
  public boolean isCorrect(String word) {
    List possible = getWords(getCode(word));
    if (possible.contains(word))
      return true;
    //JMH should we always try the lowercase version. If I dont then capitalised
    //words are always returned as incorrect.
    else if (possible.contains(word.toLowerCase()))
      return true;
    return false;
  }
}

Other Jazzy examples (source code examples)

Here is a short list of links related to this Jazzy SpellDictionaryASpell.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.