|
Commons Codec example source code file (SoundexTest.java)
The Commons Codec SoundexTest.java source code/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // (FYI: Formatted and sorted with Eclipse) package org.apache.commons.codec.language; import junit.framework.Test; import junit.framework.TestSuite; import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.StringEncoder; import org.apache.commons.codec.StringEncoderAbstractTest; /** * Tests {@link Soundex} * * @author Apache Software Foundation * @version $Id: SoundexTest.java 794525 2009-07-16 04:58:08Z bayard $ */ public class SoundexTest extends StringEncoderAbstractTest { public static Test suite() { return new TestSuite(SoundexTest.class); } private Soundex encoder = null; public SoundexTest(String name) { super(name); } void encodeAll(String[] strings, String expectedEncoding) { for (int i = 0; i < strings.length; i++) { assertEquals(expectedEncoding, this.getEncoder().encode(strings[i])); } } /** * @return Returns the _encoder. */ public Soundex getEncoder() { return this.encoder; } protected StringEncoder makeEncoder() { return new Soundex(); } /** * @param encoder * The encoder to set. */ public void setEncoder(Soundex encoder) { this.encoder = encoder; } public void setUp() throws Exception { super.setUp(); this.setEncoder(new Soundex()); } public void tearDown() throws Exception { super.tearDown(); this.setEncoder(null); } public void testB650() { this.encodeAll( new String[] { "BARHAM", "BARONE", "BARRON", "BERNA", "BIRNEY", "BIRNIE", "BOOROM", "BOREN", "BORN", "BOURN", "BOURNE", "BOWRON", "BRAIN", "BRAME", "BRANN", "BRAUN", "BREEN", "BRIEN", "BRIM", "BRIMM", "BRINN", "BRION", "BROOM", "BROOME", "BROWN", "BROWNE", "BRUEN", "BRUHN", "BRUIN", "BRUMM", "BRUN", "BRUNO", "BRYAN", "BURIAN", "BURN", "BURNEY", "BYRAM", "BYRNE", "BYRON", "BYRUM" }, "B650"); } public void testDifference() throws EncoderException { // Edge cases assertEquals(0, this.getEncoder().difference(null, null)); assertEquals(0, this.getEncoder().difference("", "")); assertEquals(0, this.getEncoder().difference(" ", " ")); // Normal cases assertEquals(4, this.getEncoder().difference("Smith", "Smythe")); assertEquals(2, this.getEncoder().difference("Ann", "Andrew")); assertEquals(1, this.getEncoder().difference("Margaret", "Andrew")); assertEquals(0, this.getEncoder().difference("Janet", "Margaret")); // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp assertEquals(4, this.getEncoder().difference("Green", "Greene")); assertEquals(0, this.getEncoder().difference("Blotchet-Halls", "Greene")); // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp assertEquals(4, this.getEncoder().difference("Smith", "Smythe")); assertEquals(4, this.getEncoder().difference("Smithers", "Smythers")); assertEquals(2, this.getEncoder().difference("Anothers", "Brothers")); } public void testEncodeBasic() { assertEquals("T235", this.getEncoder().encode("testing")); assertEquals("T000", this.getEncoder().encode("The")); assertEquals("Q200", this.getEncoder().encode("quick")); assertEquals("B650", this.getEncoder().encode("brown")); assertEquals("F200", this.getEncoder().encode("fox")); assertEquals("J513", this.getEncoder().encode("jumped")); assertEquals("O160", this.getEncoder().encode("over")); assertEquals("T000", this.getEncoder().encode("the")); assertEquals("L200", this.getEncoder().encode("lazy")); assertEquals("D200", this.getEncoder().encode("dogs")); } /** * Examples from * http://www.bradandkathy.com/genealogy/overviewofsoundex.html */ public void testEncodeBatch2() { assertEquals("A462", this.getEncoder().encode("Allricht")); assertEquals("E166", this.getEncoder().encode("Eberhard")); assertEquals("E521", this.getEncoder().encode("Engebrethson")); assertEquals("H512", this.getEncoder().encode("Heimbach")); assertEquals("H524", this.getEncoder().encode("Hanselmann")); assertEquals("H431", this.getEncoder().encode("Hildebrand")); assertEquals("K152", this.getEncoder().encode("Kavanagh")); assertEquals("L530", this.getEncoder().encode("Lind")); assertEquals("L222", this.getEncoder().encode("Lukaschowsky")); assertEquals("M235", this.getEncoder().encode("McDonnell")); assertEquals("M200", this.getEncoder().encode("McGee")); assertEquals("O155", this.getEncoder().encode("Opnian")); assertEquals("O155", this.getEncoder().encode("Oppenheimer")); assertEquals("R355", this.getEncoder().encode("Riedemanas")); assertEquals("Z300", this.getEncoder().encode("Zita")); assertEquals("Z325", this.getEncoder().encode("Zitzmeinn")); } /** * Examples from * http://www.archives.gov/research_room/genealogy/census/soundex.html */ public void testEncodeBatch3() { assertEquals("W252", this.getEncoder().encode("Washington")); assertEquals("L000", this.getEncoder().encode("Lee")); assertEquals("G362", this.getEncoder().encode("Gutierrez")); assertEquals("P236", this.getEncoder().encode("Pfister")); assertEquals("J250", this.getEncoder().encode("Jackson")); assertEquals("T522", this.getEncoder().encode("Tymczak")); // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also // possible. assertEquals("V532", this.getEncoder().encode("VanDeusen")); } /** * Examples from: http://www.myatt.demon.co.uk/sxalg.htm */ public void testEncodeBatch4() { assertEquals("H452", this.getEncoder().encode("HOLMES")); assertEquals("A355", this.getEncoder().encode("ADOMOMI")); assertEquals("V536", this.getEncoder().encode("VONDERLEHR")); assertEquals("B400", this.getEncoder().encode("BALL")); assertEquals("S000", this.getEncoder().encode("SHAW")); assertEquals("J250", this.getEncoder().encode("JACKSON")); assertEquals("S545", this.getEncoder().encode("SCANLON")); assertEquals("S532", this.getEncoder().encode("SAINTJOHN")); } public void testBadCharacters() { assertEquals("H452", this.getEncoder().encode("HOL>MES")); } public void testEncodeIgnoreApostrophes() { this.encodeAll(new String[] { "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien", "OBri'en", "OBrie'n", "OBrien'" }, "O165"); } /** * Test data from http://www.myatt.demon.co.uk/sxalg.htm */ public void testEncodeIgnoreHyphens() { this.encodeAll( new String[] { "KINGSMITH", "-KINGSMITH", "K-INGSMITH", "KI-NGSMITH", "KIN-GSMITH", "KING-SMITH", "KINGS-MITH", "KINGSM-ITH", "KINGSMI-TH", "KINGSMIT-H", "KINGSMITH-" }, "K525"); } public void testEncodeIgnoreTrimmable() { assertEquals("W252", this.getEncoder().encode(" \t\n\r Washington \t\n\r ")); } /** * Consonants from the same code group separated by W or H are treated as * one. */ public void testHWRuleEx1() { // From // http://www.archives.gov/research_room/genealogy/census/soundex.html: // Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1 // for the F). It is not coded A-226. assertEquals("A261", this.getEncoder().encode("Ashcraft")); } /** * Consonants from the same code group separated by W or H are treated as * one. * * Test data from http://www.myatt.demon.co.uk/sxalg.htm */ public void testHWRuleEx2() { assertEquals("B312", this.getEncoder().encode("BOOTHDAVIS")); assertEquals("B312", this.getEncoder().encode("BOOTH-DAVIS")); } /** * Consonants from the same code group separated by W or H are treated as * one. */ public void testHWRuleEx3() { assertEquals("S460", this.getEncoder().encode("Sgler")); assertEquals("S460", this.getEncoder().encode("Swhgler")); // Also S460: this.encodeAll( new String[] { "SAILOR", "SALYER", "SAYLOR", "SCHALLER", "SCHELLER", "SCHILLER", "SCHOOLER", "SCHULER", "SCHUYLER", "SEILER", "SEYLER", "SHOLAR", "SHULER", "SILAR", "SILER", "SILLER" }, "S460"); } public void testMaxLength() throws Exception { Soundex soundex = new Soundex(); soundex.setMaxLength(soundex.getMaxLength()); assertEquals("S460", this.getEncoder().encode("Sgler")); } public void testMaxLengthLessThan3Fix() throws Exception { Soundex soundex = new Soundex(); soundex.setMaxLength(2); assertEquals("S460", soundex.encode("SCHELLER")); } /** * Examples for MS SQLServer from * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp */ public void testMsSqlServer1() { assertEquals("S530", this.getEncoder().encode("Smith")); assertEquals("S530", this.getEncoder().encode("Smythe")); } /** * Examples for MS SQLServer from * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support/kb/articles/Q100/3/65.asp&NoWebContent=1 */ public void testMsSqlServer2() { this.encodeAll(new String[]{"Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen"}, "E625"); } /** * Examples for MS SQLServer from * http://databases.about.com/library/weekly/aa042901a.htm */ public void testMsSqlServer3() { assertEquals("A500", this.getEncoder().encode("Ann")); assertEquals("A536", this.getEncoder().encode("Andrew")); assertEquals("J530", this.getEncoder().encode("Janet")); assertEquals("M626", this.getEncoder().encode("Margaret")); assertEquals("S315", this.getEncoder().encode("Steven")); assertEquals("M240", this.getEncoder().encode("Michael")); assertEquals("R163", this.getEncoder().encode("Robert")); assertEquals("L600", this.getEncoder().encode("Laura")); assertEquals("A500", this.getEncoder().encode("Anne")); } /** * Fancy characters are not mapped by the default US mapping. * * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080 */ public void testUsMappingOWithDiaeresis() { assertEquals("O000", this.getEncoder().encode("o")); if ( Character.isLetter('ö') ) { try { assertEquals("Ö000", this.getEncoder().encode("ö")); fail("Expected IllegalArgumentException not thrown"); } catch (IllegalArgumentException e) { // expected } } else { assertEquals("", this.getEncoder().encode("ö")); } } /** * Fancy characters are not mapped by the default US mapping. * * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080 */ public void testUsMappingEWithAcute() { assertEquals("E000", this.getEncoder().encode("e")); if ( Character.isLetter('é') ) { try { assertEquals("É000", this.getEncoder().encode("é")); fail("Expected IllegalArgumentException not thrown"); } catch (IllegalArgumentException e) { // expected } } else { assertEquals("", this.getEncoder().encode("é")); } } /** * https://issues.apache.org/jira/browse/CODEC-54 * https://issues.apache.org/jira/browse/CODEC-56 */ public void testUsEnglishStatic() { assertEquals("W452", Soundex.US_ENGLISH.soundex("Williams")); } /** * https://issues.apache.org/jira/browse/CODEC-54 * https://issues.apache.org/jira/browse/CODEC-56 */ public void testNewInstance() { assertEquals("W452", new Soundex().soundex("Williams")); } public void testNewInstance2() { assertEquals("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING.toCharArray()).soundex("Williams")); } public void testNewInstance3() { assertEquals("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING).soundex("Williams")); } public void testSoundexUtilsNullBehaviour() { assertEquals(null, SoundexUtils.clean(null)); assertEquals("", SoundexUtils.clean("")); assertEquals(0, SoundexUtils.differenceEncoded(null, "")); assertEquals(0, SoundexUtils.differenceEncoded("", null)); } public void testSoundexUtilsConstructable() { new SoundexUtils(); } } Other Commons Codec examples (source code examples)Here is a short list of links related to this Commons Codec SoundexTest.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.