|
Lucene example source code file (TestCharArraySet.java)
The Lucene TestCharArraySet.java source codepackage org.apache.lucene.analysis; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Iterator; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; public class TestCharArraySet extends LuceneTestCase { static final String[] TEST_STOP_WORDS = { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" }; public void testRehash() throws Exception { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for(int i=0;i<TEST_STOP_WORDS.length;i++) cas.add(TEST_STOP_WORDS[i]); assertEquals(TEST_STOP_WORDS.length, cas.size()); for(int i=0;i<TEST_STOP_WORDS.length;i++) assertTrue(cas.contains(TEST_STOP_WORDS[i])); } public void testNonZeroOffset() { String[] words={"Hello","World","this","is","a","test"}; char[] findme="xthisy".toCharArray(); CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(Arrays.asList(words)); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new String(findme,1,4))); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new String(findme,1,4))); } public void testObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); Integer val = Integer.valueOf(1); set.add(val); assertTrue(set.contains(val)); assertTrue(set.contains(new Integer(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(val)); assertTrue(set.contains(new Integer(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); } public void testClear(){ CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size()); set.clear(); assertEquals("not empty", 0, set.size()); for(int i=0;i<TEST_STOP_WORDS.length;i++) assertFalse(set.contains(TEST_STOP_WORDS[i])); set.addAll(Arrays.asList(TEST_STOP_WORDS)); assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size()); for(int i=0;i<TEST_STOP_WORDS.length;i++) assertTrue(set.contains(TEST_STOP_WORDS[i])); } public void testModifyOnUnmodifiable(){ CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); final int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call" , size, set.size()); String NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.contains(NOT_IN_SET)); try{ set.add(NOT_IN_SET.toCharArray()); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try{ set.add(NOT_IN_SET); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try{ set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try{ set.clear(); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try{ set.add((Object) NOT_IN_SET); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call // remove() on the iterator try{ set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try{ set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true)); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try{ set.addAll(Arrays.asList(new String[]{NOT_IN_SET})); fail("Modified unmodifiable set"); }catch (UnsupportedOperationException e) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } } public void testUnmodifiableSet(){ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(Arrays.asList(TEST_STOP_WORDS)); set.add(Integer.valueOf(1)); final int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call" , size, set.size()); for (String stopword : TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Integer.valueOf(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); try{ CharArraySet.unmodifiableSet(null); fail("can not make null unmodifiable"); }catch (NullPointerException e) { // expected } } public void testSupplementaryChars() { String missing = "Term %s is missing in the set"; String falsePos = "Term %s is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on String[] upperArr = new String[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; String[] lowerArr = new String[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); } } public void testSingleHighSurrogate() { String missing = "Term %s is missing in the set"; String falsePos = "Term %s is in the set but shouldn't"; String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays .asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(String.format(falsePos, upperArr[i]), set .contains(lowerArr[i])); } } /** * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is * no longer needed. */ @Deprecated public void testSupplementaryCharsBWCompat() { String missing = "Term %s is missing in the set"; String falsePos = "Term %s is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on String[] upperArr = new String[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; String[] lowerArr = new String[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); } } /** * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is * no longer needed. */ @Deprecated public void testSingleHighSurrogateBWComapt() { String missing = "Term %s is missing in the set"; String falsePos = "Term %s is in the set but shouldn't"; String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays .asList(TEST_STOP_WORDS), true); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); if (i == lowerArr.length - 1) assertFalse(String.format(falsePos, lowerArr[i]), set .contains(lowerArr[i])); else assertTrue(String.format(missing, lowerArr[i]), set .contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(String.format(falsePos, lowerArr[i]), set .contains(lowerArr[i])); } } @SuppressWarnings("deprecated") public void testCopyCharArraySetBWCompat() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); List<String> stopwords = Arrays.asList(TEST_STOP_WORDS); List<String> stopwordsUpper = new ArrayList Other Lucene examples (source code examples)Here is a short list of links related to this Lucene TestCharArraySet.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.