home | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lucene example source code file (TestCharArraySet.java)

This example Lucene source code file (TestCharArraySet.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lucene tags/keywords

arraylist, chararrayset, chararrayset, list, list, modified, nullpointerexception, size, string, string, term, test, unsupportedoperationexception, unsupportedoperationexception, util

The Lucene TestCharArraySet.java source code

package org.apache.lucene.analysis;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Iterator;

import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;


public class TestCharArraySet extends LuceneTestCase {
  
  static final String[] TEST_STOP_WORDS = {
    "a", "an", "and", "are", "as", "at", "be", "but", "by",
    "for", "if", "in", "into", "is", "it",
    "no", "not", "of", "on", "or", "such",
    "that", "the", "their", "then", "there", "these",
    "they", "this", "to", "was", "will", "with"
  };
  
  
  public void testRehash() throws Exception {
    CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      cas.add(TEST_STOP_WORDS[i]);
    assertEquals(TEST_STOP_WORDS.length, cas.size());
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      assertTrue(cas.contains(TEST_STOP_WORDS[i]));
  }

  public void testNonZeroOffset() {
    String[] words={"Hello","World","this","is","a","test"};
    char[] findme="xthisy".toCharArray();   
    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
    set.addAll(Arrays.asList(words));
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme,1,4)));
    
    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme,1,4)));
  }
  
  public void testObjectContains() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    Integer val = Integer.valueOf(1);
    set.add(val);
    assertTrue(set.contains(val));
    assertTrue(set.contains(new Integer(1))); // another integer
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[]{'1'}));
    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(val));
    assertTrue(set.contains(new Integer(1))); // another integer
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[]{'1'}));
  }
  
  public void testClear(){
    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    set.clear();
    assertEquals("not empty", 0, set.size());
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      assertFalse(set.contains(TEST_STOP_WORDS[i]));
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      assertTrue(set.contains(TEST_STOP_WORDS[i]));
  }
  
  public void testModifyOnUnmodifiable(){
    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
    String NOT_IN_SET = "SirGallahad";
    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
    
    try{
      set.add(NOT_IN_SET.toCharArray());  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.add(NOT_IN_SET);  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.add(new StringBuilder(NOT_IN_SET));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.clear();  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    try{
      set.add((Object) NOT_IN_SET);  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
    // remove() on the iterator
    try{
      set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.addAll(Arrays.asList(new String[]{NOT_IN_SET}));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
    }
    
    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
      assertTrue(set.contains(TEST_STOP_WORDS[i]));  
    }
  }
  
  public void testUnmodifiableSet(){
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    set.add(Integer.valueOf(1));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
    for (String stopword : TEST_STOP_WORDS) {
      assertTrue(set.contains(stopword));
    }
    assertTrue(set.contains(Integer.valueOf(1)));
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[]{'1'}));
    
    try{
      CharArraySet.unmodifiableSet(null);
      fail("can not make null unmodifiable");
    }catch (NullPointerException e) {
      // expected
    }
  }
  
  public void testSupplementaryChars() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    // for reference see
    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
    String[] upperArr = new String[] {"Abc\ud801\udc1c",
        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
    String[] lowerArr = new String[] {"abc\ud801\udc44",
        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
  }
  
  public void testSingleHighSurrogate() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
        "\uD800EfG", "\uD800\ud801\udc1cB" };

    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
        "\uD800efg", "\uD800\ud801\udc44b" };
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays
        .asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
        false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, upperArr[i]), set
          .contains(lowerArr[i]));
    }
  }
  
  /**
   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
   *             no longer needed.
   */
  @Deprecated
  public void testSupplementaryCharsBWCompat() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    // for reference see
    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
    String[] upperArr = new String[] {"Abc\ud801\udc1c",
        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
    String[] lowerArr = new String[] {"abc\ud801\udc44",
        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
  }

  /**
   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
   *             no longer needed.
   */
  @Deprecated
  public void testSingleHighSurrogateBWComapt() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
        "\uD800EfG", "\uD800\ud801\udc1cB" };

    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
        "\uD800efg", "\uD800\ud801\udc44b" };
    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays
        .asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      if (i == lowerArr.length - 1)
        assertFalse(String.format(falsePos, lowerArr[i]), set
            .contains(lowerArr[i]));
      else
        assertTrue(String.format(missing, lowerArr[i]), set
            .contains(lowerArr[i]));
    }
    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS),
        false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set
          .contains(lowerArr[i]));
    }
  }
  
  @SuppressWarnings("deprecated")
  public void testCopyCharArraySetBWCompat() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase());
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
    setCaseSensitive.add(Integer.valueOf(1));

    // This should use the deprecated methods, because it checks a bw compatibility.
    CharArraySet copy = CharArraySet.copy(setIngoreCase);
    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);

    assertEquals(setIngoreCase.size(), copy.size());
    assertEquals(setCaseSensitive.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copyCaseSens.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copyCaseSens.contains(string));
    }
    // test adding terms to the copy
    List<String> newWords = new ArrayList();
    for (String string : stopwords) {
      newWords.add(string+"_1");
    }
    copy.addAll(newWords);
    
    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(setIngoreCase.contains(string));  
      assertFalse(setCaseSensitive.contains(string));  

    }
  }
  
  /**
   * Test the static #copy() function with a CharArraySet as a source
   */
  public void testCopyCharArraySet() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase());
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
    setCaseSensitive.add(Integer.valueOf(1));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
    CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

    assertEquals(setIngoreCase.size(), copy.size());
    assertEquals(setCaseSensitive.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copyCaseSens.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copyCaseSens.contains(string));
    }
    // test adding terms to the copy
    List<String> newWords = new ArrayList();
    for (String string : stopwords) {
      newWords.add(string+"_1");
    }
    copy.addAll(newWords);
    
    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(setIngoreCase.contains(string));  
      assertFalse(setCaseSensitive.contains(string));  

    }
  }
  
  /**
   * Test the static #copy() function with a JDK {@link Set} as a source
   */
  public void testCopyJDKSet() {
    Set<String> set = new HashSet();

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase());
    }
    set.addAll(Arrays.asList(TEST_STOP_WORDS));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);

    assertEquals(set.size(), copy.size());
    assertEquals(set.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copy.contains(string));
    }
    
    List<String> newWords = new ArrayList();
    for (String string : stopwords) {
      newWords.add(string+"_1");
    }
    copy.addAll(newWords);
    
    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(set.contains(string));  
    }
  }
  
  /**
   * Tests a special case of {@link CharArraySet#copy(Version, Set)} where the
   * set to copy is the {@link CharArraySet#EMPTY_SET}
   */
  public void testCopyEmptySet() {
    assertSame(CharArraySet.EMPTY_SET, 
        CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET));
  }

  /**
   * Smoketests the static empty set
   */
  public void testEmptySet() {
    assertEquals(0, CharArraySet.EMPTY_SET.size());
    
    assertTrue(CharArraySet.EMPTY_SET.isEmpty());
    for (String stopword : TEST_STOP_WORDS) {
      assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
    }
    assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
    assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
  }
  
  /**
   * Test for NPE
   */
  public void testContainsWithNull() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    try {
      set.contains((char[]) null, 0, 10);
      fail("null value must raise NPE");
    } catch (NullPointerException e) {}
    try {
      set.contains((CharSequence) null);
      fail("null value must raise NPE");
    } catch (NullPointerException e) {}
    try {
      set.contains((Object) null);
      fail("null value must raise NPE");
    } catch (NullPointerException e) {}
  }
  
  @Deprecated @SuppressWarnings("unchecked")
  public void testIterator() {
    HashSet<String> hset = new HashSet();
    hset.addAll(Arrays.asList(TEST_STOP_WORDS));

    assertTrue("in 3.0 version, iterator should be CharArraySetIterator",
      ((Iterator) CharArraySet.copy(Version.LUCENE_30, hset).iterator()) instanceof CharArraySet.CharArraySetIterator);

    CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, hset);
    assertFalse("in current version, iterator should not be CharArraySetIterator",
      ((Iterator) set.iterator()) instanceof CharArraySet.CharArraySetIterator);
    
    Iterator<String> it = set.stringIterator();
    assertTrue(it instanceof CharArraySet.CharArraySetIterator);
    while (it.hasNext()) {
      // as the set returns String instances, this must work:
      assertTrue(hset.contains(it.next()));
      try {
        it.remove();
        fail("remove() should not work on CharArraySetIterator");
      } catch (UnsupportedOperationException uoe) {
        // pass
      }
    }
  }
  
  public void testToString() {
    CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test"));
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
    
    set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
  }
}

Other Lucene examples (source code examples)

Here is a short list of links related to this Lucene TestCharArraySet.java source code file:



my book on functional programming

 

new blog posts

 

Copyright 1998-2019 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.