alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (UnicodeEscaperTest.java)

This example Java source code file (UnicodeEscaperTest.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

bad_strings, illegalargumentexception, indexoutofboundsexception, largest_surrogate, nop_escaper, override, simple_escaper, smallest_surrogate, string, stringbuffer, test_string, trailing, unicodeescaper, world

The UnicodeEscaperTest.java Java example source code

/*
 * Copyright (C) 2008 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.escape;

import com.google.common.annotations.GwtCompatible;

import junit.framework.TestCase;

/**
 * Tests for {@link UnicodeEscaper}.
 *
 * @author David Beaumont
 */
@GwtCompatible
public class UnicodeEscaperTest extends TestCase {

  private static final String SMALLEST_SURROGATE =
      "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
  private static final String LARGEST_SURROGATE =
      "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;

  private static final String TEST_STRING =
      "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" +
      SMALLEST_SURROGATE + "0189" +  LARGEST_SURROGATE;

  // Escapes nothing
  private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() {
    @Override
    protected char[] escape(int c) {
      return null;
    }
  };

  // Escapes everything except [a-zA-Z0-9]
  private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() {
    @Override
    protected char[] escape(int cp) {
      return ('a' <= cp && cp <= 'z') ||
             ('A' <= cp && cp <= 'Z') ||
             ('0' <= cp && cp <= '9') ? null :
          ("[" + String.valueOf(cp) + "]").toCharArray();
    }
  };

  public void testNopEscaper() {
    UnicodeEscaper e = NOP_ESCAPER;
    assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
  }

  public void testSimpleEscaper() {
    UnicodeEscaper e = SIMPLE_ESCAPER;
    String expected =
        "[0]abyz[128][256][2048][4096]ABYZ[65535]" +
        "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" +
        "0189[" + Character.MAX_CODE_POINT + "]";
    assertEquals(expected, escapeAsString(e, TEST_STRING));
  }

  public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
    StringBuffer input = new StringBuffer();
    StringBuffer expected = new StringBuffer();
    for (int i = 256; i < 1024; i++) {
      input.append((char) i);
      expected.append("[" + i + "]");
    }
    assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
  }

  public void testSurrogatePairs() {
    UnicodeEscaper e = SIMPLE_ESCAPER;

    // Build up a range of surrogate pair characters to test
    final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
    final int max = Character.MAX_CODE_POINT;
    final int range = max - min;
    final int s1 = min + (1 * range) / 4;
    final int s2 = min + (2 * range) / 4;
    final int s3 = min + (3 * range) / 4;
    final char[] dst = new char[12];

    // Put surrogate pairs at odd indices so they can be split easily
    dst[0] = 'x';
    Character.toChars(min, dst, 1);
    Character.toChars(s1, dst, 3);
    Character.toChars(s2, dst, 5);
    Character.toChars(s3, dst, 7);
    Character.toChars(max, dst, 9);
    dst[11] = 'x';
    String test = new String(dst);

    // Get the expected result string
    String expected =
        "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
    assertEquals(expected, escapeAsString(e, test));
  }

  public void testTrailingHighSurrogate() {
    String test = "abc" + Character.MIN_HIGH_SURROGATE;
    try {
      escapeAsString(NOP_ESCAPER, test);
      fail("Trailing high surrogate should cause exception");
    } catch (IllegalArgumentException expected) {
      // Pass
    }
    try {
      escapeAsString(SIMPLE_ESCAPER, test);
      fail("Trailing high surrogate should cause exception");
    } catch (IllegalArgumentException expected) {
      // Pass
    }
  }

  public void testNullInput() {
    UnicodeEscaper e = SIMPLE_ESCAPER;
    try {
      e.escape((String) null);
      fail("Null string should cause exception");
    } catch (NullPointerException expected) {
      // Pass
    }
  }

  public void testBadStrings() {
    UnicodeEscaper e = SIMPLE_ESCAPER;
    String[] BAD_STRINGS = {
        String.valueOf(Character.MIN_LOW_SURROGATE),
        Character.MIN_LOW_SURROGATE + "xyz",
        "abc" + Character.MIN_LOW_SURROGATE,
        "abc" + Character.MIN_LOW_SURROGATE + "xyz",
        String.valueOf(Character.MAX_LOW_SURROGATE),
        Character.MAX_LOW_SURROGATE + "xyz",
        "abc" + Character.MAX_LOW_SURROGATE,
        "abc" + Character.MAX_LOW_SURROGATE + "xyz",
    };
    for (String s : BAD_STRINGS) {
      try {
        escapeAsString(e, s);
        fail("Isolated low surrogate should cause exception [" + s + "]");
      } catch (IllegalArgumentException expected) {
        // Pass
      }
    }
  }

  public void testFalsePositivesForNextEscapedIndex() {
    UnicodeEscaper e = new UnicodeEscaper() {
      // Canonical escaper method that only escapes lower case ASCII letters.
      @Override
      protected char[] escape(int cp) {
        return ('a' <= cp && cp <= 'z') ?
            new char[] { Character.toUpperCase((char) cp) } : null;
      }
      // Inefficient implementation that defines all letters as escapable.
      @Override
      protected int nextEscapeIndex(CharSequence csq, int index, int end) {
        while (index < end && !Character.isLetter(csq.charAt(index))) {
          index++;
        }
        return index;
      }
    };
    assertEquals("\0HELLO \uD800\uDC00 WORLD!\n",
        e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
  }

  public void testCodePointAt_IndexOutOfBoundsException() {
    try {
      UnicodeEscaper.codePointAt("Testing...", 4, 2);
      fail();
    } catch (IndexOutOfBoundsException expected) {
    }
  }

  private static String escapeAsString(Escaper e, String s) {
    return e.escape(s);
  }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java UnicodeEscaperTest.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.