|
Java example source code file (ISCIITest.java)
The ISCIITest.java Java example source code/* * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* @test @bug 4328178 @summary Performs baseline and regression test on the ISCII91 charset */ import java.io.*; public class ISCIITest { private static void failureReport() { System.err.println ("Failed ISCII91 Regression Test"); } private static void mapEquiv(int start, int end, String testName) throws Exception { byte[] singleByte = new byte[1]; byte[] encoded = new byte[1]; for (int i = start; i <= end; i++ ) { singleByte[0] = (byte) i; try { String unicodeStr = new String (singleByte,"ISCII91"); if (i != (int)unicodeStr.charAt(0)) { System.err.println ("FAILED ISCII91 Regression test" + "input byte is " + i ); throw new Exception(""); } encoded = unicodeStr.getBytes("ISCII91"); if (encoded[0] != singleByte[0]) { System.err.println("Encoding error " + testName); throw new Exception("Failed ISCII91 Regression test"); } } catch (UnsupportedEncodingException e) { failureReport(); } } return; } private static void checkUnmapped(int start, int end, String testName) throws Exception { byte[] singleByte = new byte[1]; for (int i = start; i <= end; i++ ) { singleByte[0] = (byte) i; try { String unicodeStr = new String (singleByte, "ISCII91"); if (unicodeStr.charAt(0) != '\uFFFD') { System.err.println("FAILED " + testName + "input byte is " + i ); throw new Exception ("Failed ISCII91 regression test"); } } catch (UnsupportedEncodingException e) { System.err.println("Unsupported character encoding"); } } return; } /* * */ private static void checkRange(int start, int end, char[] expectChars, String testName) throws Exception { byte[] singleByte = new byte[1]; byte[] encoded = new byte[1]; int lookupOffset = 0; for (int i=start; i <= end; i++ ) { singleByte[0] = (byte) i; String unicodeStr = new String (singleByte, "ISCII91"); if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) { throw new Exception ("Failed ISCII91 Regression Test"); } encoded = unicodeStr.getBytes("ISCII"); } return; } /* * Tests the ISCII91 Indic character encoding * as per IS 13194:1991 Bureau of Indian Standards. */ private static void test () throws Exception { try { // ISCII91 is an 8-byte encoding which retains the ASCII // mappings in the lower half. mapEquiv(0, 0x7f, "7 bit ASCII range"); // Checks a range of characters which are unmappable according // to the standards. checkUnmapped(0x81, 0x9f, "UNMAPPED"); // Vowel Modifier chars can be used to modify the vowel // sound of the preceding consonant, vowel or matra character. byte[] testByte = new byte[1]; char[] vowelModChars = { '\u0901', // Vowel modifier Chandrabindu '\u0902', // Vowel modifier Anuswar '\u0903' // Vowel modifier Visarg }; checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS"); char[] expectChars = { '\u0905', // a4 -- Vowel A '\u0906', // a5 -- Vowel AA '\u0907', // a6 -- Vowel I '\u0908', // a7 -- Vowel II '\u0909', // a8 -- Vowel U '\u090a', // a9 -- Vowel UU '\u090b', // aa -- Vowel RI '\u090e', // ab -- Vowel E ( Southern Scripts ) '\u090f', // ac -- Vowel EY '\u0910', // ad -- Vowel AI '\u090d', // ae -- Vowel AYE ( Devanagari Script ) '\u0912', // af -- Vowel O ( Southern Scripts ) '\u0913', // b0 -- Vowel OW '\u0914', // b1 -- Vowel AU '\u0911', // b2 -- Vowel AWE ( Devanagari Script ) }; checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS"); char[] expectConsChars = { '\u0915', // b3 -- Consonant KA '\u0916', // b4 -- Consonant KHA '\u0917', // b5 -- Consonant GA '\u0918', // b6 -- Consonant GHA '\u0919', // b7 -- Consonant NGA '\u091a', // b8 -- Consonant CHA '\u091b', // b9 -- Consonant CHHA '\u091c', // ba -- Consonant JA '\u091d', // bb -- Consonant JHA '\u091e', // bc -- Consonant JNA '\u091f', // bd -- Consonant Hard TA '\u0920', // be -- Consonant Hard THA '\u0921', // bf -- Consonant Hard DA '\u0922', // c0 -- Consonant Hard DHA '\u0923', // c1 -- Consonant Hard NA '\u0924', // c2 -- Consonant Soft TA '\u0925', // c3 -- Consonant Soft THA '\u0926', // c4 -- Consonant Soft DA '\u0927', // c5 -- Consonant Soft DHA '\u0928', // c6 -- Consonant Soft NA '\u0929', // c7 -- Consonant NA ( Tamil ) '\u092a', // c8 -- Consonant PA '\u092b', // c9 -- Consonant PHA '\u092c', // ca -- Consonant BA '\u092d', // cb -- Consonant BHA '\u092e', // cc -- Consonant MA '\u092f', // cd -- Consonant YA '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya ) '\u0930', // cf -- Consonant RA '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts ) '\u0932', // d1 -- Consonant LA '\u0933', // d2 -- Consonant Hard LA '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam ) '\u0935', // d4 -- Consonant VA '\u0936', // d5 -- Consonant SHA '\u0937', // d6 -- Consonant Hard SHA '\u0938', // d7 -- Consonant SA '\u0939', // d8 -- Consonant HA }; checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS"); char[] matraChars = { '\u093e', // da -- Vowel Sign AA '\u093f', // db -- Vowel Sign I '\u0940', // dc -- Vowel Sign II '\u0941', // dd -- Vowel Sign U '\u0942', // de -- Vowel Sign UU '\u0943', // df -- Vowel Sign RI '\u0946', // e0 -- Vowel Sign E ( Southern Scripts ) '\u0947', // e1 -- Vowel Sign EY '\u0948', // e2 -- Vowel Sign AI '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script ) '\u094a', // e4 -- Vowel Sign O ( Southern Scripts ) '\u094b', // e5 -- Vowel Sign OW '\u094c', // e6 -- Vowel Sign AU '\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script ) }; // Matras or Vowel signs alter the implicit // vowel sound associated with an Indic consonant. checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS"); char[] loneContextModifierChars = { '\u094d', // e8 -- Vowel Omission Sign ( Halant ) '\u093c', // e9 -- Diacritic Sign ( Nukta ) '\u0964' // ea -- Full Stop ( Viram, Northern Scripts ) }; checkRange(0xe8, 0xea, loneContextModifierChars, "LONE INDIC CONTEXT CHARS"); // Test Indic script numeral chars // (as opposed to international numerals) char[] expectNumeralChars = { '\u0966', // f1 -- Digit 0 '\u0967', // f2 -- Digit 1 '\u0968', // f3 -- Digit 2 '\u0969', // f4 -- Digit 3 '\u096a', // f5 -- Digit 4 '\u096b', // f6 -- Digit 5 '\u096c', // f7 -- Digit 6 '\u096d', // f8 -- Digit 7 '\u096e', // f9 -- Digit 8 '\u096f' // fa -- Digit 9 }; checkRange(0xf1, 0xfa, expectNumeralChars, "NUMERAL/DIGIT CHARACTERS"); int lookupOffset = 0; char[] expectNuktaSub = { '\u0950', '\u090c', '\u0961', '\u0960', '\u0962', '\u0963', '\u0944', '\u093d' }; /* * ISCII uses a number of code extension techniques * to access a number of lesser used characters. * The Nukta character which ordinarily signifies * a diacritic is used in combination with existing * characters to escape them to a different character. * value. */ byte[] codeExtensionBytes = { (byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta // =>DEVANAGARI OM SIGN (byte)0xa6 , (byte)0xe9, // Vowel I + Nukta // => DEVANAGARI VOCALIC L (byte)0xa7 , (byte)0xe9, // Vowel II + Nukta // => DEVANAGARI VOCALIC LL (byte)0xaa , (byte)0xe9, // Vowel RI + Nukta // => DEVANAGARI VOCALIC RR (byte)0xdb , (byte)0xe9, // Vowel sign I + Nukta // => DEVANAGARI VOWEL SIGN VOCALIC L (byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta // => DEVANAGARI VOWEL SIGN VOCALIC LL (byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta // => DEVANAGARI VOWEL SIGN VOCALIC RR (byte)0xea , (byte)0xe9 // Full stop/Phrase separator + Nukta // => DEVANAGARI SIGN AVAGRAHA }; lookupOffset = 0; byte[] bytePair = new byte[2]; for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) { bytePair[0] = (byte) codeExtensionBytes[lookupOffset++]; bytePair[1] = (byte) codeExtensionBytes[lookupOffset++]; String unicodeStr = new String (bytePair,"ISCII91"); if (unicodeStr.charAt(0) != expectNuktaSub[i]) { throw new Exception("Failed Nukta Sub"); } } lookupOffset = 0; byte[] comboBytes = { (byte)0xe8 , (byte)0xe8, //HALANT + HALANT (byte)0xe8 , (byte)0xe9 //HALANT + NUKTA aka. Soft Halant }; char[] expectCombChars = { '\u094d', '\u200c', '\u094d', '\u200d' }; for (int i=0; i < (comboBytes.length)/2; i++ ) { bytePair[0] = (byte) comboBytes[lookupOffset++]; bytePair[1] = (byte) comboBytes[lookupOffset]; String unicodeStr = new String (bytePair, "ISCII91"); if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1] && unicodeStr.charAt(1) != expectCombChars[lookupOffset]) { throw new Exception("Failed ISCII91 Regression Test"); } lookupOffset++; } } catch (UnsupportedEncodingException e) { System.err.println ("ISCII91 encoding not supported"); throw new Exception ("Failed ISCII91 Regression Test"); } } public static void main (String[] args) throws Exception { test(); } } Other Java examples (source code examples)Here is a short list of links related to this Java ISCIITest.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.