|
Java example source code file (CharsetMapping.java)
The CharsetMapping.java Java example source code/* * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.nio.cs; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.BufferedReader; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.*; import java.security.*; public class CharsetMapping { public final static char UNMAPPABLE_DECODING = '\uFFFD'; public final static int UNMAPPABLE_ENCODING = 0xFFFD; char[] b2cSB; //singlebyte b->c char[] b2cDB1; //dobulebyte b->c /db1 char[] b2cDB2; //dobulebyte b->c /db2 int b2Min, b2Max; //min/max(start/end) value of 2nd byte int b1MinDB1, b1MaxDB1; //min/Max(start/end) value of 1st byte/db1 int b1MinDB2, b1MaxDB2; //min/Max(start/end) value of 1st byte/db2 int dbSegSize; char[] c2b; char[] c2bIndex; // Supplementary char[] b2cSupp; char[] c2bSupp; // Composite Entry[] b2cComp; Entry[] c2bComp; public char decodeSingle(int b) { return b2cSB[b]; } public char decodeDouble(int b1, int b2) { if (b2 >= b2Min && b2 < b2Max) { b2 -= b2Min; if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) { b1 -= b1MinDB1; return b2cDB1[b1 * dbSegSize + b2]; } if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) { b1 -= b1MinDB2; return b2cDB2[b1 * dbSegSize + b2]; } } return UNMAPPABLE_DECODING; } // for jis0213 all supplementary characters are in 0x2xxxx range, // so only the xxxx part is now stored, should actually store the // codepoint value instead. public char[] decodeSurrogate(int db, char[] cc) { int end = b2cSupp.length / 2; int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db); if (i >= 0) { Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0); return cc; } return null; } public char[] decodeComposite(Entry comp, char[] cc) { int i = findBytes(b2cComp, comp); if (i >= 0) { cc[0] = (char)b2cComp[i].cp; cc[1] = (char)b2cComp[i].cp2; return cc; } return null; } public int encodeChar(char ch) { int index = c2bIndex[ch >> 8]; if (index == 0xffff) return UNMAPPABLE_ENCODING; return c2b[index + (ch & 0xff)]; } public int encodeSurrogate(char hi, char lo) { int cp = Character.toCodePoint(hi, lo); if (cp < 0x20000 || cp >= 0x30000) return UNMAPPABLE_ENCODING; int end = c2bSupp.length / 2; int i = Arrays.binarySearch(c2bSupp, 0, end, (char)cp); if (i >= 0) return c2bSupp[end + i]; return UNMAPPABLE_ENCODING; } public boolean isCompositeBase(Entry comp) { if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) { return (findCP(c2bComp, comp) >= 0); } return false; } public int encodeComposite(Entry comp) { int i = findComp(c2bComp, comp); if (i >= 0) return c2bComp[i].bs; return UNMAPPABLE_ENCODING; } // init the CharsetMapping object from the .dat binary file public static CharsetMapping get(final InputStream is) { return AccessController.doPrivileged(new PrivilegedAction<CharsetMapping>() { public CharsetMapping run() { return new CharsetMapping().load(is); } }); } public static class Entry { public int bs; //byte sequence reps public int cp; //Unicode codepoint public int cp2; //CC of composite } static Comparator<Entry> comparatorBytes = new Comparator<Entry>() { public int compare(Entry m1, Entry m2) { return m1.bs - m2.bs; } public boolean equals(Object obj) { return this == obj; } }; static Comparator<Entry> comparatorCP = new Comparator<Entry>() { public int compare(Entry m1, Entry m2) { return m1.cp - m2.cp; } public boolean equals(Object obj) { return this == obj; } }; static Comparator<Entry> comparatorComp = new Comparator<Entry>() { public int compare(Entry m1, Entry m2) { int v = m1.cp - m2.cp; if (v == 0) v = m1.cp2 - m2.cp2; return v; } public boolean equals(Object obj) { return this == obj; } }; static int findBytes(Entry[] a, Entry k) { return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes); } static int findCP(Entry[] a, Entry k) { return Arrays.binarySearch(a, 0, a.length, k, comparatorCP); } static int findComp(Entry[] a, Entry k) { return Arrays.binarySearch(a, 0, a.length, k, comparatorComp); } /*****************************************************************************/ // tags of different charset mapping tables private final static int MAP_SINGLEBYTE = 0x1; // 0..256 : c private final static int MAP_DOUBLEBYTE1 = 0x2; // min..max: c private final static int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2] private final static int MAP_SUPPLEMENT = 0x5; // db,c private final static int MAP_SUPPLEMENT_C2B = 0x6; // c,db private final static int MAP_COMPOSITE = 0x7; // db,base,cc private final static int MAP_INDEXC2B = 0x8; // index table of c->bb private static final boolean readNBytes(InputStream in, byte[] bb, int N) throws IOException { int off = 0; while (N > 0) { int n = in.read(bb, off, N); if (n == -1) return false; N = N - n; off += n; } return true; } int off = 0; byte[] bb; private char[] readCharArray() { // first 2 bytes are the number of "chars" stored in this table int size = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); char [] cc = new char[size]; for (int i = 0; i < size; i++) { cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff)); } return cc; } void readSINGLEBYTE() { char[] map = readCharArray(); for (int i = 0; i < map.length; i++) { char c = map[i]; if (c != UNMAPPABLE_DECODING) { c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i; } } b2cSB = map; } void readINDEXC2B() { char[] map = readCharArray(); for (int i = map.length - 1; i >= 0; i--) { if (c2b == null && map[i] != -1) { c2b = new char[map[i] + 256]; Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); break; } } c2bIndex = map; } char[] readDB(int b1Min, int b2Min, int segSize) { char[] map = readCharArray(); for (int i = 0; i < map.length; i++) { char c = map[i]; if (c != UNMAPPABLE_DECODING) { int b1 = i / segSize; int b2 = i % segSize; int b = (b1 + b1Min)* 256 + (b2 + b2Min); //System.out.printf(" DB %x\t%x%n", b, c & 0xffff); c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b); } } return map; } void readDOUBLEBYTE1() { b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); dbSegSize = b2Max - b2Min + 1; b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize); } void readDOUBLEBYTE2() { b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); dbSegSize = b2Max - b2Min + 1; b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize); } void readCOMPOSITE() { char[] map = readCharArray(); int mLen = map.length/3; b2cComp = new Entry[mLen]; c2bComp = new Entry[mLen]; for (int i = 0, j= 0; i < mLen; i++) { Entry m = new Entry(); m.bs = map[j++]; m.cp = map[j++]; m.cp2 = map[j++]; b2cComp[i] = m; c2bComp[i] = m; } Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp); } CharsetMapping load(InputStream in) { try { // The first 4 bytes are the size of the total data followed in // this .dat file. int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) | ((in.read()&0xff) << 8) | (in.read()&0xff); bb = new byte[len]; off = 0; //System.out.printf("In : Total=%d%n", len); // Read in all bytes if (!readNBytes(in, bb, len)) throw new RuntimeException("Corrupted data file"); in.close(); while (off < len) { int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); switch(type) { case MAP_INDEXC2B: readINDEXC2B(); break; case MAP_SINGLEBYTE: readSINGLEBYTE(); break; case MAP_DOUBLEBYTE1: readDOUBLEBYTE1(); break; case MAP_DOUBLEBYTE2: readDOUBLEBYTE2(); break; case MAP_SUPPLEMENT: b2cSupp = readCharArray(); break; case MAP_SUPPLEMENT_C2B: c2bSupp = readCharArray(); break; case MAP_COMPOSITE: readCOMPOSITE(); break; default: throw new RuntimeException("Corrupted data file"); } } bb = null; return this; } catch (IOException x) { x.printStackTrace(); return null; } } } Other Java examples (source code examples)Here is a short list of links related to this Java CharsetMapping.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.