|
Java example source code file (CMap.java)
The CMap.java Java example source code/* * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.font; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; import java.util.Locale; import java.nio.charset.*; /* * A tt font has a CMAP table which is in turn made up of sub-tables which * describe the char to glyph mapping in (possibly) multiple ways. * CMAP subtables are described by 3 values. * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK) * 2. Encoding (eg 0=symbol, 1=unicode) * 3. TrueType subtable format (how the char->glyph mapping for the encoding * is stored in the subtable). See the TrueType spec. Format 4 is required * by MS in fonts for windows. Its uses segmented mapping to delta values. * Most typically we see are (3,1,4) : * CMAP Platform ID=3 is what we use. * Encodings that are used in practice by JDK on Solaris are * symbol (3,0) * unicode (3,1) * GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5) * The format for almost all subtables is 4. However the solaris (3,5) * encodings are typically in format 2. */ abstract class CMap { // static char WingDings_b2c[] = { // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d, // 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd, // 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd, // 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, // 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d, // 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd, // }; // static char Symbols_b2c[] = { // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d, // 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393, // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9, // 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd, // 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3, // 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf, // 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9, // 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219, // 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229, // 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, // 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5, // 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, // }; static final short ShiftJISEncoding = 2; static final short GBKEncoding = 3; static final short Big5Encoding = 4; static final short WansungEncoding = 5; static final short JohabEncoding = 6; static final short MSUnicodeSurrogateEncoding = 10; static final char noSuchChar = (char)0xfffd; static final int SHORTMASK = 0x0000ffff; static final int INTMASK = 0xffffffff; static final char[][] converterMaps = new char[7][]; /* * Unicode->other encoding translation array. A pre-computed look up * which can be shared across all fonts using that encoding. * Using this saves running character coverters repeatedly. */ char[] xlat; static CMap initialize(TrueTypeFont font) { CMap cmap = null; int offset, platformID, encodingID=-1; int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0, three6=0, three10=0; boolean threeStar = false; ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag); int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag); short numberSubTables = cmapBuffer.getShort(2); /* locate the offsets of all 3,* (ie Microsoft platform) encodings */ for (int i=0; i<numberSubTables; i++) { cmapBuffer.position(i * 8 + 4); platformID = cmapBuffer.getShort(); if (platformID == 3) { threeStar = true; encodingID = cmapBuffer.getShort(); offset = cmapBuffer.getInt(); switch (encodingID) { case 0: three0 = offset; break; // MS Symbol encoding case 1: three1 = offset; break; // MS Unicode cmap case 2: three2 = offset; break; // ShiftJIS cmap. case 3: three3 = offset; break; // GBK cmap case 4: three4 = offset; break; // Big 5 cmap case 5: three5 = offset; break; // Wansung case 6: three6 = offset; break; // Johab case 10: three10 = offset; break; // MS Unicode surrogates } } } /* This defines the preference order for cmap subtables */ if (threeStar) { if (three10 != 0) { cmap = createCMap(cmapBuffer, three10, null); } else if (three0 != 0) { /* The special case treatment of these fonts leads to * anomalies where a user can view "wingdings" and "wingdings2" * and the latter shows all its code points in the unicode * private use area at 0xF000->0XF0FF and the former shows * a scattered subset of its glyphs that are known mappings to * unicode code points. * The primary purpose of these mappings was to facilitate * display of symbol chars etc in composite fonts, however * this is not needed as all these code points are covered * by Lucida Sans Regular. * Commenting this out reduces the role of these two files * (assuming that they continue to be used in font.properties) * to just one of contributing to the overall composite * font metrics, and also AWT can still access the fonts. * Clients which explicitly accessed these fonts as names * "Symbol" and "Wingdings" (ie as physical fonts) and * expected to see a scattering of these characters will * see them now as missing. How much of a problem is this? * Perhaps we could still support this mapping just for * "Symbol.ttf" but I suspect some users would prefer it * to be mapped in to the Latin range as that is how * the "symbol" font is used in native apps. */ // String name = font.platName.toLowerCase(Locale.ENGLISH); // if (name.endsWith("symbol.ttf")) { // cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c); // } else if (name.endsWith("wingding.ttf")) { // cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c); // } else { cmap = createCMap(cmapBuffer, three0, null); // } } else if (three1 != 0) { cmap = createCMap(cmapBuffer, three1, null); } else if (three2 != 0) { cmap = createCMap(cmapBuffer, three2, getConverterMap(ShiftJISEncoding)); } else if (three3 != 0) { cmap = createCMap(cmapBuffer, three3, getConverterMap(GBKEncoding)); } else if (three4 != 0) { /* GB2312 TrueType fonts on Solaris have wrong encoding ID for * cmap table, these fonts have EncodingID 4 which is Big5 * encoding according the TrueType spec, but actually the * fonts are using gb2312 encoding, have to use this * workaround to make Solaris zh_CN locale work. -sherman */ if (FontUtilities.isSolaris && font.platName != null && (font.platName.startsWith( "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") || font.platName.startsWith( "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") || font.platName.startsWith( "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) { cmap = createCMap(cmapBuffer, three4, getConverterMap(GBKEncoding)); } else { cmap = createCMap(cmapBuffer, three4, getConverterMap(Big5Encoding)); } } else if (three5 != 0) { cmap = createCMap(cmapBuffer, three5, getConverterMap(WansungEncoding)); } else if (three6 != 0) { cmap = createCMap(cmapBuffer, three6, getConverterMap(JohabEncoding)); } } else { /* No 3,* subtable was found. Just use whatever is the first * table listed. Not very useful but maybe better than * rejecting the font entirely? */ cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null); } return cmap; } /* speed up the converting by setting the range for double * byte characters; */ static char[] getConverter(short encodingID) { int dBegin = 0x8000; int dEnd = 0xffff; String encoding; switch (encodingID) { case ShiftJISEncoding: dBegin = 0x8140; dEnd = 0xfcfc; encoding = "SJIS"; break; case GBKEncoding: dBegin = 0x8140; dEnd = 0xfea0; encoding = "GBK"; break; case Big5Encoding: dBegin = 0xa140; dEnd = 0xfefe; encoding = "Big5"; break; case WansungEncoding: dBegin = 0xa1a1; dEnd = 0xfede; encoding = "EUC_KR"; break; case JohabEncoding: dBegin = 0x8141; dEnd = 0xfdfe; encoding = "Johab"; break; default: return null; } try { char[] convertedChars = new char[65536]; for (int i=0; i<65536; i++) { convertedChars[i] = noSuchChar; } byte[] inputBytes = new byte[(dEnd-dBegin+1)*2]; char[] outputChars = new char[(dEnd-dBegin+1)]; int j = 0; int firstByte; if (encodingID == ShiftJISEncoding) { for (int i = dBegin; i <= dEnd; i++) { firstByte = (i >> 8 & 0xff); if (firstByte >= 0xa1 && firstByte <= 0xdf) { //sjis halfwidth katakana inputBytes[j++] = (byte)0xff; inputBytes[j++] = (byte)0xff; } else { inputBytes[j++] = (byte)firstByte; inputBytes[j++] = (byte)(i & 0xff); } } } else { for (int i = dBegin; i <= dEnd; i++) { inputBytes[j++] = (byte)(i>>8 & 0xff); inputBytes[j++] = (byte)(i & 0xff); } } Charset.forName(encoding).newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .replaceWith("\u0000") .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length), CharBuffer.wrap(outputChars, 0, outputChars.length), true); // ensure single byte ascii for (int i = 0x20; i <= 0x7e; i++) { convertedChars[i] = (char)i; } //sjis halfwidth katakana if (encodingID == ShiftJISEncoding) { for (int i = 0xa1; i <= 0xdf; i++) { convertedChars[i] = (char)(i - 0xa1 + 0xff61); } } /* It would save heap space (approx 60Kbytes for each of these * converters) if stored only valid ranges (ie returned * outputChars directly. But this is tricky since want to * include the ASCII range too. */ // System.err.println("oc.len="+outputChars.length); // System.err.println("cc.len="+convertedChars.length); // System.err.println("dbegin="+dBegin); System.arraycopy(outputChars, 0, convertedChars, dBegin, outputChars.length); //return convertedChars; /* invert this map as now want it to map from Unicode * to other encoding. */ char [] invertedChars = new char[65536]; for (int i=0;i<65536;i++) { if (convertedChars[i] != noSuchChar) { invertedChars[convertedChars[i]] = (char)i; } } return invertedChars; } catch (Exception e) { e.printStackTrace(); } return null; } /* * The returned array maps to unicode from some other 2 byte encoding * eg for a 2byte index which represents a SJIS char, the indexed * value is the corresponding unicode char. */ static char[] getConverterMap(short encodingID) { if (converterMaps[encodingID] == null) { converterMaps[encodingID] = getConverter(encodingID); } return converterMaps[encodingID]; } static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) { /* First do a sanity check that this cmap subtable is contained * within the cmap table. */ int subtableFormat = buffer.getChar(offset); long subtableLength; if (subtableFormat < 8) { subtableLength = buffer.getChar(offset+2); } else { subtableLength = buffer.getInt(offset+4) & INTMASK; } if (offset+subtableLength > buffer.capacity()) { if (FontUtilities.isLogging()) { FontUtilities.getLogger().warning("Cmap subtable overflows buffer."); } } switch (subtableFormat) { case 0: return new CMapFormat0(buffer, offset); case 2: return new CMapFormat2(buffer, offset, xlat); case 4: return new CMapFormat4(buffer, offset, xlat); case 6: return new CMapFormat6(buffer, offset, xlat); case 8: return new CMapFormat8(buffer, offset, xlat); case 10: return new CMapFormat10(buffer, offset, xlat); case 12: return new CMapFormat12(buffer, offset, xlat); default: throw new RuntimeException("Cmap format unimplemented: " + (int)buffer.getChar(offset)); } } /* final char charVal(byte[] cmap, int index) { return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); } final short shortVal(byte[] cmap, int index) { return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1])); } */ abstract char getGlyph(int charCode); /* Format 4 Header is * ushort format (off=0) * ushort length (off=2) * ushort language (off=4) * ushort segCountX2 (off=6) * ushort searchRange (off=8) * ushort entrySelector (off=10) * ushort rangeShift (off=12) * ushort endCount[segCount] (off=14) * ushort reservedPad * ushort startCount[segCount] * short idDelta[segCount] * idRangeOFfset[segCount] * ushort glyphIdArray[] */ static class CMapFormat4 extends CMap { int segCount; int entrySelector; int rangeShift; char[] endCount; char[] startCount; short[] idDelta; char[] idRangeOffset; char[] glyphIds; CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) { this.xlat = xlat; bbuffer.position(offset); CharBuffer buffer = bbuffer.asCharBuffer(); buffer.get(); // skip, we already know format=4 int subtableLength = buffer.get(); /* Try to recover from some bad fonts which specify a subtable * length that would overflow the byte buffer holding the whole * cmap table. If this isn't a recoverable situation an exception * may be thrown which is caught higher up the call stack. * Whilst this may seem lenient, in practice, unless the "bad" * subtable we are using is the last one in the cmap table we * would have no way of knowing about this problem anyway. */ if (offset+subtableLength > bbuffer.capacity()) { subtableLength = bbuffer.capacity() - offset; } buffer.get(); // skip language segCount = buffer.get()/2; int searchRange = buffer.get(); entrySelector = buffer.get(); rangeShift = buffer.get()/2; startCount = new char[segCount]; endCount = new char[segCount]; idDelta = new short[segCount]; idRangeOffset = new char[segCount]; for (int i=0; i<segCount; i++) { endCount[i] = buffer.get(); } buffer.get(); // 2 bytes for reserved pad for (int i=0; i<segCount; i++) { startCount[i] = buffer.get(); } for (int i=0; i<segCount; i++) { idDelta[i] = (short)buffer.get(); } for (int i=0; i<segCount; i++) { char ctmp = buffer.get(); idRangeOffset[i] = (char)((ctmp>>1)&0xffff); } /* Can calculate the number of glyph IDs by subtracting * "pos" from the length of the cmap */ int pos = (segCount*8+16)/2; buffer.position(pos); int numGlyphIds = (subtableLength/2 - pos); glyphIds = new char[numGlyphIds]; for (int i=0;i<numGlyphIds;i++) { glyphIds[i] = buffer.get(); } /* System.err.println("segcount="+segCount); System.err.println("entrySelector="+entrySelector); System.err.println("rangeShift="+rangeShift); for (int j=0;j<segCount;j++) { System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+ " ec="+(int)(endCount[j]&0xffff)+ " delta="+idDelta[j] + " ro="+(int)idRangeOffset[j]); } //System.err.println("numglyphs="+glyphIds.length); for (int i=0;i<numGlyphIds;i++) { System.err.println("gid["+i+"]="+(int)glyphIds[i]); } */ } char getGlyph(int charCode) { int index = 0; char glyphCode = 0; int controlGlyph = getControlCodeGlyph(charCode, true); if (controlGlyph >= 0) { return (char)controlGlyph; } /* presence of translation array indicates that this * cmap is in some other (non-unicode encoding). * In order to look-up a char->glyph mapping we need to * translate the unicode code point to the encoding of * the cmap. * REMIND: VALID CHARCODES?? */ if (xlat != null) { charCode = xlat[charCode]; } /* * Citation from the TrueType (and OpenType) spec: * The segments are sorted in order of increasing endCode * values, and the segment values are specified in four parallel * arrays. You search for the first endCode that is greater than * or equal to the character code you want to map. If the * corresponding startCode is less than or equal to the * character code, then you use the corresponding idDelta and * idRangeOffset to map the character code to a glyph index * (otherwise, the missingGlyph is returned). */ /* * CMAP format4 defines several fields for optimized search of * the segment list (entrySelector, searchRange, rangeShift). * However, benefits are neglible and some fonts have incorrect * data - so we use straightforward binary search (see bug 6247425) */ int left = 0, right = startCount.length; index = startCount.length >> 1; while (left < right) { if (endCount[index] < charCode) { left = index + 1; } else { right = index; } index = (left + right) >> 1; } if (charCode >= startCount[index] && charCode <= endCount[index]) { int rangeOffset = idRangeOffset[index]; if (rangeOffset == 0) { glyphCode = (char)(charCode + idDelta[index]); } else { /* Calculate an index into the glyphIds array */ /* System.err.println("rangeoffset="+rangeOffset+ " charCode=" + charCode + " scnt["+index+"]="+(int)startCount[index] + " segCnt="+segCount); */ int glyphIDIndex = rangeOffset - segCount + index + (charCode - startCount[index]); glyphCode = glyphIds[glyphIDIndex]; if (glyphCode != 0) { glyphCode = (char)(glyphCode + idDelta[index]); } } } if (glyphCode != 0) { //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode); } return glyphCode; } } // Format 0: Byte Encoding table static class CMapFormat0 extends CMap { byte [] cmap; CMapFormat0(ByteBuffer buffer, int offset) { /* skip 6 bytes of format, length, and version */ int len = buffer.getChar(offset+2); cmap = new byte[len-6]; buffer.position(offset+6); buffer.get(cmap); } char getGlyph(int charCode) { if (charCode < 256) { if (charCode < 0x0010) { switch (charCode) { case 0x0009: case 0x000a: case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; } } return (char)(0xff & cmap[charCode]); } else { return 0; } } } // static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) { // CMap cmap = createCMap(buffer, offset, null); // if (cmap == null) { // return null; // } else { // return new CMapFormatSymbol(cmap, syms); // } // } // static class CMapFormatSymbol extends CMap { // CMap cmap; // static final int NUM_BUCKETS = 128; // Bucket[] buckets = new Bucket[NUM_BUCKETS]; // class Bucket { // char unicode; // char glyph; // Bucket next; // Bucket(char u, char g) { // unicode = u; // glyph = g; // } // } // CMapFormatSymbol(CMap cmap, char[] syms) { // this.cmap = cmap; // for (int i=0;i<syms.length;i++) { // char unicode = syms[i]; // if (unicode != noSuchChar) { // char glyph = cmap.getGlyph(i + 0xf000); // int hash = unicode % NUM_BUCKETS; // Bucket bucket = new Bucket(unicode, glyph); // if (buckets[hash] == null) { // buckets[hash] = bucket; // } else { // Bucket b = buckets[hash]; // while (b.next != null) { // b = b.next; // } // b.next = bucket; // } // } // } // } // char getGlyph(int unicode) { // if (unicode >= 0x1000) { // return 0; // } // else if (unicode >=0xf000 && unicode < 0xf100) { // return cmap.getGlyph(unicode); // } else { // Bucket b = buckets[unicode % NUM_BUCKETS]; // while (b != null) { // if (b.unicode == unicode) { // return b.glyph; // } else { // b = b.next; // } // } // return 0; // } // } // } // Format 2: High-byte mapping through table static class CMapFormat2 extends CMap { char[] subHeaderKey = new char[256]; /* Store subheaders in individual arrays * A SubHeader entry theortically looks like { * char firstCode; * char entryCount; * short idDelta; * char idRangeOffset; * } */ char[] firstCodeArray; char[] entryCountArray; short[] idDeltaArray; char[] idRangeOffSetArray; char[] glyphIndexArray; CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) { this.xlat = xlat; int tableLen = buffer.getChar(offset+2); buffer.position(offset+6); CharBuffer cBuffer = buffer.asCharBuffer(); char maxSubHeader = 0; for (int i=0;i<256;i++) { subHeaderKey[i] = cBuffer.get(); if (subHeaderKey[i] > maxSubHeader) { maxSubHeader = subHeaderKey[i]; } } /* The value of the subHeaderKey is 8 * the subHeader index, * so the number of subHeaders can be obtained by dividing * this value bv 8 and adding 1. */ int numSubHeaders = (maxSubHeader >> 3) +1; firstCodeArray = new char[numSubHeaders]; entryCountArray = new char[numSubHeaders]; idDeltaArray = new short[numSubHeaders]; idRangeOffSetArray = new char[numSubHeaders]; for (int i=0; i<numSubHeaders; i++) { firstCodeArray[i] = cBuffer.get(); entryCountArray[i] = cBuffer.get(); idDeltaArray[i] = (short)cBuffer.get(); idRangeOffSetArray[i] = cBuffer.get(); // System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+ // " ec="+(int)entryCountArray[i]+ // " delta="+(int)idDeltaArray[i]+ // " offset="+(int)idRangeOffSetArray[i]); } int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2; glyphIndexArray = new char[glyphIndexArrSize]; for (int i=0; i<glyphIndexArrSize;i++) { glyphIndexArray[i] = cBuffer.get(); } } char getGlyph(int charCode) { int controlGlyph = getControlCodeGlyph(charCode, true); if (controlGlyph >= 0) { return (char)controlGlyph; } if (xlat != null) { charCode = xlat[charCode]; } char highByte = (char)(charCode >> 8); char lowByte = (char)(charCode & 0xff); int key = subHeaderKey[highByte]>>3; // index into subHeaders char mapMe; if (key != 0) { mapMe = lowByte; } else { mapMe = highByte; if (mapMe == 0) { mapMe = lowByte; } } // System.err.println("charCode="+Integer.toHexString(charCode)+ // " key="+key+ " mapMe="+Integer.toHexString(mapMe)); char firstCode = firstCodeArray[key]; if (mapMe < firstCode) { return 0; } else { mapMe -= firstCode; } if (mapMe < entryCountArray[key]) { /* "address" arithmetic is needed to calculate the offset * into glyphIndexArray. "idRangeOffSetArray[key]" specifies * the number of bytes from that location in the table where * the subarray of glyphIndexes starting at "firstCode" begins. * Each entry in the subHeader table is 8 bytes, and the * idRangeOffSetArray field is at offset 6 in the entry. * The glyphIndexArray immediately follows the subHeaders. * So if there are "N" entries then the number of bytes to the * start of glyphIndexArray is (N-key)*8-6. * Subtract this from the idRangeOffSetArray value to get * the number of bytes into glyphIndexArray and divide by 2 to * get the (char) array index. */ int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6; int glyphSubArrayStart = (idRangeOffSetArray[key] - glyphArrayOffset)/2; char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe]; if (glyphCode != 0) { glyphCode += idDeltaArray[key]; //idDelta return glyphCode; } } return 0; } } // Format 6: Trimmed table mapping static class CMapFormat6 extends CMap { char firstCode; char entryCount; char[] glyphIdArray; CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) { bbuffer.position(offset+6); CharBuffer buffer = bbuffer.asCharBuffer(); firstCode = buffer.get(); entryCount = buffer.get(); glyphIdArray = new char[entryCount]; for (int i=0; i< entryCount; i++) { glyphIdArray[i] = buffer.get(); } } char getGlyph(int charCode) { int controlGlyph = getControlCodeGlyph(charCode, true); if (controlGlyph >= 0) { return (char)controlGlyph; } if (xlat != null) { charCode = xlat[charCode]; } charCode -= firstCode; if (charCode < 0 || charCode >= entryCount) { return 0; } else { return glyphIdArray[charCode]; } } } // Format 8: mixed 16-bit and 32-bit coverage // Seems unlikely this code will ever get tested as we look for // MS platform Cmaps and MS states (in the Opentype spec on their website) // that MS doesn't support this format static class CMapFormat8 extends CMap { byte[] is32 = new byte[8192]; int nGroups; int[] startCharCode; int[] endCharCode; int[] startGlyphID; CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) { bbuffer.position(12); bbuffer.get(is32); nGroups = bbuffer.getInt(); startCharCode = new int[nGroups]; endCharCode = new int[nGroups]; startGlyphID = new int[nGroups]; } char getGlyph(int charCode) { if (xlat != null) { throw new RuntimeException("xlat array for cmap fmt=8"); } return 0; } } // Format 4-byte 10: Trimmed table mapping // Seems unlikely this code will ever get tested as we look for // MS platform Cmaps and MS states (in the Opentype spec on their website) // that MS doesn't support this format static class CMapFormat10 extends CMap { long firstCode; int entryCount; char[] glyphIdArray; CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) { firstCode = bbuffer.getInt() & INTMASK; entryCount = bbuffer.getInt() & INTMASK; bbuffer.position(offset+20); CharBuffer buffer = bbuffer.asCharBuffer(); glyphIdArray = new char[entryCount]; for (int i=0; i< entryCount; i++) { glyphIdArray[i] = buffer.get(); } } char getGlyph(int charCode) { if (xlat != null) { throw new RuntimeException("xlat array for cmap fmt=10"); } int code = (int)(charCode - firstCode); if (code < 0 || code >= entryCount) { return 0; } else { return glyphIdArray[code]; } } } // Format 12: Segmented coverage for UCS-4 (fonts supporting // surrogate pairs) static class CMapFormat12 extends CMap { int numGroups; int highBit =0; int power; int extra; long[] startCharCode; long[] endCharCode; int[] startGlyphID; CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) { if (xlat != null) { throw new RuntimeException("xlat array for cmap fmt=12"); } numGroups = buffer.getInt(offset+12); startCharCode = new long[numGroups]; endCharCode = new long[numGroups]; startGlyphID = new int[numGroups]; buffer.position(offset+16); buffer = buffer.slice(); IntBuffer ibuffer = buffer.asIntBuffer(); for (int i=0; i<numGroups; i++) { startCharCode[i] = ibuffer.get() & INTMASK; endCharCode[i] = ibuffer.get() & INTMASK; startGlyphID[i] = ibuffer.get() & INTMASK; } /* Finds the high bit by binary searching through the bits */ int value = numGroups; if (value >= 1 << 16) { value >>= 16; highBit += 16; } if (value >= 1 << 8) { value >>= 8; highBit += 8; } if (value >= 1 << 4) { value >>= 4; highBit += 4; } if (value >= 1 << 2) { value >>= 2; highBit += 2; } if (value >= 1 << 1) { value >>= 1; highBit += 1; } power = 1 << highBit; extra = numGroups - power; } char getGlyph(int charCode) { int controlGlyph = getControlCodeGlyph(charCode, false); if (controlGlyph >= 0) { return (char)controlGlyph; } int probe = power; int range = 0; if (startCharCode[extra] <= charCode) { range = extra; } while (probe > 1) { probe >>= 1; if (startCharCode[range+probe] <= charCode) { range += probe; } } if (startCharCode[range] <= charCode && endCharCode[range] >= charCode) { return (char) (startGlyphID[range] + (charCode - startCharCode[range])); } return 0; } } /* Used to substitute for bad Cmaps. */ static class NullCMapClass extends CMap { char getGlyph(int charCode) { return 0; } } public static final NullCMapClass theNullCmap = new NullCMapClass(); final int getControlCodeGlyph(int charCode, boolean noSurrogates) { if (charCode < 0x0010) { switch (charCode) { case 0x0009: case 0x000a: case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID; } } else if (charCode >= 0x200c) { if ((charCode <= 0x200f) || (charCode >= 0x2028 && charCode <= 0x202e) || (charCode >= 0x206a && charCode <= 0x206f)) { return CharToGlyphMapper.INVISIBLE_GLYPH_ID; } else if (noSurrogates && charCode >= 0xFFFF) { return 0; } } return -1; } } Other Java examples (source code examples)Here is a short list of links related to this Java CMap.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.