|
Java example source code file (CharacterScript.java)
This example Java source code file (CharacterScript.java) is included in the alvinalexander.com
"Java Source Code
Warehouse" project. The intent of this project is to help you "Learn
Java by Example" TM.
Learn more about this Java project at its project page.
The CharacterScript.java Java example source code
package build.tools.generatecharacter;
import java.util.regex.*;
import java.util.*;
import java.io.*;
public class CharacterScript {
// generate the code needed for j.l.C.UnicodeScript
static void fortest(String fmt, Object... o) {
//System.out.printf(fmt, o);
}
static void print(String fmt, Object... o) {
System.out.printf(fmt, o);
}
static void debug(String fmt, Object... o) {
//System.out.printf(fmt, o);
}
public static void main(String args[]){
try {
if (args.length != 1) {
System.out.println("java CharacterScript script.txt out");
System.exit(1);
}
int i, j;
BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
HashMap<String,Integer> scriptMap = new HashMap();
String line = null;
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
int prevS = -1;
int prevE = -1;
String prevN = null;
int[][] scripts = new int[1024][3];
int scriptSize = 0;
while ((line = sbfr.readLine()) != null) {
if (line.length() <= 1 || line.charAt(0) == '#') {
continue;
}
m.reset(line);
if (m.matches()) {
int start = Integer.parseInt(m.group(1), 16);
int end = (m.group(2)==null)?start
:Integer.parseInt(m.group(2), 16);
String name = m.group(3);
if (name.equals(prevN) && start == prevE + 1) {
prevE = end;
} else {
if (prevS != -1) {
if (scriptMap.get(prevN) == null) {
scriptMap.put(prevN, scriptMap.size());
}
scripts[scriptSize][0] = prevS;
scripts[scriptSize][1] = prevE;
scripts[scriptSize][2] = scriptMap.get(prevN);
scriptSize++;
}
debug("%x-%x\t%s%n", prevS, prevE, prevN);
prevS = start; prevE = end; prevN = name;
}
} else {
debug("Warning: Unrecognized line <%s>%n", line);
}
}
//last one.
if (scriptMap.get(prevN) == null) {
scriptMap.put(prevN, scriptMap.size());
}
scripts[scriptSize][0] = prevS;
scripts[scriptSize][1] = prevE;
scripts[scriptSize][2] = scriptMap.get(prevN);
scriptSize++;
debug("%x-%x\t%s%n", prevS, prevE, prevN);
debug("-----------------%n");
debug("Total scripts=%s%n", scriptMap.size());
debug("-----------------%n%n");
String[] names = new String[scriptMap.size()];
for (String name: scriptMap.keySet()) {
names[scriptMap.get(name).intValue()] = name;
}
for (j = 0; j < scriptSize; j++) {
for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
if (cp > 0xffff)
System.out.printf("%05X %s%n", cp, name);
else
System.out.printf("%05X %s%n", cp, name);
}
}
Arrays.sort(scripts, 0, scriptSize,
new Comparator<int[]>() {
public int compare(int[] a1, int[] a2) {
return a1[0] - a2[0];
}
public boolean compare(Object obj) {
return obj == this;
}
});
// Consolidation: there are lots of "reserved" code points
// embedded in those otherwise "sequential" blocks.
// To make the lookup table smaller, we combine those
// separated segments with the assumption that the lookup
// implementation checks
// Character.getType() != Character.UNASSIGNED
// first (return UNKNOWN for unassigned)
ArrayList<int[]> list = new ArrayList();
list.add(scripts[0]);
int[] last = scripts[0];
for (i = 1; i < scriptSize; i++) {
if (scripts[i][0] != (last[1] + 1)) {
boolean isNotUnassigned = false;
for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
if (Character.getType(cp) != Character.UNASSIGNED) {
isNotUnassigned = true;
debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
break;
}
}
if (isNotUnassigned) {
// surrogates only?
int[] a = new int[3];
a[0] = last[1] + 1;
a[1] = scripts[i][0] - 1;
a[2] = -1; // unknown
list.add(a);
} else {
if (last[2] == scripts[i][2]) {
//combine
last[1] = scripts[i][1];
continue;
} else {
// expand last
last[1] = scripts[i][0] - 1;
}
}
}
list.add(scripts[i]);
last = scripts[i];
}
for (i = 0; i < list.size(); i++) {
int[] a = (int[])list.get(i);
String name = "UNKNOWN";
if (a[2] != -1)
name = names[a[2]].toUpperCase(Locale.US);
debug("0x%05x, 0x%05x %s%n", a[0], a[1], name);
}
debug("--->total=%d%n", list.size());
//////////////////OUTPUT//////////////////////////////////
print("public class Scripts {%n%n");
print(" public static enum UnicodeScript {%n");
for (i = 0; i < names.length; i++) {
print(" /**%n * Unicode script \"%s\".%n */%n", names[i]);
print(" %s,%n%n", names[i].toUpperCase(Locale.US));
}
print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n");
// lookup table
print(" private static final int[] scriptStarts = {%n");
for (int[] a : list) {
String name = "UNKNOWN";
if (a[2] != -1)
name = names[a[2]].toUpperCase(Locale.US);
if (a[0] < 0x10000)
print(" 0x%04X, // %04X..%04X; %s%n",
a[0], a[0], a[1], name);
else
print(" 0x%05X, // %05X..%05X; %s%n",
a[0], a[0], a[1], name);
}
last = list.get(list.size() -1);
if (last[1] != Character.MAX_CODE_POINT)
print(" 0x%05X // %05X..%06X; %s%n",
last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
"UNKNOWN");
print("%n };%n%n");
print(" private static final UnicodeScript[] scripts = {%n");
for (int[] a : list) {
String name = "UNKNOWN";
if (a[2] != -1)
name = names[a[2]].toUpperCase(Locale.US);
print(" %s,%n", name);
}
if (last[1] != Character.MAX_CODE_POINT)
print(" UNKNOWN%n");
print(" };%n");
print(" }%n");
print("}%n");
} catch (Exception e) {
e.printStackTrace();
}
}
}
Other Java examples (source code examples)
Here is a short list of links related to this Java CharacterScript.java source code file:
|