alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (CharacterScript.java)

This example Java source code file (CharacterScript.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

arraylist, bufferedreader, characterscript, comparator, hashmap, matcher, object, regex, string, total, unicodescript, unknown, unrecognized, util, warning, xdigit

The CharacterScript.java Java example source code

package build.tools.generatecharacter;

import java.util.regex.*;
import java.util.*;
import java.io.*;

public class CharacterScript {

    // generate the code needed for j.l.C.UnicodeScript
    static void fortest(String fmt, Object... o) {
        //System.out.printf(fmt, o);
    }

    static void print(String fmt, Object... o) {
        System.out.printf(fmt, o);
    }

    static void debug(String fmt, Object... o) {
        //System.out.printf(fmt, o);
    }

    public static void main(String args[]){
        try {
            if (args.length != 1) {
                System.out.println("java CharacterScript script.txt out");
                System.exit(1);
            }

            int i, j;
            BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
            HashMap<String,Integer> scriptMap = new HashMap();
            String line = null;

            Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");

            int prevS = -1;
            int prevE = -1;
            String prevN = null;
            int[][] scripts = new int[1024][3];
            int scriptSize = 0;

            while ((line = sbfr.readLine()) != null) {
                if (line.length() <= 1 || line.charAt(0) == '#') {
                    continue;
                }
                m.reset(line);
                if (m.matches()) {
                    int start = Integer.parseInt(m.group(1), 16);
                    int end = (m.group(2)==null)?start
                              :Integer.parseInt(m.group(2), 16);
                    String name = m.group(3);
                    if (name.equals(prevN) && start == prevE + 1) {
                        prevE = end;
                    } else {
                        if (prevS != -1) {
                            if (scriptMap.get(prevN) == null) {
                                scriptMap.put(prevN, scriptMap.size());
                            }
                            scripts[scriptSize][0] = prevS;
                            scripts[scriptSize][1] = prevE;
                            scripts[scriptSize][2] = scriptMap.get(prevN);
                            scriptSize++;
                        }
                        debug("%x-%x\t%s%n", prevS, prevE, prevN);
                        prevS = start; prevE = end; prevN = name;
                    }
                } else {
                    debug("Warning: Unrecognized line <%s>%n", line);
                }
            }

            //last one.
            if (scriptMap.get(prevN) == null) {
                scriptMap.put(prevN, scriptMap.size());
            }
            scripts[scriptSize][0] = prevS;
            scripts[scriptSize][1] = prevE;
            scripts[scriptSize][2] = scriptMap.get(prevN);
            scriptSize++;

            debug("%x-%x\t%s%n", prevS, prevE, prevN);
            debug("-----------------%n");
            debug("Total scripts=%s%n", scriptMap.size());
            debug("-----------------%n%n");

            String[] names = new String[scriptMap.size()];
            for (String name: scriptMap.keySet()) {
                names[scriptMap.get(name).intValue()] = name;
            }

            for (j = 0; j < scriptSize; j++) {
                for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
                    String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
                    if (cp > 0xffff)
                        System.out.printf("%05X    %s%n", cp, name);
                    else
                        System.out.printf("%05X    %s%n", cp, name);
                }
            }

            Arrays.sort(scripts, 0, scriptSize,
                        new Comparator<int[]>() {
                            public int compare(int[] a1, int[] a2) {
                                return a1[0] - a2[0];
                            }
                            public boolean compare(Object obj) {
                                return obj == this;
                            }
                         });



            // Consolidation: there are lots of "reserved" code points
            // embedded in those otherwise "sequential" blocks.
            // To make the lookup table smaller, we combine those
            // separated segments with the assumption that the lookup
            // implementation checks
            //    Character.getType() !=  Character.UNASSIGNED
            // first (return UNKNOWN for unassigned)

            ArrayList<int[]> list = new ArrayList();
            list.add(scripts[0]);

            int[] last = scripts[0];
            for (i = 1; i < scriptSize; i++) {
                if (scripts[i][0] != (last[1] + 1)) {

                    boolean isNotUnassigned = false;
                    for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
                        if (Character.getType(cp) != Character.UNASSIGNED) {
                            isNotUnassigned = true;
                            debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
                            break;
                        }
                    }
                    if (isNotUnassigned) {
                        // surrogates only?
                        int[] a = new int[3];
                        a[0] = last[1] + 1;
                        a[1] = scripts[i][0] - 1;
                        a[2] = -1;  // unknown
                        list.add(a);
                    } else {
                        if (last[2] == scripts[i][2]) {
                            //combine
                            last[1] = scripts[i][1];
                            continue;
                        } else {
                            // expand last
                            last[1] = scripts[i][0] - 1;
                        }
                    }
                }
                list.add(scripts[i]);
                last = scripts[i];
            }

            for (i = 0; i < list.size(); i++) {
                int[] a = (int[])list.get(i);
                String name = "UNKNOWN";
                if (a[2] != -1)
                    name = names[a[2]].toUpperCase(Locale.US);
                debug("0x%05x, 0x%05x  %s%n", a[0], a[1], name);
            }
            debug("--->total=%d%n", list.size());


            //////////////////OUTPUT//////////////////////////////////
            print("public class Scripts {%n%n");
            print("    public static enum UnicodeScript {%n");
            for (i = 0; i < names.length; i++) {
                print("        /**%n         * Unicode script \"%s\".%n         */%n", names[i]);
                print("        %s,%n%n",  names[i].toUpperCase(Locale.US));
            }
            print("        /**%n         * Unicode script \"Unknown\".%n         */%n        UNKNOWN;%n%n");


            // lookup table
            print("        private static final int[] scriptStarts = {%n");
            for (int[] a : list) {
                String name = "UNKNOWN";
                if (a[2] != -1)
                    name = names[a[2]].toUpperCase(Locale.US);
                if (a[0] < 0x10000)
                    print("            0x%04X,   // %04X..%04X; %s%n",
                          a[0], a[0], a[1], name);
                else
                    print("            0x%05X,  // %05X..%05X; %s%n",
                          a[0], a[0], a[1], name);
            }
            last = list.get(list.size() -1);
            if (last[1] != Character.MAX_CODE_POINT)
                print("            0x%05X   // %05X..%06X; %s%n",
                      last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
                      "UNKNOWN");
            print("%n        };%n%n");

            print("        private static final UnicodeScript[] scripts = {%n");
            for (int[] a : list) {
                String name = "UNKNOWN";
                if (a[2] != -1)
                    name = names[a[2]].toUpperCase(Locale.US);
                print("            %s,%n", name);
            }

            if (last[1] != Character.MAX_CODE_POINT)
                print("            UNKNOWN%n");
            print("        };%n");
            print("    }%n");
            print("}%n");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java CharacterScript.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.