alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (CheckEncodingPropertiesFile.java)

This example Java source code file (CheckEncodingPropertiesFile.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

checkcharsetmapping, collection, conflictingcharseterror, conflictingpreferredmimenameerror, encodinginfo, exception, hashmap, hashset, java, list, map, missingvalidcharsetnameerror, reflection, string, stringbuilder, util

The CheckEncodingPropertiesFile.java Java example source code

/*
 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/**
 * @test
 * @bug 8008738
 * @summary checks that the mapping implemented by
 *      com.sun.org.apache.xml.internal.serializer.Encodings
 *      correctly identifies valid Charset names and
 *      correctly maps them to their preferred mime names.
 *      Also checks that the Encodings.properties resource file
 *      is consistent.
 * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java
 * @run main CheckEncodingPropertiesFile
 * @author Daniel Fuchs
 */

import com.sun.org.apache.xml.internal.serializer.EncodingInfo;
import com.sun.org.apache.xml.internal.serializer.Encodings;
import java.io.InputStreamReader;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;

public class CheckEncodingPropertiesFile {

    private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties";

    public static void main(String[] args) throws Exception {
        Properties props = new Properties();
        try (InputStreamReader is = new InputStreamReader(ClassLoader.getSystemResourceAsStream(ENCODINGS_FILE))) {
            props.load(is);
        }

        //printAllCharsets();

        test(props);
    }


    private static final class CheckCharsetMapping {

        /**
         * A map that maps Java or XML name to canonical charset names.
         * key:    upper cased value of Java or XML name.
         * value:  case-sensitive canonical name of charset.
         */
        private final Map<String, String> charsetMap = new HashMap<>();

        private final Map<String, String> preferredMime = new HashMap<>();

        /**
         * Unresolved alias names.
         * For a given set of names pointing to the same unresolved charset,
         * this map will contain, for each alias in the set, a mapping
         * with the alias.toUpperValue() as key and the set of known aliases
         * as value.
         */
        private final Map<String, Collection unresolved = new HashMap<>();

        public final static class ConflictingCharsetError extends Error {
            ConflictingCharsetError(String a, String cs1, String cs2) {
                super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'");
            }
        }

        public final static class MissingValidCharsetNameError extends Error {
            MissingValidCharsetNameError(String name, Collection<String> aliases) {
                super(name+": Line "+aliases+" has no recognized charset alias");
            }
        }

        public final static class ConflictingPreferredMimeNameError extends Error {
            ConflictingPreferredMimeNameError(String a, String cs1, String cs2) {
                super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'");
            }
        }

        /**
         * For each alias in aliases, attempt to find the canonical
         * charset name.
         * All names in aliases are supposed to point to the same charset.
         * Names in aliases can be java names or XML names, indifferently.
         * @param aliases list of names (aliases) for a given charset.
         * @return The canonical name of the charset, if found, null otherwise.
         */
        private String findCharsetNameFor(String[] aliases) {
            String cs = null;
            String res = null;
            for (String a : aliases) {
                final String k = a.toUpperCase();
                String cachedCs = charsetMap.get(k);
                if (cs == null) {
                    cs = cachedCs;
                }
                if (cachedCs != null && cs != null
                        && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) {
                    throw new ConflictingCharsetError(a,cs,cachedCs);
                }
                try {
                    final String rcs = Charset.forName(a).name();
                    if (cs != null && !Charset.forName(cs).name().equals(rcs)) {
                        throw new ConflictingCharsetError(a,cs,rcs);
                    }
                    if (res == null) {
                        if (a.equals(aliases[0])) {
                            res = a;
                        } else {
                            res = cs;
                        }
                    }
                    cs = rcs;
                    charsetMap.put(k, res == null ? cs : res);
                } catch (Exception x) {
                    continue;
                }
            }
            return res == null ? cs : res;
        }

        /**
         * Register a canonical charset name for a given set of aliases.
         *
         * @param charsetName the canonical charset name.
         * @param aliases a list of aliases for the given charset.
         */
        private void registerCharsetNameFor(String charsetName, String[] aliases) {
            if (charsetName == null) throw new NullPointerException();

            for (String a : aliases) {
                String k = a.toUpperCase();
                String csv = charsetMap.get(k);
                if (csv == null) {
                    charsetMap.put(k, charsetName);
                    csv = charsetName;
                } else if (!csv.equals(charsetName)) {
                    throw new ConflictingCharsetError(a,charsetName,csv);
                }

                final Collection<String> c = unresolved.get(k);
                if (c != null) {
                    for (String aa : c) {
                        k = aa.toUpperCase();
                        String csvv = charsetMap.get(k);
                        if (csvv == null) charsetMap.put(k, csv);
                        unresolved.remove(k);
                    }
                    throw new MissingValidCharsetNameError(charsetName,c);
                }
            }
        }

        /**
         * Register a set of aliases as being unresolved.
         * @param names    the list of names - this should be what is returned by
         *                 nameSet.toArray(new String[nameSet.size()])
         * @param nameSet  the set of unresolved aliases.
         */
        private void registerUnresolvedNamesFor(String[] names, Collection<String> nameSet) {
            // This is not necessarily an error: it could happen that some
            //    charsets are simply not supported on some OS/Arch
            System.err.println("Warning: unresolved charset names: '"+ nameSet
                    + "' This is not necessarily an error "
                    + "- this charset may not be supported on this platform.");
            for (String a : names) {
                final String k = a.toUpperCase();
                final Collection<String> c = unresolved.get(k);
                if (c != null) {
                    //System.out.println("Found: "+a+" -> "+c);
                    //System.out.println("\t merging "+ c + " with " + nameSet);
                    nameSet.addAll(c);
                    for (String aa : c) {
                        unresolved.put(aa.toUpperCase(), nameSet);
                    }
                }
                unresolved.put(k, nameSet);
            }
        }


        /**
         * Add a new charset name mapping
         * @param javaName the (supposedly) java name of the charset.
         * @param xmlNames a list of corresponding XML names for that charset.
         */
        void addMapping(String javaName, Collection<String> xmlNames) {
            final LinkedHashSet<String> aliasNames = new LinkedHashSet<>();
            aliasNames.add(javaName);
            aliasNames.addAll(xmlNames);
            final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]);
            final String cs = findCharsetNameFor(aliases);
            if (cs != null) {
                registerCharsetNameFor(cs, aliases);
                if (xmlNames.size() > 0) {
                    String preferred = xmlNames.iterator().next();
                    String cachedPreferred = preferredMime.get(cs.toUpperCase());
                    if (cachedPreferred != null && !cachedPreferred.equals(preferred)) {
                        throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred);
                    }
                    preferredMime.put(cs.toUpperCase(), preferred);
                }
            } else {
                registerUnresolvedNamesFor(aliases, aliasNames);
            }
        }

        /**
         * Returns the canonical name of the charset for the given Java or XML
         * alias name.
         * @param alias the alias name
         * @return the canonical charset name - or null if unknown.
         */
        public String getCharsetNameFor(String alias) {
            return charsetMap.get(alias.toUpperCase());
        }

    }

    public static void test(Properties props) throws Exception {

        // First, build a mapping from the properties read from the resource
        // file.
        // We're going to check the consistency of the resource file
        // while building this mapping, and throw errors if the file
        // does not meet our assumptions.
        //
        Map<String, Collection lines = new HashMap<>();
        final CheckCharsetMapping mapping = new CheckCharsetMapping();

        for (String key : props.stringPropertyNames()) {
            Collection<String> values = getValues(props.getProperty(key));
            lines.put(key, values);
            mapping.addMapping(key, values);
        }

        // Then build maps of EncodingInfos, and print along debugging
        // information that should help understand the content of the
        // resource file and the mapping it defines.
        //
        Map<String, EncodingInfo> javaInfos = new HashMap<>(); // Map indexed by java names
        Map<String, EncodingInfo> xmlMap = new HashMap<>();    // Map indexed by XML names
        Map<String, String> preferred =
                new HashMap<>(mapping.preferredMime);          // Java Name -> Preferred Mime Name
        List<EncodingInfo> all = new ArrayList<>();            // unused...
        for (Entry<String, Collection e : lines.entrySet()) {
            final String charsetName = mapping.getCharsetNameFor(e.getKey());
            if (charsetName == null) {
                System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue());
                continue;
            }
            Charset c = Charset.forName(charsetName);
            EncodingInfo info;
            final String k = e.getKey().toUpperCase();
            final String kc = charsetName.toUpperCase();
            StringBuilder sb = new StringBuilder();
            for (String xml : e.getValue()) {
                final String kx = xml.toUpperCase();
                info = xmlMap.get(kx);
                if (info == null) {
                    info = new EncodingInfo(xml, charsetName);
                    System.out.println("** XML: "+xml+" -> "+charsetName);
                    xmlMap.put(kx, info);
                    all.add(info);
                }
                if (!javaInfos.containsKey(k)) {
                    javaInfos.put(k, info);
                    if (!preferred.containsKey(k)) {
                        preferred.put(k, xml);
                    }
                    sb.append("** Java: ").append(k).append(" -> ")
                            .append(xml).append(" (charset: ")
                            .append(charsetName).append(")\n");
                }
                if (!javaInfos.containsKey(kc)) {
                    if (!preferred.containsKey(kc)) {
                        preferred.put(kc, xml);
                    }
                    javaInfos.put(kc, info);
                    sb.append("** Java: ").append(kc).append(" -> ")
                            .append(xml).append(" (charset: ")
                            .append(charsetName).append(")\n");
                }
                if (!javaInfos.containsKey(c.name().toUpperCase())) {
                    if (!preferred.containsKey(c.name().toUpperCase())) {
                        preferred.put(c.name().toUpperCase(), xml);
                    }
                    javaInfos.put(c.name().toUpperCase(), info);
                    sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ")
                            .append(xml).append(" (charset: ")
                            .append(charsetName).append(")\n");
                }
            }
            if (sb.length() == 0) {
                System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue());
            } else {
                System.out.print(sb);
            }

        }

        // Now we're going to verify that Encodings.java has done its job
        // correctly. We're going to ask Encodings to convert java names to mime
        // names and mime names to java names - and verify that the returned
        // java names do map to recognized charsets.
        //
        // We're also going to verify that Encodings has recorded the preferred
        // mime name correctly.

        Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class);
        m.setAccessible(true);

        Set<String> xNames = new HashSet<>();
        Set<String> jNames = new HashSet<>();
        for (String name: xmlMap.keySet()) {
            final String javaName = checkConvertMime2Java(name);
            checkPreferredMime(m, javaName, preferred);
            jNames.add(javaName);
            xNames.add(name);
        }


        for (String javaName : lines.keySet()) {
            final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase());
            if (javaCharsetName == null) continue;
            if (!jNames.contains(javaName)) {
                checkPreferredMime(m, javaName, preferred);
                jNames.add(javaName);
            }
            for (String xml : lines.get(javaName)) {
                if (xNames.contains(xml)) continue;
                final String jName = checkConvertMime2Java(xml);
                xNames.add(xml);
                if (jNames.contains(jName)) continue;
                checkPreferredMime(m, jName, preferred);
            }
        }
    }

    private static String checkConvertMime2Java(String xml) {
        final String jName = Encodings.convertMime2JavaEncoding(xml);
        final String jCharsetName;
        try {
            jCharsetName = Charset.forName(jName).name();
        } catch (Exception x) {
            throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x);
        }
        System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")");
        return jName;
    }

    private static void checkPreferredMime(Method m, String javaName, Map<String,String> preferred)
            throws Exception {
        final String mime = (String) m.invoke(null, javaName);
        final String expected = preferred.get(javaName.toUpperCase());
        if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) {
            System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\"");
        } else {
            throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+
                expected+"' but got '"+mime+"'");
        }
    }

    private static Collection<String> getValues(String val) {
        int pos = val.indexOf(' ');
        if (pos < 0) {
            return Collections.singletonList(val);
        }
        //lastPrintable =
        //    Integer.decode(val.substring(pos).trim()).intValue();
        StringTokenizer st =
            new StringTokenizer(val.substring(0, pos), ",");
        final List<String> values = new ArrayList<>(st.countTokens());
        while (st.hasMoreTokens()) {
            values.add(st.nextToken());
        }
        return values;
    }

    // can be called in main() to help debugging.
    // Prints out all available charsets and their recognized aliases
    // as returned by the Charset API.
    private static void printAllCharsets() {
        Map<String, Charset> all = Charset.availableCharsets();
        System.out.println("\n=========================================\n");
        for (String can : all.keySet()) {
            System.out.println(can + ": " + all.get(can).aliases());
        }
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java CheckEncodingPropertiesFile.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.