Java example source code file (DTDParser.java)

This example Java source code file (DTDParser.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" ^TM.

Learn more about this Java project at its project page.
Java - Java tags/keywords

character, externalentity, f\-021, inputentity, internalentity, ioexception, namecacheentry, object, sax, saxexception, saxparseexception, simplehashtable, string, stringbuffer, util
The DTDParser.java Java example source code

/*
 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package com.sun.xml.internal.dtdparser;

import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Set;
import java.util.Vector;

/**
 * This implements parsing of XML 1.0 DTDs.
 * <p/>
 * This conforms to the portion of the XML 1.0 specification related
 * to the external DTD subset.
 * <p/>
 * For multi-language applications (such as web servers using XML
 * processing to create dynamic content), a method supports choosing
 * a locale for parser diagnostics which is both understood by the
 * message recipient and supported by the parser.
 * <p/>
 * This parser produces a stream of parse events.  It supports some
 * features (exposing comments, CDATA sections, and entity references)
 * which are not required to be reported by conformant XML processors.
 *
 * @author David Brownell
 * @author Janet Koenig
 * @author Kohsuke KAWAGUCHI
 * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
 */
public class DTDParser {
    public final static String TYPE_CDATA = "CDATA";
    public final static String TYPE_ID = "ID";
    public final static String TYPE_IDREF = "IDREF";
    public final static String TYPE_IDREFS = "IDREFS";
    public final static String TYPE_ENTITY = "ENTITY";
    public final static String TYPE_ENTITIES = "ENTITIES";
    public final static String TYPE_NMTOKEN = "NMTOKEN";
    public final static String TYPE_NMTOKENS = "NMTOKENS";
    public final static String TYPE_NOTATION = "NOTATION";
    public final static String TYPE_ENUMERATION = "ENUMERATION";


    // stack of input entities being merged
    private InputEntity in;

    // temporaries reused during parsing
    private StringBuffer strTmp;
    private char nameTmp [];
    private NameCache nameCache;
    private char charTmp [] = new char[2];

    // temporary DTD parsing state
    private boolean doLexicalPE;

    // DTD state, used during parsing
//    private SimpleHashtable    elements = new SimpleHashtable (47);
    protected final Set declaredElements = new java.util.HashSet();
    private SimpleHashtable params = new SimpleHashtable(7);

    // exposed to package-private subclass
    Hashtable notations = new Hashtable(7);
    SimpleHashtable entities = new SimpleHashtable(17);

    private SimpleHashtable ids = new SimpleHashtable();

    // listeners for DTD parsing events
    private DTDEventListener dtdHandler;

    private EntityResolver resolver;
    private Locale locale;

    // string constants -- use these copies so "==" works
    // package private
    static final String strANY = "ANY";
    static final String strEMPTY = "EMPTY";

    /**
     * Used by applications to request locale for diagnostics.
     *
     * @param l The locale to use, or null to use system defaults
     *          (which may include only message IDs).
     */
    public void setLocale(Locale l) throws SAXException {

        if (l != null && !messages.isLocaleSupported(l.toString())) {
            throw new SAXException(messages.getMessage(locale,
                    "P-078", new Object[]{l}));
        }
        locale = l;
    }

    /**
     * Returns the diagnostic locale.
     */
    public Locale getLocale() {
        return locale;
    }

    /**
     * Chooses a client locale to use for diagnostics, using the first
     * language specified in the list that is supported by this parser.
     * That locale is then set using <a href="#setLocale(java.util.Locale)">
     * setLocale()</a>.  Such a list could be provided by a variety of user
     * preference mechanisms, including the HTTP <em>Accept-Language
     * header field.
     *
     * @param languages Array of language specifiers, ordered with the most
     *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
     *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
     * @return The chosen locale, or null.
     * @see MessageCatalog
     */
    public Locale chooseLocale(String languages [])
            throws SAXException {

        Locale l = messages.chooseLocale(languages);

        if (l != null) {
            setLocale(l);
        }
        return l;
    }

    /**
     * Lets applications control entity resolution.
     */
    public void setEntityResolver(EntityResolver r) {

        resolver = r;
    }

    /**
     * Returns the object used to resolve entities
     */
    public EntityResolver getEntityResolver() {

        return resolver;
    }

    /**
     * Used by applications to set handling of DTD parsing events.
     */
    public void setDtdHandler(DTDEventListener handler) {
        dtdHandler = handler;
        if (handler != null)
            handler.setDocumentLocator(new Locator() {
                public String getPublicId() {
                    return DTDParser.this.getPublicId();
                }

                public String getSystemId() {
                    return DTDParser.this.getSystemId();
                }

                public int getLineNumber() {
                    return DTDParser.this.getLineNumber();
                }

                public int getColumnNumber() {
                    return DTDParser.this.getColumnNumber();
                }
            });
    }

    /**
     * Returns the handler used to for DTD parsing events.
     */
    public DTDEventListener getDtdHandler() {
        return dtdHandler;
    }

    /**
     * Parse a DTD.
     */
    public void parse(InputSource in)
            throws IOException, SAXException {
        init();
        parseInternal(in);
    }

    /**
     * Parse a DTD.
     */
    public void parse(String uri)
            throws IOException, SAXException {
        InputSource in;

        init();
        // System.out.println ("parse (\"" + uri + "\")");
        in = resolver.resolveEntity(null, uri);

        // If custom resolver punts resolution to parser, handle it ...
        if (in == null) {
            in = Resolver.createInputSource(new java.net.URL(uri), false);

            // ... or if custom resolver doesn't correctly construct the
            // input entity, patch it up enough so relative URIs work, and
            // issue a warning to minimize later confusion.
        } else if (in.getSystemId() == null) {
            warning("P-065", null);
            in.setSystemId(uri);
        }

        parseInternal(in);
    }

    // makes sure the parser is reset to "before a document"
    private void init() {
        in = null;

        // alloc temporary data used in parsing
        strTmp = new StringBuffer();
        nameTmp = new char[20];
        nameCache = new NameCache();

        // reset doc info
//        isInAttribute = false;

        doLexicalPE = false;

        entities.clear();
        notations.clear();
        params.clear();
        //    elements.clear ();
        declaredElements.clear();

        // initialize predefined references ... re-interpreted later
        builtin("amp", "&");
        builtin("lt", "<");
        builtin("gt", ">");
        builtin("quot", "\"");
        builtin("apos", "'");

        if (locale == null)
            locale = Locale.getDefault();
        if (resolver == null)
            resolver = new Resolver();
        if (dtdHandler == null)
            dtdHandler = new DTDHandlerBase();
    }

    private void builtin(String entityName, String entityValue) {
        InternalEntity entity;
        entity = new InternalEntity(entityName, entityValue.toCharArray());
        entities.put(entityName, entity);
    }


    ////////////////////////////////////////////////////////////////
    //
    // parsing is by recursive descent, code roughly
    // following the BNF rules except tweaked for simple
    // lookahead.  rules are more or less in numeric order,
    // except where code sharing suggests other structures.
    //
    // a classic benefit of recursive descent parsers:  it's
    // relatively easy to get diagnostics that make sense.
    //
    ////////////////////////////////////////////////////////////////


    private void parseInternal(InputSource input)
            throws IOException, SAXException {

        if (input == null)
            fatal("P-000");

        try {
            in = InputEntity.getInputEntity(dtdHandler, locale);
            in.init(input, null, null, false);

            dtdHandler.startDTD(in);

            // [30] extSubset ::= TextDecl? extSubsetDecl
            // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
            //        | PEReference | S )*
            //    ... same as [79] extPE, which is where the code is

            ExternalEntity externalSubset = new ExternalEntity(in);
            externalParameterEntity(externalSubset);

            if (!in.isEOF()) {
                fatal("P-001", new Object[]
                {Integer.toHexString(((int) getc()))});
            }
            afterRoot();
            dtdHandler.endDTD();

        } catch (EndOfInputException e) {
            if (!in.isDocument()) {
                String name = in.getName();
                do {    // force a relevant URI and line number
                    in = in.pop();
                } while (in.isInternal());
                fatal("P-002", new Object[]{name});
            } else {
                fatal("P-003", null);
            }
        } catch (RuntimeException e) {
            // Don't discard location that triggered the exception
            // ## Should properly wrap exception
            System.err.print("Internal DTD parser error: "); // ##
            e.printStackTrace();
            throw new SAXParseException(e.getMessage() != null
                    ? e.getMessage() : e.getClass().getName(),
                    getPublicId(), getSystemId(),
                    getLineNumber(), getColumnNumber());

        } finally {
            // recycle temporary data used during parsing
            strTmp = null;
            nameTmp = null;
            nameCache = null;

            // ditto input sources etc
            if (in != null) {
                in.close();
                in = null;
            }

            // get rid of all DTD info ... some of it would be
            // useful for editors etc, investigate later.

            params.clear();
            entities.clear();
            notations.clear();
            declaredElements.clear();
//        elements.clear();
            ids.clear();
        }
    }

    void afterRoot() throws SAXException {
        // Make sure all IDREFs match declared ID attributes.  We scan
        // after the document element is parsed, since XML allows forward
        // references, and only now can we know if they're all resolved.

        for (Enumeration e = ids.keys();
             e.hasMoreElements();
                ) {
            String id = (String) e.nextElement();
            Boolean value = (Boolean) ids.get(id);
            if (Boolean.FALSE == value)
                error("V-024", new Object[]{id});
        }
    }


    // role is for diagnostics
    private void whitespace(String roleId)
            throws IOException, SAXException {

        // [3] S ::= (#x20 | #x9 | #xd | #xa)+
        if (!maybeWhitespace()) {
            fatal("P-004", new Object[]
            {messages.getMessage(locale, roleId)});
        }
    }

    // S?
    private boolean maybeWhitespace()
            throws IOException, SAXException {

        if (!doLexicalPE)
            return in.maybeWhitespace();

        // see getc() for the PE logic -- this lets us splice
        // expansions of PEs in "anywhere".  getc() has smarts,
        // so for external PEs we don't bypass it.

        // XXX we can marginally speed PE handling, and certainly
        // be cleaner (hence potentially more correct), by using
        // the observations that expanded PEs only start and stop
        // where whitespace is allowed.  getc wouldn't need any
        // "lexical" PE expansion logic, and no other method needs
        // to handle termination of PEs.  (parsing of literals would
        // still need to pop entities, but not parsing of references
        // in content.)

        char c = getc();
        boolean saw = false;

        while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
            saw = true;

            // this gracefully ends things when we stop playing
            // with internal parameters.  caller should have a
            // grammar rule allowing whitespace at end of entity.
            if (in.isEOF() && !in.isInternal())
                return saw;
            c = getc();
        }
        ungetc();
        return saw;
    }

    private String maybeGetName()
            throws IOException, SAXException {

        NameCacheEntry entry = maybeGetNameCacheEntry();
        return (entry == null) ? null : entry.name;
    }

    private NameCacheEntry maybeGetNameCacheEntry()
            throws IOException, SAXException {

        // [5] Name ::= (Letter|'_'|':') (Namechar)*
        char c = getc();

        if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
            ungetc();
            return null;
        }
        return nameCharString(c);
    }

    // Used when parsing enumerations
    private String getNmtoken()
            throws IOException, SAXException {

        // [7] Nmtoken ::= (Namechar)+
        char c = getc();
        if (!XmlChars.isNameChar(c))
            fatal("P-006", new Object[]{new Character(c)});
        return nameCharString(c).name;
    }

    // n.b. this gets used when parsing attribute values (for
    // internal references) so we can't use strTmp; it's also
    // a hotspot for CPU and memory in the parser (called at least
    // once for each element) so this has been optimized a bit.

    private NameCacheEntry nameCharString(char c)
            throws IOException, SAXException {

        int i = 1;

        nameTmp[0] = c;
        for (; ;) {
            if ((c = in.getNameChar()) == 0)
                break;
            if (i >= nameTmp.length) {
                char tmp [] = new char[nameTmp.length + 10];
                System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
                nameTmp = tmp;
            }
            nameTmp[i++] = c;
        }
        return nameCache.lookupEntry(nameTmp, i);
    }

    //
    // much similarity between parsing entity values in DTD
    // and attribute values (in DTD or content) ... both follow
    // literal parsing rules, newline canonicalization, etc
    //
    // leaves value in 'strTmp' ... either a "replacement text" (4.5),
    // or else partially normalized attribute value (the first bit
    // of 3.3.3's spec, without the "if not CDATA" bits).
    //
    private void parseLiteral(boolean isEntityValue)
            throws IOException, SAXException {

        // [9] EntityValue ::=
        //    '"' ([^"&%] | Reference | PEReference)* '"'
        //    |    "'" ([^'&%] | Reference | PEReference)* "'"
        // [10] AttValue ::=
        //    '"' ([^"&]  | Reference             )* '"'
        //    |    "'" ([^'&]  | Reference             )* "'"
        char quote = getc();
        char c;
        InputEntity source = in;

        if (quote != '\'' && quote != '"') {
            fatal("P-007");
        }

        // don't report entity expansions within attributes,
        // they're reported "fully expanded" via SAX
//    isInAttribute = !isEntityValue;

        // get value into strTmp
        strTmp = new StringBuffer();

        // scan, allowing entity push/pop wherever ...
        // expanded entities can't terminate the literal!
        for (; ;) {
            if (in != source && in.isEOF()) {
                // we don't report end of parsed entities
                // within attributes (no SAX hooks)
                in = in.pop();
                continue;
            }
            if ((c = getc()) == quote && in == source) {
                break;
            }

            //
            // Basically the "reference in attribute value"
            // row of the chart in section 4.4 of the spec
            //
            if (c == '&') {
                String entityName = maybeGetName();

                if (entityName != null) {
                    nextChar(';', "F-020", entityName);

                    // 4.4 says:  bypass these here ... we'll catch
                    // forbidden refs to unparsed entities on use
                    if (isEntityValue) {
                        strTmp.append('&');
                        strTmp.append(entityName);
                        strTmp.append(';');
                        continue;
                    }
                    expandEntityInLiteral(entityName, entities, isEntityValue);


                    // character references are always included immediately
                } else if ((c = getc()) == '#') {
                    int tmp = parseCharNumber();

                    if (tmp > 0xffff) {
                        tmp = surrogatesToCharTmp(tmp);
                        strTmp.append(charTmp[0]);
                        if (tmp == 2)
                            strTmp.append(charTmp[1]);
                    } else
                        strTmp.append((char) tmp);
                } else
                    fatal("P-009");
                continue;

            }

            // expand parameter entities only within entity value literals
            if (c == '%' && isEntityValue) {
                String entityName = maybeGetName();

                if (entityName != null) {
                    nextChar(';', "F-021", entityName);
                    expandEntityInLiteral(entityName, params, isEntityValue);
                    continue;
                } else
                    fatal("P-011");
            }

            // For attribute values ...
            if (!isEntityValue) {
                // 3.3.3 says whitespace normalizes to space...
                if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
                    strTmp.append(' ');
                    continue;
                }

                // "<" not legal in parsed literals ...
                if (c == '<')
                    fatal("P-012");
            }

            strTmp.append(c);
        }
//    isInAttribute = false;
    }

    // does a SINGLE expansion of the entity (often reparsed later)
    private void expandEntityInLiteral(String name, SimpleHashtable table,
                                       boolean isEntityValue)
            throws IOException, SAXException {

        Object entity = table.get(name);

        if (entity instanceof InternalEntity) {
            InternalEntity value = (InternalEntity) entity;
            pushReader(value.buf, name, !value.isPE);

        } else if (entity instanceof ExternalEntity) {
            if (!isEntityValue)    // must be a PE ...
                fatal("P-013", new Object[]{name});
            // XXX if this returns false ...
            pushReader((ExternalEntity) entity);

        } else if (entity == null) {
            //
            // Note:  much confusion about whether spec requires such
            // errors to be fatal in many cases, but none about whether
            // it allows "normal" errors to be unrecoverable!
            //
            fatal((table == params) ? "V-022" : "P-014",
                    new Object[]{name});
        }
    }

    // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
    // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'

    // NOTE:  XML spec should explicitly say that PE ref syntax is
    // ignored in PIs, comments, SystemLiterals, and Pubid Literal
    // values ... can't process the XML spec's own DTD without doing
    // that for comments.

    private String getQuotedString(String type, String extra)
            throws IOException, SAXException {

        // use in.getc to bypass PE processing
        char quote = in.getc();

        if (quote != '\'' && quote != '"')
            fatal("P-015", new Object[]{
                messages.getMessage(locale, type, new Object[]{extra})
            });

        char c;

        strTmp = new StringBuffer();
        while ((c = in.getc()) != quote)
            strTmp.append((char) c);
        return strTmp.toString();
    }


    private String parsePublicId() throws IOException, SAXException {

        // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
        // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
        String retval = getQuotedString("F-033", null);
        for (int i = 0; i < retval.length(); i++) {
            char c = retval.charAt(i);
            if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
                    && !(c >= 'A' && c <= 'Z')
                    && !(c >= 'a' && c <= 'z'))
                fatal("P-016", new Object[]{new Character(c)});
        }
        strTmp = new StringBuffer();
        strTmp.append(retval);
        return normalize(false);
    }

    // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
    // handled by:  InputEntity.parsedContent()

    private boolean maybeComment(boolean skipStart)
            throws IOException, SAXException {

        // [15] Comment ::= '<!--'
        //        ( (Char - '-') | ('-' (Char - '-'))*
        //        '-->'
        if (!in.peek(skipStart ? "!--" : "<!--", null))
            return false;

        boolean savedLexicalPE = doLexicalPE;
        boolean saveCommentText;

        doLexicalPE = false;
        saveCommentText = false;
        if (saveCommentText)
            strTmp = new StringBuffer();

        oneComment:
        for (; ;) {
            try {
                // bypass PE expansion, but permit PEs
                // to complete ... valid docs won't care.
                for (; ;) {
                    int c = getc();
                    if (c == '-') {
                        c = getc();
                        if (c != '-') {
                            if (saveCommentText)
                                strTmp.append('-');
                            ungetc();
                            continue;
                        }
                        nextChar('>', "F-022", null);
                        break oneComment;
                    }
                    if (saveCommentText)
                        strTmp.append((char) c);
                }
            } catch (EndOfInputException e) {
                //
                // This is fatal EXCEPT when we're processing a PE...
                // in which case a validating processor reports an error.
                // External PEs are easy to detect; internal ones we
                // infer by being an internal entity outside an element.
                //
                if (in.isInternal()) {
                    error("V-021", null);
                }
                fatal("P-017");
            }
        }
        doLexicalPE = savedLexicalPE;
        if (saveCommentText)
            dtdHandler.comment(strTmp.toString());
        return true;
    }

    private boolean maybePI(boolean skipStart)
            throws IOException, SAXException {

        // [16] PI ::= '<?' PITarget
        //        (S (Char* - (Char* '?>' Char*)))?
        //        '?>'
        // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
        boolean savedLexicalPE = doLexicalPE;

        if (!in.peek(skipStart ? "?" : "<?", null))
            return false;
        doLexicalPE = false;

        String target = maybeGetName();

        if (target == null) {
            fatal("P-018");
        }
        if ("xml".equals(target)) {
            fatal("P-019");
        }
        if ("xml".equalsIgnoreCase(target)) {
            fatal("P-020", new Object[]{target});
        }

        if (maybeWhitespace()) {
            strTmp = new StringBuffer();
            try {
                for (; ;) {
                    // use in.getc to bypass PE processing
                    char c = in.getc();
                    //Reached the end of PI.
                    if (c == '?' && in.peekc('>'))
                        break;
                    strTmp.append(c);
                }
            } catch (EndOfInputException e) {
                fatal("P-021");
            }
            dtdHandler.processingInstruction(target, strTmp.toString());
        } else {
            if (!in.peek("?>", null)) {
                fatal("P-022");
            }
            dtdHandler.processingInstruction(target, "");
        }

        doLexicalPE = savedLexicalPE;
        return true;
    }

    // [18] CDSect ::= CDStart CData CDEnd
    // [19] CDStart ::= '<![CDATA['
    // [20] CData ::= (Char* - (Char* ']]>' Char*))
    // [21] CDEnd ::= ']]>'
    //
    //    ... handled by InputEntity.unparsedContent()

    // collapsing several rules together ...
    // simpler than attribute literals -- no reference parsing!
    private String maybeReadAttribute(String name, boolean must)
            throws IOException, SAXException {

        // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
        // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
        // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
        if (!maybeWhitespace()) {
            if (!must) {
                return null;
            }
            fatal("P-024", new Object[]{name});
            // NOTREACHED
        }

        if (!peek(name)) {
            if (must) {
                fatal("P-024", new Object[]{name});
            } else {
                // To ensure that the whitespace is there so that when we
                // check for the next attribute we assure that the
                // whitespace still exists.
                ungetc();
                return null;
            }
        }

        // [25] Eq ::= S? '=' S?
        maybeWhitespace();
        nextChar('=', "F-023", null);
        maybeWhitespace();

        return getQuotedString("F-035", name);
    }

    private void readVersion(boolean must, String versionNum)
            throws IOException, SAXException {

        String value = maybeReadAttribute("version", must);

        // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+

        if (must && value == null)
            fatal("P-025", new Object[]{versionNum});
        if (value != null) {
            int length = value.length();
            for (int i = 0; i < length; i++) {
                char c = value.charAt(i);
                if (!((c >= '0' && c <= '9')
                        || c == '_' || c == '.'
                        || (c >= 'a' && c <= 'z')
                        || (c >= 'A' && c <= 'Z')
                        || c == ':' || c == '-')
                )
                    fatal("P-026", new Object[]{value});
            }
        }
        if (value != null && !value.equals(versionNum))
            error("P-027", new Object[]{versionNum, value});
    }

    // common code used by most markup declarations
    // ... S (Q)Name ...
    private String getMarkupDeclname(String roleId, boolean qname)
            throws IOException, SAXException {

        String name;

        whitespace(roleId);
        name = maybeGetName();
        if (name == null)
            fatal("P-005", new Object[]
            {messages.getMessage(locale, roleId)});
        return name;
    }

    private boolean maybeMarkupDecl()
            throws IOException, SAXException {

        // [29] markupdecl ::= elementdecl | Attlistdecl
        //           | EntityDecl | NotationDecl | PI | Comment
        return maybeElementDecl()
                || maybeAttlistDecl()
                || maybeEntityDecl()
                || maybeNotationDecl()
                || maybePI(false)
                || maybeComment(false);
    }

    private static final String XmlLang = "xml:lang";

    private boolean isXmlLang(String value) {

        // [33] LanguageId ::= Langcode ('-' Subcode)*
        // [34] Langcode ::= ISO639Code | IanaCode | UserCode
        // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
        // [36] IanaCode ::= [iI] '-' SubCode
        // [37] UserCode ::= [xX] '-' SubCode
        // [38] SubCode ::= [a-zA-Z]+

        // the ISO and IANA codes (and subcodes) are registered,
        // but that's neither a WF nor a validity constraint.

        int nextSuffix;
        char c;

        if (value.length() < 2)
            return false;
        c = value.charAt(1);
        if (c == '-') {        // IANA, or user, code
            c = value.charAt(0);
            if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
                return false;
            nextSuffix = 1;
        } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
            // 2 letter ISO code, or error
            c = value.charAt(0);
            if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
                return false;
            nextSuffix = 2;
        } else
            return false;

        // here "suffix" ::= '-' [a-zA-Z]+ suffix*
        while (nextSuffix < value.length()) {
            c = value.charAt(nextSuffix);
            if (c != '-')
                break;
            while (++nextSuffix < value.length()) {
                c = value.charAt(nextSuffix);
                if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
                    break;
            }
        }
        return value.length() == nextSuffix && c != '-';
    }


    //
    // CHAPTER 3:  Logical Structures
    //

    /**
     * To validate, subclassers should at this time make sure that
     * values are of the declared types:<UL>
     * <LI> ID and IDREF(S) values are Names
     * <LI> NMTOKEN(S) are Nmtokens
     * <LI> ENUMERATION values match one of the tokens
     * <LI> NOTATION values match a notation name
     * <LI> ENTITIY(IES) values match an unparsed external entity
     * </UL>
     * <p/>
     * <P> Separately, make sure IDREF values match some ID
     * provided in the document (in the afterRoot method).
     */
/*    void validateAttributeSyntax (Attribute attr, String value)
         throws DTDParseException {
        // ID, IDREF(S) ... values are Names
        if (Attribute.ID == attr.type()) {
            if (!XmlNames.isName (value))
                error ("V-025", new Object [] { value });

            Boolean             b = (Boolean) ids.getNonInterned (value);
            if (b == null || b.equals (Boolean.FALSE))
                ids.put (value.intern (), Boolean.TRUE);
            else
                error ("V-026", new Object [] { value });

        } else if (Attribute.IDREF == attr.type()) {
            if (!XmlNames.isName (value))
                error ("V-027", new Object [] { value });

            Boolean             b = (Boolean) ids.getNonInterned (value);
            if (b == null)
                ids.put (value.intern (), Boolean.FALSE);

        } else if (Attribute.IDREFS == attr.type()) {
            StringTokenizer     tokenizer = new StringTokenizer (value);
            Boolean             b;
            boolean             sawValue = false;

            while (tokenizer.hasMoreTokens ()) {
                value = tokenizer.nextToken ();
                if (!XmlNames.isName (value))
                    error ("V-027", new Object [] { value });
                b = (Boolean) ids.getNonInterned (value);
                if (b == null)
                    ids.put (value.intern (), Boolean.FALSE);
                sawValue = true;
            }
            if (!sawValue)
                error ("V-039", null);


        // NMTOKEN(S) ... values are Nmtoken(s)
        } else if (Attribute.NMTOKEN == attr.type()) {
            if (!XmlNames.isNmtoken (value))
                error ("V-028", new Object [] { value });

        } else if (Attribute.NMTOKENS == attr.type()) {
            StringTokenizer     tokenizer = new StringTokenizer (value);
            boolean             sawValue = false;

            while (tokenizer.hasMoreTokens ()) {
                value = tokenizer.nextToken ();
                if (!XmlNames.isNmtoken (value))
                    error ("V-028", new Object [] { value });
                sawValue = true;
            }
            if (!sawValue)
                error ("V-032", null);

        // ENUMERATION ... values match one of the tokens
        } else if (Attribute.ENUMERATION == attr.type()) {
            for (int i = 0; i < attr.values().length; i++)
                if (value.equals (attr.values()[i]))
                    return;
            error ("V-029", new Object [] { value });

        // NOTATION values match a notation name
        } else if (Attribute.NOTATION == attr.type()) {
            //
            // XXX XML 1.0 spec should probably list references to
            // externally defined notations in standalone docs as
            // validity errors.  Ditto externally defined unparsed
            // entities; neither should show up in attributes, else
            // one needs to read the external declarations in order
            // to make sense of the document (exactly what tagging
            // a doc as "standalone" intends you won't need to do).
            //
            for (int i = 0; i < attr.values().length; i++)
                if (value.equals (attr.values()[i]))
                    return;
            error ("V-030", new Object [] { value });

        // ENTITY(IES) values match an unparsed entity(ies)
        } else if (Attribute.ENTITY == attr.type()) {
            // see note above re standalone
            if (!isUnparsedEntity (value))
                error ("V-031", new Object [] { value });

        } else if (Attribute.ENTITIES == attr.type()) {
            StringTokenizer     tokenizer = new StringTokenizer (value);
            boolean             sawValue = false;

            while (tokenizer.hasMoreTokens ()) {
                value = tokenizer.nextToken ();
                // see note above re standalone
                if (!isUnparsedEntity (value))
                    error ("V-031", new Object [] { value });
                sawValue = true;
            }
            if (!sawValue)
                error ("V-040", null);

        } else if (Attribute.CDATA != attr.type())
            throw new InternalError (attr.type());
    }
*/
/*
    private boolean isUnparsedEntity (String name)
    {
        Object e = entities.getNonInterned (name);
        if (e == null || !(e instanceof ExternalEntity))
            return false;
        return ((ExternalEntity)e).notation != null;
    }
*/
    private boolean maybeElementDecl()
            throws IOException, SAXException {

        // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
        // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
        InputEntity start = peekDeclaration("!ELEMENT");

        if (start == null)
            return false;

        // n.b. for content models where inter-element whitespace is
        // ignorable, we mark that fact here.
        String name = getMarkupDeclname("F-015", true);
//    Element        element = (Element) elements.get (name);
//    boolean        declEffective = false;

/*
    if (element != null) {
        if (element.contentModel() != null) {
            error ("V-012", new Object [] { name });
        } // else <!ATTLIST name ...> came first
    } else {
        element = new Element(name);
        elements.put (element.name(), element);
        declEffective = true;
    }
*/
        if (declaredElements.contains(name))
            error("V-012", new Object[]{name});
        else {
            declaredElements.add(name);
//        declEffective = true;
        }

        short modelType;
        whitespace("F-000");
        if (peek(strEMPTY)) {
///        // leave element.contentModel as null for this case.
            dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
        } else if (peek(strANY)) {
///        element.setContentModel(new StringModel(StringModelType.ANY));
            dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
        } else {
            modelType = getMixedOrChildren(name);
        }

        dtdHandler.endContentModel(name, modelType);

        maybeWhitespace();
        char c = getc();
        if (c != '>')
            fatal("P-036", new Object[]{name, new Character(c)});
        if (start != in)
            error("V-013", null);

///        dtdHandler.elementDecl(element);

        return true;
    }

    // We're leaving the content model as a regular expression;
    // it's an efficient natural way to express such things, and
    // libraries often interpret them.  No whitespace in the
    // model we store, though!

    /**
     * returns content model type.
     */
    private short getMixedOrChildren(String elementName/*Element element*/)
            throws IOException, SAXException {

        InputEntity start;

        // [47] children ::= (choice|seq) ('?'|'*'|'+')?
        strTmp = new StringBuffer();

        nextChar('(', "F-028", elementName);
        start = in;
        maybeWhitespace();
        strTmp.append('(');

        short modelType;
        if (peek("#PCDATA")) {
            strTmp.append("#PCDATA");
            dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
            getMixed(elementName, start);
        } else {
            dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
            getcps(elementName, start);
        }

        return modelType;
    }

    // '(' S? already consumed
    // matching ')' must be in "start" entity if validating
    private void getcps(/*Element element,*/String elementName, InputEntity start)
            throws IOException, SAXException {

        // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
        // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
        // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
        boolean decided = false;
        char type = 0;
//        ContentModel       retval, temp, current;

//        retval = temp = current = null;

        dtdHandler.startModelGroup();

        do {
            String tag;

            tag = maybeGetName();
            if (tag != null) {
                strTmp.append(tag);
//                temp = new ElementModel(tag);
//                getFrequency((RepeatableContent)temp);
///->
                dtdHandler.childElement(tag, getFrequency());
///<-
            } else if (peek("(")) {
                InputEntity next = in;
                strTmp.append('(');
                maybeWhitespace();
//                temp = getcps(element, next);
//                getFrequency(temp);
///->
                getcps(elementName, next);
///                getFrequency();        <- this looks like a bug
///<-
            } else
                fatal((type == 0) ? "P-039" :
                        ((type == ',') ? "P-037" : "P-038"),
                        new Object[]{new Character(getc())});

            maybeWhitespace();
            if (decided) {
                char c = getc();

//                if (current != null) {
//                    current.addChild(temp);
//                }
                if (c == type) {
                    strTmp.append(type);
                    maybeWhitespace();
                    reportConnector(type);
                    continue;
                } else if (c == '\u0029') {    // rparen
                    ungetc();
                    continue;
                } else {
                    fatal((type == 0) ? "P-041" : "P-040",
                            new Object[]{
                                new Character(c),
                                new Character(type)
                            });
                }
            } else {
                type = getc();
                switch (type) {
                case '|':
                case ',':
                    reportConnector(type);
                    break;
                default:
//                        retval = temp;
                    ungetc();
                    continue;
                }
//                retval = (ContentModel)current;
                decided = true;
//                current.addChild(temp);
                strTmp.append(type);
            }
            maybeWhitespace();
        } while (!peek(")"));

        if (in != start)
            error("V-014", new Object[]{elementName});
        strTmp.append(')');

        dtdHandler.endModelGroup(getFrequency());
//        return retval;
    }

    private void reportConnector(char type) throws SAXException {
        switch (type) {
        case '|':
            dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
            return;
        case ',':
            dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
            return;
        default:
            throw new Error();    //assertion failed.
        }
    }

    private short getFrequency()
            throws IOException, SAXException {

        final char c = getc();

        if (c == '?') {
            strTmp.append(c);
            return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
            //        original.setRepeat(Repeat.ZERO_OR_ONE);
        } else if (c == '+') {
            strTmp.append(c);
            return DTDEventListener.OCCURENCE_ONE_OR_MORE;
            //        original.setRepeat(Repeat.ONE_OR_MORE);
        } else if (c == '*') {
            strTmp.append(c);
            return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
            //        original.setRepeat(Repeat.ZERO_OR_MORE);
        } else {
            ungetc();
            return DTDEventListener.OCCURENCE_ONCE;
        }
    }

    // '(' S? '#PCDATA' already consumed
    // matching ')' must be in "start" entity if validating
    private void getMixed(String elementName, /*Element element,*/ InputEntity start)
            throws IOException, SAXException {

        // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
        //        | '(' S? '#PCDATA'                   S? ')'
        maybeWhitespace();
        if (peek("\u0029*") || peek("\u0029")) {
            if (in != start)
                error("V-014", new Object[]{elementName});
            strTmp.append(')');
//            element.setContentModel(new StringModel(StringModelType.PCDATA));
            return;
        }

        ArrayList l = new ArrayList();
//    l.add(new StringModel(StringModelType.PCDATA));


        while (peek("|")) {
            String name;

            strTmp.append('|');
            maybeWhitespace();

            doLexicalPE = true;
            name = maybeGetName();
            if (name == null)
                fatal("P-042", new Object[]
                {elementName, Integer.toHexString(getc())});
            if (l.contains(name)) {
                error("V-015", new Object[]{name});
            } else {
                l.add(name);
                dtdHandler.mixedElement(name);
            }
            strTmp.append(name);
            maybeWhitespace();
        }

        if (!peek("\u0029*"))    // right paren
            fatal("P-043", new Object[]
            {elementName, new Character(getc())});
        if (in != start)
            error("V-014", new Object[]{elementName});
        strTmp.append(')');
//        ChoiceModel cm = new ChoiceModel((Collection)l);
//    cm.setRepeat(Repeat.ZERO_OR_MORE);
//       element.setContentModel(cm);
    }

    private boolean maybeAttlistDecl()
            throws IOException, SAXException {

        // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
        InputEntity start = peekDeclaration("!ATTLIST");

        if (start == null)
            return false;

        String elementName = getMarkupDeclname("F-016", true);
//    Element    element = (Element) elements.get (name);

//    if (element == null) {
//        // not yet declared -- no problem.
//        element = new Element(name);
//        elements.put(name, element);
//    }

        while (!peek(">")) {

            // [53] AttDef ::= S Name S AttType S DefaultDecl
            // [54] AttType ::= StringType | TokenizedType | EnumeratedType

            // look for global attribute definitions, don't expand for now...
            maybeWhitespace();
            char c = getc();
            if (c == '%') {
                String entityName = maybeGetName();
                if (entityName != null) {
                    nextChar(';', "F-021", entityName);
                    whitespace("F-021");
                    continue;
                } else
                    fatal("P-011");
            }

            ungetc();
            // look for attribute name otherwise
            String attName = maybeGetName();
            if (attName == null) {
                fatal("P-044", new Object[]{new Character(getc())});
            }
            whitespace("F-001");

///        Attribute    a = new Attribute (name);

            String typeName;
            Vector values = null;    // notation/enumeration values

            // Note:  use the type constants from Attribute
            // so that "==" may be used (faster)

            // [55] StringType ::= 'CDATA'
            if (peek(TYPE_CDATA))
///            a.setType(Attribute.CDATA);
                typeName = TYPE_CDATA;

            // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
            //        | 'ENTITY' | 'ENTITIES'
            //        | 'NMTOKEN' | 'NMTOKENS'
            // n.b. if "IDREFS" is there, both "ID" and "IDREF"
            // match peekahead ... so this order matters!
            else if (peek(TYPE_IDREFS))
                typeName = TYPE_IDREFS;
            else if (peek(TYPE_IDREF))
                typeName = TYPE_IDREF;
            else if (peek(TYPE_ID)) {
                typeName = TYPE_ID;
// TODO: should implement this error check?
///        if (element.id() != null) {
///                    error ("V-016", new Object [] { element.id() });
///        } else
///            element.setId(name);
            } else if (peek(TYPE_ENTITY))
                typeName = TYPE_ENTITY;
            else if (peek(TYPE_ENTITIES))
                typeName = TYPE_ENTITIES;
            else if (peek(TYPE_NMTOKENS))
                typeName = TYPE_NMTOKENS;
            else if (peek(TYPE_NMTOKEN))
                typeName = TYPE_NMTOKEN;

            // [57] EnumeratedType ::= NotationType | Enumeration
            // [58] NotationType ::= 'NOTATION' S '(' S? Name
            //        (S? '|' S? Name)* S? ')'
            else if (peek(TYPE_NOTATION)) {
                typeName = TYPE_NOTATION;
                whitespace("F-002");
                nextChar('(', "F-029", null);
                maybeWhitespace();

                values = new Vector();
                do {
                    String name;
                    if ((name = maybeGetName()) == null)
                        fatal("P-068");
                    // permit deferred declarations
                    if (notations.get(name) == null)
                        notations.put(name, name);
                    values.addElement(name);
                    maybeWhitespace();
                    if (peek("|"))
                        maybeWhitespace();
                } while (!peek(")"));
///            a.setValues(new String [v.size ()]);
///            for (int i = 0; i < v.size (); i++)
///                a.setValue(i, (String)v.elementAt(i));

                // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
            } else if (peek("(")) {
///            a.setType(Attribute.ENUMERATION);
                typeName = TYPE_ENUMERATION;

                maybeWhitespace();

///            Vector v = new Vector ();
                values = new Vector();
                do {
                    String name = getNmtoken();
///                v.addElement (name);
                    values.addElement(name);
                    maybeWhitespace();
                    if (peek("|"))
                        maybeWhitespace();
                } while (!peek(")"));
///            a.setValues(new String [v.size ()]);
///            for (int i = 0; i < v.size (); i++)
///                a.setValue(i, (String)v.elementAt(i));
            } else {
                fatal("P-045",
                        new Object[]{attName, new Character(getc())});
                typeName = null;
            }

            short attributeUse;
            String defaultValue = null;

            // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
            //        | (('#FIXED' S)? AttValue)
            whitespace("F-003");
            if (peek("#REQUIRED"))
                attributeUse = DTDEventListener.USE_REQUIRED;
///            a.setIsRequired(true);
            else if (peek("#FIXED")) {
///            if (a.type() == Attribute.ID)
                if (typeName == TYPE_ID)
                    error("V-017", new Object[]{attName});
///            a.setIsFixed(true);
                attributeUse = DTDEventListener.USE_FIXED;
                whitespace("F-004");
                parseLiteral(false);
///            if (a.type() != Attribute.CDATA)
///                a.setDefaultValue(normalize(false));
///            else
///                a.setDefaultValue(strTmp.toString());

                if (typeName == TYPE_CDATA)
                    defaultValue = normalize(false);
                else
                    defaultValue = strTmp.toString();

// TODO: implement this check
///            if (a.type() != Attribute.CDATA)
///                validateAttributeSyntax (a, a.defaultValue());
            } else if (!peek("#IMPLIED")) {
                attributeUse = DTDEventListener.USE_IMPLIED;

///            if (a.type() == Attribute.ID)
                if (typeName == TYPE_ID)
                    error("V-018", new Object[]{attName});
                parseLiteral(false);
///            if (a.type() != Attribute.CDATA)
///                a.setDefaultValue(normalize(false));
///            else
///                a.setDefaultValue(strTmp.toString());
                if (typeName == TYPE_CDATA)
                    defaultValue = normalize(false);
                else
                    defaultValue = strTmp.toString();

// TODO: implement this check
///            if (a.type() != Attribute.CDATA)
///                validateAttributeSyntax (a, a.defaultValue());
            } else {
                // TODO: this looks like an fatal error.
                attributeUse = DTDEventListener.USE_NORMAL;
            }

            if (XmlLang.equals(attName)
                    && defaultValue/* a.defaultValue()*/ != null
                    && !isXmlLang(defaultValue/*a.defaultValue()*/))
                error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});

// TODO: isn't it an error to specify the same attribute twice?
///        if (!element.attributes().contains(a)) {
///            element.addAttribute(a);
///            dtdHandler.attributeDecl(a);
///        }

            String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
            dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
            maybeWhitespace();
        }
        if (start != in)
            error("V-013", null);
        return true;
    }

    // used when parsing literal attribute values,
    // or public identifiers.
    //
    // input in strTmp
    private String normalize(boolean invalidIfNeeded) {

        // this can allocate an extra string...

        String s = strTmp.toString();
        String s2 = s.trim();
        boolean didStrip = false;

        if (s != s2) {
            s = s2;
            s2 = null;
            didStrip = true;
        }
        strTmp = new StringBuffer();
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            if (!XmlChars.isSpace(c)) {
                strTmp.append(c);
                continue;
            }
            strTmp.append(' ');
            while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
                didStrip = true;
            i--;
        }
        if (didStrip)
            return strTmp.toString();
        else
            return s;
    }

    private boolean maybeConditionalSect()
            throws IOException, SAXException {

        // [61] conditionalSect ::= includeSect | ignoreSect

        if (!peek("<!["))
            return false;

        String keyword;
        InputEntity start = in;

        maybeWhitespace();

        if ((keyword = maybeGetName()) == null)
            fatal("P-046");
        maybeWhitespace();
        nextChar('[', "F-030", null);

        // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
        //                extSubsetDecl ']]>'
        if ("INCLUDE".equals(keyword)) {
            for (; ;) {
                while (in.isEOF() && in != start)
                    in = in.pop();
                if (in.isEOF()) {
                    error("V-020", null);
                }
                if (peek("]]>"))
                    break;

                doLexicalPE = false;
                if (maybeWhitespace())
                    continue;
                if (maybePEReference())
                    continue;
                doLexicalPE = true;
                if (maybeMarkupDecl() || maybeConditionalSect())
                    continue;

                fatal("P-047");
            }

            // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
            //            ignoreSectcontents ']]>'
            // [64] ignoreSectcontents ::= Ignore ('<!['
            //            ignoreSectcontents ']]>' Ignore)*
            // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
        } else if ("IGNORE".equals(keyword)) {
            int nestlevel = 1;
            // ignoreSectcontents
            doLexicalPE = false;
            while (nestlevel > 0) {
                char c = getc();    // will pop input entities
                if (c == '<') {
                    if (peek("!["))
                        nestlevel++;
                } else if (c == ']') {
                    if (peek("]>"))
                        nestlevel--;
                } else
                    continue;
            }
        } else
            fatal("P-048", new Object[]{keyword});
        return true;
    }


    //
    // CHAPTER 4:  Physical Structures
    //

    // parse decimal or hex numeric character reference
    private int parseCharNumber()
            throws IOException, SAXException {

        char c;
        int retval = 0;

        // n.b. we ignore overflow ...
        if (getc() != 'x') {
            ungetc();
            for (; ;) {
                c = getc();
                if (c >= '0' && c <= '9') {
                    retval *= 10;
                    retval += (c - '0');
                    continue;
                }
                if (c == ';')
                    return retval;
                fatal("P-049");
            }
        } else
            for (; ;) {
                c = getc();
                if (c >= '0' && c <= '9') {
                    retval <<= 4;
                    retval += (c - '0');
                    continue;
                }
                if (c >= 'a' && c <= 'f') {
                    retval <<= 4;
                    retval += 10 + (c - 'a');
                    continue;
                }
                if (c >= 'A' && c <= 'F') {
                    retval <<= 4;
                    retval += 10 + (c - 'A');
                    continue;
                }
                if (c == ';')
                    return retval;
                fatal("P-050");
            }
    }

    // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
    // though still subject to the 'Char' construct in XML
    private int surrogatesToCharTmp(int ucs4)
            throws SAXException {

        if (ucs4 <= 0xffff) {
            if (XmlChars.isChar(ucs4)) {
                charTmp[0] = (char) ucs4;
                return 1;
            }
        } else if (ucs4 <= 0x0010ffff) {
            // we represent these as UNICODE surrogate pairs
            ucs4 -= 0x10000;
            charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
            charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
            return 2;
        }
        fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
        // NOTREACHED
        return -1;
    }

    private boolean maybePEReference()
            throws IOException, SAXException {

        // This is the SYNTACTIC version of this construct.
        // When processing external entities, there is also
        // a LEXICAL version; see getc() and doLexicalPE.

        // [69] PEReference ::= '%' Name ';'
        if (!in.peekc('%'))
            return false;

        String name = maybeGetName();
        Object entity;

        if (name == null)
            fatal("P-011");
        nextChar(';', "F-021", name);
        entity = params.get(name);

        if (entity instanceof InternalEntity) {
            InternalEntity value = (InternalEntity) entity;
            pushReader(value.buf, name, false);

        } else if (entity instanceof ExternalEntity) {
            pushReader((ExternalEntity) entity);
            externalParameterEntity((ExternalEntity) entity);

        } else if (entity == null) {
            error("V-022", new Object[]{name});
        }
        return true;
    }

    private boolean maybeEntityDecl()
            throws IOException, SAXException {

        // [70] EntityDecl ::= GEDecl | PEDecl
        // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
        // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
        // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
        // [74] PEDef     ::= EntityValue |  ExternalID
        //
        InputEntity start = peekDeclaration("!ENTITY");

        if (start == null)
            return false;

        String entityName;
        SimpleHashtable defns;
        ExternalEntity externalId;
        boolean doStore;

        // PE expansion gets selectively turned off several places:
        // in ENTITY declarations (here), in comments, in PIs.

        // Here, we allow PE entities to be declared, and allows
        // literals to include PE refs without the added spaces
        // required with their expansion in markup decls.

        doLexicalPE = false;
        whitespace("F-005");
        if (in.peekc('%')) {
            whitespace("F-006");
            defns = params;
        } else
            defns = entities;

        ungetc();    // leave some whitespace
        doLexicalPE = true;
        entityName = getMarkupDeclname("F-017", false);
        whitespace("F-007");
        externalId = maybeExternalID();

        //
        // first definition sticks ... e.g. internal subset PEs are used
        // to override DTD defaults.  It's also an "error" to incorrectly
        // redefine builtin internal entities, but since reporting such
        // errors is optional we only give warnings ("just in case") for
        // non-parameter entities.
        //
        doStore = (defns.get(entityName) == null);
        if (!doStore && defns == entities)
            warning("P-054", new Object[]{entityName});

        // internal entities
        if (externalId == null) {
            char value [];
            InternalEntity entity;

            doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
            parseLiteral(true);
            doLexicalPE = true;
            if (doStore) {
                value = new char[strTmp.length()];
                if (value.length != 0)
                    strTmp.getChars(0, value.length, value, 0);
                entity = new InternalEntity(entityName, value);
                entity.isPE = (defns == params);
                entity.isFromInternalSubset = false;
                defns.put(entityName, entity);
                if (defns == entities)
                    dtdHandler.internalGeneralEntityDecl(entityName,
                            new String(value));
            }

            // external entities (including unparsed)
        } else {
            // [76] NDataDecl ::= S 'NDATA' S Name
            if (defns == entities && maybeWhitespace()
                    && peek("NDATA")) {
                externalId.notation = getMarkupDeclname("F-018", false);

                // flag undeclared notation for checking after
                // the DTD is fully processed
                if (notations.get(externalId.notation) == null)
                    notations.put(externalId.notation, Boolean.TRUE);
            }
            externalId.name = entityName;
            externalId.isPE = (defns == params);
            externalId.isFromInternalSubset = false;
            if (doStore) {
                defns.put(entityName, externalId);
                if (externalId.notation != null)
                    dtdHandler.unparsedEntityDecl(entityName,
                            externalId.publicId, externalId.systemId,
                            externalId.notation);
                else if (defns == entities)
                    dtdHandler.externalGeneralEntityDecl(entityName,
                            externalId.publicId, externalId.systemId);
            }
        }
        maybeWhitespace();
        nextChar('>', "F-031", entityName);
        if (start != in)
            error("V-013", null);
        return true;
    }

    private ExternalEntity maybeExternalID()
            throws IOException, SAXException {

        // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
        //        | 'PUBLIC' S' PubidLiteral S Systemliteral
        String temp = null;
        ExternalEntity retval;

        if (peek("PUBLIC")) {
            whitespace("F-009");
            temp = parsePublicId();
        } else if (!peek("SYSTEM"))
            return null;

        retval = new ExternalEntity(in);
        retval.publicId = temp;
        whitespace("F-008");
        retval.systemId = parseSystemId();
        return retval;
    }

    private String parseSystemId()
            throws IOException, SAXException {

        String uri = getQuotedString("F-034", null);
        int temp = uri.indexOf(':');

        // resolve relative URIs ... must do it here since
        // it's relative to the source file holding the URI!

        // "new java.net.URL (URL, string)" conforms to RFC 1630,
        // but we can't use that except when the URI is a URL.
        // The entity resolver is allowed to handle URIs that are
        // not URLs, so we pass URIs through with scheme intact
        if (temp == -1 || uri.indexOf('/') < temp) {
            String baseURI;

            baseURI = in.getSystemId();
            if (baseURI == null)
                fatal("P-055", new Object[]{uri});
            if (uri.length() == 0)
                uri = ".";
            baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
            if (uri.charAt(0) != '/')
                uri = baseURI + uri;
            else {
                // XXX slashes at the beginning of a relative URI are
                // a special case we don't handle.
                throw new InternalError();
            }

            // letting other code map any "/xxx/../" or "/./" to "/",
            // since all URIs must handle it the same.
        }
        // check for fragment ID in URI
        if (uri.indexOf('#') != -1)
            error("P-056", new Object[]{uri});
        return uri;
    }

    private void maybeTextDecl()
            throws IOException, SAXException {

        // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
        if (peek("<?xml")) {
            readVersion(false, "1.0");
            readEncoding(true);
            maybeWhitespace();
            if (!peek("?>"))
                fatal("P-057");
        }
    }

    private void externalParameterEntity(ExternalEntity next)
            throws IOException, SAXException {

        //
        // Reap the intended benefits of standalone declarations:
        // don't deal with external parameter entities, except to
        // validate the standalone declaration.
        //

        // n.b. "in external parameter entities" (and external
        // DTD subset, same grammar) parameter references can
        // occur "within" markup declarations ... expansions can
        // cross syntax rules.  Flagged here; affects getc().

        // [79] ExtPE ::= TextDecl? extSubsetDecl
        // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
        //        | PEReference | S )*
        InputEntity pe;

        // XXX if this returns false ...

        pe = in;
        maybeTextDecl();
        while (!pe.isEOF()) {
            // pop internal PEs (and whitespace before/after)
            if (in.isEOF()) {
                in = in.pop();
                continue;
            }
            doLexicalPE = false;
            if (maybeWhitespace())
                continue;
            if (maybePEReference())
                continue;
            doLexicalPE = true;
            if (maybeMarkupDecl() || maybeConditionalSect())
                continue;
            break;
        }
        // if (in != pe) throw new InternalError("who popped my PE?");
        if (!pe.isEOF())
            fatal("P-059", new Object[]{in.getName()});
    }

    private void readEncoding(boolean must)
            throws IOException, SAXException {

        // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
        String name = maybeReadAttribute("encoding", must);

        if (name == null)
            return;
        for (int i = 0; i < name.length(); i++) {
            char c = name.charAt(i);
            if ((c >= 'A' && c <= 'Z')
                    || (c >= 'a' && c <= 'z'))
                continue;
            if (i != 0
                    && ((c >= '0' && c <= '9')
                    || c == '-'
                    || c == '_'
                    || c == '.'
                    ))
                continue;
            fatal("P-060", new Object[]{new Character(c)});
        }

        //
        // This should be the encoding in use, and it's even an error for
        // it to be anything else (in certain cases that are impractical to
        // to test, and may even be insufficient).  So, we do the best we
        // can, and warn if things look suspicious.  Note that Java doesn't
        // uniformly expose the encodings, and that the names it uses
        // internally are nonstandard.  Also, that the XML spec allows
        // such "errors" not to be reported at all.
        //
        String currentEncoding = in.getEncoding();

        if (currentEncoding != null
                && !name.equalsIgnoreCase(currentEncoding))
            warning("P-061", new Object[]{name, currentEncoding});
    }

    private boolean maybeNotationDecl()
            throws IOException, SAXException {

        // [82] NotationDecl ::= '<!NOTATION' S Name S
        //        (ExternalID | PublicID) S? '>'
        // [83] PublicID ::= 'PUBLIC' S PubidLiteral
        InputEntity start = peekDeclaration("!NOTATION");

        if (start == null)
            return false;

        String name = getMarkupDeclname("F-019", false);
        ExternalEntity entity = new ExternalEntity(in);

        whitespace("F-011");
        if (peek("PUBLIC")) {
            whitespace("F-009");
            entity.publicId = parsePublicId();
            if (maybeWhitespace()) {
                if (!peek(">"))
                    entity.systemId = parseSystemId();
                else
                    ungetc();
            }
        } else if (peek("SYSTEM")) {
            whitespace("F-008");
            entity.systemId = parseSystemId();
        } else
            fatal("P-062");
        maybeWhitespace();
        nextChar('>', "F-032", name);
        if (start != in)
            error("V-013", null);
        if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
            error("P-056", new Object[]{entity.systemId});

        Object value = notations.get(name);
        if (value != null && value instanceof ExternalEntity)
            warning("P-063", new Object[]{name});

        else {
            notations.put(name, entity);
            dtdHandler.notationDecl(name, entity.publicId,
                    entity.systemId);
        }
        return true;
    }


    ////////////////////////////////////////////////////////////////
    //
    //    UTILITIES
    //
    ////////////////////////////////////////////////////////////////

    private char getc() throws IOException, SAXException {

        if (!doLexicalPE) {
            char c = in.getc();
            return c;
        }

        //
        // External parameter entities get funky processing of '%param;'
        // references.  It's not clearly defined in the XML spec; but it
        // boils down to having those refs be _lexical_ in most cases to
        // include partial syntax productions.  It also needs selective
        // enabling; "<!ENTITY % foo ...>" must work, for example, and
        // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
        // if it's expanded in a literal, else "ab  cd".  PEs also do
        // not expand within comments or PIs, and external PEs are only
        // allowed to have markup decls (and so aren't handled lexically).
        //
        // This PE handling should be merged into maybeWhitespace, where
        // it can be dealt with more consistently.
        //
        // Also, there are some validity constraints in this area.
        //
        char c;

        while (in.isEOF()) {
            if (in.isInternal() || (doLexicalPE && !in.isDocument()))
                in = in.pop();
            else {
                fatal("P-064", new Object[]{in.getName()});
            }
        }
        if ((c = in.getc()) == '%' && doLexicalPE) {
            // PE ref ::= '%' name ';'
            String name = maybeGetName();
            Object entity;

            if (name == null)
                fatal("P-011");
            nextChar(';', "F-021", name);
            entity = params.get(name);

            // push a magic "entity" before and after the
            // real one, so ungetc() behaves uniformly
            pushReader(" ".toCharArray(), null, false);
            if (entity instanceof InternalEntity)
                pushReader(((InternalEntity) entity).buf, name, false);
            else if (entity instanceof ExternalEntity)
            // PEs can't be unparsed!
            // XXX if this returns false ...
                pushReader((ExternalEntity) entity);
            else if (entity == null)
            // see note in maybePEReference re making this be nonfatal.
                fatal("V-022");
            else
                throw new InternalError();
            pushReader(" ".toCharArray(), null, false);
            return in.getc();
        }
        return c;
    }

    private void ungetc() {

        in.ungetc();
    }

    private boolean peek(String s)
            throws IOException, SAXException {

        return in.peek(s, null);
    }

    // Return the entity starting the specified declaration
    // (for validating declaration nesting) else null.

    private InputEntity peekDeclaration(String s)
            throws IOException, SAXException {

        InputEntity start;

        if (!in.peekc('<'))
            return null;
        start = in;
        if (in.peek(s, null))
            return start;
        in.ungetc();
        return null;
    }

    private void nextChar(char c, String location, String near)
            throws IOException, SAXException {

        while (in.isEOF() && !in.isDocument())
            in = in.pop();
        if (!in.peekc(c))
            fatal("P-008", new Object[]
            {new Character(c),
             messages.getMessage(locale, location),
             (near == null ? "" : ('"' + near + '"'))});
    }


    private void pushReader(char buf [], String name, boolean isGeneral)
            throws SAXException {

        InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
        r.init(buf, name, in, !isGeneral);
        in = r;
    }

    private boolean pushReader(ExternalEntity next)
            throws IOException, SAXException {

        InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
        InputSource s;
        try {
            s = next.getInputSource(resolver);
        } catch (IOException e) {
            String msg =
                    "unable to open the external entity from :" + next.systemId;
            if (next.publicId != null)
                msg += " (public id:" + next.publicId + ")";

            SAXParseException spe = new SAXParseException(msg,
                    getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
            dtdHandler.fatalError(spe);
            throw e;
        }

        r.init(s, next.name, in, next.isPE);
        in = r;
        return true;
    }

    public String getPublicId() {

        return (in == null) ? null : in.getPublicId();
    }

    public String getSystemId() {

        return (in == null) ? null : in.getSystemId();
    }

    public int getLineNumber() {

        return (in == null) ? -1 : in.getLineNumber();
    }

    public int getColumnNumber() {

        return (in == null) ? -1 : in.getColumnNumber();
    }

    // error handling convenience routines

    private void warning(String messageId, Object parameters [])
            throws SAXException {

        SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
                getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());

        dtdHandler.warning(e);
    }

    void error(String messageId, Object parameters [])
            throws SAXException {

        SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
                getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());

        dtdHandler.error(e);
    }

    private void fatal(String messageId) throws SAXException {

        fatal(messageId, null);
    }

    private void fatal(String messageId, Object parameters [])
            throws SAXException {

        SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
                getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());

        dtdHandler.fatalError(e);

        throw e;
    }

    //
    // Map char arrays to strings ... cuts down both on memory and
    // CPU usage for element/attribute/other names that are reused.
    //
    // Documents typically repeat names a lot, so we more or less
    // intern all the strings within the document; since some strings
    // are repeated in multiple documents (e.g. stylesheets) we go
    // a bit further, and intern globally.
    //
    static class NameCache {
        //
        // Unless we auto-grow this, the default size should be a
        // reasonable bit larger than needed for most XML files
        // we've yet seen (and be prime).  If it's too small, the
        // penalty is just excess cache collisions.
        //
        NameCacheEntry hashtable [] = new NameCacheEntry[541];

        //
        // Usually we just want to get the 'symbol' for these chars
        //
        String lookup(char value [], int len) {

            return lookupEntry(value, len).name;
        }

        //
        // Sometimes we need to scan the chars in the resulting
        // string, so there's an accessor which exposes them.
        // (Mostly for element end tags.)
        //
        NameCacheEntry lookupEntry(char value [], int len) {

            int index = 0;
            NameCacheEntry entry;

            // hashing to get index
            for (int i = 0; i < len; i++)
                index = index * 31 + value[i];
            index &= 0x7fffffff;
            index %= hashtable.length;

            // return entry if one's there ...
            for (entry = hashtable[index];
                 entry != null;
                 entry = entry.next) {
                if (entry.matches(value, len))
                    return entry;
            }

            // else create new one
            entry = new NameCacheEntry();
            entry.chars = new char[len];
            System.arraycopy(value, 0, entry.chars, 0, len);
            entry.name = new String(entry.chars);
            //
            // NOTE:  JDK 1.1 has a fixed size string intern table,
            // with non-GC'd entries.  It can panic here; that's a
            // JDK problem, use 1.2 or later with many identifiers.
            //
            entry.name = entry.name.intern();        // "global" intern
            entry.next = hashtable[index];
            hashtable[index] = entry;
            return entry;
        }
    }

    static class NameCacheEntry {

        String name;
        char chars [];
        NameCacheEntry next;

        boolean matches(char value [], int len) {

            if (chars.length != len)
                return false;
            for (int i = 0; i < len; i++)
                if (value[i] != chars[i])
                    return false;
            return true;
        }
    }

    //
    // Message catalog for diagnostics.
    //
    static final Catalog messages = new Catalog();

    static final class Catalog extends MessageCatalog {

        Catalog() {
            super(DTDParser.class);
        }
    }

}
Other Java examples (source code examples)

Here is a short list of links related to this Java DTDParser.java source code file:
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.
... this post is sponsored by my books ...
#1 New Release!	FP Best Seller
Java example source code file (DTDParser.java)

Java - Java tags/keywords

The DTDParser.java Java example source code

Other Java examples (source code examples)

new blog posts