|
Java example source code file (DocumentParser.java)
The DocumentParser.java Java example source code/* * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package javax.swing.text.html.parser; import javax.swing.text.SimpleAttributeSet; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.HTML; import javax.swing.text.ChangedCharSetException; import java.util.*; import java.io.*; import java.net.*; /** * A Parser for HTML Documents (actually, you can specify a DTD, but * you should really only use this class with the html dtd in swing). * Reads an InputStream of HTML and * invokes the appropriate methods in the ParserCallback class. This * is the default parser used by HTMLEditorKit to parse HTML url's. * <p>This will message the callback for all valid tags, as well as * tags that are implied but not explicitly specified. For example, the * html string (<p>blah) only has a p tag defined. The callback * will see the following methods: * <ol> blah
* does not have a value, there are two possible values that will be
* placed in the AttributeSet's value:
* <ul>
* <li>If the DTD does not contain an definition for the element, or the
* definition does not have an explicit value then the value in the
* AttributeSet will be <code>HTML.NULL_ATTRIBUTE_VALUE.
* <li>If the DTD contains an explicit value, as in:
* <code><!ATTLIST OPTION selected (selected) #IMPLIED>
* this value from the dtd (in this case selected) will be used.
* </ul>
* <p>
* Once the stream has been parsed, the callback is notified of the most
* likely end of line string. The end of line string will be one of
* \n, \r or \r\n, which ever is encountered the most in parsing the
* stream.
*
* @author Sunita Mani
*/
public class DocumentParser extends javax.swing.text.html.parser.Parser {
private int inbody;
private int intitle;
private int inhead;
private int instyle;
private int inscript;
private boolean seentitle;
private HTMLEditorKit.ParserCallback callback = null;
private boolean ignoreCharSet = false;
private static final boolean debugFlag = false;
public DocumentParser(DTD dtd) {
super(dtd);
}
public void parse(Reader in, HTMLEditorKit.ParserCallback callback, boolean ignoreCharSet) throws IOException {
this.ignoreCharSet = ignoreCharSet;
this.callback = callback;
parse(in);
// end of line
callback.handleEndOfLineString(getEndOfLineString());
}
/**
* Handle Start Tag.
*/
protected void handleStartTag(TagElement tag) {
Element elem = tag.getElement();
if (elem == dtd.body) {
inbody++;
} else if (elem == dtd.html) {
} else if (elem == dtd.head) {
inhead++;
} else if (elem == dtd.title) {
intitle++;
} else if (elem == dtd.style) {
instyle++;
} else if (elem == dtd.script) {
inscript++;
}
if (debugFlag) {
if (tag.fictional()) {
debug("Start Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
} else {
debug("Start Tag: " + tag.getHTMLTag() + " attributes: " +
getAttributes() + " pos: " + getCurrentPos());
}
}
if (tag.fictional()) {
SimpleAttributeSet attrs = new SimpleAttributeSet();
attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
Boolean.TRUE);
callback.handleStartTag(tag.getHTMLTag(), attrs,
getBlockStartPosition());
} else {
callback.handleStartTag(tag.getHTMLTag(), getAttributes(),
getBlockStartPosition());
flushAttributes();
}
}
protected void handleComment(char text[]) {
if (debugFlag) {
debug("comment: ->" + new String(text) + "<-"
+ " pos: " + getCurrentPos());
}
callback.handleComment(text, getBlockStartPosition());
}
/**
* Handle Empty Tag.
*/
protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException {
Element elem = tag.getElement();
if (elem == dtd.meta && !ignoreCharSet) {
SimpleAttributeSet atts = getAttributes();
if (atts != null) {
String content = (String)atts.getAttribute(HTML.Attribute.CONTENT);
if (content != null) {
if ("content-type".equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
if (!content.equalsIgnoreCase("text/html") &&
!content.equalsIgnoreCase("text/plain")) {
throw new ChangedCharSetException(content, false);
}
} else if ("charset" .equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) {
throw new ChangedCharSetException(content, true);
}
}
}
}
if (inbody != 0 || elem == dtd.meta || elem == dtd.base || elem == dtd.isindex || elem == dtd.style || elem == dtd.link) {
if (debugFlag) {
if (tag.fictional()) {
debug("Empty Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
} else {
debug("Empty Tag: " + tag.getHTMLTag() + " attributes: "
+ getAttributes() + " pos: " + getCurrentPos());
}
}
if (tag.fictional()) {
SimpleAttributeSet attrs = new SimpleAttributeSet();
attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
Boolean.TRUE);
callback.handleSimpleTag(tag.getHTMLTag(), attrs,
getBlockStartPosition());
} else {
callback.handleSimpleTag(tag.getHTMLTag(), getAttributes(),
getBlockStartPosition());
flushAttributes();
}
}
}
/**
* Handle End Tag.
*/
protected void handleEndTag(TagElement tag) {
Element elem = tag.getElement();
if (elem == dtd.body) {
inbody--;
} else if (elem == dtd.title) {
intitle--;
seentitle = true;
} else if (elem == dtd.head) {
inhead--;
} else if (elem == dtd.style) {
instyle--;
} else if (elem == dtd.script) {
inscript--;
}
if (debugFlag) {
debug("End Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos());
}
callback.handleEndTag(tag.getHTMLTag(), getBlockStartPosition());
}
/**
* Handle Text.
*/
protected void handleText(char data[]) {
if (data != null) {
if (inscript != 0) {
callback.handleComment(data, getBlockStartPosition());
return;
}
if (inbody != 0 || ((instyle != 0) ||
((intitle != 0) && !seentitle))) {
if (debugFlag) {
debug("text: ->" + new String(data) + "<-" + " pos: " + getCurrentPos());
}
callback.handleText(data, getBlockStartPosition());
}
}
}
/*
* Error handling.
*/
protected void handleError(int ln, String errorMsg) {
if (debugFlag) {
debug("Error: ->" + errorMsg + "<-" + " pos: " + getCurrentPos());
}
/* PENDING: need to improve the error string. */
callback.handleError(errorMsg, getCurrentPos());
}
/*
* debug messages
*/
private void debug(String msg) {
System.out.println(msg);
}
}
Other Java examples (source code examples)Here is a short list of links related to this Java DocumentParser.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.