|
What this is
Other links
The source code// $Header: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java,v 1.12 2004/03/24 03:00:56 sebb Exp $ /* * Copyright 2003-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.apache.jmeter.protocol.http.parser; import java.io.ByteArrayInputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import org.apache.jorphan.logging.LoggingManager; import org.apache.log.Logger; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.tidy.Tidy; import org.xml.sax.SAXException; /** * HtmlParser implementation using JTidy. * * @version $Revision: 1.12 $ updated on $Date: 2004/03/24 03:00:56 $ */ class JTidyHTMLParser extends HTMLParser { /** Used to store the Logger (used for debug and error messages). */ transient private static Logger log = LoggingManager.getLoggerForClass(); protected JTidyHTMLParser() { super(); } protected boolean isReusable() { return true; } /* (non-Javadoc) * @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(byte[], java.net.URL) */ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection urls) throws HTMLParseException { Document dom = null; try { dom = (Document)getDOM(html); } catch(SAXException se) { throw new HTMLParseException(se); } // Now parse the DOM tree scanNodes(dom,urls, baseUrl); return urls.iterator(); } /** * Scan nodes recursively, looking for embedded resources * @param node - initial node * @param urls - container for URLs * @param baseUrl - used to create absolute URLs * * @return new base URL */ private URL scanNodes(Node node, URLCollection urls, URL baseUrl) throws HTMLParseException { if ( node == null ) { return baseUrl; } String name = node.getNodeName(); int type = node.getNodeType(); switch ( type ) { case Node.DOCUMENT_NODE: scanNodes(((Document)node).getDocumentElement(),urls,baseUrl); break; case Node.ELEMENT_NODE: NamedNodeMap attrs = node.getAttributes(); if (name.equalsIgnoreCase("base")) { String tmp=getValue(attrs,"href"); if (tmp!=null) try { baseUrl= new URL(baseUrl, tmp); } catch (MalformedURLException e) { throw new HTMLParseException(e); } break; } if (name.equalsIgnoreCase("img")) { urls.addURL(getValue(attrs,"src"),baseUrl); break; } if (name.equalsIgnoreCase("applet")) { urls.addURL(getValue(attrs,"code"),baseUrl); break; } if (name.equalsIgnoreCase("input")) { String src=getValue(attrs,"src"); String typ=getValue(attrs,"type"); if ((src!=null) &&(typ.equalsIgnoreCase("image")) ){ urls.addURL(src,baseUrl); } break; } if (name.equalsIgnoreCase("link")) { urls.addURL(getValue(attrs,"href"),baseUrl); break; } if (name.equalsIgnoreCase("script")) { urls.addURL(getValue(attrs,"src"),baseUrl); break; } String back=getValue(attrs,"background"); if (back != null){ urls.addURL(back,baseUrl); break; } NodeList children = node.getChildNodes(); if ( children != null ) { int len = children.getLength(); for ( int i = 0; i < len; i++ ) { baseUrl= scanNodes(children.item(i),urls,baseUrl); } } break; // case Node.TEXT_NODE: // break; } return baseUrl; } /* * Helper method to get an attribute value, if it exists * @param attrs list of attributs * @param attname attribute name * @return */ private String getValue(NamedNodeMap attrs, String attname) { String v=null; Node n = attrs.getNamedItem(attname); if (n != null) v=n.getNodeValue(); return v; } /** * Returns |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.