// $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/tagTests/TagTest.java,v 1.2 2004/02/11 02:16:57 woolfel Exp $
/*
* ====================================================================
* Copyright 2002-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.tests.tagTests;
import java.util.Hashtable;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.StringNode;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.EndTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Tag;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.ParserException;
public class TagTest extends ParserTestCase
{
public TagTest(String name)
{
super(name);
}
/**
* The bug being reproduced is this :
* <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000
* vLink=#551a8b>
* The above line is incorrectly parsed in that, the BODY tag is not identified.
*/
public void testBodyTagBug1() throws ParserException
{
createParser("");
parseAndAssertNodeCount(1);
// The node should be an Tag
assertTrue("Node should be a Tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertEquals(
"Contents of the tag",
"BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000\r\nvLink=#551a8b",
tag.getText());
}
/**
* The following should be identified as a tag :
* <MYTAG abcd\n"+
* "efgh\n"+
* "ijkl\n"+
* "mnop>
* Creation date: (6/17/2001 5:27:42 PM)
*/
public void testLargeTagBug() throws ParserException
{
createParser("");
parseAndAssertNodeCount(1);
// The node should be an Tag
assertTrue("Node should be a Tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertEquals(
"Contents of the tag",
"MYTAG abcd\r\nefgh\r\nijkl\r\nmnop",
tag.getText());
}
/**
* Bug reported by Gordon Deudney 2002-03-15
* Nested JSP Tags were not working
*/
public void testNestedTags() throws ParserException
{
EndTag etag;
String s = "input type=\"text\" value=\"<%=\"test\"%>\" name=\"text\"";
String line = "<" + s + ">";
createParser(line);
parseAndAssertNodeCount(1);
assertTrue(
"The node found should have been an Tag",
node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertEquals("Tag Contents", s, tag.getText());
}
/**
* Test parseParameter method
* Created by Kaarle Kaila (august 2001)
* the tag name is here G
*/
public void testParseParameter3() throws ParserException
{
Tag tag;
EndTag etag;
StringNode snode;
Node node = null;
String lin1 =
"";
createParser(lin1);
NodeIterator en = parser.elements();
Hashtable h;
boolean testEnd = true; // test end of first part
String a, href, myPara, myValue, nice;
try
{
if (en.hasMoreNodes())
{
node = en.nextNode();
tag = (Tag) node;
h = tag.getAttributes();
String classValue = (String) h.get("CLASS");
assertEquals(
"The class value should be ",
"userData",
classValue);
}
}
catch (ClassCastException ce)
{
fail("Bad class element = " + node.getClass().getName());
}
}
/**
* Test parseParameter method
* Created by Kaarle Kaila (august 2001)
* the tag name is here A (and should be eaten up by linkScanner)
*/
public void testParseParameterA() throws ParserException
{
Tag tag;
EndTag etag;
StringNode snode;
Node node = null;
String lin1 =
"Kaarle's homepage
Paragraph
";
createParser(lin1);
NodeIterator en = parser.elements();
Hashtable h;
boolean testEnd = true; // test end of first part
String a, href, myPara, myValue, nice;
try
{
if (en.hasMoreNodes())
{
node = en.nextNode();
tag = (Tag) node;
h = tag.getAttributes();
a = (String) h.get(Tag.TAGNAME);
href = (String) h.get("HREF");
myValue = (String) h.get("MYPARAMETER");
nice = (String) h.get("YOURPARAMETER");
assertEquals("Link tag (A)", "A", a);
assertEquals("href value", "http://www.iki.fi/kaila", href);
assertEquals("myparameter value", "", myValue);
assertEquals("yourparameter value", "Kaarle Kaaila", nice);
}
if (!(node instanceof LinkTag))
{
// linkscanner has eaten up this piece
if (en.hasMoreNodes())
{
node = en.nextNode();
snode = (StringNode) node;
assertEquals(
"Value of element",
snode.getText(),
"Kaarle's homepage");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
etag = (EndTag) node;
assertEquals("endtag of link", etag.getText(), "A");
}
}
// testing rest
if (en.hasMoreNodes())
{
node = en.nextNode();
tag = (Tag) node;
assertEquals("following paragraph begins", tag.getText(), "p");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
snode = (StringNode) node;
assertEquals(
"paragraph contents",
snode.getText(),
"Paragraph");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
etag = (EndTag) node;
assertEquals("paragrapg endtag", etag.getText(), "p");
}
}
catch (ClassCastException ce)
{
fail("Bad class element = " + node.getClass().getName());
}
}
/**
* Test parseParameter method
* Created by Kaarle Kaila (august 2001)
* the tag name is here G
*/
public void testParseParameterG() throws ParserException
{
Tag tag;
EndTag etag;
StringNode snode;
Node node = null;
String lin1 =
"Kaarle's homepage
Paragraph
";
createParser(lin1);
NodeIterator en = parser.elements();
Hashtable h;
boolean testEnd = true; // test end of first part
String a, href, myPara, myValue, nice;
try
{
if (en.hasMoreNodes())
{
node = en.nextNode();
tag = (Tag) node;
h = tag.getAttributes();
a = (String) h.get(Tag.TAGNAME);
href = (String) h.get("HREF");
myValue = (String) h.get("MYPARAMETER");
nice = (String) h.get("YOURPARAMETER");
assertEquals("The tagname should be G", a, "G");
assertEquals(
"Check the http address",
href,
"http://www.iki.fi/kaila");
assertEquals("myValue is empty", myValue, "");
assertEquals("The second parameter value", nice, "Kaila");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
snode = (StringNode) node;
assertEquals(
"The text of the element",
snode.getText(),
"Kaarle's homepage");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
etag = (EndTag) node;
assertEquals("Endtag is G", etag.getText(), "G");
}
// testing rest
if (en.hasMoreNodes())
{
node = en.nextNode();
tag = (Tag) node;
assertEquals("Follow up by p-tag", tag.getText(), "p");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
snode = (StringNode) node;
assertEquals(
"Verify the paragraph text",
snode.getText(),
"Paragraph");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
etag = (EndTag) node;
assertEquals("Still patragraph endtag", etag.getText(), "p");
}
}
catch (ClassCastException ce)
{
fail("Bad class element = " + node.getClass().getName());
}
}
/**
* Test parseParameter method
* Created by Kaarle Kaila (august 2002)
* the tag name is here A (and should be eaten up by linkScanner)
* Tests elements where = sign is surrounded by spaces
*/
public void testParseParameterSpace() throws ParserException
{
Tag tag;
EndTag etag;
StringNode snode;
Node node = null;
String lin1 = "Kaarle's homepage";
createParser(lin1);
NodeIterator en = parser.elements();
Hashtable h;
boolean testEnd = true; // test end of first part
String a, href, myPara, myValue, nice;
try
{
if (en.hasMoreNodes())
{
node = en.nextNode();
tag = (Tag) node;
h = tag.getAttributes();
a = (String) h.get(Tag.TAGNAME);
nice = (String) h.get("YOURPARAMETER");
assertEquals("Link tag (A)", a, "A");
assertEquals("yourParameter value", "Kaarle", nice);
}
if (!(node instanceof LinkTag))
{
// linkscanner has eaten up this piece
if (en.hasMoreNodes())
{
node = en.nextNode();
snode = (StringNode) node;
assertEquals(
"Value of element",
snode.getText(),
"Kaarle's homepage");
}
if (en.hasMoreNodes())
{
node = en.nextNode();
etag = (EndTag) node;
assertEquals(
"Still patragraph endtag",
etag.getText(),
"A");
}
}
// testing rest
}
catch (ClassCastException ce)
{
fail("Bad class element = " + node.getClass().getName());
}
}
/**
* Reproduction of a bug reported by Annette Doyle
* This is actually a pretty good example of dirty html - we are in a fix
* here, bcos the font tag (the first one) has an erroneous inverted comma. In Tag,
* we ignore anything in inverted commas, and dont if its outside. This kind of messes
* up our parsing almost completely.
*/
public void testStrictParsing() throws ParserException
{
String testHTML =
"
";
createParser(testHTML, "http://www.cia.gov");
parser.registerScanners();
parseAndAssertNodeCount(1);
// Check the tags
assertType("node", Div.class, node[0]);
Div div = (Div) node[0];
Tag fontTag = (Tag) div.children().nextNode();
assertEquals(
"Second tag should be corrected",
"font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",
fontTag.getText());
// Try to parse the parameters from this tag.
Hashtable table = fontTag.getAttributes();
assertNotNull("Parameters table", table);
assertEquals(
"font sans-serif parameter",
"sans-serif",
table.get("SANS-SERIF"));
assertEquals(
"font face parameter",
"Arial,helvetica,",
table.get("FACE"));
}
public void testToHTML() throws ParserException
{
String testHTML =
new String(
"\n"
+ "Hello\n"
+ "Hey");
createParser(testHTML);
parseAndAssertNodeCount(7);
// The node should be an Tag
assertTrue("1st Node should be a Tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertStringEquals(
"toHTML()",
"",
tag.toHtml());
assertTrue("2nd Node should be a Tag", node[1] instanceof Tag);
assertTrue("5th Node should be a Tag", node[4] instanceof Tag);
tag = (Tag) node[1];
assertEquals("Raw String of the tag", "", tag.toHtml());
tag = (Tag) node[4];
assertEquals(
"Raw String of the tag",
"",
tag.toHtml());
}
/**
* Test parseParameter method
* Created by Kaarle Kaila (22 Oct 2001)
* This test just wants the text in the element
*/
public void testWithoutParseParameter() throws ParserException
{
Tag tag;
EndTag etag;
StringNode snode;
Node node = null;
String testHTML =
"Kaarle's homepage
Paragraph
";
createParser(testHTML);
NodeIterator en = parser.elements();
String result = "";
try
{
while (en.hasMoreNodes())
{
node = en.nextNode();
result += node.toHtml();
}
String expected =
"Kaarle's homepage
Paragraph
";
assertStringEquals(
"Check collected contents to original",
expected,
result);
}
catch (ClassCastException ce)
{
fail("Bad class element = " + node.getClass().getName());
}
}
/**
* Test parseParameter method
* Created by Kaarle Kaila (09 Jan 2003)
* This test just wants the text in the element
*/
public void testEmptyTagParseParameter() throws ParserException
{
Tag tag;
EndTag etag;
StringNode snode;
Node node = null;
String testHTML =
"";
createParser(testHTML);
NodeIterator en = parser.elements();
String result = "";
try
{
while (en.hasMoreNodes())
{
node = en.nextNode();
result = node.toHtml();
}
String expected =
"";
assertStringEquals(
"Check collected contents to original",
expected,
result);
}
catch (ClassCastException ce)
{
fail("Bad class element = " + node.getClass().getName());
}
}
public void testStyleSheetTag() throws ParserException
{
String testHTML1 = new String("");
createParser(testHTML1, "http://www.google.com/test/index.html");
parseAndAssertNodeCount(1);
assertTrue("Node should be a tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertEquals("StyleSheet Source", "af.css", tag.getAttribute("src"));
}
/**
* Bug report by Cedric Rosa, causing null pointer exceptions when encountering a broken tag,
* and if this has no further lines to parse
*/
public void testBrokenTag() throws ParserException
{
String testHTML1 = new String(" \">");
createParser(testHTML);
parseAndAssertNodeCount(1);
assertTrue("Node should be a tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertEquals(
"Node contents",
"META name=\"Hello\" value=\"World \"",
tag.getText());
assertEquals("Meta Content", "World ", tag.getAttribute("value"));
}
public void testIncorrectInvertedCommas() throws ParserException
{
String testHTML =
new String("");
createParser(testHTML);
parseAndAssertNodeCount(1);
assertTrue("Node should be a tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertStringEquals(
"Node contents",
"META NAME=\"Author\" CONTENT=\"DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.\"",
tag.getText());
Hashtable table = tag.getAttributes();
assertEquals(
"Meta Content",
"DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.",
tag.getAttribute("CONTENT"));
}
public void testIncorrectInvertedCommas2() throws ParserException
{
String testHTML =
new String("");
createParser(testHTML);
parseAndAssertNodeCount(1);
assertTrue("Node should be a tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertStringEquals(
"Node contents",
"META NAME=\"Keywords\" CONTENT=\"Moscou, modernisation, politique urbaine, spécificités culturelles, municipalité, Moscou, modernisation, urban politics, cultural specificities, municipality\"",
tag.getText());
}
public void testIncorrectInvertedCommas3() throws ParserException
{
String testHTML =
new String("");
createParser(testHTML);
parseAndAssertNodeCount(1);
assertTrue("Node should be a tag", node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertEquals(
"Node contents",
"meta name=\"description\" content=\"Une base de données sur les thèses de gographie soutenues en France\"",
tag.getText());
}
/**
* Ignore empty tags.
*/
public void testEmptyTag() throws ParserException
{
String testHTML = "<>text";
createParser(testHTML);
parser.registerScanners();
parseAndAssertNodeCount(5);
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[2];
assertEquals(
"Third node has incorrect text",
"<>text",
stringNode.getText());
}
/**
* Ignore empty tags.
*/
public void testEmptyTag2() throws ParserException
{
String testHTML = "text<>";
createParser(testHTML);
parser.registerScanners();
parseAndAssertNodeCount(5);
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[2];
assertEquals(
"Third node has incorrect text",
"text<>",
stringNode.getText());
}
/**
* Ignore empty tags.
*/
public void testEmptyTag3() throws ParserException
{
String testHTML = "text<>text";
createParser(testHTML);
parser.registerScanners();
parseAndAssertNodeCount(5);
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[2];
assertEquals(
"Third node has incorrect text",
"text<>text",
stringNode.getText());
}
/**
* Ignore empty tags.
*/
public void testEmptyTag4() throws ParserException
{
String testHTML = "text\n<>text";
createParser(testHTML);
parser.registerScanners();
Parser.setLineSeparator("\r\n"); // actually a static method
parseAndAssertNodeCount(5);
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[2];
String actual = stringNode.getText();
assertEquals("Third node has incorrect text", "text\r\n<>text", actual);
}
/**
* Ignore empty tags.
*/
public void testEmptyTag5() throws ParserException
{
String testHTML = "text<\n>text";
createParser(testHTML);
parser.registerScanners();
Parser.setLineSeparator("\r\n"); // actually a static method
parseAndAssertNodeCount(5);
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[2];
String actual = stringNode.getText();
assertEquals("Third node has incorrect text", "text<\r\n>text", actual);
}
/**
* Ignore empty tags.
*/
public void testEmptyTag6() throws ParserException
{
String testHTML = "text<>\ntext";
createParser(testHTML);
parser.registerScanners();
Parser.setLineSeparator("\r\n"); // actually a static method
parseAndAssertNodeCount(5);
assertTrue(
"Third node should be a string node",
node[2] instanceof StringNode);
StringNode stringNode = (StringNode) node[2];
String actual = stringNode.getText();
assertEquals("Third node has incorrect text", "text<>\r\ntext", actual);
}
public void testAttributesReconstruction() throws ParserException
{
String testHTML = "";
createParser(testHTML);
parseAndAssertNodeCount(2);
assertTrue("First node should be an HTMLtag", node[0] instanceof Tag);
Tag htmlTag = (Tag) node[0];
String expectedHTML = "