summaryrefslogtreecommitdiff
path: root/src/lxml/tests/test_htmlparser.py
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2012-04-20 15:34:37 +0200
committerStefan Behnel <stefan_ml@behnel.de>2012-04-20 15:34:37 +0200
commite1f3f2affc774fdaaf414f59e2601c298b33f954 (patch)
treef3238de1917928150a7d6ce7094d735cd22a8f63 /src/lxml/tests/test_htmlparser.py
parentd9c0234b990a5ac425cff717f812ef218e97cec2 (diff)
downloadpython-lxml-e1f3f2affc774fdaaf414f59e2601c298b33f954.tar.gz
fix crash in sax parser when finding invalid DOCTYPE
Diffstat (limited to 'src/lxml/tests/test_htmlparser.py')
-rw-r--r--src/lxml/tests/test_htmlparser.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py
index 7e2eac18..18e14141 100644
--- a/src/lxml/tests/test_htmlparser.py
+++ b/src/lxml/tests/test_htmlparser.py
@@ -304,6 +304,102 @@ class HtmlParserTestCase(HelperTestCase):
('start', root[1]), ('start', root[1][0])],
events)
+ def test_html_parser_target_tag(self):
+ assertFalse = self.assertFalse
+ events = []
+ class Target(object):
+ def start(self, tag, attrib):
+ events.append(("start", tag))
+ assertFalse(attrib)
+ def end(self, tag):
+ events.append(("end", tag))
+ def close(self):
+ return "DONE"
+
+ parser = self.etree.HTMLParser(target=Target())
+
+ parser.feed("<html><body></body></html>")
+ done = parser.close()
+
+ self.assertEquals("DONE", done)
+ self.assertEquals([
+ ("start", "html"), ("start", "body"),
+ ("end", "body"), ("end", "html")], events)
+
+ def test_html_parser_target_doctype_empty(self):
+ assertFalse = self.assertFalse
+ events = []
+ class Target(object):
+ def start(self, tag, attrib):
+ events.append(("start", tag))
+ assertFalse(attrib)
+ def end(self, tag):
+ events.append(("end", tag))
+ def doctype(self, *args):
+ events.append(("doctype", args))
+ def close(self):
+ return "DONE"
+
+ parser = self.etree.HTMLParser(target=Target())
+ parser.feed("<!DOCTYPE><html><body></body></html>")
+ done = parser.close()
+
+ self.assertEquals("DONE", done)
+ self.assertEquals([
+ ("doctype", (None, None, None)),
+ ("start", "html"), ("start", "body"),
+ ("end", "body"), ("end", "html")], events)
+
+ def test_html_parser_target_doctype_html(self):
+ assertFalse = self.assertFalse
+ events = []
+ class Target(object):
+ def start(self, tag, attrib):
+ events.append(("start", tag))
+ assertFalse(attrib)
+ def end(self, tag):
+ events.append(("end", tag))
+ def doctype(self, *args):
+ events.append(("doctype", args))
+ def close(self):
+ return "DONE"
+
+ parser = self.etree.HTMLParser(target=Target())
+ parser.feed("<!DOCTYPE html><html><body></body></html>")
+ done = parser.close()
+
+ self.assertEquals("DONE", done)
+ self.assertEquals([
+ ("doctype", ("html", None, None)),
+ ("start", "html"), ("start", "body"),
+ ("end", "body"), ("end", "html")], events)
+
+ def test_html_parser_target_doctype_html_full(self):
+ assertFalse = self.assertFalse
+ events = []
+ class Target(object):
+ def start(self, tag, attrib):
+ events.append(("start", tag))
+ assertFalse(attrib)
+ def end(self, tag):
+ events.append(("end", tag))
+ def doctype(self, *args):
+ events.append(("doctype", args))
+ def close(self):
+ return "DONE"
+
+ parser = self.etree.HTMLParser(target=Target())
+ parser.feed('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "sys.dtd">'
+ '<html><body></body></html>')
+ done = parser.close()
+
+ self.assertEquals("DONE", done)
+ self.assertEquals([
+ ("doctype", ("html", "-//W3C//DTD HTML 4.01//EN", "sys.dtd")),
+ ("start", "html"), ("start", "body"),
+ ("end", "body"), ("end", "html")], events)
+
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(HtmlParserTestCase)])