diff options
| author | Stefan Behnel <stefan_ml@behnel.de> | 2012-04-20 15:34:37 +0200 |
|---|---|---|
| committer | Stefan Behnel <stefan_ml@behnel.de> | 2012-04-20 15:34:37 +0200 |
| commit | e1f3f2affc774fdaaf414f59e2601c298b33f954 (patch) | |
| tree | f3238de1917928150a7d6ce7094d735cd22a8f63 /src/lxml/tests/test_htmlparser.py | |
| parent | d9c0234b990a5ac425cff717f812ef218e97cec2 (diff) | |
| download | python-lxml-e1f3f2affc774fdaaf414f59e2601c298b33f954.tar.gz | |
fix crash in sax parser when finding invalid DOCTYPE
Diffstat (limited to 'src/lxml/tests/test_htmlparser.py')
| -rw-r--r-- | src/lxml/tests/test_htmlparser.py | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py index 7e2eac18..18e14141 100644 --- a/src/lxml/tests/test_htmlparser.py +++ b/src/lxml/tests/test_htmlparser.py @@ -304,6 +304,102 @@ class HtmlParserTestCase(HelperTestCase): ('start', root[1]), ('start', root[1][0])], events) + def test_html_parser_target_tag(self): + assertFalse = self.assertFalse + events = [] + class Target(object): + def start(self, tag, attrib): + events.append(("start", tag)) + assertFalse(attrib) + def end(self, tag): + events.append(("end", tag)) + def close(self): + return "DONE" + + parser = self.etree.HTMLParser(target=Target()) + + parser.feed("<html><body></body></html>") + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals([ + ("start", "html"), ("start", "body"), + ("end", "body"), ("end", "html")], events) + + def test_html_parser_target_doctype_empty(self): + assertFalse = self.assertFalse + events = [] + class Target(object): + def start(self, tag, attrib): + events.append(("start", tag)) + assertFalse(attrib) + def end(self, tag): + events.append(("end", tag)) + def doctype(self, *args): + events.append(("doctype", args)) + def close(self): + return "DONE" + + parser = self.etree.HTMLParser(target=Target()) + parser.feed("<!DOCTYPE><html><body></body></html>") + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals([ + ("doctype", (None, None, None)), + ("start", "html"), ("start", "body"), + ("end", "body"), ("end", "html")], events) + + def test_html_parser_target_doctype_html(self): + assertFalse = self.assertFalse + events = [] + class Target(object): + def start(self, tag, attrib): + events.append(("start", tag)) + assertFalse(attrib) + def end(self, tag): + events.append(("end", tag)) + def doctype(self, *args): + events.append(("doctype", args)) + def close(self): + return "DONE" + + parser = self.etree.HTMLParser(target=Target()) + parser.feed("<!DOCTYPE html><html><body></body></html>") + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals([ + ("doctype", ("html", None, None)), + ("start", "html"), ("start", "body"), + ("end", "body"), ("end", "html")], events) + + def test_html_parser_target_doctype_html_full(self): + assertFalse = self.assertFalse + events = [] + class Target(object): + def start(self, tag, attrib): + events.append(("start", tag)) + assertFalse(attrib) + def end(self, tag): + events.append(("end", tag)) + def doctype(self, *args): + events.append(("doctype", args)) + def close(self): + return "DONE" + + parser = self.etree.HTMLParser(target=Target()) + parser.feed('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "sys.dtd">' + '<html><body></body></html>') + done = parser.close() + + self.assertEquals("DONE", done) + self.assertEquals([ + ("doctype", ("html", "-//W3C//DTD HTML 4.01//EN", "sys.dtd")), + ("start", "html"), ("start", "body"), + ("end", "body"), ("end", "html")], events) + + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(HtmlParserTestCase)]) |
