diff options
| author | scoder <none@none> | 2008-05-21 21:47:20 +0200 |
|---|---|---|
| committer | scoder <none@none> | 2008-05-21 21:47:20 +0200 |
| commit | 3d3ce5594e2f1f95d40289f2a38d864910082e8f (patch) | |
| tree | 04c0c05a82bce347853c78fb74606566417c93f5 /src/lxml/tests/test_htmlparser.py | |
| parent | f1c88245f3a6a146274af2bb9c16accace75dbd8 (diff) | |
| download | python-lxml-3d3ce5594e2f1f95d40289f2a38d864910082e8f.tar.gz | |
[svn r3710] r4268@delle: sbehnel | 2008-05-21 13:11:54 +0200
Py3 test fixes and work arounds
--HG--
branch : trunk
Diffstat (limited to 'src/lxml/tests/test_htmlparser.py')
| -rw-r--r-- | src/lxml/tests/test_htmlparser.py | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py index 703c44d5..932bb52f 100644 --- a/src/lxml/tests/test_htmlparser.py +++ b/src/lxml/tests/test_htmlparser.py @@ -7,7 +7,7 @@ HTML parser test cases for etree import unittest import tempfile, os -from common_imports import StringIO, etree, fileInTestDir +from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str from common_imports import SillyFileLike, HelperTestCase class HtmlParserTestCase(HelperTestCase): @@ -15,15 +15,15 @@ class HtmlParserTestCase(HelperTestCase): """ etree = etree - html_str = "<html><head><title>test</title></head><body><h1>page title</h1></body></html>" - html_str_pretty = """\ + html_str = _bytes("<html><head><title>test</title></head><body><h1>page title</h1></body></html>") + html_str_pretty = _bytes("""\ <html> <head><title>test</title></head> <body><h1>page title</h1></body> </html> -""" - broken_html_str = "<html><head><title>test<body><h1>page title</h3></p></html>" - uhtml_str = u"<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>" +""") + broken_html_str = _bytes("<html><head><title>test<body><h1>page title</h3></p></html>") + uhtml_str = _str("<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>") def tearDown(self): super(HtmlParserTestCase, self).tearDown() @@ -47,7 +47,7 @@ class HtmlParserTestCase(HelperTestCase): def test_module_parse_html_error(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse - f = StringIO("<html></body>") + f = BytesIO("<html></body>") self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser) @@ -148,37 +148,37 @@ class HtmlParserTestCase(HelperTestCase): def test_module_parse_html_norecover(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse - f = StringIO(self.broken_html_str) + f = BytesIO(self.broken_html_str) self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser) def test_parse_encoding_8bit_explicit(self): - text = u'Søk på nettet' - html_latin1 = (u'<p>%s</p>' % text).encode('iso-8859-1') + text = _str('Søk på nettet') + html_latin1 = (_str('<p>%s</p>') % text).encode('iso-8859-1') tree = self.etree.parse( - StringIO(html_latin1), + BytesIO(html_latin1), self.etree.HTMLParser(encoding="iso-8859-1")) p = tree.find("//p") self.assertEquals(p.text, text) def test_parse_encoding_8bit_override(self): - text = u'Søk på nettet' - wrong_head = ''' + text = _str('Søk på nettet') + wrong_head = _str(''' <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> - </head>''' - html_latin1 = (u'<html>%s<body><p>%s</p></body></html>' % (wrong_head, - text) + </head>''') + html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') % (wrong_head, + text) ).encode('iso-8859-1') self.assertRaises(self.etree.ParseError, self.etree.parse, - StringIO(html_latin1)) + BytesIO(html_latin1)) tree = self.etree.parse( - StringIO(html_latin1), + BytesIO(html_latin1), self.etree.HTMLParser(encoding="iso-8859-1")) p = tree.find("//p") self.assertEquals(p.text, text) @@ -190,7 +190,7 @@ class HtmlParserTestCase(HelperTestCase): def test_module_HTML_cdata(self): # by default, libxml2 generates CDATA nodes for <script> content - html = '<html><head><style>foo</style></head></html>' + html = _bytes('<html><head><style>foo</style></head></html>') element = self.etree.HTML(html) self.assertEquals(element[0][0].text, "foo") @@ -233,22 +233,22 @@ class HtmlParserTestCase(HelperTestCase): def test_default_parser_HTML_broken(self): self.assertRaises(self.etree.XMLSyntaxError, - self.etree.parse, StringIO(self.broken_html_str)) + self.etree.parse, BytesIO(self.broken_html_str)) self.etree.set_default_parser( self.etree.HTMLParser() ) - tree = self.etree.parse(StringIO(self.broken_html_str)) + tree = self.etree.parse(BytesIO(self.broken_html_str)) self.assertEqual(self.etree.tostring(tree.getroot()), self.html_str) self.etree.set_default_parser() self.assertRaises(self.etree.XMLSyntaxError, - self.etree.parse, StringIO(self.broken_html_str)) + self.etree.parse, BytesIO(self.broken_html_str)) def test_html_iterparse(self): iterparse = self.etree.iterparse - f = StringIO( + f = BytesIO( '<html><head><title>TITLE</title><body><p>P</p></body></html>') iterator = iterparse(f, html=True) |
