summaryrefslogtreecommitdiff
path: root/src/lxml/tests/test_htmlparser.py
diff options
context:
space:
mode:
authorscoder <none@none>2008-05-21 21:47:20 +0200
committerscoder <none@none>2008-05-21 21:47:20 +0200
commit3d3ce5594e2f1f95d40289f2a38d864910082e8f (patch)
tree04c0c05a82bce347853c78fb74606566417c93f5 /src/lxml/tests/test_htmlparser.py
parentf1c88245f3a6a146274af2bb9c16accace75dbd8 (diff)
downloadpython-lxml-3d3ce5594e2f1f95d40289f2a38d864910082e8f.tar.gz
[svn r3710] r4268@delle: sbehnel | 2008-05-21 13:11:54 +0200
Py3 test fixes and work arounds --HG-- branch : trunk
Diffstat (limited to 'src/lxml/tests/test_htmlparser.py')
-rw-r--r--src/lxml/tests/test_htmlparser.py46
1 files changed, 23 insertions, 23 deletions
diff --git a/src/lxml/tests/test_htmlparser.py b/src/lxml/tests/test_htmlparser.py
index 703c44d5..932bb52f 100644
--- a/src/lxml/tests/test_htmlparser.py
+++ b/src/lxml/tests/test_htmlparser.py
@@ -7,7 +7,7 @@ HTML parser test cases for etree
import unittest
import tempfile, os
-from common_imports import StringIO, etree, fileInTestDir
+from common_imports import etree, StringIO, BytesIO, fileInTestDir, _bytes, _str
from common_imports import SillyFileLike, HelperTestCase
class HtmlParserTestCase(HelperTestCase):
@@ -15,15 +15,15 @@ class HtmlParserTestCase(HelperTestCase):
"""
etree = etree
- html_str = "<html><head><title>test</title></head><body><h1>page title</h1></body></html>"
- html_str_pretty = """\
+ html_str = _bytes("<html><head><title>test</title></head><body><h1>page title</h1></body></html>")
+ html_str_pretty = _bytes("""\
<html>
<head><title>test</title></head>
<body><h1>page title</h1></body>
</html>
-"""
- broken_html_str = "<html><head><title>test<body><h1>page title</h3></p></html>"
- uhtml_str = u"<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>"
+""")
+ broken_html_str = _bytes("<html><head><title>test<body><h1>page title</h3></p></html>")
+ uhtml_str = _str("<html><head><title>test á\uF8D2</title></head><body><h1>page á\uF8D2 title</h1></body></html>")
def tearDown(self):
super(HtmlParserTestCase, self).tearDown()
@@ -47,7 +47,7 @@ class HtmlParserTestCase(HelperTestCase):
def test_module_parse_html_error(self):
parser = self.etree.HTMLParser(recover=False)
parse = self.etree.parse
- f = StringIO("<html></body>")
+ f = BytesIO("<html></body>")
self.assertRaises(self.etree.XMLSyntaxError,
parse, f, parser)
@@ -148,37 +148,37 @@ class HtmlParserTestCase(HelperTestCase):
def test_module_parse_html_norecover(self):
parser = self.etree.HTMLParser(recover=False)
parse = self.etree.parse
- f = StringIO(self.broken_html_str)
+ f = BytesIO(self.broken_html_str)
self.assertRaises(self.etree.XMLSyntaxError,
parse, f, parser)
def test_parse_encoding_8bit_explicit(self):
- text = u'Søk på nettet'
- html_latin1 = (u'<p>%s</p>' % text).encode('iso-8859-1')
+ text = _str('Søk på nettet')
+ html_latin1 = (_str('<p>%s</p>') % text).encode('iso-8859-1')
tree = self.etree.parse(
- StringIO(html_latin1),
+ BytesIO(html_latin1),
self.etree.HTMLParser(encoding="iso-8859-1"))
p = tree.find("//p")
self.assertEquals(p.text, text)
def test_parse_encoding_8bit_override(self):
- text = u'Søk på nettet'
- wrong_head = '''
+ text = _str('Søk på nettet')
+ wrong_head = _str('''
<head>
<meta http-equiv="Content-Type"
content="text/html; charset=UTF-8" />
- </head>'''
- html_latin1 = (u'<html>%s<body><p>%s</p></body></html>' % (wrong_head,
- text)
+ </head>''')
+ html_latin1 = (_str('<html>%s<body><p>%s</p></body></html>') % (wrong_head,
+ text)
).encode('iso-8859-1')
self.assertRaises(self.etree.ParseError,
self.etree.parse,
- StringIO(html_latin1))
+ BytesIO(html_latin1))
tree = self.etree.parse(
- StringIO(html_latin1),
+ BytesIO(html_latin1),
self.etree.HTMLParser(encoding="iso-8859-1"))
p = tree.find("//p")
self.assertEquals(p.text, text)
@@ -190,7 +190,7 @@ class HtmlParserTestCase(HelperTestCase):
def test_module_HTML_cdata(self):
# by default, libxml2 generates CDATA nodes for <script> content
- html = '<html><head><style>foo</style></head></html>'
+ html = _bytes('<html><head><style>foo</style></head></html>')
element = self.etree.HTML(html)
self.assertEquals(element[0][0].text, "foo")
@@ -233,22 +233,22 @@ class HtmlParserTestCase(HelperTestCase):
def test_default_parser_HTML_broken(self):
self.assertRaises(self.etree.XMLSyntaxError,
- self.etree.parse, StringIO(self.broken_html_str))
+ self.etree.parse, BytesIO(self.broken_html_str))
self.etree.set_default_parser( self.etree.HTMLParser() )
- tree = self.etree.parse(StringIO(self.broken_html_str))
+ tree = self.etree.parse(BytesIO(self.broken_html_str))
self.assertEqual(self.etree.tostring(tree.getroot()),
self.html_str)
self.etree.set_default_parser()
self.assertRaises(self.etree.XMLSyntaxError,
- self.etree.parse, StringIO(self.broken_html_str))
+ self.etree.parse, BytesIO(self.broken_html_str))
def test_html_iterparse(self):
iterparse = self.etree.iterparse
- f = StringIO(
+ f = BytesIO(
'<html><head><title>TITLE</title><body><p>P</p></body></html>')
iterator = iterparse(f, html=True)