summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2013-04-27 08:24:40 -0700
committerscoder <stefan_ml@behnel.de>2013-04-27 08:24:40 -0700
commita8a5a39a135a2c2c31a764f0f759217a937df3bb (patch)
treee1f53ebc63d334442b9729eb854333e30954e381
parentd96e70fa8fd551bd3a7724ba17cad01dacaa4c4b (diff)
parent7b7958e175f0218cea58d4f42644f8ee07437f2e (diff)
downloadpython-lxml-a8a5a39a135a2c2c31a764f0f759217a937df3bb.tar.gz
Merge pull request #119 from seelmann/599318-690319-fromstring-error-if-no-body
Avoid error in lxml.html.fromstring() if content contains no body (bugs ...
-rw-r--r--src/lxml/html/__init__.py2
-rw-r--r--src/lxml/html/tests/test_basic.txt42
2 files changed, 44 insertions, 0 deletions
diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py
index 23a96c1e..78b0e9f4 100644
--- a/src/lxml/html/__init__.py
+++ b/src/lxml/html/__init__.py
@@ -699,6 +699,8 @@ def fromstring(html, base_url=None, parser=None, **kw):
# We don't care about text or tail in a head
other_head.drop_tree()
return doc
+ if body is None:
+ return doc
if (len(body) == 1 and (not body.text or not body.text.strip())
and (not body[-1].tail or not body[-1].tail.strip())):
# The body has just one element, so it was probably a single
diff --git a/src/lxml/html/tests/test_basic.txt b/src/lxml/html/tests/test_basic.txt
index 074dddef..d7066402 100644
--- a/src/lxml/html/tests/test_basic.txt
+++ b/src/lxml/html/tests/test_basic.txt
@@ -118,3 +118,45 @@ least if an encoding is given.
1
>>> print(docs[0])
Test
+
+Bug 599318: Call fromstring with a frameset fragment should not raise an error,
+the whole document is returned.
+
+ >>> import lxml.html
+ >>> content='''
+ ... <frameset>
+ ... <frame src="main.php" name="srcpg">
+ ... </frameset>'''
+ >>> etree_document = lxml.html.fromstring(content)
+ >>> print(tostring(etree_document, encoding=unicode))
+ <html><frameset><frame src="main.php" name="srcpg"></frameset></html>
+
+Bug 599318: Call fromstring with a div fragment should not raise an error,
+only the element is returned
+
+ >>> import lxml.html
+ >>> content='<div></div>'
+ >>> etree_document = lxml.html.fromstring(content)
+ >>> print(tostring(etree_document, encoding=unicode))
+ <div></div>
+
+Bug 599318: Call fromstring with a head fragment should not raise an error,
+the whole document is returned.
+
+ >>> import lxml.html
+ >>> content='<head></head>'
+ >>> etree_document = lxml.html.fromstring(content)
+ >>> print(tostring(etree_document, encoding=unicode))
+ <html><head></head></html>
+
+Bug 690319: Leading whitespace before doctype declaration should not raise an error.
+
+ >>> import lxml.html
+ >>> content='''
+ ... <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+ ... <html>
+ ... </html>'''
+ >>> etree_document = lxml.html.fromstring(content)
+ >>> print(tostring(etree_document, encoding=unicode))
+ <html></html>
+