diff options
author | Stefan Seelmann <mail@stefan-seelmann.de> | 2013-04-27 16:34:16 +0200 |
---|---|---|
committer | Stefan Seelmann <mail@stefan-seelmann.de> | 2013-04-27 16:53:08 +0200 |
commit | 7b7958e175f0218cea58d4f42644f8ee07437f2e (patch) | |
tree | e1f53ebc63d334442b9729eb854333e30954e381 | |
parent | d96e70fa8fd551bd3a7724ba17cad01dacaa4c4b (diff) | |
download | python-lxml-7b7958e175f0218cea58d4f42644f8ee07437f2e.tar.gz |
Avoid error in lxml.html.fromstring() if content contains no body (bugs 599318, 690319)
-rw-r--r-- | src/lxml/html/__init__.py | 2 | ||||
-rw-r--r-- | src/lxml/html/tests/test_basic.txt | 42 |
2 files changed, 44 insertions, 0 deletions
diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py index 23a96c1e..78b0e9f4 100644 --- a/src/lxml/html/__init__.py +++ b/src/lxml/html/__init__.py @@ -699,6 +699,8 @@ def fromstring(html, base_url=None, parser=None, **kw): # We don't care about text or tail in a head other_head.drop_tree() return doc + if body is None: + return doc if (len(body) == 1 and (not body.text or not body.text.strip()) and (not body[-1].tail or not body[-1].tail.strip())): # The body has just one element, so it was probably a single diff --git a/src/lxml/html/tests/test_basic.txt b/src/lxml/html/tests/test_basic.txt index 074dddef..d7066402 100644 --- a/src/lxml/html/tests/test_basic.txt +++ b/src/lxml/html/tests/test_basic.txt @@ -118,3 +118,45 @@ least if an encoding is given. 1 >>> print(docs[0]) Test + +Bug 599318: Call fromstring with a frameset fragment should not raise an error, +the whole document is returned. + + >>> import lxml.html + >>> content=''' + ... <frameset> + ... <frame src="main.php" name="srcpg"> + ... </frameset>''' + >>> etree_document = lxml.html.fromstring(content) + >>> print(tostring(etree_document, encoding=unicode)) + <html><frameset><frame src="main.php" name="srcpg"></frameset></html> + +Bug 599318: Call fromstring with a div fragment should not raise an error, +only the element is returned + + >>> import lxml.html + >>> content='<div></div>' + >>> etree_document = lxml.html.fromstring(content) + >>> print(tostring(etree_document, encoding=unicode)) + <div></div> + +Bug 599318: Call fromstring with a head fragment should not raise an error, +the whole document is returned. + + >>> import lxml.html + >>> content='<head></head>' + >>> etree_document = lxml.html.fromstring(content) + >>> print(tostring(etree_document, encoding=unicode)) + <html><head></head></html> + +Bug 690319: Leading whitespace before doctype declaration should not raise an error. + + >>> import lxml.html + >>> content=''' + ... <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> + ... <html> + ... </html>''' + >>> etree_document = lxml.html.fromstring(content) + >>> print(tostring(etree_document, encoding=unicode)) + <html></html> + |