summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Strawderman <patrick@kilink.net>2012-11-18 01:52:42 -0500
committerPatrick Strawderman <patrick@kilink.net>2012-11-18 01:52:42 -0500
commit4f34ff6ad55abb1a57d7109ce2a4d868271bc981 (patch)
treed3005385791b995e486d3d5f0b21514472d2fc84
parent39e7f3637b440b3a70f369e84b79e3e3d8852b39 (diff)
downloadwebtest-4f34ff6ad55abb1a57d7109ce2a4d868271bc981.tar.gz
decode all HTML entities
-rw-r--r--tests/test_click.py5
-rw-r--r--webtest/app.py23
-rw-r--r--webtest/compat.py2
3 files changed, 24 insertions, 6 deletions
diff --git a/tests/test_click.py b/tests/test_click.py
index 5c27c53..b02179a 100644
--- a/tests/test_click.py
+++ b/tests/test_click.py
@@ -130,3 +130,8 @@ class TestClick(unittest.TestCase):
self.assertEqual(_parse_attrs("href='foo' id=\"bar\" "), {'href': 'foo', 'id': 'bar'})
self.assertEqual(_parse_attrs("href='foo' id='bar' "), {'href': 'foo', 'id': 'bar'})
self.assertEqual(_parse_attrs("tag='foo\"'"), {'tag': 'foo"'})
+ self.assertEqual(
+ _parse_attrs('value="&lt;&gt;&amp;&quot;&#123;"'),
+ {'value': u('<>&"{')})
+ self.assertEqual(_parse_attrs('value="&sum;"'), {'value': u('∑')})
+ self.assertEqual(_parse_attrs('value="&#x20ac;"'), {'value': u('€')})
diff --git a/webtest/app.py b/webtest/app.py
index 406a846..32a0ef9 100644
--- a/webtest/app.py
+++ b/webtest/app.py
@@ -21,6 +21,7 @@ from webtest.compat import StringIO
from webtest.compat import BytesIO
from webtest.compat import SimpleCookie, CookieError
from webtest.compat import cookie_quote
+from webtest.compat import name2codepoint
from webtest.compat import urlencode
from webtest.compat import splittype
from webtest.compat import splithost
@@ -1819,15 +1820,25 @@ def _make_pattern(pat):
"Cannot make callable pattern object out of %r" % pat)
+entity_pattern = re.compile(r"&(\w+|#\d+|#[xX][a-fA-F0-9]+);")
def html_unquote(v):
"""
- Unquote (some) entities in HTML. (incomplete)
+ Unquote entities in HTML.
"""
- for ent, repl in [('&nbsp;', ' '), ('&gt;', '>'),
- ('&lt;', '<'), ('&quot;', '"'),
- ('&amp;', '&')]:
- v = v.replace(ent, repl)
- return v
+ to_chr = chr if PY3 else unichr
+ def repl(match):
+ s = match.group(1)
+ if s.startswith("#"):
+ if s[1].lower() == "x":
+ s = int(s[2:], 16)
+ else:
+ s = int(s[1:])
+ elif s in name2codepoint:
+ s = name2codepoint[s]
+ else:
+ return
+ return to_chr(s)
+ return entity_pattern.sub(repl, v)
def encode_params(params, content_type):
diff --git a/webtest/compat.py b/webtest/compat.py
index 1c30a4f..b4d39ac 100644
--- a/webtest/compat.py
+++ b/webtest/compat.py
@@ -8,6 +8,7 @@ if sys.version_info[0] > 2:
binary_type = bytes
from json import loads
from json import dumps
+ from html.entities import name2codepoint
from io import StringIO
from io import BytesIO
from urllib.parse import urlencode
@@ -40,6 +41,7 @@ else:
string_types = basestring
text_type = unicode
binary_type = str
+ from htmlentitydefs import name2codepoint
from urllib import splittype
from urllib import splithost
from urllib import urlencode