summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2014-02-08 13:12:00 -0500
committerR David Murray <rdmurray@bitdance.com>2014-02-08 13:12:00 -0500
commit0400d33928e6b463db164836da670700f03edc5d (patch)
tree70c96ca143468dfe5e7fcfc827fc32bf37d4ff4b
parent905c8c3d8dfe081d91e399aa5fd93d1659655264 (diff)
downloadcpython-git-0400d33928e6b463db164836da670700f03edc5d.tar.gz
#16983: Apply postel's law to encoded words inside quoted strings.
This applies only to the new parser. The old parser decodes encoded words inside quoted strings already, although it gets the whitespace wrong when it does so. This version of the patch only handles the most common case (a single encoded word surrounded by quotes), but I haven't seen any other variations of this in the wild yet, so its good enough for now.
-rw-r--r--Lib/email/_header_value_parser.py7
-rw-r--r--Lib/test/test_email/test__header_value_parser.py9
-rw-r--r--Lib/test/test_email/test_headerregistry.py10
-rw-r--r--Misc/NEWS3
4 files changed, 29 insertions, 0 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 291437c586..0369e01547 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1559,6 +1559,13 @@ def get_bare_quoted_string(value):
while value and value[0] != '"':
if value[0] in WSP:
token, value = get_fws(value)
+ elif value[:2] == '=?':
+ try:
+ token, value = get_encoded_word(value)
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "encoded word inside quoted string"))
+ except errors.HeaderParseError:
+ token, value = get_qcontent(value)
else:
token, value = get_qcontent(value)
bare_quoted_string.append(token)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 646082b4a4..32996ca4c8 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -540,6 +540,15 @@ class TestParser(TestParserMixin, TestEmailBase):
self._test_get_x(parser.get_bare_quoted_string,
'""', '""', '', [], '')
+ # Issue 16983: apply postel's law to some bad encoding.
+ def test_encoded_word_inside_quotes(self):
+ self._test_get_x(parser.get_bare_quoted_string,
+ '"=?utf-8?Q?not_really_valid?="',
+ '"not really valid"',
+ 'not really valid',
+ [errors.InvalidHeaderDefect],
+ '')
+
# get_comment
def test_get_comment_only(self):
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index f829f83e32..adaf3e8fe4 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -1143,6 +1143,16 @@ class TestAddressHeader(TestHeaderBase):
'example.com',
None),
+ 'rfc2047_atom_in_quoted_string_is_decoded':
+ ('"=?utf-8?q?=C3=89ric?=" <foo@example.com>',
+ [errors.InvalidHeaderDefect],
+ 'Éric <foo@example.com>',
+ 'Éric',
+ 'foo@example.com',
+ 'foo',
+ 'example.com',
+ None),
+
}
# XXX: Need many more examples, and in particular some with names in
diff --git a/Misc/NEWS b/Misc/NEWS
index cedd4e52ac..3ee074392b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -48,6 +48,9 @@ Core and Builtins
Library
-------
+- Issue #16983: the new email header parsing code will now decode encoded words
+ that are (incorrectly) surrounded by quotes, and register a defect.
+
- Issue #19772: email.generator no longer mutates the message object when
doing a down-transform from 8bit to 7bit CTEs.