#22233: Only split headers on \r and/or \n, per email RFCs.

Original patch by Martin Panter, new policy fixes by me.
author: R David Murray <rdmurray@bitdance.com> 2016-09-07 17:44:34 -0400
committer: R David Murray <rdmurray@bitdance.com> 2016-09-07 17:44:34 -0400
commit: dc1650ca062a99d41a029a6645dc72fd7d820c94 (patch)
tree: 7719487f2ea0d6a95d2e024e365dbedacf697534 /Lib/test/test_email
parent: 6b46ec7733ad7391b9e008d2b273c556f140f88e (diff)
download: cpython-git-dc1650ca062a99d41a029a6645dc72fd7d820c94.tar.gz
2 files changed, 45 insertions, 2 deletions
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 90fd9e1970..8e407f70da 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3444,10 +3444,12 @@ class TestFeedParsers(TestEmailBase):
         self.assertEqual(m.keys(), ['a', 'b'])
         m = self.parse(['a:\r', '\nb:\n'])
         self.assertEqual(m.keys(), ['a', 'b'])
+
+        # Only CR and LF should break header fields
         m = self.parse(['a:\x85b:\u2028c:\n'])
-        self.assertEqual(m.items(), [('a', '\x85'), ('b', '\u2028'), ('c', '')])
+        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
         m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
-        self.assertEqual(m.items(), [('a', ''), ('b', '\x85'), ('c', '')])
+        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
 
     def test_long_lines(self):
         # Expected peak memory use on 32-bit platform: 6*N*M bytes.
diff --git a/Lib/test/test_email/test_parser.py b/Lib/test/test_email/test_parser.py
index b54fdd7589..8ddc176389 100644
--- a/Lib/test/test_email/test_parser.py
+++ b/Lib/test/test_email/test_parser.py
@@ -2,6 +2,7 @@ import io
 import email
 import unittest
 from email.message import Message
+from email.policy import default
 from test.test_email import TestEmailBase
 
 
@@ -32,5 +33,45 @@ class TestCustomMessage(TestEmailBase):
     # XXX add tests for other functions that take Message arg.
 
 
+class TestParserBase:
+
+    def test_only_split_on_cr_lf(self):
+        # The unicode line splitter splits on unicode linebreaks, which are
+        # more numerous than allowed by the email RFCs; make sure we are only
+        # splitting on those two.
+        msg = self.parser(
+            "Next-Line: not\x85broken\r\n"
+            "Null: not\x00broken\r\n"
+            "Vertical-Tab: not\vbroken\r\n"
+            "Form-Feed: not\fbroken\r\n"
+            "File-Separator: not\x1Cbroken\r\n"
+            "Group-Separator: not\x1Dbroken\r\n"
+            "Record-Separator: not\x1Ebroken\r\n"
+            "Line-Separator: not\u2028broken\r\n"
+            "Paragraph-Separator: not\u2029broken\r\n"
+            "\r\n",
+            policy=default,
+        )
+        self.assertEqual(msg.items(), [
+            ("Next-Line", "not\x85broken"),
+            ("Null", "not\x00broken"),
+            ("Vertical-Tab", "not\vbroken"),
+            ("Form-Feed", "not\fbroken"),
+            ("File-Separator", "not\x1Cbroken"),
+            ("Group-Separator", "not\x1Dbroken"),
+            ("Record-Separator", "not\x1Ebroken"),
+            ("Line-Separator", "not\u2028broken"),
+            ("Paragraph-Separator", "not\u2029broken"),
+        ])
+        self.assertEqual(msg.get_payload(), "")
+
+class TestParser(TestParserBase, TestEmailBase):
+    parser = staticmethod(email.message_from_string)
+
+class TestBytesParser(TestParserBase, TestEmailBase):
+    def parser(self, s, *args, **kw):
+        return email.message_from_bytes(s.encode(), *args, **kw)
+
+
 if __name__ == '__main__':
     unittest.main()
author	R David Murray <rdmurray@bitdance.com>	2016-09-07 17:44:34 -0400
committer	R David Murray <rdmurray@bitdance.com>	2016-09-07 17:44:34 -0400
commit	dc1650ca062a99d41a029a6645dc72fd7d820c94 (patch)
tree	7719487f2ea0d6a95d2e024e365dbedacf697534 /Lib/test/test_email
parent	6b46ec7733ad7391b9e008d2b273c556f140f88e (diff)
download	cpython-git-dc1650ca062a99d41a029a6645dc72fd7d820c94.tar.gz