4 files changed, 47 insertions, 5 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index a9bdf4458b..f264191dc4 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -320,17 +320,18 @@ class TokenList(list):
         return ''.join(res)
 
     def _fold(self, folded):
+        encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
         for part in self.parts:
             tstr = str(part)
             tlen = len(tstr)
             try:
-                str(part).encode('us-ascii')
+                str(part).encode(encoding)
             except UnicodeEncodeError:
                 if any(isinstance(x, errors.UndecodableBytesDefect)
                         for x in part.all_defects):
                     charset = 'unknown-8bit'
                 else:
-                    # XXX: this should be a policy setting
+                    # XXX: this should be a policy setting when utf8 is False.
                     charset = 'utf-8'
                 tstr = part.cte_encode(charset, folded.policy)
                 tlen = len(tstr)
@@ -394,11 +395,12 @@ class UnstructuredTokenList(TokenList):
 
     def _fold(self, folded):
         last_ew = None
+        encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
         for part in self.parts:
             tstr = str(part)
             is_ew = False
             try:
-                str(part).encode('us-ascii')
+                str(part).encode(encoding)
             except UnicodeEncodeError:
                 if any(isinstance(x, errors.UndecodableBytesDefect)
                        for x in part.all_defects):
@@ -475,12 +477,13 @@ class Phrase(TokenList):
         # comment that becomes a barrier across which we can't compose encoded
         # words.
         last_ew = None
+        encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
         for part in self.parts:
             tstr = str(part)
             tlen = len(tstr)
             has_ew = False
             try:
-                str(part).encode('us-ascii')
+                str(part).encode(encoding)
             except UnicodeEncodeError:
                 if any(isinstance(x, errors.UndecodableBytesDefect)
                         for x in part.all_defects):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index f0b20f4b19..6ac64a5683 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -35,6 +35,13 @@ class EmailPolicy(Policy):
     In addition to the settable attributes listed above that apply to
     all Policies, this policy adds the following additional attributes:
 
+    utf8                -- if False (the default) message headers will be
+                           serialized as ASCII, using encoded words to encode
+                           any non-ASCII characters in the source strings.  If
+                           True, the message headers will be serialized using
+                           utf8 and will not contain encoded words (see RFC
+                           6532 for more on this serialization format).
+
     refold_source       -- if the value for a header in the Message object
                            came from the parsing of some source, this attribute
                            indicates whether or not a generator should refold
@@ -72,6 +79,7 @@ class EmailPolicy(Policy):
 
     """
 
+    utf8 = False
     refold_source = 'long'
     header_factory = HeaderRegistry()
     content_manager = raw_data_manager
@@ -175,9 +183,13 @@ class EmailPolicy(Policy):
         refold_header setting, since there is no way to know whether the binary
         data consists of single byte characters or multibyte characters.
 
+        If utf8 is true, headers are encoded to utf8, otherwise to ascii with
+        non-ASCII unicode rendered as encoded words.
+
         """
         folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
-        return folded.encode('ascii', 'surrogateescape')
+        charset = 'utf8' if self.utf8 else 'ascii'
+        return folded.encode(charset, 'surrogateescape')
 
     def _fold(self, name, value, refold_binary=False):
         if hasattr(value, 'name'):
@@ -199,3 +211,4 @@ del default.header_factory
 strict = default.clone(raise_on_defect=True)
 SMTP = default.clone(linesep='\r\n')
 HTTP = default.clone(linesep='\r\n', max_line_length=None)
+SMTPUTF8 = SMTP.clone(utf8=True)
diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py
index 8917408171..920f870c23 100644
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -2,6 +2,7 @@ import io
 import textwrap
 import unittest
 from email import message_from_string, message_from_bytes
+from email.message import EmailMessage
 from email.generator import Generator, BytesGenerator
 from email import policy
 from test.test_email import TestEmailBase, parameterize
@@ -194,6 +195,27 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
         g.flatten(msg)
         self.assertEqual(s.getvalue(), expected)
 
+    def test_smtputf8_policy(self):
+        msg = EmailMessage()
+        msg['From'] = "Páolo <főo@bar.com>"
+        msg['To'] = 'Dinsdale'
+        msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
+        msg.set_content("oh là là, know what I mean, know what I mean?")
+        expected = textwrap.dedent("""\
+            From: Páolo <főo@bar.com>
+            To: Dinsdale
+            Subject: Nudge nudge, wink, wink \u1F609
+            Content-Type: text/plain; charset="utf-8"
+            Content-Transfer-Encoding: 8bit
+            MIME-Version: 1.0
+
+            oh là là, know what I mean, know what I mean?
+            """).encode('utf-8').replace(b'\n', b'\r\n')
+        s = io.BytesIO()
+        g = BytesGenerator(s, policy=policy.SMTPUTF8)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), expected)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py
index e797f36b72..4b0a04e594 100644
--- a/Lib/test/test_email/test_policy.py
+++ b/Lib/test/test_email/test_policy.py
@@ -27,6 +27,7 @@ class PolicyAPITests(unittest.TestCase):
     # If any of these defaults change, the docs must be updated.
     policy_defaults = compat32_defaults.copy()
     policy_defaults.update({
+        'utf8':                     False,
         'raise_on_defect':          False,
         'header_factory':           email.policy.EmailPolicy.header_factory,
         'refold_source':            'long',
@@ -42,6 +43,9 @@ class PolicyAPITests(unittest.TestCase):
         email.policy.default: make_defaults(policy_defaults, {}),
         email.policy.SMTP: make_defaults(policy_defaults,
                                          {'linesep': '\r\n'}),
+        email.policy.SMTPUTF8: make_defaults(policy_defaults,
+                                             {'linesep': '\r\n',
+                                              'utf8': True}),
         email.policy.HTTP: make_defaults(policy_defaults,
                                          {'linesep': '\r\n',
                                           'max_line_length': None}),