diff options
author | R. David Murray <rdmurray@bitdance.com> | 2010-10-23 22:19:56 +0000 |
---|---|---|
committer | R. David Murray <rdmurray@bitdance.com> | 2010-10-23 22:19:56 +0000 |
commit | 8451c4b6e044f83efc2298a79af58c3e56d946a2 (patch) | |
tree | edaad1a89627de27ad30b465b7a416c468850653 /Lib/email | |
parent | 29aad0005dd56634363dabd74cf6708c9a255b43 (diff) | |
download | cpython-git-8451c4b6e044f83efc2298a79af58c3e56d946a2.tar.gz |
#1349106: add linesep argument to generator.flatten and header.encode.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/generator.py | 74 | ||||
-rw-r--r-- | Lib/email/header.py | 16 | ||||
-rw-r--r-- | Lib/email/test/data/msg_26.txt | 3 | ||||
-rw-r--r-- | Lib/email/test/test_email.py | 24 |
4 files changed, 80 insertions, 37 deletions
diff --git a/Lib/email/generator.py b/Lib/email/generator.py index 40b95c4f4f..05019d91fc 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -17,7 +17,7 @@ from email.header import Header from email.message import _has_surrogates UNDERSCORE = '_' -NL = '\n' +NL = '\n' # XXX: no longer used by the code below. fcre = re.compile(r'^From ', re.MULTILINE) @@ -58,7 +58,7 @@ class Generator: # Just delegate to the file object self._fp.write(s) - def flatten(self, msg, unixfrom=False): + def flatten(self, msg, unixfrom=False, linesep='\n'): """Print the message object tree rooted at msg to the output file specified when the Generator instance was created. @@ -68,12 +68,23 @@ class Generator: is False to inhibit the printing of any From_ delimiter. Note that for subobjects, no From_ line is printed. + + linesep specifies the characters used to indicate a new line in + the output. """ + # We use the _XXX constants for operating on data that comes directly + # from the msg, and _encoded_XXX constants for operating on data that + # has already been converted (to bytes in the BytesGenerator) and + # inserted into a temporary buffer. + self._NL = linesep + self._encoded_NL = self._encode(linesep) + self._EMPTY = '' + self._encoded_EMTPY = self._encode('') if unixfrom: ufrom = msg.get_unixfrom() if not ufrom: ufrom = 'From nobody ' + time.ctime(time.time()) - self.write(ufrom + NL) + self.write(ufrom + self._NL) self._write(msg) def clone(self, fp): @@ -93,20 +104,18 @@ class Generator: # it has already transformed the input; but, since this whole thing is a # hack anyway this seems good enough. - # We use these class constants when we need to manipulate data that has - # already been written to a buffer (ex: constructing a re to check the - # boundary), and the module level NL constant when adding new output to a - # buffer via self.write, because 'write' always takes strings. - # Having write always take strings makes the code simpler, but there are - # a few occasions when we need to write previously created data back - # to the buffer or to a new buffer; for those cases we use self._fp.write. - _NL = NL - _EMPTY = '' + # Similarly, we have _XXX and _encoded_XXX attributes that are used on + # source and buffer data, respectively. + _encoded_EMPTY = '' def _new_buffer(self): # BytesGenerator overrides this to return BytesIO. return StringIO() + def _encode(self, s): + # BytesGenerator overrides this to encode strings to bytes. + return s + def _write(self, msg): # We can't write the headers yet because of the following scenario: # say a multipart message includes the boundary string somewhere in @@ -158,14 +167,15 @@ class Generator: for h, v in msg.items(): self.write('%s: ' % h) if isinstance(v, Header): - self.write(v.encode(maxlinelen=self._maxheaderlen)+NL) + self.write(v.encode( + maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL) else: # Header's got lots of smarts, so use it. header = Header(v, maxlinelen=self._maxheaderlen, header_name=h) - self.write(header.encode()+NL) + self.write(header.encode(linesep=self._NL)+self._NL) # A blank line always separates headers from body - self.write(NL) + self.write(self._NL) # # Handlers for writing types and subtypes @@ -208,11 +218,11 @@ class Generator: for part in subparts: s = self._new_buffer() g = self.clone(s) - g.flatten(part, unixfrom=False) + g.flatten(part, unixfrom=False, linesep=self._NL) msgtexts.append(s.getvalue()) # Now make sure the boundary we've selected doesn't appear in any of # the message texts. - alltext = self._NL.join(msgtexts) + alltext = self._encoded_NL.join(msgtexts) # BAW: What about boundaries that are wrapped in double-quotes? boundary = msg.get_boundary(failobj=self._make_boundary(alltext)) # If we had to calculate a new boundary because the body text @@ -225,9 +235,9 @@ class Generator: msg.set_boundary(boundary) # If there's a preamble, write it out, with a trailing CRLF if msg.preamble is not None: - self.write(msg.preamble + NL) + self.write(msg.preamble + self._NL) # dash-boundary transport-padding CRLF - self.write('--' + boundary + NL) + self.write('--' + boundary + self._NL) # body-part if msgtexts: self._fp.write(msgtexts.pop(0)) @@ -236,13 +246,13 @@ class Generator: # --> CRLF body-part for body_part in msgtexts: # delimiter transport-padding CRLF - self.write('\n--' + boundary + NL) + self.write(self._NL + '--' + boundary + self._NL) # body-part self._fp.write(body_part) # close-delimiter transport-padding - self.write('\n--' + boundary + '--') + self.write(self._NL + '--' + boundary + '--') if msg.epilogue is not None: - self.write(NL) + self.write(self._NL) self.write(msg.epilogue) def _handle_multipart_signed(self, msg): @@ -266,16 +276,16 @@ class Generator: g = self.clone(s) g.flatten(part, unixfrom=False) text = s.getvalue() - lines = text.split(self._NL) + lines = text.split(self._encoded_NL) # Strip off the unnecessary trailing empty line - if lines and lines[-1] == self._EMPTY: - blocks.append(self._NL.join(lines[:-1])) + if lines and lines[-1] == self._encoded_EMPTY: + blocks.append(self._encoded_NL.join(lines[:-1])) else: blocks.append(text) # Now join all the blocks with an empty line. This has the lovely # effect of separating each block with an empty line, but not adding # an extra one after the last one. - self._fp.write(self._NL.join(blocks)) + self._fp.write(self._encoded_NL.join(blocks)) def _handle_message(self, msg): s = self._new_buffer() @@ -333,10 +343,9 @@ class BytesGenerator(Generator): The outfp object must accept bytes in its write method. """ - # Bytes versions of these constants for use in manipulating data from + # Bytes versions of this constant for use in manipulating data from # the BytesIO buffer. - _NL = NL.encode('ascii') - _EMPTY = b'' + _encoded_EMPTY = b'' def write(self, s): self._fp.write(s.encode('ascii', 'surrogateescape')) @@ -344,6 +353,9 @@ class BytesGenerator(Generator): def _new_buffer(self): return BytesIO() + def _encode(self, s): + return s.encode('ascii') + def _write_headers(self, msg): # This is almost the same as the string version, except for handling # strings with 8bit bytes. @@ -363,9 +375,9 @@ class BytesGenerator(Generator): # Header's got lots of smarts and this string is safe... header = Header(v, maxlinelen=self._maxheaderlen, header_name=h) - self.write(header.encode()+NL) + self.write(header.encode(linesep=self._NL)+self._NL) # A blank line always separates headers from body - self.write(NL) + self.write(self._NL) def _handle_text(self, msg): # If the string has surrogates the original source was bytes, so diff --git a/Lib/email/header.py b/Lib/email/header.py index 89c1391052..88fa80f57e 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -272,7 +272,7 @@ class Header: output_string = input_bytes.decode(output_charset, errors) self._chunks.append((output_string, charset)) - def encode(self, splitchars=';, \t', maxlinelen=None): + def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): """Encode a message header into an RFC-compliant format. There are many issues involved in converting a given string for use in @@ -293,6 +293,11 @@ class Header: Optional splitchars is a string containing characters to split long ASCII lines on, in rough support of RFC 2822's `highest level syntactic breaks'. This doesn't affect RFC 2047 encoded lines. + + Optional linesep is a string to be used to separate the lines of + the value. The default value is the most useful for typical + Python applications, but it can be set to \r\n to produce RFC-compliant + line separators when needed. """ self._normalize() if maxlinelen is None: @@ -311,7 +316,7 @@ class Header: if len(lines) > 1: formatter.newline() formatter.add_transition() - return str(formatter) + return formatter._str(linesep) def _normalize(self): # Step 1: Normalize the chunks so that all runs of identical charsets @@ -342,9 +347,12 @@ class _ValueFormatter: self._lines = [] self._current_line = _Accumulator(headerlen) - def __str__(self): + def _str(self, linesep): self.newline() - return NL.join(self._lines) + return linesep.join(self._lines) + + def __str__(self): + return self._str(NL) def newline(self): end_of_line = self._current_line.pop() diff --git a/Lib/email/test/data/msg_26.txt b/Lib/email/test/data/msg_26.txt index 6c71bced9a..58efaa9c9a 100644 --- a/Lib/email/test/data/msg_26.txt +++ b/Lib/email/test/data/msg_26.txt @@ -24,7 +24,8 @@ Simple email with attachment. --1618492860--2051301190--113853680
-Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP; load=&fff69c4b; exec=&355dd4d1; access=&03
+Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP;
+ load=&fff69c4b; exec=&355dd4d1; access=&03
Content-Disposition: attachment; filename="clock.bmp"
Content-Transfer-Encoding: base64
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index e5e51c6ffc..f40d77081d 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -77,7 +77,7 @@ class TestMessageAPI(TestEmailBase): eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) eq(msg.get_all('xx', 'n/a'), 'n/a') - def test_getset_charset(self): + def TEst_getset_charset(self): eq = self.assertEqual msg = Message() eq(msg.get_charset(), None) @@ -2600,6 +2600,18 @@ Here's the message body part2 = msg.get_payload(1) eq(part2.get_content_type(), 'application/riscos') + def test_crlf_flatten(self): + # Using newline='\n' preserves the crlfs in this input file. + with openfile('msg_26.txt', newline='\n') as fp: + text = fp.read() + msg = email.message_from_string(text) + s = StringIO() + g = Generator(s) + g.flatten(msg, linesep='\r\n') + self.assertEqual(s.getvalue(), text) + + maxDiff = None + def test_multipart_digest_with_extra_mime_headers(self): eq = self.assertEqual neq = self.ndiffAssertEqual @@ -2931,6 +2943,16 @@ class Test8BitBytesHandling(unittest.TestCase): m = bfp.close() self.assertEqual(str(m), self.latin_bin_msg_as7bit) + def test_crlf_flatten(self): + with openfile('msg_26.txt', 'rb') as fp: + text = fp.read() + msg = email.message_from_bytes(text) + s = BytesIO() + g = email.generator.BytesGenerator(s) + g.flatten(msg, linesep='\r\n') + self.assertEqual(s.getvalue(), text) + maxDiff = None + class TestBytesGeneratorIdempotent(TestIdempotent): |