diff options
Diffstat (limited to 'Lib/email/Header.py')
| -rw-r--r-- | Lib/email/Header.py | 90 | 
1 files changed, 61 insertions, 29 deletions
| diff --git a/Lib/email/Header.py b/Lib/email/Header.py index 70e0bac862..0f2eb32ea8 100644 --- a/Lib/email/Header.py +++ b/Lib/email/Header.py @@ -1,9 +1,11 @@  # Copyright (C) 2002 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield) +# Author: che@debian.org (Ben Gertzfield), barry@zope.com (Barry Warsaw)  """Header encoding and decoding functionality."""  import re +from types import StringType, UnicodeType +  import email.quopriMIME  import email.base64MIME  from email.Charset import Charset @@ -14,6 +16,12 @@ except SyntaxError:      # Python 2.1 spells integer division differently      from email._compat21 import _floordiv +try: +    True, False +except NameError: +    True = 1 +    False = 0 +  CRLFSPACE = '\r\n '  CRLF = '\r\n'  NL = '\n' @@ -25,6 +33,9 @@ MAXLINELEN = 76  ENCODE = 1  DECODE = 2 +USASCII = Charset('us-ascii') +UTF8 = Charset('utf-8') +  # Match encoded-word strings in the form =?charset?q?Hello_World?=  ecre = re.compile(r'''    =\?                   # literal =? @@ -117,21 +128,19 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,  class Header:      def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,                   continuation_ws=' '): -        """Create a MIME-compliant header that can contain many languages. +        """Create a MIME-compliant header that can contain many character sets. -        Specify the initial header value in s.  If None, the initial header -        value is not set. +        Optional s is the initial header value.  If None, the initial header +        value is not set.  You can later append to the header with .append() +        method calls.  s may be a byte string or a Unicode string, but see the +        .append() documentation for semantics. -        Specify both s's character set, and the default character set by -        setting the charset argument to a Charset object (not a character set -        name string!).  If None, a us-ascii Charset is used as both s's -        initial charset and as the default character set for subsequent -        .append() calls. - -        You can later append to the header with append(s, charset) below; -        charset does not have to be the same as the one initially specified -        here.  In fact, it's optional, and if not given, defaults to the -        charset specified in the constructor. +        Optional charset serves two purposes: it has the same meaning as the +        charset argument to the .append() method.  It also sets the default +        character set for all subsequent .append() calls that omit the charset +        argument.  If charset is not provided in the constructor, the us-ascii +        charset is used both as s's initial charset and as the default for +        subsequent .append() calls.          The maximum line length can be specified explicit via maxlinelen.  For          splitting the first line to a shorter value (to account for the field @@ -143,7 +152,7 @@ class Header:          lines.          """          if charset is None: -            charset = Charset() +            charset = USASCII          self._charset = charset          self._continuation_ws = continuation_ws          cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) @@ -186,20 +195,43 @@ class Header:          return not self == other      def append(self, s, charset=None): -        """Append string s with Charset charset to the MIME header. - -        If charset is given, it should be a Charset instance, or the name of a -        character set (which will be converted to a Charset instance).  A -        value of None (the default) means charset is the one given in the -        class constructor. +        """Append a string to the MIME header. + +        Optional charset, if given, should be a Charset instance or the name +        of a character set (which will be converted to a Charset instance).  A +        value of None (the default) means that the charset given in the +        constructor is used. + +        s may be a byte string or a Unicode string.  If it is a byte string +        (i.e. isinstance(s, StringType) is true), then charset is the encoding +        of that byte string, and a UnicodeError will be raised if the string +        cannot be decoded with that charset.  If `s' is a Unicode string, then +        charset is a hint specifying the character set of the characters in +        the string.  In this case, when producing an RFC 2822 compliant header +        using RFC 2047 rules, the Unicode string will be encoded using the +        following charsets in order: us-ascii, the charset hint, utf-8.          """          if charset is None:              charset = self._charset          elif not isinstance(charset, Charset):              charset = Charset(charset) +        # Normalize and check the string +        if isinstance(s, StringType): +            # Possibly raise UnicodeError if it can't e encoded +            unicode(s, charset.get_output_charset()) +        elif isinstance(s, UnicodeType): +            # Convert Unicode to byte string for later concatenation +            for charset in USASCII, charset, UTF8: +                try: +                    s = s.encode(charset.get_output_charset()) +                    break +                except UnicodeError: +                    pass +            else: +                assert False, 'Could not encode to utf-8'          self._chunks.append((s, charset)) -    def _split(self, s, charset, firstline=0): +    def _split(self, s, charset, firstline=False):          # Split up a header safely for use with encode_chunks.  BAW: this          # appears to be a private convenience method.          splittable = charset.to_splittable(s) @@ -227,13 +259,13 @@ class Header:              # We can split on _maxlinelen boundaries because we know that the              # encoding won't change the size of the string              splitpnt = self._maxlinelen -            first = charset.from_splittable(splittable[:splitpnt], 0) -            last = charset.from_splittable(splittable[splitpnt:], 0) +            first = charset.from_splittable(splittable[:splitpnt], False) +            last = charset.from_splittable(splittable[splitpnt:], False)          else:              # Divide and conquer.              halfway = _floordiv(len(splittable), 2) -            first = charset.from_splittable(splittable[:halfway], 0) -            last = charset.from_splittable(splittable[halfway:], 0) +            first = charset.from_splittable(splittable[:halfway], False) +            last = charset.from_splittable(splittable[halfway:], False)          # Do the split          return self._split(first, charset, firstline) + \                 self._split(last, charset) @@ -248,7 +280,7 @@ class Header:              line = lines.pop(0)              if firstline:                  maxlinelen = self._firstlinelen -                firstline = 0 +                firstline = False              else:                  #line = line.lstrip()                  maxlinelen = self._maxlinelen @@ -338,7 +370,7 @@ class Header:                  # There's no encoding for this chunk's charsets                  _max_append(chunks, header, self._maxlinelen)              else: -                _max_append(chunks, charset.header_encode(header, 0), +                _max_append(chunks, charset.header_encode(header),                              self._maxlinelen, ' ')          joiner = NL + self._continuation_ws          return joiner.join(chunks) @@ -363,6 +395,6 @@ class Header:          """          newchunks = []          for s, charset in self._chunks: -            newchunks += self._split(s, charset, 1) +            newchunks += self._split(s, charset, True)          self._chunks = newchunks          return self._encode_chunks() | 
