diff options
| author | Barry Warsaw <barry@python.org> | 2002-09-26 17:19:34 +0000 | 
|---|---|---|
| committer | Barry Warsaw <barry@python.org> | 2002-09-26 17:19:34 +0000 | 
| commit | 15aefa94d065cbb7408484ff98406cffd5002e2b (patch) | |
| tree | 6aab24ba6555383e55ed566d72459d5be5769955 /Lib/email/Message.py | |
| parent | 9b1a80baf407e5a0bee40e28357d35e64263233e (diff) | |
| download | cpython-git-15aefa94d065cbb7408484ff98406cffd5002e2b.tar.gz | |
Fixing some RFC 2231 related issues as reported in the Spambayes
project, and with assistance from Oleg Broytmann.  Specifically,
get_param(), get_params(): Document that these methods may return
parameter values that are either strings, or 3-tuples in the case of
RFC 2231 encoded parameters.  The application should be prepared to
deal with such return values.
get_boundary(): Be prepared to deal with RFC 2231 encoded boundary
parameters.  It makes little sense to have boundaries that are
anything but ascii, so if we get back a 3-tuple from get_param() we
will decode it into ascii and let any failures percolate up.
get_content_charset(): New method which treats the charset parameter
just like the boundary parameter in get_boundary().  Note that
"get_charset()" was already taken to return the default Charset
object.
get_charsets(): Rewrite to use get_content_charset().
Diffstat (limited to 'Lib/email/Message.py')
| -rw-r--r-- | Lib/email/Message.py | 48 | 
1 files changed, 39 insertions, 9 deletions
| diff --git a/Lib/email/Message.py b/Lib/email/Message.py index c018ae702b..8bc82a6b11 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -53,7 +53,7 @@ def _formatparam(param, value=None, quote=1):  def _unquotevalue(value):      if isinstance(value, TupleType): -        return (value[0], value[1], Utils.unquote(value[2])) +        return value[0], value[1], Utils.unquote(value[2])      else:          return Utils.unquote(value) @@ -509,8 +509,8 @@ class Message:          The elements of the returned list are 2-tuples of key/value pairs, as          split on the `=' sign.  The left hand side of the `=' is the key,          while the right hand side is the value.  If there is no `=' sign in -        the parameter the value is the empty string.  The value is always -        unquoted, unless unquote is set to a false value. +        the parameter the value is the empty string.  The value is as +        described in the get_param() method.          Optional failobj is the object to return if there is no Content-Type:          header.  Optional header is the header to search instead of @@ -529,11 +529,23 @@ class Message:          """Return the parameter value if found in the Content-Type: header.          Optional failobj is the object to return if there is no Content-Type: -        header.  Optional header is the header to search instead of -        Content-Type: - -        Parameter keys are always compared case insensitively.  Values are -        always unquoted, unless unquote is set to a false value. +        header, or the Content-Type header has no such parameter.  Optional +        header is the header to search instead of Content-Type: + +        Parameter keys are always compared case insensitively.  The return +        value can either be a string, or a 3-tuple if the parameter was RFC +        2231 encoded.  When it's a 3-tuple, the elements of the value are of +        the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty +        string.  Your application should be prepared to deal with these, and +        can convert the parameter to a Unicode string like so: + +            param = msg.get_param('foo') +            if isinstance(param, tuple): +                param = unicode(param[2], param[0]) + +        In any case, the parameter value (either the returned string, or the +        VALUE item in the 3-tuple) is always unquoted, unless unquote is set +        to a false value.          """          if not self.has_key(header):              return failobj @@ -674,6 +686,9 @@ class Message:          boundary = self.get_param('boundary', missing)          if boundary is missing:              return failobj +        if isinstance(boundary, TupleType): +            # RFC 2231 encoded, so decode.  It better end up as ascii +            return unicode(boundary[2], boundary[0]).encode('us-ascii')          return _unquotevalue(boundary.strip())      def set_boundary(self, boundary): @@ -727,6 +742,21 @@ class Message:          # Must be using Python 2.1          from email._compat21 import walk +    def get_content_charset(self, failobj=None): +        """Return the charset parameter of the Content-Type header. + +        If there is no Content-Type header, or if that header has no charset +        parameter, failobj is returned. +        """ +        missing = [] +        charset = self.get_param('charset', missing) +        if charset is missing: +            return failobj +        if isinstance(charset, TupleType): +            # RFC 2231 encoded, so decode it, and it better end up as ascii. +            return unicode(charset[2], charset[0]).encode('us-ascii') +        return charset +      def get_charsets(self, failobj=None):          """Return a list containing the charset(s) used in this message. @@ -743,4 +773,4 @@ class Message:          one for the container message (i.e. self), so that a non-multipart          message will still return a list of length 1.          """ -        return [part.get_param('charset', failobj) for part in self.walk()] +        return [part.get_content_charset(failobj) for part in self.walk()] | 
