3 files changed, 395 insertions, 48 deletions
diff --git a/paste/util/multidict.py b/paste/util/multidict.py
index 0d1567d..2a9a4d8 100644
--- a/paste/util/multidict.py
+++ b/paste/util/multidict.py
@@ -1,5 +1,8 @@
 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+import cgi
+import copy
+import sys
 from UserDict import DictMixin
 
 class MultiDict(DictMixin):
@@ -167,7 +170,7 @@ class MultiDict(DictMixin):
 
     def __repr__(self):
         items = ', '.join(['(%r, %r)' % v for v in self._items])
-        return 'MultiDict([%s])' % items
+        return '%s([%s])' % (self.__class__.__name__, items)
 
     def __len__(self):
         return len(self._items)
@@ -198,6 +201,162 @@ class MultiDict(DictMixin):
         for k, v in self._items:
             yield v
 
+class UnicodeMultiDict(DictMixin):
+    """
+    A MultiDict wrapper that decodes returned key/values to unicode on the
+    fly. Decoding is not applied to assigned key/values.
+
+    The key/value contents are assumed to be ``str``/``strs`` or
+    ``str``/``FieldStorages`` (as is returned by the paste.request.parse_
+    functions).
+
+    ``FieldStorage`` instances are cloned, and the clone's ``name`` and
+    ``filename`` variables are decoded.
+
+    """
+    def __init__(self, multi=None, encoding=None, errors='strict'):
+        self.multi = multi
+        if encoding is None:
+            encoding = sys.getdefaultencoding()
+        self.encoding = encoding
+        self.errors = errors
+
+    def _decode_value(self, value):
+        """
+        Decode the specified value to unicode. Assumes value is a ``str`` or
+        `FieldStorage`` object.
+
+        ``FieldStorage`` objects are specially handled.
+        """
+        if isinstance(value, cgi.FieldStorage):
+            # decode FieldStorage's field name and filename
+            value = copy.copy(value)
+            value.name = value.name.decode(self.encoding, self.errors)
+            value.filename = value.filename.decode(self.encoding, self.errors)
+        else:
+            try:
+                value = value.decode(self.encoding, self.errors)
+            except AttributeError:
+                pass
+        return value
+
+    def __getitem__(self, key):
+        return self._decode_value(self.multi.__getitem__(key))
+
+    def __setitem__(self, key, value):
+        self.multi.__setitem__(key, value)
+
+    def add(self, key, value):
+        """
+        Add the key and value, not overwriting any previous value.
+        """
+        self.multi.add(key, value)
+
+    def getall(self, key):
+        """
+        Return a list of all values matching the key (may be an empty list)
+        """
+        return [self._decode_value(v) for v in self.multi.getall(key)]
+
+    def getone(self, key):
+        """
+        Get one value matching the key, raising a KeyError if multiple
+        values were found.
+        """
+        return self._decode_value(self.multi.getone(key))
+
+    def mixed(self):
+        """
+        Returns a dictionary where the values are either single
+        values, or a list of values when a key/value appears more than
+        once in this dictionary.  This is similar to the kind of
+        dictionary often used to represent the variables in a web
+        request.
+        """
+        unicode_mixed = {}
+        for key, value in self.multi.mixed().iteritems():
+            if isinstance(value, list):
+                value = [self._decode_value(value) for value in value]
+            else:
+                value = self._decode_value(value)
+            unicode_mixed[key.decode(self.encoding, self.errors)] = \
+                value
+        return unicode_mixed
+
+    def dict_of_lists(self):
+        """
+        Returns a dictionary where each key is associated with a
+        list of values.
+        """
+        unicode_dict = {}
+        for key, value in self.multi.dict_of_lists().iteritems():
+            value = [self._decode_value(value) for value in value]
+            unicode_dict[key.decode(self.encoding, self.errors)] = \
+                value
+        return unicode_dict
+
+    def __delitem__(self, key):
+        self.multi.__delitem__(key)
+
+    def __contains__(self, key):
+        return self.multi.__contains__(key)
+
+    has_key = __contains__
+
+    def clear(self):
+        self.multi.clear()
+
+    def copy(self):
+        return UnicodeMultiDict(self.multi.copy(), self.encoding, self.errors)
+
+    def setdefault(self, key, default=None):
+        return self._decode_value(self.multi.setdefault(key, default))
+
+    def pop(self, key, *args):
+        return self._decode_value(self.multi.pop(key, *args))
+
+    def popitem(self):
+        k, v = self.multi.popitem()
+        return (k.decode(self.encoding, self.errors),
+                self._decode_value(v))
+
+    def __repr__(self):
+        items = ', '.join(['(%r, %r)' % v for v in self.items()])
+        return '%s([%s])' % (self.__class__.__name__, items)
+
+    def __len__(self):
+        return self.multi.__len__()
+
+    ##
+    ## All the iteration:
+    ##
+
+    def keys(self):
+        return [k.decode(self.encoding, self.errors) for \
+                    k in self.multi.iterkeys()]
+
+    def iterkeys(self):
+        for k in self.multi.iterkeys():
+            yield k.decode(self.encoding, self.errors)
+
+    __iter__ = iterkeys
+
+    def items(self):
+        return [(k.decode(self.encoding, self.errors), self._decode_value(v)) \
+                    for k, v in self.multi.iteritems()]
+
+    def iteritems(self):
+        for k, v in self.multi.iteritems():
+            yield (k.decode(self.encoding, self.errors),
+                   self._decode_value(v))
+
+    def values(self):
+        return [self._decode_value(v) for v in self.multi.itervalues()]
+
+    def itervalues(self):
+        for v in self.multi.itervalues():
+            yield self._decode_value(v)
+
 __test__ = {
     'general': """
     >>> d = MultiDict(a=1, b=2)
diff --git a/paste/wsgiwrappers.py b/paste/wsgiwrappers.py
index 66a667c..decc275 100644
--- a/paste/wsgiwrappers.py
+++ b/paste/wsgiwrappers.py
@@ -7,19 +7,26 @@ to deal with an incoming request and sending a response.
 """
 import re
 import warnings
-from paste.request import EnvironHeaders, parse_formvars, parse_dict_querystring, get_cookie_dict
-from paste.util.multidict import MultiDict
+from Cookie import SimpleCookie
+from paste.request import EnvironHeaders, get_cookie_dict, \
+    parse_dict_querystring, parse_formvars
+from paste.util.multidict import MultiDict, UnicodeMultiDict
+from paste.registry import StackedObjectProxy
 from paste.response import HeaderDict
 from paste.wsgilib import encode_unicode_app_iter
-import paste.registry as registry
-from Cookie import SimpleCookie
 
-# settings should be set with the registry to a dict having at least:
-#     content_type, charset
-# With the optional:
-#     encoding_errors (specifies a codec error handler, defaults to 'strict')
-settings = registry.StackedObjectProxy(default=dict(content_type='text/html', 
-    charset='UTF-8', encoding_errors='strict'))
+_CHARSET_RE = re.compile(r'.*;\s*charset=(.*?)(;|$)', re.I)
+
+class DeprecatedSettings(StackedObjectProxy):
+    def _push_object(self, obj):
+        warnings.warn('paste.wsgiwrappers.settings is deprecated: Please use '
+                      'paste.wsgiwrappers.WSGIRequest.defaults instead',
+                      DeprecationWarning, 3)
+        WSGIResponse.defaults._push_object(obj)
+        StackedObjectProxy._push_object(self, obj)
+
+# settings is deprecated: use WSGIResponse.defaults instead
+settings = DeprecatedSettings(default=dict())
 
 class environ_getter(object):
     """For delegating an attribute to a key in self.environ."""
@@ -46,18 +53,44 @@ class WSGIRequest(object):
     """WSGI Request API Object
 
     This object represents a WSGI request with a more friendly interface.
-    This does not expose every detail of the WSGI environment, and does not
-    in any way express anything beyond what is available in the environment
-    dictionary.  *All* state is kept in the environment dictionary; this
-    is essential for interoperability.
+    This does not expose every detail of the WSGI environment, and attempts
+    to express nothing beyond what is available in the environment
+    dictionary.
+
+    The only state maintained in this object is the desired ``charset``
+    and its associated ``errors`` handler. The incoming parameters will
+    be automatically coerced to unicode objects of the ``charset``
+    encoding when ``charset`` is set.
+
+    When unicode is expected, ``charset`` will overridden by the the
+    value of the ``Content-Type`` header's charset parameter if one was
+    specified by the client.
+
+    The class variable ``defaults`` specifies default values for
+    ``charset`` and ``errors``. These can be overridden for the current
+    request via the registry.
+
+    *All* other state is kept in the environment dictionary; this is
+    essential for interoperability.
 
     You are free to subclass this object.
 
     """
+    defaults = StackedObjectProxy(default=dict(charset=None, errors='strict'))
     def __init__(self, environ):
         self.environ = environ
         # This isn't "state" really, since the object is derivative:
         self.headers = EnvironHeaders(environ)
+
+        defaults = self.defaults._current_obj()
+        self.charset = defaults.get('charset')
+        if self.charset:
+            # There's a charset: params will be coerced to unicode. In that
+            # case, attempt to use the charset specified by the browser
+            charset = self.determine_browser_charset()
+            if charset:
+                self.charset = charset
+        self.errors = defaults.get('errors', 'strict')
     
     body = environ_getter('wsgi.input')
     scheme = environ_getter('wsgi.url_scheme')
@@ -76,33 +109,54 @@ class WSGIRequest(object):
         return self.environ.get('HTTP_HOST', self.environ.get('SERVER_NAME'))
     host = property(host, doc=host.__doc__)
 
+    def _GET(self):
+        return parse_dict_querystring(self.environ)
+
     def GET(self):
         """
         Dictionary-like object representing the QUERY_STRING
         parameters. Always present, if possibly empty.
 
-        If the same key is present in the query string multiple
-        times, it will be present as a list.
+        If the same key is present in the query string multiple times, a
+        list of its values can be retrieved from the ``MultiDict`` via
+        the ``getall`` method.
+
+        Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
+        ``charset`` is set.
         """
-        return parse_dict_querystring(self.environ)
+        params = self._GET()
+        if self.charset:
+            params = UnicodeMultiDict(params, self.charset, self.errors)
+        return params
     GET = property(GET, doc=GET.__doc__)
 
+    def _POST(self):
+        return parse_formvars(self.environ, include_get_vars=False)
+
     def POST(self):
         """Dictionary-like object representing the POST body.
 
-        Most values are strings, but file uploads can be FieldStorage
-        objects. If this is not a POST request, or the body is not
-        encoded fields (e.g., an XMLRPC request) then this will be empty.
+        Most values are encoded strings, or unicode strings when
+        ``charset`` is set. There may also be FieldStorage objects
+        representing file uploads. If this is not a POST request, or the
+        body is not encoded fields (e.g., an XMLRPC request) then this
+        will be empty.
 
         This will consume wsgi.input when first accessed if applicable,
-        but the output will be put in environ['paste.post_vars']
-        
+        but the raw version will be put in
+        environ['paste.parsed_formvars'].
+
+        Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
+        ``charset`` is set.
         """
-        return parse_formvars(self.environ, include_get_vars=False)
+        params = self._POST()
+        if self.charset:
+            params = UnicodeMultiDict(params, self.charset, self.errors)
+        return params
     POST = property(POST, doc=POST.__doc__)
 
     def params(self):
-        """MultiDict of keys from POST, GET, URL dicts
+        """Dictionary-like object of keys from POST, GET, URL dicts
 
         Return a key value from the parameters, they are checked in the
         following order: POST, GET, URL
@@ -112,11 +166,16 @@ class WSGIRequest(object):
         ``getlist(key)``
             Returns a list of all the values by that key, collected from
             POST, GET, URL dicts
+
+        Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
+        ``charset`` is set.
         """
-        pms = MultiDict()
-        pms.update(self.POST)
-        pms.update(self.GET)
-        return pms
+        params = MultiDict()
+        params.update(self._POST())
+        params.update(self._GET())
+        if self.charset:
+            params = UnicodeMultiDict(params, self.charset, self.errors)
+        return params
     params = property(params, doc=params.__doc__)
 
     def cookies(self):
@@ -128,9 +187,27 @@ class WSGIRequest(object):
         return get_cookie_dict(self.environ)
     cookies = property(cookies, doc=cookies.__doc__)
 
-_CHARSET_RE = re.compile(r'.*;\s*charset=(.*?)(;|$)', re.I)
+    def determine_browser_charset(self):
+        """
+        Determine the encoding as specified by the browser via the
+        Content-Type's charset parameter, if one is set
+        """
+        charset_match = _CHARSET_RE.match(self.headers.get('Content-Type', ''))
+        if charset_match:
+            return charset_match.group(1)
+
 class WSGIResponse(object):
-    """A basic HTTP response with content, headers, and out-bound cookies"""
+    """A basic HTTP response with content, headers, and out-bound cookies
+
+    The class variable ``defaults`` specifies default values for
+    ``content_type``, ``charset`` and ``errors``. These can be overridden
+    for the current request via the registry.
+
+    """
+    defaults = StackedObjectProxy(
+        default=dict(content_type='text/html',
+                     charset='UTF-8', errors='strict')
+        )
     def __init__(self, content='', mimetype=None, code=200):
         self._iter = None
         self._is_str_iter = True
@@ -139,15 +216,15 @@ class WSGIResponse(object):
         self.headers = HeaderDict()
         self.cookies = SimpleCookie()
         self.status_code = code
+
+        defaults = self.defaults._current_obj()
         if not mimetype:
-            mimetype = "%s; charset=%s" % (settings['content_type'],
-                                           settings['charset'])
+            mimetype = defaults.get('content_type', 'text/html')
+            charset = defaults.get('charset')
+            if charset:
+                mimetype = '%s; charset=%s' % (mimetype, charset)
         self.headers['Content-Type'] = mimetype
-
-        if 'encoding_errors' in settings:
-            self.encoding_errors = settings['encoding_errors']
-        else:
-            self.encoding_errors = 'strict'
+        self.errors = defaults.get('errors', 'strict')
 
     def __str__(self):
         """Returns a rendition of the full HTTP message, including headers.
@@ -156,7 +233,7 @@ class WSGIResponse(object):
         output of str(iterator) (to avoid exhausting the iterator).
         """
         if self._is_str_iter:
-            content = ''.join(self.get_content_as_string())
+            content = ''.join(self.get_content())
         else:
             content = str(self.content)
         return '\n'.join(['%s: %s' % (key, value)
@@ -190,9 +267,9 @@ class WSGIResponse(object):
             return environ['wsgi.file_wrapper'](self.content)
         elif is_file:
             return iter(lambda: self.content.read(), '')
-        return self.get_content_as_string()
+        return self.get_content()
     
-    def determine_encoding(self):
+    def determine_charset(self):
         """
         Determine the encoding as specified by the Content-Type's charset
         parameter, if one is set
@@ -200,8 +277,6 @@ class WSGIResponse(object):
         charset_match = _CHARSET_RE.match(self.headers.get('Content-Type', ''))
         if charset_match:
             return charset_match.group(1)
-        # No charset specified, default to iso-8859-1 as per RFC2616
-        return 'iso-8859-1'
     
     def has_header(self, header):
         """
@@ -253,13 +328,16 @@ class WSGIResponse(object):
                        'that yields strings, or an iterable object that '
                        'produces strings.')
 
-    def get_content_as_string(self):
+    def get_content(self):
         """
         Returns the content as an iterable of strings, encoding each element of
         the iterator from a Unicode object if necessary.
         """
-        return encode_unicode_app_iter(self.content, self.determine_encoding(),
-                                       self.encoding_errors)
+        charset = self.determine_charset()
+        if charset:
+            return encode_unicode_app_iter(self.content, charset, self.errors)
+        else:
+            return self.content
     
     def wsgi_response(self):
         """
@@ -271,7 +349,7 @@ class WSGIResponse(object):
         response_headers = self.headers.headeritems()
         for c in self.cookies.values():
             response_headers.append(('Set-Cookie', c.output(header='')))
-        return status, response_headers, self.get_content_as_string()
+        return status, response_headers, self.get_content()
     
     # The remaining methods partially implement the file-like object interface.
     # See http://docs.python.org/lib/bltin-file-objects.html
diff --git a/tests/test_multidict.py b/tests/test_multidict.py
index 494b0b2..a6787e8 100644
--- a/tests/test_multidict.py
+++ b/tests/test_multidict.py
@@ -1,24 +1,134 @@
-from paste.util.multidict import MultiDict
+# -*- coding: utf-8 -*-
+# (c) 2007 Ian Bicking and Philip Jenvey; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+import cgi
+from StringIO import StringIO
+from paste.fixture import TestApp
+from paste.wsgiwrappers import WSGIRequest
+from paste.util.multidict import MultiDict, UnicodeMultiDict
 from py.test import raises
 
 def test_dict():
     d = MultiDict({'a': 1})
     assert d.items() == [('a', 1)]
+
     d['b'] = 2
     d['c'] = 3
     assert d.items() == [('a', 1), ('b', 2), ('c', 3)]
+
     d['b'] = 4
     assert d.items() == [('a', 1), ('c', 3), ('b', 4)]
+
     d.add('b', 5)
     raises(KeyError, 'd.getone("b")')
     assert d.getall('b') == [4, 5]
     assert d.items() == [('a', 1), ('c', 3), ('b', 4), ('b', 5)]
+
     del d['b']
     assert d.items() == [('a', 1), ('c', 3)]
     assert d.pop('xxx', 5) == 5
     assert d.getone('a') == 1
     assert d.popitem() == ('c', 3)
     assert d.items() == [('a', 1)]
+
     item = []
     assert d.setdefault('z', item) is item
     assert d.items() == [('a', 1), ('z', item)]
+
+    assert d.setdefault('y', 6) == 6
+
+    assert d.mixed() == {'a': 1, 'y': 6, 'z': item}
+    assert d.dict_of_lists() == {'a': [1], 'y': [6], 'z': [item]}
+
+    assert 'a' in d
+    dcopy = d.copy()
+    assert dcopy is not d
+    assert dcopy == d
+    d['x'] = 'x test'
+    assert dcopy != d
+
+def test_unicode_dict():
+    def assert_unicode(obj):
+        assert isinstance(obj, unicode)
+
+    def assert_unicode_items(obj):
+        key, value = obj
+        assert isinstance(key, unicode)
+        assert isinstance(value, unicode)
+
+    d = UnicodeMultiDict(MultiDict({'a': 'a test'}))
+    d.encoding = 'utf-8'
+    d.errors = 'ignore'
+    assert d.items() == [('a', u'a test')]
+    map(assert_unicode, d.keys())
+    map(assert_unicode, d.values())
+
+    d['b'] = '2 test'
+    d['c'] = '3 test'
+    assert d.items() == [('a', u'a test'), ('b', u'2 test'), ('c', u'3 test')]
+    map(assert_unicode_items, d.items())
+
+    d['b'] = '4 test'
+    assert d.items() == [('a', u'a test'), ('c', u'3 test'), ('b', u'4 test')]
+    map(assert_unicode_items, d.items())
+
+    d.add('b', '5 test')
+    raises(KeyError, 'd.getone("b")')
+    assert d.getall('b') == [u'4 test', u'5 test']
+    map(assert_unicode, d.getall('b'))
+    assert d.items() == [('a', u'a test'), ('c', u'3 test'), ('b', u'4 test'),
+                         ('b', u'5 test')]
+    map(assert_unicode_items, d.items())
+
+    del d['b']
+    assert d.items() == [('a', u'a test'), ('c', u'3 test')]
+    map(assert_unicode_items, d.items())
+    assert d.pop('xxx', u'5 test') == u'5 test'
+    assert isinstance(d.pop('xxx', u'5 test'), unicode)
+    assert d.getone('a') == u'a test'
+    assert isinstance(d.getone('a'), unicode)
+    assert d.popitem() == ('c', u'3 test')
+    d['c'] = '3 test'
+    map(assert_unicode, d.popitem())
+    assert d.items() == [('a', u'a test')]
+    map(assert_unicode_items, d.items())
+
+    item = []
+    assert d.setdefault('z', item) is item
+    items = d.items()
+    assert items == [('a', u'a test'), ('z', item)]
+    assert isinstance(items[1][0], unicode)
+    assert isinstance(items[1][1], list)
+
+    assert isinstance(d.setdefault('y', 'y test'), unicode)
+    assert isinstance(d['y'], unicode)
+
+    assert d.mixed() == {u'a': u'a test', u'y': u'y test', u'z': item}
+    assert d.dict_of_lists() == {u'a': [u'a test'], u'y': [u'y test'],
+                                 u'z': [item]}
+    del d['z']
+    map(assert_unicode_items, d.mixed().iteritems())
+    map(assert_unicode_items, [(k, v[0]) for \
+                                   k, v in d.dict_of_lists().iteritems()])
+
+    assert u'a' in d
+    dcopy = d.copy()
+    assert dcopy is not d
+    assert dcopy == d
+    d['x'] = 'x test'
+    assert dcopy != d
+
+    fs = cgi.FieldStorage()
+    fs.name = 'thefile'
+    fs.filename = 'hello.txt'
+    fs.file = StringIO('hello')
+    d['f'] = fs
+    ufs = d['f']
+    assert isinstance(ufs, cgi.FieldStorage)
+    assert ufs is not fs
+    assert ufs.name == fs.name
+    assert isinstance(ufs.name, unicode)
+    assert ufs.filename == fs.filename
+    assert isinstance(ufs.filename, unicode)
+    assert isinstance(ufs.value, str)
+    assert ufs.value == 'hello'