diff options
-rw-r--r-- | paste/util/multidict.py | 161 | ||||
-rw-r--r-- | paste/wsgiwrappers.py | 170 | ||||
-rw-r--r-- | tests/test_multidict.py | 112 |
3 files changed, 395 insertions, 48 deletions
diff --git a/paste/util/multidict.py b/paste/util/multidict.py index 0d1567d..2a9a4d8 100644 --- a/paste/util/multidict.py +++ b/paste/util/multidict.py @@ -1,5 +1,8 @@ # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php +import cgi +import copy +import sys from UserDict import DictMixin class MultiDict(DictMixin): @@ -167,7 +170,7 @@ class MultiDict(DictMixin): def __repr__(self): items = ', '.join(['(%r, %r)' % v for v in self._items]) - return 'MultiDict([%s])' % items + return '%s([%s])' % (self.__class__.__name__, items) def __len__(self): return len(self._items) @@ -198,6 +201,162 @@ class MultiDict(DictMixin): for k, v in self._items: yield v +class UnicodeMultiDict(DictMixin): + """ + A MultiDict wrapper that decodes returned key/values to unicode on the + fly. Decoding is not applied to assigned key/values. + + The key/value contents are assumed to be ``str``/``strs`` or + ``str``/``FieldStorages`` (as is returned by the paste.request.parse_ + functions). + + ``FieldStorage`` instances are cloned, and the clone's ``name`` and + ``filename`` variables are decoded. + + """ + def __init__(self, multi=None, encoding=None, errors='strict'): + self.multi = multi + if encoding is None: + encoding = sys.getdefaultencoding() + self.encoding = encoding + self.errors = errors + + def _decode_value(self, value): + """ + Decode the specified value to unicode. Assumes value is a ``str`` or + `FieldStorage`` object. + + ``FieldStorage`` objects are specially handled. + """ + if isinstance(value, cgi.FieldStorage): + # decode FieldStorage's field name and filename + value = copy.copy(value) + value.name = value.name.decode(self.encoding, self.errors) + value.filename = value.filename.decode(self.encoding, self.errors) + else: + try: + value = value.decode(self.encoding, self.errors) + except AttributeError: + pass + return value + + def __getitem__(self, key): + return self._decode_value(self.multi.__getitem__(key)) + + def __setitem__(self, key, value): + self.multi.__setitem__(key, value) + + def add(self, key, value): + """ + Add the key and value, not overwriting any previous value. + """ + self.multi.add(key, value) + + def getall(self, key): + """ + Return a list of all values matching the key (may be an empty list) + """ + return [self._decode_value(v) for v in self.multi.getall(key)] + + def getone(self, key): + """ + Get one value matching the key, raising a KeyError if multiple + values were found. + """ + return self._decode_value(self.multi.getone(key)) + + def mixed(self): + """ + Returns a dictionary where the values are either single + values, or a list of values when a key/value appears more than + once in this dictionary. This is similar to the kind of + dictionary often used to represent the variables in a web + request. + """ + unicode_mixed = {} + for key, value in self.multi.mixed().iteritems(): + if isinstance(value, list): + value = [self._decode_value(value) for value in value] + else: + value = self._decode_value(value) + unicode_mixed[key.decode(self.encoding, self.errors)] = \ + value + return unicode_mixed + + def dict_of_lists(self): + """ + Returns a dictionary where each key is associated with a + list of values. + """ + unicode_dict = {} + for key, value in self.multi.dict_of_lists().iteritems(): + value = [self._decode_value(value) for value in value] + unicode_dict[key.decode(self.encoding, self.errors)] = \ + value + return unicode_dict + + def __delitem__(self, key): + self.multi.__delitem__(key) + + def __contains__(self, key): + return self.multi.__contains__(key) + + has_key = __contains__ + + def clear(self): + self.multi.clear() + + def copy(self): + return UnicodeMultiDict(self.multi.copy(), self.encoding, self.errors) + + def setdefault(self, key, default=None): + return self._decode_value(self.multi.setdefault(key, default)) + + def pop(self, key, *args): + return self._decode_value(self.multi.pop(key, *args)) + + def popitem(self): + k, v = self.multi.popitem() + return (k.decode(self.encoding, self.errors), + self._decode_value(v)) + + def __repr__(self): + items = ', '.join(['(%r, %r)' % v for v in self.items()]) + return '%s([%s])' % (self.__class__.__name__, items) + + def __len__(self): + return self.multi.__len__() + + ## + ## All the iteration: + ## + + def keys(self): + return [k.decode(self.encoding, self.errors) for \ + k in self.multi.iterkeys()] + + def iterkeys(self): + for k in self.multi.iterkeys(): + yield k.decode(self.encoding, self.errors) + + __iter__ = iterkeys + + def items(self): + return [(k.decode(self.encoding, self.errors), self._decode_value(v)) \ + for k, v in self.multi.iteritems()] + + def iteritems(self): + for k, v in self.multi.iteritems(): + yield (k.decode(self.encoding, self.errors), + self._decode_value(v)) + + def values(self): + return [self._decode_value(v) for v in self.multi.itervalues()] + + def itervalues(self): + for v in self.multi.itervalues(): + yield self._decode_value(v) + __test__ = { 'general': """ >>> d = MultiDict(a=1, b=2) diff --git a/paste/wsgiwrappers.py b/paste/wsgiwrappers.py index 66a667c..decc275 100644 --- a/paste/wsgiwrappers.py +++ b/paste/wsgiwrappers.py @@ -7,19 +7,26 @@ to deal with an incoming request and sending a response. """ import re import warnings -from paste.request import EnvironHeaders, parse_formvars, parse_dict_querystring, get_cookie_dict -from paste.util.multidict import MultiDict +from Cookie import SimpleCookie +from paste.request import EnvironHeaders, get_cookie_dict, \ + parse_dict_querystring, parse_formvars +from paste.util.multidict import MultiDict, UnicodeMultiDict +from paste.registry import StackedObjectProxy from paste.response import HeaderDict from paste.wsgilib import encode_unicode_app_iter -import paste.registry as registry -from Cookie import SimpleCookie -# settings should be set with the registry to a dict having at least: -# content_type, charset -# With the optional: -# encoding_errors (specifies a codec error handler, defaults to 'strict') -settings = registry.StackedObjectProxy(default=dict(content_type='text/html', - charset='UTF-8', encoding_errors='strict')) +_CHARSET_RE = re.compile(r'.*;\s*charset=(.*?)(;|$)', re.I) + +class DeprecatedSettings(StackedObjectProxy): + def _push_object(self, obj): + warnings.warn('paste.wsgiwrappers.settings is deprecated: Please use ' + 'paste.wsgiwrappers.WSGIRequest.defaults instead', + DeprecationWarning, 3) + WSGIResponse.defaults._push_object(obj) + StackedObjectProxy._push_object(self, obj) + +# settings is deprecated: use WSGIResponse.defaults instead +settings = DeprecatedSettings(default=dict()) class environ_getter(object): """For delegating an attribute to a key in self.environ.""" @@ -46,18 +53,44 @@ class WSGIRequest(object): """WSGI Request API Object This object represents a WSGI request with a more friendly interface. - This does not expose every detail of the WSGI environment, and does not - in any way express anything beyond what is available in the environment - dictionary. *All* state is kept in the environment dictionary; this - is essential for interoperability. + This does not expose every detail of the WSGI environment, and attempts + to express nothing beyond what is available in the environment + dictionary. + + The only state maintained in this object is the desired ``charset`` + and its associated ``errors`` handler. The incoming parameters will + be automatically coerced to unicode objects of the ``charset`` + encoding when ``charset`` is set. + + When unicode is expected, ``charset`` will overridden by the the + value of the ``Content-Type`` header's charset parameter if one was + specified by the client. + + The class variable ``defaults`` specifies default values for + ``charset`` and ``errors``. These can be overridden for the current + request via the registry. + + *All* other state is kept in the environment dictionary; this is + essential for interoperability. You are free to subclass this object. """ + defaults = StackedObjectProxy(default=dict(charset=None, errors='strict')) def __init__(self, environ): self.environ = environ # This isn't "state" really, since the object is derivative: self.headers = EnvironHeaders(environ) + + defaults = self.defaults._current_obj() + self.charset = defaults.get('charset') + if self.charset: + # There's a charset: params will be coerced to unicode. In that + # case, attempt to use the charset specified by the browser + charset = self.determine_browser_charset() + if charset: + self.charset = charset + self.errors = defaults.get('errors', 'strict') body = environ_getter('wsgi.input') scheme = environ_getter('wsgi.url_scheme') @@ -76,33 +109,54 @@ class WSGIRequest(object): return self.environ.get('HTTP_HOST', self.environ.get('SERVER_NAME')) host = property(host, doc=host.__doc__) + def _GET(self): + return parse_dict_querystring(self.environ) + def GET(self): """ Dictionary-like object representing the QUERY_STRING parameters. Always present, if possibly empty. - If the same key is present in the query string multiple - times, it will be present as a list. + If the same key is present in the query string multiple times, a + list of its values can be retrieved from the ``MultiDict`` via + the ``getall`` method. + + Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when + ``charset`` is set. """ - return parse_dict_querystring(self.environ) + params = self._GET() + if self.charset: + params = UnicodeMultiDict(params, self.charset, self.errors) + return params GET = property(GET, doc=GET.__doc__) + def _POST(self): + return parse_formvars(self.environ, include_get_vars=False) + def POST(self): """Dictionary-like object representing the POST body. - Most values are strings, but file uploads can be FieldStorage - objects. If this is not a POST request, or the body is not - encoded fields (e.g., an XMLRPC request) then this will be empty. + Most values are encoded strings, or unicode strings when + ``charset`` is set. There may also be FieldStorage objects + representing file uploads. If this is not a POST request, or the + body is not encoded fields (e.g., an XMLRPC request) then this + will be empty. This will consume wsgi.input when first accessed if applicable, - but the output will be put in environ['paste.post_vars'] - + but the raw version will be put in + environ['paste.parsed_formvars']. + + Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when + ``charset`` is set. """ - return parse_formvars(self.environ, include_get_vars=False) + params = self._POST() + if self.charset: + params = UnicodeMultiDict(params, self.charset, self.errors) + return params POST = property(POST, doc=POST.__doc__) def params(self): - """MultiDict of keys from POST, GET, URL dicts + """Dictionary-like object of keys from POST, GET, URL dicts Return a key value from the parameters, they are checked in the following order: POST, GET, URL @@ -112,11 +166,16 @@ class WSGIRequest(object): ``getlist(key)`` Returns a list of all the values by that key, collected from POST, GET, URL dicts + + Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when + ``charset`` is set. """ - pms = MultiDict() - pms.update(self.POST) - pms.update(self.GET) - return pms + params = MultiDict() + params.update(self._POST()) + params.update(self._GET()) + if self.charset: + params = UnicodeMultiDict(params, self.charset, self.errors) + return params params = property(params, doc=params.__doc__) def cookies(self): @@ -128,9 +187,27 @@ class WSGIRequest(object): return get_cookie_dict(self.environ) cookies = property(cookies, doc=cookies.__doc__) -_CHARSET_RE = re.compile(r'.*;\s*charset=(.*?)(;|$)', re.I) + def determine_browser_charset(self): + """ + Determine the encoding as specified by the browser via the + Content-Type's charset parameter, if one is set + """ + charset_match = _CHARSET_RE.match(self.headers.get('Content-Type', '')) + if charset_match: + return charset_match.group(1) + class WSGIResponse(object): - """A basic HTTP response with content, headers, and out-bound cookies""" + """A basic HTTP response with content, headers, and out-bound cookies + + The class variable ``defaults`` specifies default values for + ``content_type``, ``charset`` and ``errors``. These can be overridden + for the current request via the registry. + + """ + defaults = StackedObjectProxy( + default=dict(content_type='text/html', + charset='UTF-8', errors='strict') + ) def __init__(self, content='', mimetype=None, code=200): self._iter = None self._is_str_iter = True @@ -139,15 +216,15 @@ class WSGIResponse(object): self.headers = HeaderDict() self.cookies = SimpleCookie() self.status_code = code + + defaults = self.defaults._current_obj() if not mimetype: - mimetype = "%s; charset=%s" % (settings['content_type'], - settings['charset']) + mimetype = defaults.get('content_type', 'text/html') + charset = defaults.get('charset') + if charset: + mimetype = '%s; charset=%s' % (mimetype, charset) self.headers['Content-Type'] = mimetype - - if 'encoding_errors' in settings: - self.encoding_errors = settings['encoding_errors'] - else: - self.encoding_errors = 'strict' + self.errors = defaults.get('errors', 'strict') def __str__(self): """Returns a rendition of the full HTTP message, including headers. @@ -156,7 +233,7 @@ class WSGIResponse(object): output of str(iterator) (to avoid exhausting the iterator). """ if self._is_str_iter: - content = ''.join(self.get_content_as_string()) + content = ''.join(self.get_content()) else: content = str(self.content) return '\n'.join(['%s: %s' % (key, value) @@ -190,9 +267,9 @@ class WSGIResponse(object): return environ['wsgi.file_wrapper'](self.content) elif is_file: return iter(lambda: self.content.read(), '') - return self.get_content_as_string() + return self.get_content() - def determine_encoding(self): + def determine_charset(self): """ Determine the encoding as specified by the Content-Type's charset parameter, if one is set @@ -200,8 +277,6 @@ class WSGIResponse(object): charset_match = _CHARSET_RE.match(self.headers.get('Content-Type', '')) if charset_match: return charset_match.group(1) - # No charset specified, default to iso-8859-1 as per RFC2616 - return 'iso-8859-1' def has_header(self, header): """ @@ -253,13 +328,16 @@ class WSGIResponse(object): 'that yields strings, or an iterable object that ' 'produces strings.') - def get_content_as_string(self): + def get_content(self): """ Returns the content as an iterable of strings, encoding each element of the iterator from a Unicode object if necessary. """ - return encode_unicode_app_iter(self.content, self.determine_encoding(), - self.encoding_errors) + charset = self.determine_charset() + if charset: + return encode_unicode_app_iter(self.content, charset, self.errors) + else: + return self.content def wsgi_response(self): """ @@ -271,7 +349,7 @@ class WSGIResponse(object): response_headers = self.headers.headeritems() for c in self.cookies.values(): response_headers.append(('Set-Cookie', c.output(header=''))) - return status, response_headers, self.get_content_as_string() + return status, response_headers, self.get_content() # The remaining methods partially implement the file-like object interface. # See http://docs.python.org/lib/bltin-file-objects.html diff --git a/tests/test_multidict.py b/tests/test_multidict.py index 494b0b2..a6787e8 100644 --- a/tests/test_multidict.py +++ b/tests/test_multidict.py @@ -1,24 +1,134 @@ -from paste.util.multidict import MultiDict +# -*- coding: utf-8 -*- +# (c) 2007 Ian Bicking and Philip Jenvey; written for Paste (http://pythonpaste.org) +# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php +import cgi +from StringIO import StringIO +from paste.fixture import TestApp +from paste.wsgiwrappers import WSGIRequest +from paste.util.multidict import MultiDict, UnicodeMultiDict from py.test import raises def test_dict(): d = MultiDict({'a': 1}) assert d.items() == [('a', 1)] + d['b'] = 2 d['c'] = 3 assert d.items() == [('a', 1), ('b', 2), ('c', 3)] + d['b'] = 4 assert d.items() == [('a', 1), ('c', 3), ('b', 4)] + d.add('b', 5) raises(KeyError, 'd.getone("b")') assert d.getall('b') == [4, 5] assert d.items() == [('a', 1), ('c', 3), ('b', 4), ('b', 5)] + del d['b'] assert d.items() == [('a', 1), ('c', 3)] assert d.pop('xxx', 5) == 5 assert d.getone('a') == 1 assert d.popitem() == ('c', 3) assert d.items() == [('a', 1)] + item = [] assert d.setdefault('z', item) is item assert d.items() == [('a', 1), ('z', item)] + + assert d.setdefault('y', 6) == 6 + + assert d.mixed() == {'a': 1, 'y': 6, 'z': item} + assert d.dict_of_lists() == {'a': [1], 'y': [6], 'z': [item]} + + assert 'a' in d + dcopy = d.copy() + assert dcopy is not d + assert dcopy == d + d['x'] = 'x test' + assert dcopy != d + +def test_unicode_dict(): + def assert_unicode(obj): + assert isinstance(obj, unicode) + + def assert_unicode_items(obj): + key, value = obj + assert isinstance(key, unicode) + assert isinstance(value, unicode) + + d = UnicodeMultiDict(MultiDict({'a': 'a test'})) + d.encoding = 'utf-8' + d.errors = 'ignore' + assert d.items() == [('a', u'a test')] + map(assert_unicode, d.keys()) + map(assert_unicode, d.values()) + + d['b'] = '2 test' + d['c'] = '3 test' + assert d.items() == [('a', u'a test'), ('b', u'2 test'), ('c', u'3 test')] + map(assert_unicode_items, d.items()) + + d['b'] = '4 test' + assert d.items() == [('a', u'a test'), ('c', u'3 test'), ('b', u'4 test')] + map(assert_unicode_items, d.items()) + + d.add('b', '5 test') + raises(KeyError, 'd.getone("b")') + assert d.getall('b') == [u'4 test', u'5 test'] + map(assert_unicode, d.getall('b')) + assert d.items() == [('a', u'a test'), ('c', u'3 test'), ('b', u'4 test'), + ('b', u'5 test')] + map(assert_unicode_items, d.items()) + + del d['b'] + assert d.items() == [('a', u'a test'), ('c', u'3 test')] + map(assert_unicode_items, d.items()) + assert d.pop('xxx', u'5 test') == u'5 test' + assert isinstance(d.pop('xxx', u'5 test'), unicode) + assert d.getone('a') == u'a test' + assert isinstance(d.getone('a'), unicode) + assert d.popitem() == ('c', u'3 test') + d['c'] = '3 test' + map(assert_unicode, d.popitem()) + assert d.items() == [('a', u'a test')] + map(assert_unicode_items, d.items()) + + item = [] + assert d.setdefault('z', item) is item + items = d.items() + assert items == [('a', u'a test'), ('z', item)] + assert isinstance(items[1][0], unicode) + assert isinstance(items[1][1], list) + + assert isinstance(d.setdefault('y', 'y test'), unicode) + assert isinstance(d['y'], unicode) + + assert d.mixed() == {u'a': u'a test', u'y': u'y test', u'z': item} + assert d.dict_of_lists() == {u'a': [u'a test'], u'y': [u'y test'], + u'z': [item]} + del d['z'] + map(assert_unicode_items, d.mixed().iteritems()) + map(assert_unicode_items, [(k, v[0]) for \ + k, v in d.dict_of_lists().iteritems()]) + + assert u'a' in d + dcopy = d.copy() + assert dcopy is not d + assert dcopy == d + d['x'] = 'x test' + assert dcopy != d + + fs = cgi.FieldStorage() + fs.name = 'thefile' + fs.filename = 'hello.txt' + fs.file = StringIO('hello') + d['f'] = fs + ufs = d['f'] + assert isinstance(ufs, cgi.FieldStorage) + assert ufs is not fs + assert ufs.name == fs.name + assert isinstance(ufs.name, unicode) + assert ufs.filename == fs.filename + assert isinstance(ufs.filename, unicode) + assert isinstance(ufs.value, str) + assert ufs.value == 'hello' |