summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--paste/util/multidict.py161
-rw-r--r--paste/wsgiwrappers.py170
-rw-r--r--tests/test_multidict.py112
3 files changed, 395 insertions, 48 deletions
diff --git a/paste/util/multidict.py b/paste/util/multidict.py
index 0d1567d..2a9a4d8 100644
--- a/paste/util/multidict.py
+++ b/paste/util/multidict.py
@@ -1,5 +1,8 @@
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+import cgi
+import copy
+import sys
from UserDict import DictMixin
class MultiDict(DictMixin):
@@ -167,7 +170,7 @@ class MultiDict(DictMixin):
def __repr__(self):
items = ', '.join(['(%r, %r)' % v for v in self._items])
- return 'MultiDict([%s])' % items
+ return '%s([%s])' % (self.__class__.__name__, items)
def __len__(self):
return len(self._items)
@@ -198,6 +201,162 @@ class MultiDict(DictMixin):
for k, v in self._items:
yield v
+class UnicodeMultiDict(DictMixin):
+ """
+ A MultiDict wrapper that decodes returned key/values to unicode on the
+ fly. Decoding is not applied to assigned key/values.
+
+ The key/value contents are assumed to be ``str``/``strs`` or
+ ``str``/``FieldStorages`` (as is returned by the paste.request.parse_
+ functions).
+
+ ``FieldStorage`` instances are cloned, and the clone's ``name`` and
+ ``filename`` variables are decoded.
+
+ """
+ def __init__(self, multi=None, encoding=None, errors='strict'):
+ self.multi = multi
+ if encoding is None:
+ encoding = sys.getdefaultencoding()
+ self.encoding = encoding
+ self.errors = errors
+
+ def _decode_value(self, value):
+ """
+ Decode the specified value to unicode. Assumes value is a ``str`` or
+ `FieldStorage`` object.
+
+ ``FieldStorage`` objects are specially handled.
+ """
+ if isinstance(value, cgi.FieldStorage):
+ # decode FieldStorage's field name and filename
+ value = copy.copy(value)
+ value.name = value.name.decode(self.encoding, self.errors)
+ value.filename = value.filename.decode(self.encoding, self.errors)
+ else:
+ try:
+ value = value.decode(self.encoding, self.errors)
+ except AttributeError:
+ pass
+ return value
+
+ def __getitem__(self, key):
+ return self._decode_value(self.multi.__getitem__(key))
+
+ def __setitem__(self, key, value):
+ self.multi.__setitem__(key, value)
+
+ def add(self, key, value):
+ """
+ Add the key and value, not overwriting any previous value.
+ """
+ self.multi.add(key, value)
+
+ def getall(self, key):
+ """
+ Return a list of all values matching the key (may be an empty list)
+ """
+ return [self._decode_value(v) for v in self.multi.getall(key)]
+
+ def getone(self, key):
+ """
+ Get one value matching the key, raising a KeyError if multiple
+ values were found.
+ """
+ return self._decode_value(self.multi.getone(key))
+
+ def mixed(self):
+ """
+ Returns a dictionary where the values are either single
+ values, or a list of values when a key/value appears more than
+ once in this dictionary. This is similar to the kind of
+ dictionary often used to represent the variables in a web
+ request.
+ """
+ unicode_mixed = {}
+ for key, value in self.multi.mixed().iteritems():
+ if isinstance(value, list):
+ value = [self._decode_value(value) for value in value]
+ else:
+ value = self._decode_value(value)
+ unicode_mixed[key.decode(self.encoding, self.errors)] = \
+ value
+ return unicode_mixed
+
+ def dict_of_lists(self):
+ """
+ Returns a dictionary where each key is associated with a
+ list of values.
+ """
+ unicode_dict = {}
+ for key, value in self.multi.dict_of_lists().iteritems():
+ value = [self._decode_value(value) for value in value]
+ unicode_dict[key.decode(self.encoding, self.errors)] = \
+ value
+ return unicode_dict
+
+ def __delitem__(self, key):
+ self.multi.__delitem__(key)
+
+ def __contains__(self, key):
+ return self.multi.__contains__(key)
+
+ has_key = __contains__
+
+ def clear(self):
+ self.multi.clear()
+
+ def copy(self):
+ return UnicodeMultiDict(self.multi.copy(), self.encoding, self.errors)
+
+ def setdefault(self, key, default=None):
+ return self._decode_value(self.multi.setdefault(key, default))
+
+ def pop(self, key, *args):
+ return self._decode_value(self.multi.pop(key, *args))
+
+ def popitem(self):
+ k, v = self.multi.popitem()
+ return (k.decode(self.encoding, self.errors),
+ self._decode_value(v))
+
+ def __repr__(self):
+ items = ', '.join(['(%r, %r)' % v for v in self.items()])
+ return '%s([%s])' % (self.__class__.__name__, items)
+
+ def __len__(self):
+ return self.multi.__len__()
+
+ ##
+ ## All the iteration:
+ ##
+
+ def keys(self):
+ return [k.decode(self.encoding, self.errors) for \
+ k in self.multi.iterkeys()]
+
+ def iterkeys(self):
+ for k in self.multi.iterkeys():
+ yield k.decode(self.encoding, self.errors)
+
+ __iter__ = iterkeys
+
+ def items(self):
+ return [(k.decode(self.encoding, self.errors), self._decode_value(v)) \
+ for k, v in self.multi.iteritems()]
+
+ def iteritems(self):
+ for k, v in self.multi.iteritems():
+ yield (k.decode(self.encoding, self.errors),
+ self._decode_value(v))
+
+ def values(self):
+ return [self._decode_value(v) for v in self.multi.itervalues()]
+
+ def itervalues(self):
+ for v in self.multi.itervalues():
+ yield self._decode_value(v)
+
__test__ = {
'general': """
>>> d = MultiDict(a=1, b=2)
diff --git a/paste/wsgiwrappers.py b/paste/wsgiwrappers.py
index 66a667c..decc275 100644
--- a/paste/wsgiwrappers.py
+++ b/paste/wsgiwrappers.py
@@ -7,19 +7,26 @@ to deal with an incoming request and sending a response.
"""
import re
import warnings
-from paste.request import EnvironHeaders, parse_formvars, parse_dict_querystring, get_cookie_dict
-from paste.util.multidict import MultiDict
+from Cookie import SimpleCookie
+from paste.request import EnvironHeaders, get_cookie_dict, \
+ parse_dict_querystring, parse_formvars
+from paste.util.multidict import MultiDict, UnicodeMultiDict
+from paste.registry import StackedObjectProxy
from paste.response import HeaderDict
from paste.wsgilib import encode_unicode_app_iter
-import paste.registry as registry
-from Cookie import SimpleCookie
-# settings should be set with the registry to a dict having at least:
-# content_type, charset
-# With the optional:
-# encoding_errors (specifies a codec error handler, defaults to 'strict')
-settings = registry.StackedObjectProxy(default=dict(content_type='text/html',
- charset='UTF-8', encoding_errors='strict'))
+_CHARSET_RE = re.compile(r'.*;\s*charset=(.*?)(;|$)', re.I)
+
+class DeprecatedSettings(StackedObjectProxy):
+ def _push_object(self, obj):
+ warnings.warn('paste.wsgiwrappers.settings is deprecated: Please use '
+ 'paste.wsgiwrappers.WSGIRequest.defaults instead',
+ DeprecationWarning, 3)
+ WSGIResponse.defaults._push_object(obj)
+ StackedObjectProxy._push_object(self, obj)
+
+# settings is deprecated: use WSGIResponse.defaults instead
+settings = DeprecatedSettings(default=dict())
class environ_getter(object):
"""For delegating an attribute to a key in self.environ."""
@@ -46,18 +53,44 @@ class WSGIRequest(object):
"""WSGI Request API Object
This object represents a WSGI request with a more friendly interface.
- This does not expose every detail of the WSGI environment, and does not
- in any way express anything beyond what is available in the environment
- dictionary. *All* state is kept in the environment dictionary; this
- is essential for interoperability.
+ This does not expose every detail of the WSGI environment, and attempts
+ to express nothing beyond what is available in the environment
+ dictionary.
+
+ The only state maintained in this object is the desired ``charset``
+ and its associated ``errors`` handler. The incoming parameters will
+ be automatically coerced to unicode objects of the ``charset``
+ encoding when ``charset`` is set.
+
+ When unicode is expected, ``charset`` will overridden by the the
+ value of the ``Content-Type`` header's charset parameter if one was
+ specified by the client.
+
+ The class variable ``defaults`` specifies default values for
+ ``charset`` and ``errors``. These can be overridden for the current
+ request via the registry.
+
+ *All* other state is kept in the environment dictionary; this is
+ essential for interoperability.
You are free to subclass this object.
"""
+ defaults = StackedObjectProxy(default=dict(charset=None, errors='strict'))
def __init__(self, environ):
self.environ = environ
# This isn't "state" really, since the object is derivative:
self.headers = EnvironHeaders(environ)
+
+ defaults = self.defaults._current_obj()
+ self.charset = defaults.get('charset')
+ if self.charset:
+ # There's a charset: params will be coerced to unicode. In that
+ # case, attempt to use the charset specified by the browser
+ charset = self.determine_browser_charset()
+ if charset:
+ self.charset = charset
+ self.errors = defaults.get('errors', 'strict')
body = environ_getter('wsgi.input')
scheme = environ_getter('wsgi.url_scheme')
@@ -76,33 +109,54 @@ class WSGIRequest(object):
return self.environ.get('HTTP_HOST', self.environ.get('SERVER_NAME'))
host = property(host, doc=host.__doc__)
+ def _GET(self):
+ return parse_dict_querystring(self.environ)
+
def GET(self):
"""
Dictionary-like object representing the QUERY_STRING
parameters. Always present, if possibly empty.
- If the same key is present in the query string multiple
- times, it will be present as a list.
+ If the same key is present in the query string multiple times, a
+ list of its values can be retrieved from the ``MultiDict`` via
+ the ``getall`` method.
+
+ Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
+ ``charset`` is set.
"""
- return parse_dict_querystring(self.environ)
+ params = self._GET()
+ if self.charset:
+ params = UnicodeMultiDict(params, self.charset, self.errors)
+ return params
GET = property(GET, doc=GET.__doc__)
+ def _POST(self):
+ return parse_formvars(self.environ, include_get_vars=False)
+
def POST(self):
"""Dictionary-like object representing the POST body.
- Most values are strings, but file uploads can be FieldStorage
- objects. If this is not a POST request, or the body is not
- encoded fields (e.g., an XMLRPC request) then this will be empty.
+ Most values are encoded strings, or unicode strings when
+ ``charset`` is set. There may also be FieldStorage objects
+ representing file uploads. If this is not a POST request, or the
+ body is not encoded fields (e.g., an XMLRPC request) then this
+ will be empty.
This will consume wsgi.input when first accessed if applicable,
- but the output will be put in environ['paste.post_vars']
-
+ but the raw version will be put in
+ environ['paste.parsed_formvars'].
+
+ Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
+ ``charset`` is set.
"""
- return parse_formvars(self.environ, include_get_vars=False)
+ params = self._POST()
+ if self.charset:
+ params = UnicodeMultiDict(params, self.charset, self.errors)
+ return params
POST = property(POST, doc=POST.__doc__)
def params(self):
- """MultiDict of keys from POST, GET, URL dicts
+ """Dictionary-like object of keys from POST, GET, URL dicts
Return a key value from the parameters, they are checked in the
following order: POST, GET, URL
@@ -112,11 +166,16 @@ class WSGIRequest(object):
``getlist(key)``
Returns a list of all the values by that key, collected from
POST, GET, URL dicts
+
+ Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
+ ``charset`` is set.
"""
- pms = MultiDict()
- pms.update(self.POST)
- pms.update(self.GET)
- return pms
+ params = MultiDict()
+ params.update(self._POST())
+ params.update(self._GET())
+ if self.charset:
+ params = UnicodeMultiDict(params, self.charset, self.errors)
+ return params
params = property(params, doc=params.__doc__)
def cookies(self):
@@ -128,9 +187,27 @@ class WSGIRequest(object):
return get_cookie_dict(self.environ)
cookies = property(cookies, doc=cookies.__doc__)
-_CHARSET_RE = re.compile(r'.*;\s*charset=(.*?)(;|$)', re.I)
+ def determine_browser_charset(self):
+ """
+ Determine the encoding as specified by the browser via the
+ Content-Type's charset parameter, if one is set
+ """
+ charset_match = _CHARSET_RE.match(self.headers.get('Content-Type', ''))
+ if charset_match:
+ return charset_match.group(1)
+
class WSGIResponse(object):
- """A basic HTTP response with content, headers, and out-bound cookies"""
+ """A basic HTTP response with content, headers, and out-bound cookies
+
+ The class variable ``defaults`` specifies default values for
+ ``content_type``, ``charset`` and ``errors``. These can be overridden
+ for the current request via the registry.
+
+ """
+ defaults = StackedObjectProxy(
+ default=dict(content_type='text/html',
+ charset='UTF-8', errors='strict')
+ )
def __init__(self, content='', mimetype=None, code=200):
self._iter = None
self._is_str_iter = True
@@ -139,15 +216,15 @@ class WSGIResponse(object):
self.headers = HeaderDict()
self.cookies = SimpleCookie()
self.status_code = code
+
+ defaults = self.defaults._current_obj()
if not mimetype:
- mimetype = "%s; charset=%s" % (settings['content_type'],
- settings['charset'])
+ mimetype = defaults.get('content_type', 'text/html')
+ charset = defaults.get('charset')
+ if charset:
+ mimetype = '%s; charset=%s' % (mimetype, charset)
self.headers['Content-Type'] = mimetype
-
- if 'encoding_errors' in settings:
- self.encoding_errors = settings['encoding_errors']
- else:
- self.encoding_errors = 'strict'
+ self.errors = defaults.get('errors', 'strict')
def __str__(self):
"""Returns a rendition of the full HTTP message, including headers.
@@ -156,7 +233,7 @@ class WSGIResponse(object):
output of str(iterator) (to avoid exhausting the iterator).
"""
if self._is_str_iter:
- content = ''.join(self.get_content_as_string())
+ content = ''.join(self.get_content())
else:
content = str(self.content)
return '\n'.join(['%s: %s' % (key, value)
@@ -190,9 +267,9 @@ class WSGIResponse(object):
return environ['wsgi.file_wrapper'](self.content)
elif is_file:
return iter(lambda: self.content.read(), '')
- return self.get_content_as_string()
+ return self.get_content()
- def determine_encoding(self):
+ def determine_charset(self):
"""
Determine the encoding as specified by the Content-Type's charset
parameter, if one is set
@@ -200,8 +277,6 @@ class WSGIResponse(object):
charset_match = _CHARSET_RE.match(self.headers.get('Content-Type', ''))
if charset_match:
return charset_match.group(1)
- # No charset specified, default to iso-8859-1 as per RFC2616
- return 'iso-8859-1'
def has_header(self, header):
"""
@@ -253,13 +328,16 @@ class WSGIResponse(object):
'that yields strings, or an iterable object that '
'produces strings.')
- def get_content_as_string(self):
+ def get_content(self):
"""
Returns the content as an iterable of strings, encoding each element of
the iterator from a Unicode object if necessary.
"""
- return encode_unicode_app_iter(self.content, self.determine_encoding(),
- self.encoding_errors)
+ charset = self.determine_charset()
+ if charset:
+ return encode_unicode_app_iter(self.content, charset, self.errors)
+ else:
+ return self.content
def wsgi_response(self):
"""
@@ -271,7 +349,7 @@ class WSGIResponse(object):
response_headers = self.headers.headeritems()
for c in self.cookies.values():
response_headers.append(('Set-Cookie', c.output(header='')))
- return status, response_headers, self.get_content_as_string()
+ return status, response_headers, self.get_content()
# The remaining methods partially implement the file-like object interface.
# See http://docs.python.org/lib/bltin-file-objects.html
diff --git a/tests/test_multidict.py b/tests/test_multidict.py
index 494b0b2..a6787e8 100644
--- a/tests/test_multidict.py
+++ b/tests/test_multidict.py
@@ -1,24 +1,134 @@
-from paste.util.multidict import MultiDict
+# -*- coding: utf-8 -*-
+# (c) 2007 Ian Bicking and Philip Jenvey; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+import cgi
+from StringIO import StringIO
+from paste.fixture import TestApp
+from paste.wsgiwrappers import WSGIRequest
+from paste.util.multidict import MultiDict, UnicodeMultiDict
from py.test import raises
def test_dict():
d = MultiDict({'a': 1})
assert d.items() == [('a', 1)]
+
d['b'] = 2
d['c'] = 3
assert d.items() == [('a', 1), ('b', 2), ('c', 3)]
+
d['b'] = 4
assert d.items() == [('a', 1), ('c', 3), ('b', 4)]
+
d.add('b', 5)
raises(KeyError, 'd.getone("b")')
assert d.getall('b') == [4, 5]
assert d.items() == [('a', 1), ('c', 3), ('b', 4), ('b', 5)]
+
del d['b']
assert d.items() == [('a', 1), ('c', 3)]
assert d.pop('xxx', 5) == 5
assert d.getone('a') == 1
assert d.popitem() == ('c', 3)
assert d.items() == [('a', 1)]
+
item = []
assert d.setdefault('z', item) is item
assert d.items() == [('a', 1), ('z', item)]
+
+ assert d.setdefault('y', 6) == 6
+
+ assert d.mixed() == {'a': 1, 'y': 6, 'z': item}
+ assert d.dict_of_lists() == {'a': [1], 'y': [6], 'z': [item]}
+
+ assert 'a' in d
+ dcopy = d.copy()
+ assert dcopy is not d
+ assert dcopy == d
+ d['x'] = 'x test'
+ assert dcopy != d
+
+def test_unicode_dict():
+ def assert_unicode(obj):
+ assert isinstance(obj, unicode)
+
+ def assert_unicode_items(obj):
+ key, value = obj
+ assert isinstance(key, unicode)
+ assert isinstance(value, unicode)
+
+ d = UnicodeMultiDict(MultiDict({'a': 'a test'}))
+ d.encoding = 'utf-8'
+ d.errors = 'ignore'
+ assert d.items() == [('a', u'a test')]
+ map(assert_unicode, d.keys())
+ map(assert_unicode, d.values())
+
+ d['b'] = '2 test'
+ d['c'] = '3 test'
+ assert d.items() == [('a', u'a test'), ('b', u'2 test'), ('c', u'3 test')]
+ map(assert_unicode_items, d.items())
+
+ d['b'] = '4 test'
+ assert d.items() == [('a', u'a test'), ('c', u'3 test'), ('b', u'4 test')]
+ map(assert_unicode_items, d.items())
+
+ d.add('b', '5 test')
+ raises(KeyError, 'd.getone("b")')
+ assert d.getall('b') == [u'4 test', u'5 test']
+ map(assert_unicode, d.getall('b'))
+ assert d.items() == [('a', u'a test'), ('c', u'3 test'), ('b', u'4 test'),
+ ('b', u'5 test')]
+ map(assert_unicode_items, d.items())
+
+ del d['b']
+ assert d.items() == [('a', u'a test'), ('c', u'3 test')]
+ map(assert_unicode_items, d.items())
+ assert d.pop('xxx', u'5 test') == u'5 test'
+ assert isinstance(d.pop('xxx', u'5 test'), unicode)
+ assert d.getone('a') == u'a test'
+ assert isinstance(d.getone('a'), unicode)
+ assert d.popitem() == ('c', u'3 test')
+ d['c'] = '3 test'
+ map(assert_unicode, d.popitem())
+ assert d.items() == [('a', u'a test')]
+ map(assert_unicode_items, d.items())
+
+ item = []
+ assert d.setdefault('z', item) is item
+ items = d.items()
+ assert items == [('a', u'a test'), ('z', item)]
+ assert isinstance(items[1][0], unicode)
+ assert isinstance(items[1][1], list)
+
+ assert isinstance(d.setdefault('y', 'y test'), unicode)
+ assert isinstance(d['y'], unicode)
+
+ assert d.mixed() == {u'a': u'a test', u'y': u'y test', u'z': item}
+ assert d.dict_of_lists() == {u'a': [u'a test'], u'y': [u'y test'],
+ u'z': [item]}
+ del d['z']
+ map(assert_unicode_items, d.mixed().iteritems())
+ map(assert_unicode_items, [(k, v[0]) for \
+ k, v in d.dict_of_lists().iteritems()])
+
+ assert u'a' in d
+ dcopy = d.copy()
+ assert dcopy is not d
+ assert dcopy == d
+ d['x'] = 'x test'
+ assert dcopy != d
+
+ fs = cgi.FieldStorage()
+ fs.name = 'thefile'
+ fs.filename = 'hello.txt'
+ fs.file = StringIO('hello')
+ d['f'] = fs
+ ufs = d['f']
+ assert isinstance(ufs, cgi.FieldStorage)
+ assert ufs is not fs
+ assert ufs.name == fs.name
+ assert isinstance(ufs.name, unicode)
+ assert ufs.filename == fs.filename
+ assert isinstance(ufs.filename, unicode)
+ assert isinstance(ufs.value, str)
+ assert ufs.value == 'hello'