diff options
author | Marcel Hellkamp <marc@gsites.de> | 2012-06-26 00:35:40 +0200 |
---|---|---|
committer | Marcel Hellkamp <marc@gsites.de> | 2012-06-26 00:35:40 +0200 |
commit | c8cc11409978761194c5fce16eefeead43fb3cb1 (patch) | |
tree | f9f301f04a6d66893eadb7e48c1114764e736aa9 | |
parent | 4edde9b1d58914e2227c201bc559441bc5be0991 (diff) | |
download | bottle-c8cc11409978761194c5fce16eefeead43fb3cb1.tar.gz |
docs: Unicode issues with form values.
-rw-r--r-- | bottle.py | 11 | ||||
-rwxr-xr-x | docs/tutorial.rst | 20 |
2 files changed, 25 insertions, 6 deletions
@@ -60,6 +60,7 @@ except ImportError: # pragma: no cover py = sys.version_info py3k = py >= (3,0,0) py25 = py < (2,6,0) +py31 = (3,1,0) <= py < (3,2,0) # Workaround for the missing "as" keyword in py3k. def _e(): return sys.exc_info()[1] @@ -116,10 +117,8 @@ def touni(s, enc='utf8', err='strict'): tonat = touni if py3k else tob # 3.2 fixes cgi.FieldStorage to accept bytes (which makes a lot of sense). -# but defaults to utf-8 (which is not always true) # 3.1 needs a workaround. -NCTextIOWrapper = None -if (3,0,0) < py < (3,2,0): +if py31: from io import TextIOWrapper class NCTextIOWrapper(TextIOWrapper): def close(self): pass # Keep wrapped buffer open. @@ -1080,11 +1079,11 @@ class BaseRequest(object): for key in ('REQUEST_METHOD', 'CONTENT_TYPE', 'CONTENT_LENGTH'): if key in self.environ: safe_env[key] = self.environ[key] args = dict(fp=self.body, environ=safe_env, keep_blank_values=True) - if py >= (3,2,0): - args['encoding'] = 'ISO-8859-1' - if NCTextIOWrapper: + if py31: args['fp'] = NCTextIOWrapper(args['fp'], encoding='ISO-8859-1', newline='\n') + elif py3k: + args['encoding'] = 'ISO-8859-1' data = cgi.FieldStorage(**args) for item in (data.list or [])[:self.MAX_PARAMS]: post[item.name] = item if item.filename else item.value diff --git a/docs/tutorial.rst b/docs/tutorial.rst index b7645d2..0f1ee65 100755 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -565,6 +565,26 @@ Here is an example for a simple file upload form: return "You missed a field." +Unicode issues +----------------------- + +In **Python 2** all keys and values are byte-strings. If you need unicode, you can call :meth:`FormDict.getunicode` or fetch values via attribute access. Both methods try to decode the string (default: utf8) and return an empty string if that fails. No need to catch :exc:`UnicodeError`:: + + >>> request.query['city'] + 'G\xc3\xb6ttingen' # A utf8 byte string + >>> request.query.city + u'Göttingen' # The same string as unicode + +In **Python 3** all strings are unicode, but HTTP is a byte-based wire protocol. The server has to decode the byte strings somehow before they are passed to the application. To be on the safe side, WSGI suggests ISO-8859-1 (aka latin1), a reversible single-byte codec that can be re-encoded with a different encoding later. Bottle does that for :meth:`FormDict.getunicode` and attribute access, but not for the dict-access methods. These return the unchanged values as provided by the server implementation, which is probably not what you want. + + >>> request.query['city'] + 'Göttingen' # An utf8 string provisionally decoded as ISO-8859-1 by the server + >>> request.query.city + 'Göttingen' # The same string correctly re-encoded as utf8 by bottle + +If you need the whole dictionary with correctly decoded values (e.g. for WTForms), you can call :meth:`FormsDict.decode` to get a re-encoded copy. + + WSGI Environment -------------------------------------------------------------------------------- |