summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcel Hellkamp <marc@gsites.de>2012-06-26 00:35:40 +0200
committerMarcel Hellkamp <marc@gsites.de>2012-06-26 00:35:40 +0200
commitc8cc11409978761194c5fce16eefeead43fb3cb1 (patch)
treef9f301f04a6d66893eadb7e48c1114764e736aa9
parent4edde9b1d58914e2227c201bc559441bc5be0991 (diff)
downloadbottle-c8cc11409978761194c5fce16eefeead43fb3cb1.tar.gz
docs: Unicode issues with form values.
-rw-r--r--bottle.py11
-rwxr-xr-xdocs/tutorial.rst20
2 files changed, 25 insertions, 6 deletions
diff --git a/bottle.py b/bottle.py
index ca70659..652da08 100644
--- a/bottle.py
+++ b/bottle.py
@@ -60,6 +60,7 @@ except ImportError: # pragma: no cover
py = sys.version_info
py3k = py >= (3,0,0)
py25 = py < (2,6,0)
+py31 = (3,1,0) <= py < (3,2,0)
# Workaround for the missing "as" keyword in py3k.
def _e(): return sys.exc_info()[1]
@@ -116,10 +117,8 @@ def touni(s, enc='utf8', err='strict'):
tonat = touni if py3k else tob
# 3.2 fixes cgi.FieldStorage to accept bytes (which makes a lot of sense).
-# but defaults to utf-8 (which is not always true)
# 3.1 needs a workaround.
-NCTextIOWrapper = None
-if (3,0,0) < py < (3,2,0):
+if py31:
from io import TextIOWrapper
class NCTextIOWrapper(TextIOWrapper):
def close(self): pass # Keep wrapped buffer open.
@@ -1080,11 +1079,11 @@ class BaseRequest(object):
for key in ('REQUEST_METHOD', 'CONTENT_TYPE', 'CONTENT_LENGTH'):
if key in self.environ: safe_env[key] = self.environ[key]
args = dict(fp=self.body, environ=safe_env, keep_blank_values=True)
- if py >= (3,2,0):
- args['encoding'] = 'ISO-8859-1'
- if NCTextIOWrapper:
+ if py31:
args['fp'] = NCTextIOWrapper(args['fp'], encoding='ISO-8859-1',
newline='\n')
+ elif py3k:
+ args['encoding'] = 'ISO-8859-1'
data = cgi.FieldStorage(**args)
for item in (data.list or [])[:self.MAX_PARAMS]:
post[item.name] = item if item.filename else item.value
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index b7645d2..0f1ee65 100755
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -565,6 +565,26 @@ Here is an example for a simple file upload form:
return "You missed a field."
+Unicode issues
+-----------------------
+
+In **Python 2** all keys and values are byte-strings. If you need unicode, you can call :meth:`FormDict.getunicode` or fetch values via attribute access. Both methods try to decode the string (default: utf8) and return an empty string if that fails. No need to catch :exc:`UnicodeError`::
+
+ >>> request.query['city']
+ 'G\xc3\xb6ttingen' # A utf8 byte string
+ >>> request.query.city
+ u'Göttingen' # The same string as unicode
+
+In **Python 3** all strings are unicode, but HTTP is a byte-based wire protocol. The server has to decode the byte strings somehow before they are passed to the application. To be on the safe side, WSGI suggests ISO-8859-1 (aka latin1), a reversible single-byte codec that can be re-encoded with a different encoding later. Bottle does that for :meth:`FormDict.getunicode` and attribute access, but not for the dict-access methods. These return the unchanged values as provided by the server implementation, which is probably not what you want.
+
+ >>> request.query['city']
+ 'Göttingen' # An utf8 string provisionally decoded as ISO-8859-1 by the server
+ >>> request.query.city
+ 'Göttingen' # The same string correctly re-encoded as utf8 by bottle
+
+If you need the whole dictionary with correctly decoded values (e.g. for WTForms), you can call :meth:`FormsDict.decode` to get a re-encoded copy.
+
+
WSGI Environment
--------------------------------------------------------------------------------