summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris McDonough <chrism@plope.com>2012-01-08 16:24:58 -0500
committerChris McDonough <chrism@plope.com>2012-01-08 16:24:58 -0500
commit2fd0945e7fac522e76ca180fe53067f46d728278 (patch)
treee6daaed503e66c2f33cb9edbf759d38ad1f9a2ef
parent63302b67d06d519c569f325528c98bae5bcbf9d5 (diff)
downloadwebob-feature.pathinfogeddon.tar.gz
add tests, add docs describing all changesfeature.pathinfogeddon
-rw-r--r--docs/news.txt69
-rw-r--r--tests/test_request.py4
-rw-r--r--tests/test_response.py21
-rw-r--r--webob/request.py17
-rw-r--r--webob/response.py13
-rw-r--r--webob/util.py2
6 files changed, 113 insertions, 13 deletions
diff --git a/docs/news.txt b/docs/news.txt
index caa27af..280038f 100644
--- a/docs/news.txt
+++ b/docs/news.txt
@@ -15,6 +15,75 @@ Next release
* Removed (non-API) ``webob.descriptors.upath_property``.
+* ``request.path_info_pop`` and ``request.path_info_peek`` now return
+ bytestrings on Python 3. Previously, they would return text. Rationale:
+ the raw value of ``PATH_INFO`` on Python 3 is "WSGI-encoded", and doesn't
+ make sense to work against at all. We normalize behavior on Python 2 and
+ Python 3 by explicitly working against and returning bytes on both
+ platforms.
+
+* ``Request.blank(<url_with_nonascii_url_encoded_value>)`` and
+ ``Request.blank(base_url=<url_with_nonascii_url_encoded_value>)`` now
+ produce a request environment with correct ``PATH_INFO`` and
+ ``SCRIPT_NAME`` environment variables.
+
+* Request.from_file now produces a request environment with correct
+ ``PATH_INFO`` and ``SCRIPT_NAME`` environment variables when the header
+ line of the request contains nonascii characters in the URI.
+
+* Response ``location`` header value mutation (converting relative paths to
+ absolute) when a response is generated will now create the correct URL when
+ SCRIPT_NAME or PATH_INFO exists in the environment. Also fixed same code
+ to not barf when nonascii characters are in the location.
+
+* BaseRequest now accepts a ``url_encoding`` argument, which defaults to
+ 'utf-8'. It represents the presumed encoding of the SCRIPT_NAME and
+ PATH_INFO environment variables.
+
+* Four new descriptor APIs have been added to BaseRequest: ``pathinfo``,
+ ``pathinfo_bytes``, ``scriptname``, and ``scriptname_bytes``. These
+ supersede the existing ``path_info`` and ``script_name`` descriptors and
+ should be used going forward instead. The ``path_info`` and ``script_name``
+ descriptor have been deprecated.
+
+ This was done to address Python 3-related PEP 3333 issues. The older
+ ``path_info`` and ``script_name`` descriptors have historically operated
+ against the raw ``PATH_INFO`` and ``SCRIPT_NAME`` values in the WSGI
+ environment, treating them as if they were bytestring values. However,
+ because PEP 3333 specifies that PATH_INFO and SCRIPT_NAME are *text* (raw
+ bytes decoded from Latin-1) on Python 3, operating on the raw environ
+ values as if they are bytes is not sane on that platform, and will not
+ work.
+
+ In the meantime, we can't just make ``path_info`` and ``script_name``
+ return decoded (Unicode) values on Python 2 without breaking existing
+ consumer code, and having these return bytestrings on Python 3 is what no
+ one expects. It's generally much saner to operate against text (decoded)
+ values on both platforms, and we'd prefer to make this the default
+ name-wise going forward. So we've chosen to deprecate both ``path_info``
+ and ``script_name``. They'll continue operating like they always have,
+ which is to say, they'll work fine on Python 2, and they'll return nonsense
+ on Python 3. We'll leave both in place for a long time, but they'll emit a
+ warning when used.
+
+ ``pathinfo`` is a replacement for the older deprecated ``path_info``
+ descriptor; it returns the PATH_INFO as unicode/text (decoded using the
+ request's url_encoding). It's a descriptor, so you can also set it using a
+ text value. It will raise an exception if you try to set it using a
+ non-text value. ``pathinfo_bytes`` is the bytes-oriented version, which
+ you can use to get and set PATH_INFO using a bytes value.
+
+ ``scriptname`` is a replacement for the older deprecated ``script_name``
+ descriptor; it returns the SCRIPT_NAME as unicode/text (decoded using the
+ request's url_encoding). It's a descriptor, so you can also set it using a
+ text value. It will raise an exception if you try to set it using a
+ non-text value. ``scriptname_bytes`` is the bytes-oriented version, which
+ you can use to get and set SCRIPT_NAME using a bytes value.
+
+ The pre-existing ``upath_info`` descriptor is now aliased to the new
+ ``pathinfo`` descriptor. The pre-existing ``uscript_name`` descriptor is
+ now aliased to the new ``scriptname`` descriptor.
+
1.2b2
------
diff --git a/tests/test_request.py b/tests/test_request.py
index 016bd0e..462dc0f 100644
--- a/tests/test_request.py
+++ b/tests/test_request.py
@@ -797,7 +797,7 @@ class BaseRequestTests(unittest.TestCase):
def test_path_info_pop_non_empty_w_pattern_miss(self):
import re
- PATTERN = re.compile('miss')
+ PATTERN = re.compile(b'miss')
environ = {'wsgi.url_scheme': 'http',
'SERVER_NAME': 'example.com',
'SERVER_PORT': '80',
@@ -812,7 +812,7 @@ class BaseRequestTests(unittest.TestCase):
def test_path_info_pop_non_empty_w_pattern_hit(self):
import re
- PATTERN = re.compile('path')
+ PATTERN = re.compile(b'path')
environ = {'wsgi.url_scheme': 'http',
'SERVER_NAME': 'example.com',
'SERVER_PORT': '80',
diff --git a/tests/test_response.py b/tests/test_response.py
index 792d77d..8cef916 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -997,11 +997,30 @@ def test_decode_content_gzip():
def test__abs_headerlist_location_with_scheme():
res = Response()
- res.content_encoding = 'gzip'
res.headerlist = [('Location', 'http:')]
result = res._abs_headerlist({})
eq_(result, [('Location', 'http:')])
+def test__abs_headerlist_location_with_relative():
+ encoded_path_info = b'/\xe6\xb5\x81'
+ encoded_script_name = b'/\xe6\xb5\x82'
+ if PY3:
+ wsgiencoded_path_info = encoded_path_info.decode('latin-1')
+ wsgiencoded_script_name = encoded_script_name.decode('latin-1')
+ else:
+ wsgiencoded_path_info = encoded_path_info
+ wsgiencoded_script_name = encoded_script_name
+ environ = {
+ 'wsgi.url_scheme': 'http',
+ 'HTTP_HOST': 'test.com',
+ 'SCRIPT_NAME': wsgiencoded_script_name,
+ 'PATH_INFO': wsgiencoded_path_info,
+ }
+ res = Response()
+ res.headerlist = [('Location', 'foo')]
+ result = res._abs_headerlist(environ)
+ eq_(result, [('Location', 'http://test.com/%E6%B5%82/%E6%B5%81/foo')])
+
def test_response_set_body_file1():
data = b'abc'
file = io.BytesIO(data)
diff --git a/webob/request.py b/webob/request.py
index f76b754..50bb407 100644
--- a/webob/request.py
+++ b/webob/request.py
@@ -281,8 +281,16 @@ class BaseRequest(object):
parse_int, serialize_int, 'int')
# raw wsgi values (bytes on py2, bytes-tunneled-via-text on py3)
- script_name = environ_getter('SCRIPT_NAME', '')
- path_info = environ_getter('PATH_INFO')
+ script_name = deprecated_property(
+ environ_getter('SCRIPT_NAME', ''),
+ 'script_name',
+ 'deprecated in WebOb 1.2, use scriptname or scriptname_bytes instead',
+ '1.4')
+ path_info = deprecated_property(
+ environ_getter('PATH_INFO'),
+ 'path_info',
+ 'deprecated in WebOb 1.2, use pathinfo or pathinfo_bytes instead',
+ '1.4')
if PY3: # pragma: no cover
def _bytes_to_wsgi(self, val):
@@ -671,7 +679,8 @@ class BaseRequest(object):
Optional ``pattern`` argument is a regexp to match the return value
before returning. If there is no match, no changes are made to the
- request and None is returned.
+ request and None is returned. The pattern must always match against
+ a bytes object (not unicode).
"""
path = self.pathinfo_bytes
if not path:
@@ -684,7 +693,7 @@ class BaseRequest(object):
if idx == -1:
idx = len(path)
r = path[:idx]
- if pattern is None or re.match(pattern, r.decode(self.url_encoding)):
+ if pattern is None or re.match(pattern, r):
self.scriptname_bytes += slashes + r
self.pathinfo_bytes = path[idx:]
return r
diff --git a/webob/response.py b/webob/response.py
index 83931f8..20b0e47 100644
--- a/webob/response.py
+++ b/webob/response.py
@@ -934,8 +934,10 @@ class Response(object):
if name.lower() == 'location':
if SCHEME_RE.search(value):
break
- new_location = urlparse.urljoin(
- _request_uri(environ), value)
+ uri = _request_uri(environ)
+ if not uri.endswith('/'):
+ uri += '/'
+ new_location = urlparse.urljoin(uri, value)
headerlist = list(headerlist)
idx = headerlist.index((name, value))
headerlist[idx] = (name, new_location)
@@ -1152,16 +1154,17 @@ def _request_uri(environ):
script_name = environ.get('SCRIPT_NAME') or '/'
path_info = environ.get('PATH_INFO','')
+
if PY3: # pragma: no cover
script_name = script_name.encode('latin-1').decode('utf-8')
path_info = path_info.encode('latin-1').decode('utf-8')
url += url_quote(script_name)
path_info = url_quote(path_info)
- if not environ.get('SCRIPT_NAME'):
- url += path_info[1:]
- else:
+ if environ.get('SCRIPT_NAME'):
url += path_info
+ else:
+ url += path_info[1:]
return url
diff --git a/webob/util.py b/webob/util.py
index b740088..6b838b4 100644
--- a/webob/util.py
+++ b/webob/util.py
@@ -49,7 +49,7 @@ def warn_deprecation(text, version, stacklevel): # pragma: no cover
# version specifies when to start raising exceptions instead of warnings
if version == '1.2':
raise DeprecationWarning(text)
- elif version == '1.3':
+ elif version in ('1.3', '1.4'):
cls = DeprecationWarning
else:
cls = DeprecationWarning