diff options
| author | Chris McDonough <chrism@plope.com> | 2012-01-08 16:24:58 -0500 |
|---|---|---|
| committer | Chris McDonough <chrism@plope.com> | 2012-01-08 16:24:58 -0500 |
| commit | 2fd0945e7fac522e76ca180fe53067f46d728278 (patch) | |
| tree | e6daaed503e66c2f33cb9edbf759d38ad1f9a2ef | |
| parent | 63302b67d06d519c569f325528c98bae5bcbf9d5 (diff) | |
| download | webob-feature.pathinfogeddon.tar.gz | |
add tests, add docs describing all changesfeature.pathinfogeddon
| -rw-r--r-- | docs/news.txt | 69 | ||||
| -rw-r--r-- | tests/test_request.py | 4 | ||||
| -rw-r--r-- | tests/test_response.py | 21 | ||||
| -rw-r--r-- | webob/request.py | 17 | ||||
| -rw-r--r-- | webob/response.py | 13 | ||||
| -rw-r--r-- | webob/util.py | 2 |
6 files changed, 113 insertions, 13 deletions
diff --git a/docs/news.txt b/docs/news.txt index caa27af..280038f 100644 --- a/docs/news.txt +++ b/docs/news.txt @@ -15,6 +15,75 @@ Next release * Removed (non-API) ``webob.descriptors.upath_property``. +* ``request.path_info_pop`` and ``request.path_info_peek`` now return + bytestrings on Python 3. Previously, they would return text. Rationale: + the raw value of ``PATH_INFO`` on Python 3 is "WSGI-encoded", and doesn't + make sense to work against at all. We normalize behavior on Python 2 and + Python 3 by explicitly working against and returning bytes on both + platforms. + +* ``Request.blank(<url_with_nonascii_url_encoded_value>)`` and + ``Request.blank(base_url=<url_with_nonascii_url_encoded_value>)`` now + produce a request environment with correct ``PATH_INFO`` and + ``SCRIPT_NAME`` environment variables. + +* Request.from_file now produces a request environment with correct + ``PATH_INFO`` and ``SCRIPT_NAME`` environment variables when the header + line of the request contains nonascii characters in the URI. + +* Response ``location`` header value mutation (converting relative paths to + absolute) when a response is generated will now create the correct URL when + SCRIPT_NAME or PATH_INFO exists in the environment. Also fixed same code + to not barf when nonascii characters are in the location. + +* BaseRequest now accepts a ``url_encoding`` argument, which defaults to + 'utf-8'. It represents the presumed encoding of the SCRIPT_NAME and + PATH_INFO environment variables. + +* Four new descriptor APIs have been added to BaseRequest: ``pathinfo``, + ``pathinfo_bytes``, ``scriptname``, and ``scriptname_bytes``. These + supersede the existing ``path_info`` and ``script_name`` descriptors and + should be used going forward instead. The ``path_info`` and ``script_name`` + descriptor have been deprecated. + + This was done to address Python 3-related PEP 3333 issues. The older + ``path_info`` and ``script_name`` descriptors have historically operated + against the raw ``PATH_INFO`` and ``SCRIPT_NAME`` values in the WSGI + environment, treating them as if they were bytestring values. However, + because PEP 3333 specifies that PATH_INFO and SCRIPT_NAME are *text* (raw + bytes decoded from Latin-1) on Python 3, operating on the raw environ + values as if they are bytes is not sane on that platform, and will not + work. + + In the meantime, we can't just make ``path_info`` and ``script_name`` + return decoded (Unicode) values on Python 2 without breaking existing + consumer code, and having these return bytestrings on Python 3 is what no + one expects. It's generally much saner to operate against text (decoded) + values on both platforms, and we'd prefer to make this the default + name-wise going forward. So we've chosen to deprecate both ``path_info`` + and ``script_name``. They'll continue operating like they always have, + which is to say, they'll work fine on Python 2, and they'll return nonsense + on Python 3. We'll leave both in place for a long time, but they'll emit a + warning when used. + + ``pathinfo`` is a replacement for the older deprecated ``path_info`` + descriptor; it returns the PATH_INFO as unicode/text (decoded using the + request's url_encoding). It's a descriptor, so you can also set it using a + text value. It will raise an exception if you try to set it using a + non-text value. ``pathinfo_bytes`` is the bytes-oriented version, which + you can use to get and set PATH_INFO using a bytes value. + + ``scriptname`` is a replacement for the older deprecated ``script_name`` + descriptor; it returns the SCRIPT_NAME as unicode/text (decoded using the + request's url_encoding). It's a descriptor, so you can also set it using a + text value. It will raise an exception if you try to set it using a + non-text value. ``scriptname_bytes`` is the bytes-oriented version, which + you can use to get and set SCRIPT_NAME using a bytes value. + + The pre-existing ``upath_info`` descriptor is now aliased to the new + ``pathinfo`` descriptor. The pre-existing ``uscript_name`` descriptor is + now aliased to the new ``scriptname`` descriptor. + 1.2b2 ------ diff --git a/tests/test_request.py b/tests/test_request.py index 016bd0e..462dc0f 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -797,7 +797,7 @@ class BaseRequestTests(unittest.TestCase): def test_path_info_pop_non_empty_w_pattern_miss(self): import re - PATTERN = re.compile('miss') + PATTERN = re.compile(b'miss') environ = {'wsgi.url_scheme': 'http', 'SERVER_NAME': 'example.com', 'SERVER_PORT': '80', @@ -812,7 +812,7 @@ class BaseRequestTests(unittest.TestCase): def test_path_info_pop_non_empty_w_pattern_hit(self): import re - PATTERN = re.compile('path') + PATTERN = re.compile(b'path') environ = {'wsgi.url_scheme': 'http', 'SERVER_NAME': 'example.com', 'SERVER_PORT': '80', diff --git a/tests/test_response.py b/tests/test_response.py index 792d77d..8cef916 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -997,11 +997,30 @@ def test_decode_content_gzip(): def test__abs_headerlist_location_with_scheme(): res = Response() - res.content_encoding = 'gzip' res.headerlist = [('Location', 'http:')] result = res._abs_headerlist({}) eq_(result, [('Location', 'http:')]) +def test__abs_headerlist_location_with_relative(): + encoded_path_info = b'/\xe6\xb5\x81' + encoded_script_name = b'/\xe6\xb5\x82' + if PY3: + wsgiencoded_path_info = encoded_path_info.decode('latin-1') + wsgiencoded_script_name = encoded_script_name.decode('latin-1') + else: + wsgiencoded_path_info = encoded_path_info + wsgiencoded_script_name = encoded_script_name + environ = { + 'wsgi.url_scheme': 'http', + 'HTTP_HOST': 'test.com', + 'SCRIPT_NAME': wsgiencoded_script_name, + 'PATH_INFO': wsgiencoded_path_info, + } + res = Response() + res.headerlist = [('Location', 'foo')] + result = res._abs_headerlist(environ) + eq_(result, [('Location', 'http://test.com/%E6%B5%82/%E6%B5%81/foo')]) + def test_response_set_body_file1(): data = b'abc' file = io.BytesIO(data) diff --git a/webob/request.py b/webob/request.py index f76b754..50bb407 100644 --- a/webob/request.py +++ b/webob/request.py @@ -281,8 +281,16 @@ class BaseRequest(object): parse_int, serialize_int, 'int') # raw wsgi values (bytes on py2, bytes-tunneled-via-text on py3) - script_name = environ_getter('SCRIPT_NAME', '') - path_info = environ_getter('PATH_INFO') + script_name = deprecated_property( + environ_getter('SCRIPT_NAME', ''), + 'script_name', + 'deprecated in WebOb 1.2, use scriptname or scriptname_bytes instead', + '1.4') + path_info = deprecated_property( + environ_getter('PATH_INFO'), + 'path_info', + 'deprecated in WebOb 1.2, use pathinfo or pathinfo_bytes instead', + '1.4') if PY3: # pragma: no cover def _bytes_to_wsgi(self, val): @@ -671,7 +679,8 @@ class BaseRequest(object): Optional ``pattern`` argument is a regexp to match the return value before returning. If there is no match, no changes are made to the - request and None is returned. + request and None is returned. The pattern must always match against + a bytes object (not unicode). """ path = self.pathinfo_bytes if not path: @@ -684,7 +693,7 @@ class BaseRequest(object): if idx == -1: idx = len(path) r = path[:idx] - if pattern is None or re.match(pattern, r.decode(self.url_encoding)): + if pattern is None or re.match(pattern, r): self.scriptname_bytes += slashes + r self.pathinfo_bytes = path[idx:] return r diff --git a/webob/response.py b/webob/response.py index 83931f8..20b0e47 100644 --- a/webob/response.py +++ b/webob/response.py @@ -934,8 +934,10 @@ class Response(object): if name.lower() == 'location': if SCHEME_RE.search(value): break - new_location = urlparse.urljoin( - _request_uri(environ), value) + uri = _request_uri(environ) + if not uri.endswith('/'): + uri += '/' + new_location = urlparse.urljoin(uri, value) headerlist = list(headerlist) idx = headerlist.index((name, value)) headerlist[idx] = (name, new_location) @@ -1152,16 +1154,17 @@ def _request_uri(environ): script_name = environ.get('SCRIPT_NAME') or '/' path_info = environ.get('PATH_INFO','') + if PY3: # pragma: no cover script_name = script_name.encode('latin-1').decode('utf-8') path_info = path_info.encode('latin-1').decode('utf-8') url += url_quote(script_name) path_info = url_quote(path_info) - if not environ.get('SCRIPT_NAME'): - url += path_info[1:] - else: + if environ.get('SCRIPT_NAME'): url += path_info + else: + url += path_info[1:] return url diff --git a/webob/util.py b/webob/util.py index b740088..6b838b4 100644 --- a/webob/util.py +++ b/webob/util.py @@ -49,7 +49,7 @@ def warn_deprecation(text, version, stacklevel): # pragma: no cover # version specifies when to start raising exceptions instead of warnings if version == '1.2': raise DeprecationWarning(text) - elif version == '1.3': + elif version in ('1.3', '1.4'): cls = DeprecationWarning else: cls = DeprecationWarning |
