diff options
author | Sergey Shepelev <temotor@gmail.com> | 2018-03-07 10:48:07 +0300 |
---|---|---|
committer | Sergey Shepelev <temotor@gmail.com> | 2018-03-11 01:04:18 +0300 |
commit | 4e576cbca07cd2c50da4baffd033a67c2ab10d1a (patch) | |
tree | fbad231c2f01d3ce73dd538df5db7df4b38ba6d8 | |
parent | 6c6cfc565f743c9de88fd131ec392cfb2162a984 (diff) | |
download | eventlet-4e576cbca07cd2c50da4baffd033a67c2ab10d1a.tar.gz |
wsgi: latin-1 encoding dance for environ[PATH_INFO]
https://www.python.org/dev/peps/pep-0333/#unicode-issues
https://github.com/eventlet/eventlet/issues/468
-rw-r--r-- | eventlet/wsgi.py | 8 | ||||
-rw-r--r-- | tests/wsgi_test.py | 25 | ||||
-rw-r--r-- | tox.ini | 4 |
3 files changed, 33 insertions, 4 deletions
diff --git a/eventlet/wsgi.py b/eventlet/wsgi.py index 7576f0e..9f49090 100644 --- a/eventlet/wsgi.py +++ b/eventlet/wsgi.py @@ -59,6 +59,12 @@ def addr_to_host_port(addr): return (host, port) +def encode_dance(s): + if not isinstance(s, bytes): + s = s.encode('utf-8', 'replace') + return s.decode('latin1') + + # Collections of error codes to compare against. Not all attributes are set # on errno module on all platforms, so some are literals :( BAD_SOCK = set((errno.EBADF, 10053)) @@ -631,7 +637,7 @@ class HttpProtocol(BaseHTTPServer.BaseHTTPRequestHandler): pq = self.path.split('?', 1) env['RAW_PATH_INFO'] = pq[0] - env['PATH_INFO'] = urllib.parse.unquote(pq[0]) + env['PATH_INFO'] = encode_dance(urllib.parse.unquote(pq[0])) if len(pq) > 1: env['QUERY_STRING'] = pq[1] diff --git a/tests/wsgi_test.py b/tests/wsgi_test.py index 48da085..d680154 100644 --- a/tests/wsgi_test.py +++ b/tests/wsgi_test.py @@ -1,3 +1,4 @@ +# coding: utf-8 import cgi import collections import errno @@ -1398,10 +1399,32 @@ class TestHttpd(_TestBase): sock = eventlet.connect(self.server_addr) sock.sendall(b'GET /a*b@%40%233 HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n') result = read_http(sock) - self.assertEqual(result.status, 'HTTP/1.1 200 OK') + assert result.status == 'HTTP/1.1 200 OK' assert b'decoded: /a*b@@#3' in result.body assert b'raw: /a*b@%40%233' in result.body + def test_path_info_latin1(self): + # https://github.com/eventlet/eventlet/issues/468 + g = [] + + def wsgi_app(environ, start_response): + g.append(environ['PATH_INFO']) + start_response("200 OK", []) + return b'' + + self.site.application = wsgi_app + sock = eventlet.connect(self.server_addr) + sock.sendall(b'GET /%E4%BD%A0%E5%A5%BD HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n') + result = read_http(sock) + assert result.status == 'HTTP/1.1 200 OK' + # that was only preparation, actual test below + # Per PEP-0333 https://www.python.org/dev/peps/pep-0333/#unicode-issues + # in all WSGI environment strings application must observe either bytes in latin-1 (ISO-8859-1) + # or unicode code points \u0000..\u00ff + # wsgi_decoding_dance from Werkzeug to emulate concerned application + decoded = g[0].encode('latin1').decode('utf-8', 'replace') + assert decoded == u'/你好' + @tests.skip_if_no_ipv6 def test_ipv6(self): try: @@ -3,13 +3,13 @@ [flake8] exclude = *.egg*,.env,.git,.hg,.tox,_*,build*,dist*,venv*,six.py,mock.py,eventlet/green/http/*,eventlet/support/dns/*,eventlet/support/monotonic.py ignore = E261,E402,E731,W503 -max-line-length = 101 +max-line-length = 123 [pep8] count = 1 exclude = *.egg*,.env,.git,.hg,.tox,_*,build*,dist*,venv*,six.py,mock.py,eventlet/green/http/*,eventlet/support/dns/*,eventlet/support/monotonic.py ignore = E261,E402,E731,W503 -max-line-length = 101 +max-line-length = 123 show-source = 1 statistics = 1 |