diff options
| author | Sergey Shepelev <temotor@gmail.com> | 2018-03-07 10:48:07 +0300 |
|---|---|---|
| committer | Sergey Shepelev <temotor@gmail.com> | 2018-03-10 21:45:43 +0300 |
| commit | bf18bda2db948a6921cbb817880eb1594da2c1e7 (patch) | |
| tree | df5e4cb7f51e2e58446947df1718043ced94f8dc | |
| parent | 6c6cfc565f743c9de88fd131ec392cfb2162a984 (diff) | |
| download | eventlet-468-wsgi-latin1.tar.gz | |
wsgi: latin-1 encoding dance for environ[PATH_INFO]468-wsgi-latin1
https://www.python.org/dev/peps/pep-0333/#unicode-issues
https://github.com/eventlet/eventlet/issues/468
| -rw-r--r-- | eventlet/wsgi.py | 8 | ||||
| -rw-r--r-- | tests/wsgi_test.py | 25 |
2 files changed, 31 insertions, 2 deletions
diff --git a/eventlet/wsgi.py b/eventlet/wsgi.py index 7576f0e..9f49090 100644 --- a/eventlet/wsgi.py +++ b/eventlet/wsgi.py @@ -59,6 +59,12 @@ def addr_to_host_port(addr): return (host, port) +def encode_dance(s): + if not isinstance(s, bytes): + s = s.encode('utf-8', 'replace') + return s.decode('latin1') + + # Collections of error codes to compare against. Not all attributes are set # on errno module on all platforms, so some are literals :( BAD_SOCK = set((errno.EBADF, 10053)) @@ -631,7 +637,7 @@ class HttpProtocol(BaseHTTPServer.BaseHTTPRequestHandler): pq = self.path.split('?', 1) env['RAW_PATH_INFO'] = pq[0] - env['PATH_INFO'] = urllib.parse.unquote(pq[0]) + env['PATH_INFO'] = encode_dance(urllib.parse.unquote(pq[0])) if len(pq) > 1: env['QUERY_STRING'] = pq[1] diff --git a/tests/wsgi_test.py b/tests/wsgi_test.py index 48da085..d680154 100644 --- a/tests/wsgi_test.py +++ b/tests/wsgi_test.py @@ -1,3 +1,4 @@ +# coding: utf-8 import cgi import collections import errno @@ -1398,10 +1399,32 @@ class TestHttpd(_TestBase): sock = eventlet.connect(self.server_addr) sock.sendall(b'GET /a*b@%40%233 HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n') result = read_http(sock) - self.assertEqual(result.status, 'HTTP/1.1 200 OK') + assert result.status == 'HTTP/1.1 200 OK' assert b'decoded: /a*b@@#3' in result.body assert b'raw: /a*b@%40%233' in result.body + def test_path_info_latin1(self): + # https://github.com/eventlet/eventlet/issues/468 + g = [] + + def wsgi_app(environ, start_response): + g.append(environ['PATH_INFO']) + start_response("200 OK", []) + return b'' + + self.site.application = wsgi_app + sock = eventlet.connect(self.server_addr) + sock.sendall(b'GET /%E4%BD%A0%E5%A5%BD HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n') + result = read_http(sock) + assert result.status == 'HTTP/1.1 200 OK' + # that was only preparation, actual test below + # Per PEP-0333 https://www.python.org/dev/peps/pep-0333/#unicode-issues + # in all WSGI environment strings application must observe either bytes in latin-1 (ISO-8859-1) + # or unicode code points \u0000..\u00ff + # wsgi_decoding_dance from Werkzeug to emulate concerned application + decoded = g[0].encode('latin1').decode('utf-8', 'replace') + assert decoded == u'/你好' + @tests.skip_if_no_ipv6 def test_ipv6(self): try: |
