diff options
Diffstat (limited to 'Lib')
| -rw-r--r-- | Lib/test/test_wsgiref.py | 4 | ||||
| -rw-r--r-- | Lib/wsgiref/handlers.py | 115 | ||||
| -rw-r--r-- | Lib/wsgiref/simple_server.py | 5 | 
3 files changed, 116 insertions, 8 deletions
| diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py index 49d372d6c6..8051b4a081 100644 --- a/Lib/test/test_wsgiref.py +++ b/Lib/test/test_wsgiref.py @@ -131,7 +131,7 @@ class IntegrationTests(TestCase):      def check_hello(self, out, has_length=True):          self.assertEqual(out,              ("HTTP/1.0 200 OK\r\n" -            "Server: WSGIServer/0.1 Python/"+sys.version.split()[0]+"\r\n" +            "Server: WSGIServer/0.2 Python/"+sys.version.split()[0]+"\r\n"              "Content-Type: text/plain\r\n"              "Date: Mon, 05 Jun 2006 18:49:54 GMT\r\n" +              (has_length and  "Content-Length: 13\r\n" or "") + @@ -187,7 +187,7 @@ class IntegrationTests(TestCase):          ver = sys.version.split()[0].encode('ascii')          self.assertEqual(                  b"HTTP/1.0 200 OK\r\n" -                b"Server: WSGIServer/0.1 Python/" + ver + b"\r\n" +                b"Server: WSGIServer/0.2 Python/" + ver + b"\r\n"                  b"Content-Type: text/plain; charset=utf-8\r\n"                  b"Date: Wed, 24 Dec 2008 13:29:32 GMT\r\n"                  b"\r\n" diff --git a/Lib/wsgiref/handlers.py b/Lib/wsgiref/handlers.py index 3e11219095..6d6f80ffd7 100644 --- a/Lib/wsgiref/handlers.py +++ b/Lib/wsgiref/handlers.py @@ -5,7 +5,10 @@ from .headers import Headers  import sys, os, time -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] +__all__ = [ +    'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', +    'IISCGIHandler', 'read_environ' +]  # Weekday and month names for HTTP date/time formatting; always English!  _weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] @@ -19,6 +22,74 @@ def format_date_time(timestamp):          _weekdayname[wd], day, _monthname[month], year, hh, mm, ss      ) +_is_request = { +    'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE', +    'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT', +}.__contains__ + +def _needs_transcode(k): +    return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \ +        or (k.startswith('REDIRECT_') and _needs_transcode(k[9:])) + +def read_environ(): +    """Read environment, fixing HTTP variables""" +    enc = sys.getfilesystemencoding() +    esc = 'surrogateescape' +    try: +        ''.encode('utf-8', esc) +    except LookupError: +        esc = 'replace' +    environ = {} + +    # Take the basic environment from native-unicode os.environ. Attempt to +    # fix up the variables that come from the HTTP request to compensate for +    # the bytes->unicode decoding step that will already have taken place. +    for k, v in os.environ.items(): +        if _needs_transcode(k): + +            # On win32, the os.environ is natively Unicode. Different servers +            # decode the request bytes using different encodings. +            if sys.platform == 'win32': +                software = os.environ.get('SERVER_SOFTWARE', '').lower() + +                # On IIS, the HTTP request will be decoded as UTF-8 as long +                # as the input is a valid UTF-8 sequence. Otherwise it is +                # decoded using the system code page (mbcs), with no way to +                # detect this has happened. Because UTF-8 is the more likely +                # encoding, and mbcs is inherently unreliable (an mbcs string +                # that happens to be valid UTF-8 will not be decoded as mbcs) +                # always recreate the original bytes as UTF-8. +                if software.startswith('microsoft-iis/'): +                    v = v.encode('utf-8').decode('iso-8859-1') + +                # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct +                # to the Unicode environ. No modification needed. +                elif software.startswith('apache/'): +                    pass + +                # Python 3's http.server.CGIHTTPRequestHandler decodes +                # using the urllib.unquote default of UTF-8, amongst other +                # issues. +                elif ( +                    software.startswith('simplehttp/') +                    and 'python/3' in software +                ): +                    v = v.encode('utf-8').decode('iso-8859-1') + +                # For other servers, guess that they have written bytes to +                # the environ using stdio byte-oriented interfaces, ending up +                # with the system code page. +                else: +                    v = v.encode(enc, 'replace').decode('iso-8859-1') + +            # Recover bytes from unicode environ, using surrogate escapes +            # where available (Python 3.1+). +            else: +                v = v.encode(enc, esc).decode('iso-8859-1') + +        environ[k] = v +    return environ +  class BaseHandler:      """Manage the invocation of a WSGI application""" @@ -36,7 +107,7 @@ class BaseHandler:      # os_environ is used to supply configuration from the OS environment:      # by default it's a copy of 'os.environ' as of import time, but you can      # override this in e.g. your __init__ method. -    os_environ = dict(os.environ.items()) +    os_environ= read_environ()      # Collaborator classes      wsgi_file_wrapper = FileWrapper     # set to None to disable @@ -431,6 +502,42 @@ class CGIHandler(BaseCGIHandler):      def __init__(self):          BaseCGIHandler.__init__( -            self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), -            multithread=False, multiprocess=True +            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, +            read_environ(), multithread=False, multiprocess=True +        ) + + +class IISCGIHandler(BaseCGIHandler): +    """CGI-based invocation with workaround for IIS path bug + +    This handler should be used in preference to CGIHandler when deploying on +    Microsoft IIS without having set the config allowPathInfo option (IIS>=7) +    or metabase allowPathInfoForScriptMappings (IIS<7). +    """ +    wsgi_run_once = True +    os_environ = {} + +    # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at +    # the front, causing problems for WSGI applications that wish to implement +    # routing. This handler strips any such duplicated path. + +    # IIS can be configured to pass the correct PATH_INFO, but this causes +    # another bug where PATH_TRANSLATED is wrong. Luckily this variable is +    # rarely used and is not guaranteed by WSGI. On IIS<7, though, the +    # setting can only be made on a vhost level, affecting all other script +    # mappings, many of which break when exposed to the PATH_TRANSLATED bug. +    # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 +    # rarely uses it because there is still no UI for it.) + +    # There is no way for CGI code to tell whether the option was set, so a +    # separate handler class is provided. +    def __init__(self): +        environ= read_environ() +        path = environ.get('PATH_INFO', '') +        script = environ.get('SCRIPT_NAME', '') +        if (path+'/').startswith(script+'/'): +            environ['PATH_INFO'] = path[len(script):] +        BaseCGIHandler.__init__( +            self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr, +            environ, multithread=False, multiprocess=True          ) diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py index 550f4d86a9..af82f953c5 100644 --- a/Lib/wsgiref/simple_server.py +++ b/Lib/wsgiref/simple_server.py @@ -15,7 +15,7 @@ import sys  import urllib.parse  from wsgiref.handlers import SimpleHandler -__version__ = "0.1" +__version__ = "0.2"  __all__ = ['WSGIServer', 'WSGIRequestHandler', 'demo_app', 'make_server'] @@ -74,13 +74,14 @@ class WSGIRequestHandler(BaseHTTPRequestHandler):      def get_environ(self):          env = self.server.base_environ.copy()          env['SERVER_PROTOCOL'] = self.request_version +        env['SERVER_SOFTWARE'] = self.server_version          env['REQUEST_METHOD'] = self.command          if '?' in self.path:              path,query = self.path.split('?',1)          else:              path,query = self.path,'' -        env['PATH_INFO'] = urllib.parse.unquote(path) +        env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1')          env['QUERY_STRING'] = query          host = self.address_string() | 
