1 files changed, 21 insertions, 1 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 0b39b6eaf7..e2b6f133e1 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -431,11 +431,31 @@ def urlsplit(url, scheme='', allow_fragments=True):
     netloc = query = fragment = ''
     i = url.find(':')
     if i > 0:
+        if url[:i] == 'http': # optimize the common case
+            url = url[i+1:]
+            if url[:2] == '//':
+                netloc, url = _splitnetloc(url, 2)
+                if (('[' in netloc and ']' not in netloc) or
+                        (']' in netloc and '[' not in netloc)):
+                    raise ValueError("Invalid IPv6 URL")
+            if allow_fragments and '#' in url:
+                url, fragment = url.split('#', 1)
+            if '?' in url:
+                url, query = url.split('?', 1)
+            _checknetloc(netloc)
+            v = SplitResult('http', netloc, url, query, fragment)
+            _parse_cache[key] = v
+            return _coerce_result(v)
         for c in url[:i]:
             if c not in scheme_chars:
                 break
         else:
-            scheme, url = url[:i].lower(), url[i+1:]
+            # make sure "url" is not actually a port number (in which case
+            # "scheme" is really part of the path)
+            rest = url[i+1:]
+            if not rest or any(c not in '0123456789' for c in rest):
+                # not a port number
+                scheme, url = url[:i].lower(), rest
 
     if url[:2] == '//':
         netloc, url = _splitnetloc(url, 2)