diff options
Diffstat (limited to 'Lib/urllib/parse.py')
-rw-r--r-- | Lib/urllib/parse.py | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 0b39b6eaf7..e2b6f133e1 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -431,11 +431,31 @@ def urlsplit(url, scheme='', allow_fragments=True): netloc = query = fragment = '' i = url.find(':') if i > 0: + if url[:i] == 'http': # optimize the common case + url = url[i+1:] + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + _checknetloc(netloc) + v = SplitResult('http', netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) for c in url[:i]: if c not in scheme_chars: break else: - scheme, url = url[:i].lower(), url[i+1:] + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i+1:] + if not rest or any(c not in '0123456789' for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest if url[:2] == '//': netloc, url = _splitnetloc(url, 2) |