summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSenthil Kumaran <skumaran@gatech.edu>2020-02-16 04:21:41 -0800
committerGitHub <noreply@github.com>2020-02-16 04:21:41 -0800
commitbb9ff2500aec3581430eaa3d97ce31df457ff42e (patch)
tree112fdc85b989ffe82d455832d4e770757359c751
parent0d860dd43c72dc7046a5d18fc72d495cadd4a2df (diff)
downloadcpython-git-revert-16839-backport-5a88d50-3.8.tar.gz
Revert "[3.8] bpo-27657: Fix urlparse() with numeric paths (GH-661) (#16839)"revert-16839-backport-5a88d50-3.8
This reverts commit 0f3187c1ce3b3ace60f6c1691dfa3d4e744f0384.
-rw-r--r--Lib/test/test_urlparse.py10
-rw-r--r--Lib/urllib/parse.py22
2 files changed, 25 insertions, 7 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 762500789f..4ae6ed3385 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -709,17 +709,15 @@ class UrlParseTestCase(unittest.TestCase):
def test_portseparator(self):
# Issue 754016 makes changes for port separator ':' from scheme separator
- self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
- self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
- self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
+ self.assertEqual(urllib.parse.urlparse("path:80"),
+ ('','','path:80','','',''))
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
('http','www.python.org:80','','','',''))
# As usual, need to check bytes input as well
- self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
- self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
- self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
+ self.assertEqual(urllib.parse.urlparse(b"path:80"),
+ (b'',b'',b'path:80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 0b39b6eaf7..e2b6f133e1 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -431,11 +431,31 @@ def urlsplit(url, scheme='', allow_fragments=True):
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
+ if url[:i] == 'http': # optimize the common case
+ url = url[i+1:]
+ if url[:2] == '//':
+ netloc, url = _splitnetloc(url, 2)
+ if (('[' in netloc and ']' not in netloc) or
+ (']' in netloc and '[' not in netloc)):
+ raise ValueError("Invalid IPv6 URL")
+ if allow_fragments and '#' in url:
+ url, fragment = url.split('#', 1)
+ if '?' in url:
+ url, query = url.split('?', 1)
+ _checknetloc(netloc)
+ v = SplitResult('http', netloc, url, query, fragment)
+ _parse_cache[key] = v
+ return _coerce_result(v)
for c in url[:i]:
if c not in scheme_chars:
break
else:
- scheme, url = url[:i].lower(), url[i+1:]
+ # make sure "url" is not actually a port number (in which case
+ # "scheme" is really part of the path)
+ rest = url[i+1:]
+ if not rest or any(c not in '0123456789' for c in rest):
+ # not a port number
+ scheme, url = url[:i].lower(), rest
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)