summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Graham <timograham@gmail.com>2019-10-18 09:07:20 -0400
committerSenthil Kumaran <senthil@uthcode.com>2019-10-18 06:51:12 -0700
commit94f32caf49627bbc30e188af49f8ee69c9850ad9 (patch)
treee034c70610185281f4ac3658bdb3d1e41b07bf71
parentde812682a674cdf2bac0f9547200f107069781ad (diff)
downloadcpython-git-backport-5a88d50-3.8.tar.gz
[3.8] bpo-27657: Fix urlparse() with numeric paths (GH-661)backport-5a88d50-3.8
* bpo-27657: Fix urlparse() with numeric paths Revert parsing decision from bpo-754016 in favor of the documented consensus in bpo-16932 of how to treat strings without a // to designate the netloc. * bpo-22891: Remove urlsplit() optimization for 'http' prefixed inputs. (cherry picked from commit 5a88d50ff013a64fbdb25b877c87644a9034c969) Co-authored-by: Tim Graham <timograham@gmail.com>
-rw-r--r--Lib/test/test_urlparse.py10
-rw-r--r--Lib/urllib/parse.py22
-rw-r--r--Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst2
3 files changed, 9 insertions, 25 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 4ae6ed3385..762500789f 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -709,15 +709,17 @@ class UrlParseTestCase(unittest.TestCase):
def test_portseparator(self):
# Issue 754016 makes changes for port separator ':' from scheme separator
- self.assertEqual(urllib.parse.urlparse("path:80"),
- ('','','path:80','','',''))
+ self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
+ self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
+ self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
('http','www.python.org:80','','','',''))
# As usual, need to check bytes input as well
- self.assertEqual(urllib.parse.urlparse(b"path:80"),
- (b'',b'',b'path:80',b'',b'',b''))
+ self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
+ self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
+ self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index b6608783a8..d497925b94 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -431,31 +431,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
- if url[:i] == 'http': # optimize the common case
- url = url[i+1:]
- if url[:2] == '//':
- netloc, url = _splitnetloc(url, 2)
- if (('[' in netloc and ']' not in netloc) or
- (']' in netloc and '[' not in netloc)):
- raise ValueError("Invalid IPv6 URL")
- if allow_fragments and '#' in url:
- url, fragment = url.split('#', 1)
- if '?' in url:
- url, query = url.split('?', 1)
- _checknetloc(netloc)
- v = SplitResult('http', netloc, url, query, fragment)
- _parse_cache[key] = v
- return _coerce_result(v)
for c in url[:i]:
if c not in scheme_chars:
break
else:
- # make sure "url" is not actually a port number (in which case
- # "scheme" is really part of the path)
- rest = url[i+1:]
- if not rest or any(c not in '0123456789' for c in rest):
- # not a port number
- scheme, url = url[:i].lower(), rest
+ scheme, url = url[:i].lower(), url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
diff --git a/Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst b/Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst
new file mode 100644
index 0000000000..77746c0ce6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst
@@ -0,0 +1,2 @@
+Fix urllib.parse.urlparse() with numeric paths. A string like "path:80" is
+no longer parsed as a path but as a scheme ("path") and a path ("80").