From 90e01e50ef8a9e6c91f30d965563c378a4ad26de Mon Sep 17 00:00:00 2001 From: postmasters Date: Tue, 20 Jun 2017 06:02:44 -0700 Subject: urllib: Simplify splithost by calling into urlparse. (#1849) The current regex based splitting produces a wrong result. For example:: http://abc#@def Web browsers parse that URL as ``http://abc/#@def``, that is, the host is ``abc``, the path is ``/``, and the fragment is ``#@def``. --- Lib/urllib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Lib/urllib/parse.py') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1af2906e36..01eb54906c 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -947,7 +947,7 @@ def splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: - _hostprog = re.compile('//([^/?]*)(.*)', re.DOTALL) + _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) match = _hostprog.match(url) if match: -- cgit v1.2.1