summaryrefslogtreecommitdiff
path: root/Lib/http
diff options
context:
space:
mode:
authorGregory P. Smith <greg@krypto.org>2019-04-30 19:12:21 -0700
committerGitHub <noreply@github.com>2019-04-30 19:12:21 -0700
commitc4e671eec20dfcb29b18596a89ef075f826c9f96 (patch)
treeed97dd046a1467e029caed8416ed6de7182ef53a /Lib/http
parent5f38b8407b071acd96da2c8cde411d0e26967735 (diff)
downloadcpython-git-c4e671eec20dfcb29b18596a89ef075f826c9f96.tar.gz
bpo-30458: Disallow control chars in http URLs. (GH-12755)
Disallow control chars in http URLs in urllib.urlopen. This addresses a potential security problem for applications that do not sanity check their URLs where http request headers could be injected.
Diffstat (limited to 'Lib/http')
-rw-r--r--Lib/http/client.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 5a2225276b..99d6a68cf4 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -137,6 +137,16 @@ _MAXHEADERS = 100
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
+# These characters are not allowed within HTTP URL paths.
+# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
+# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
+# Prevents CVE-2019-9740. Includes control characters such as \r\n.
+# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
+# Arguably only these _should_ allowed:
+# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
+# We are more lenient for assumed real world compatibility purposes.
+
# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -1079,6 +1089,10 @@ class HTTPConnection:
self._method = method
if not url:
url = '/'
+ # Prevent CVE-2019-9740.
+ if match := _contains_disallowed_url_pchar_re.search(url):
+ raise ValueError(f"URL can't contain control characters. {url!r} "
+ f"(found at least {match.group()!r})")
request = '%s %s %s' % (method, url, self._http_vsn_str)
# Non-ASCII characters should have been eliminated earlier