From 21024f06622c4c55b666adb130797a4ee205d005 Mon Sep 17 00:00:00 2001 From: Ratnadeep Debnath Date: Sat, 25 Feb 2017 14:30:28 +0530 Subject: bpo-16285: Update urllib quoting to RFC 3986 (#173) * bpo-16285: Update urllib quoting to RFC 3986 urllib.parse.quote is now based on RFC 3986, and hence includes `'~'` in the set of characters that is not escaped by default. Patch by Christian Theune and Ratnadeep Debnath. --- Lib/urllib/parse.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'Lib/urllib') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1d08730a89..f3a309aacc 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -704,7 +704,7 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' b'abcdefghijklmnopqrstuvwxyz' b'0123456789' - b'_.-') + b'_.-~') _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) _safe_quoters = {} @@ -736,15 +736,18 @@ def quote(string, safe='/', encoding=None, errors=None): Each part of a URL, e.g. the path info, the query, etc., has a different set of reserved characters that must be quoted. - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax lists the following reserved characters. reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," + "$" | "," | "~" Each of these characters is reserved in some component of a URL, but not necessarily in all of them. + Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings. + Now, "~" is included in the set of reserved characters. + By default, the quote function is intended for quoting the path section of a URL. Thus, it will not encode '/'. This character is reserved, but in typical usage the quote function is being -- cgit v1.2.1