diff options
author | Ratnadeep Debnath <rtnpro@gmail.com> | 2017-02-25 14:30:28 +0530 |
---|---|---|
committer | Nick Coghlan <ncoghlan@gmail.com> | 2017-02-25 19:00:28 +1000 |
commit | 21024f06622c4c55b666adb130797a4ee205d005 (patch) | |
tree | 8b5f5381deb999d248430f3b2b8e351936e72fe8 /Lib/urllib | |
parent | 140792bd514ee4ba739fda899785bea3ce746f05 (diff) | |
download | cpython-git-21024f06622c4c55b666adb130797a4ee205d005.tar.gz |
bpo-16285: Update urllib quoting to RFC 3986 (#173)
* bpo-16285: Update urllib quoting to RFC 3986
urllib.parse.quote is now based on RFC 3986, and hence
includes `'~'` in the set of characters that is not escaped
by default.
Patch by Christian Theune and Ratnadeep Debnath.
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/parse.py | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 1d08730a89..f3a309aacc 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -704,7 +704,7 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): _ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' b'abcdefghijklmnopqrstuvwxyz' b'0123456789' - b'_.-') + b'_.-~') _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) _safe_quoters = {} @@ -736,15 +736,18 @@ def quote(string, safe='/', encoding=None, errors=None): Each part of a URL, e.g. the path info, the query, etc., has a different set of reserved characters that must be quoted. - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax lists the following reserved characters. reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," + "$" | "," | "~" Each of these characters is reserved in some component of a URL, but not necessarily in all of them. + Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings. + Now, "~" is included in the set of reserved characters. + By default, the quote function is intended for quoting the path section of a URL. Thus, it will not encode '/'. This character is reserved, but in typical usage the quote function is being |