summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörn Hees <joernhees@users.noreply.github.com>2019-04-10 02:31:18 +0200
committerSenthil Kumaran <skumaran@gatech.edu>2019-04-09 17:31:18 -0700
commit750d74fac5c510e39958b3f79641fe54096ee54f (patch)
tree7c045d3bb464ee68a4cfd3f4f204ae4bcacf8e18
parent63b5fc5f42c95a9ef25f9ef9f69ef218763d28bd (diff)
downloadcpython-git-750d74fac5c510e39958b3f79641fe54096ee54f.tar.gz
bpo-12910: update and correct quote docstring (#2568)
Fixes some mistakes and misleadings in the quote function docstring: - reserved chars are never actually used by quote code, unreserved chars are - reserved chars were wrong and incomplete - mentioned that use-case is not minimal quoting wrt. RFC, but cautious quoting
-rw-r--r--Lib/urllib/parse.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 8b6c9b1060..fb518a9774 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -785,25 +785,32 @@ def quote(string, safe='/', encoding=None, errors=None):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
- different set of reserved characters that must be quoted.
+ different set of reserved characters that must be quoted. The
+ quote function offers a cautious (not minimal) way to quote a
+ string for most of these parts.
- RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax lists
- the following reserved characters.
+ RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists
+ the following (un)reserved characters.
- reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
- "$" | "," | "~"
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
- Each of these characters is reserved in some component of a URL,
+ Each of the reserved characters is reserved in some component of a URL,
but not necessarily in all of them.
- Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings.
- Now, "~" is included in the set of reserved characters.
+ The quote function %-escapes all characters that are neither in the
+ unreserved chars ("always safe") nor the additional chars set via the
+ safe arg.
+
+ The default for the safe arg is '/'. The character is reserved, but in
+ typical usage the quote function is being called on a path where the
+ existing slash characters are to be preserved.
- By default, the quote function is intended for quoting the path
- section of a URL. Thus, it will not encode '/'. This character
- is reserved, but in typical usage the quote function is being
- called on a path where the existing slash characters are used as
- reserved characters.
+ Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings.
+ Now, "~" is included in the set of unreserved characters.
string and safe may be either str or bytes objects. encoding and errors
must not be specified if string is a bytes object.