From d988c13ffa4f287039dd8a45148dcdee7684c9a6 Mon Sep 17 00:00:00 2001 From: David Lord Date: Mon, 4 Nov 2019 08:04:24 -0800 Subject: clarify quoting behavior of urlencode filter --- jinja2/filters.py | 39 ++++++++++++++++++++++++--------------- jinja2/utils.py | 31 ++++++++++++++++++++----------- tests/test_filters.py | 29 +++++++++++++++++------------ 3 files changed, 61 insertions(+), 38 deletions(-) diff --git a/jinja2/filters.py b/jinja2/filters.py index 666df40..b73f627 100644 --- a/jinja2/filters.py +++ b/jinja2/filters.py @@ -19,7 +19,7 @@ from jinja2.utils import Markup, escape, pformat, urlize, soft_unicode, \ unicode_urlencode, htmlsafe_json_dumps from jinja2.runtime import Undefined from jinja2.exceptions import FilterArgumentError -from jinja2._compat import imap, string_types, text_type, iteritems, PY2 +from jinja2._compat import imap, string_types, text_type, iteritems, abc _word_re = re.compile(r'\w+', re.UNICODE) @@ -129,24 +129,33 @@ def do_forceescape(value): def do_urlencode(value): - """Escape strings for use in URLs (uses UTF-8 encoding). It accepts both - dictionaries and regular strings as well as pairwise iterables. + """Quote data for use in a URL path or query using UTF-8. + + Basic wrapper around :func:`urllib.parse.quote` when given a + string, or :func:`urllib.parse.urlencode` for a dict or iterable. + + :param value: Data to quote. A string will be quoted directly. A + dict or iterable of ``(key, value)`` pairs will be joined as a + query string. + + When given a string, "/" is not quoted. HTTP servers treat "/" and + "%2F" equivalently in paths. If you need quoted slashes, use the + ``|replace("/", "%2F")`` filter. .. versionadded:: 2.7 """ - itemiter = None - if isinstance(value, dict): - itemiter = iteritems(value) - elif not isinstance(value, string_types): - try: - itemiter = iter(value) - except TypeError: - pass - if itemiter is None: + if isinstance(value, string_types) or not isinstance(value, abc.Iterable): return unicode_urlencode(value) - return u'&'.join(unicode_urlencode(k) + '=' + - unicode_urlencode(v, for_qs=True) - for k, v in itemiter) + + if isinstance(value, dict): + items = iteritems(value) + else: + items = iter(value) + + return u"&".join( + "%s=%s" % (unicode_urlencode(k, for_qs=True), unicode_urlencode(v, for_qs=True)) + for k, v in items + ) @evalcontextfilter diff --git a/jinja2/utils.py b/jinja2/utils.py index d2759e2..49501c7 100644 --- a/jinja2/utils.py +++ b/jinja2/utils.py @@ -14,8 +14,7 @@ import json import warnings from collections import deque from threading import Lock -from jinja2._compat import text_type, string_types, implements_iterator, \ - url_quote, abc +from jinja2._compat import text_type, string_types, url_quote, abc _word_split_re = re.compile(r'(\s+)') @@ -282,22 +281,32 @@ def generate_lorem_ipsum(n=5, html=True, min=20, max=100): return Markup(u'\n'.join(u'

%s

' % escape(x) for x in result)) -def unicode_urlencode(obj, charset='utf-8', for_qs=False): - """URL escapes a single bytestring or unicode string with the - given charset if applicable to URL safe quoting under all rules - that need to be considered under all supported Python versions. +def unicode_urlencode(obj, charset="utf-8", for_qs=False): + """Quote a string for use in a URL using the given charset. - If non strings are provided they are converted to their unicode - representation first. + This function is misnamed, it is a wrapper around + :func:`urllib.parse.quote`. + + :param obj: String or bytes to quote. Other types are converted to + string then encoded to bytes using the given charset. + :param charset: Encode text to bytes using this charset. + :param for_qs: Quote "/" and use "+" for spaces. """ if not isinstance(obj, string_types): obj = text_type(obj) + if isinstance(obj, text_type): obj = obj.encode(charset) - safe = not for_qs and b'/' or b'' - rv = text_type(url_quote(obj, safe)) + + safe = b"" if for_qs else b"/" + rv = url_quote(obj, safe) + + if not isinstance(rv, text_type): + rv = rv.decode("utf-8") + if for_qs: - rv = rv.replace('%20', '+') + rv = rv.replace("%20", "+") + return rv diff --git a/tests/test_filters.py b/tests/test_filters.py index 456412f..9492663 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -596,18 +596,23 @@ class TestFilter(object): tmpl = env.from_string('{{ "
foo
" }}') assert tmpl.render() == '<div>foo</div>' - def test_urlencode(self, env): - env = Environment(autoescape=True) - tmpl = env.from_string('{{ "Hello, world!"|urlencode }}') - assert tmpl.render() == 'Hello%2C%20world%21' - tmpl = env.from_string('{{ o|urlencode }}') - assert tmpl.render(o=u"Hello, world\u203d") \ - == "Hello%2C%20world%E2%80%BD" - assert tmpl.render(o=(("f", 1),)) == "f=1" - assert tmpl.render(o=(('f', 1), ("z", 2))) == "f=1&z=2" - assert tmpl.render(o=((u"\u203d", 1),)) == "%E2%80%BD=1" - assert tmpl.render(o={u"\u203d": 1}) == "%E2%80%BD=1" - assert tmpl.render(o={0: 1}) == "0=1" + @pytest.mark.parametrize( + ("value", "expect"), + [ + ("Hello, world!", "Hello%2C%20world%21"), + (u"Hello, world\u203d", "Hello%2C%20world%E2%80%BD"), + ({"f": 1}, "f=1"), + ([('f', 1), ("z", 2)], "f=1&z=2"), + ({u"\u203d": 1}, "%E2%80%BD=1"), + ({0: 1}, "0=1"), + ([("a b/c", "a b/c")], "a+b%2Fc=a+b%2Fc"), + ("a b/c", "a%20b/c") + ], + ) + def test_urlencode(self, value, expect): + e = Environment(autoescape=True) + t = e.from_string("{{ value|urlencode }}") + assert t.render(value=value) == expect def test_simple_map(self, env): env = Environment() -- cgit v1.2.1