diff options
author | Bebleo <james.warne@outlook.com> | 2020-04-19 05:42:12 -0400 |
---|---|---|
committer | David Lord <davidism@gmail.com> | 2021-01-30 06:25:03 -0800 |
commit | d504e1d1e2798d7b4661462b9ef4cd77dd270ff9 (patch) | |
tree | 7b97d883edb56768cc5d72351dfe9f8946dff61d | |
parent | c3b34a06f340234939df5ad77bbe6327ca7fc3f0 (diff) | |
download | jinja2-d504e1d1e2798d7b4661462b9ef4cd77dd270ff9.tar.gz |
Improve and extend urlize
-rw-r--r-- | docs/api.rst | 4 | ||||
-rw-r--r-- | src/jinja2/defaults.py | 1 | ||||
-rw-r--r-- | src/jinja2/filters.py | 39 | ||||
-rw-r--r-- | src/jinja2/utils.py | 80 | ||||
-rw-r--r-- | tests/test_filters.py | 23 | ||||
-rw-r--r-- | tests/test_regression.py | 9 |
6 files changed, 129 insertions, 27 deletions
diff --git a/docs/api.rst b/docs/api.rst index ec083a8..9189642 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -550,6 +550,10 @@ Example:: The default target that is issued for links from the `urlize` filter if no other target is defined by the call explicitly. +``urlize.additional_uri_schemes``: + Additional uri scheme prefixes that will generate links from the + `urlize` filter in addition to http://, https://, and mailto:. + ``json.dumps_function``: If this is set to a value other than `None` then the `tojson` filter will dump with this function instead of the default one. Note that diff --git a/src/jinja2/defaults.py b/src/jinja2/defaults.py index 1f0b0ab..d582836 100644 --- a/src/jinja2/defaults.py +++ b/src/jinja2/defaults.py @@ -35,6 +35,7 @@ DEFAULT_POLICIES = { "compiler.ascii_str": True, "urlize.rel": "noopener", "urlize.target": None, + "urlize.extra_uri_schemes": None, "truncate.leeway": 5, "json.dumps_function": None, "json.dumps_kwargs": {"sort_keys": True}, diff --git a/src/jinja2/filters.py b/src/jinja2/filters.py index 7a554a0..0d1639f 100644 --- a/src/jinja2/filters.py +++ b/src/jinja2/filters.py @@ -20,6 +20,7 @@ from .utils import urlize _word_re = re.compile(r"\w+") _word_beginning_split_re = re.compile(r"([-\s({\[<]+)") +_uri_scheme_re = re.compile(r"^([\w\.\+-]{2,}:(/){0,2})$") def contextfilter(f): @@ -569,7 +570,13 @@ def do_pprint(value): @evalcontextfilter def do_urlize( - eval_ctx, value, trim_url_limit=None, nofollow=False, target=None, rel=None + eval_ctx, + value, + trim_url_limit=None, + nofollow=False, + target=None, + rel=None, + extra_uri_schemes=None, ): """Converts URLs in plain text into clickable links. @@ -589,18 +596,44 @@ def do_urlize( {{ mytext|urlize(40, target='_blank') }} + If *extra_uri_schemes* are added then links will be generated for those + in addition to http(s): and mailto: schemes. + + .. sourcecode:: jinja + + {{ mytext|urlize(extra_uri_schemes=['tel:', 'ftp://']) }} + links are generated for tel and ftp. + .. versionchanged:: 2.8 The ``target`` parameter was added. + + .. versionchanged:: 3.0 + The ``extra_uri_schemes`` parameter was added. """ policies = eval_ctx.environment.policies + rel = set((rel or "").split() or []) if nofollow: rel.add("nofollow") rel.update((policies["urlize.rel"] or "").split()) + rel = " ".join(sorted(rel)) or None + if target is None: target = policies["urlize.target"] - rel = " ".join(sorted(rel)) or None - rv = urlize(value, trim_url_limit, rel=rel, target=target) + + if extra_uri_schemes is None: + extra_uri_schemes = policies["urlize.extra_uri_schemes"] or [] + for uri_scheme in extra_uri_schemes: + if _uri_scheme_re.fullmatch(uri_scheme) is None: + raise FilterArgumentError(f"{uri_scheme} is not a valid URI scheme prefix.") + + rv = urlize( + value, + trim_url_limit, + rel=rel, + target=target, + extra_uri_schemes=extra_uri_schemes, + ) if eval_ctx.autoescape: rv = Markup(rv) return rv diff --git a/src/jinja2/utils.py b/src/jinja2/utils.py index 8ee0295..9ab5eb0 100644 --- a/src/jinja2/utils.py +++ b/src/jinja2/utils.py @@ -17,6 +17,14 @@ _trail_pattern = "|".join(map(re.escape, (".", ",", ")", ">", "\n", ">"))) _punctuation_re = re.compile( fr"^(?P<lead>(?:{_lead_pattern})*)(?P<middle>.*?)(?P<trail>(?:{_trail_pattern})*)$" ) +_simple_http_https_re = re.compile( + r"^((https?://|www\.)(([\w%-]+\.)+)?([a-z]{2,63}|xn--[\w%]{2,59})|" + r"([\w%-]{2,63}\.)+(com|net|int|edu|gov|org|info|mil)|" + r"(https?://)((([\d]{1,3})(\.[\d]{1,3}){3})|" + r"(\[([\da-f]{0,4}:){2}([\da-f]{0,4}:?){1,6}\])))" + r"(?::[\d]{1,5})?(?:[/?#]\S*)?$", + re.IGNORECASE, +) _simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$") _striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") _entity_re = re.compile(r"&([^;]+);") @@ -175,11 +183,11 @@ def pformat(obj): return pformat(obj) -def urlize(text, trim_url_limit=None, rel=None, target=None): +def urlize(text, trim_url_limit=None, rel=None, target=None, extra_uri_schemes=None): """Converts any URLs in text into clickable links. Works on http://, - https:// and www. links. Links can have trailing punctuation (periods, - commas, close-parens) and leading punctuation (opening parens) and - it'll still do the right thing. + https://, www., mailto:, and email links. Links can have trailing + punctuation (periods, commas, close-parens) and leading punctuation + (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text will be limited to trim_url_limit characters. @@ -188,6 +196,13 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): attribute. If target is not None, a target attribute will be added to the link. + + Known Limitations: + - Will not urlize emails or mailto: links if they include header fields + (for example, mailto:address@example.com?cc=copy@example.com). + + .. versionchanged:: 3.0 + Adds limited support for mailto: links """ def trim_url(x, limit=trim_url_limit): @@ -204,26 +219,30 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): match = _punctuation_re.match(word) if match: lead, middle, trail = match.groups() - if middle.startswith("www.") or ( - "@" not in middle - and not middle.startswith("http://") - and not middle.startswith("https://") - and len(middle) > 0 - and middle[0] in _letters + _digits - and ( - middle.endswith(".org") - or middle.endswith(".net") - or middle.endswith(".com") - ) - ): - middle = ( - f'<a href="http://{middle}"{rel_attr}{target_attr}>' - f"{trim_url(middle)}</a>" - ) - if middle.startswith("http://") or middle.startswith("https://"): - middle = ( - f'<a href="{middle}"{rel_attr}{target_attr}>{trim_url(middle)}</a>' - ) + # fix for mismatched opening and closing parentheses + pairs = [("(", ")"), ("<", ">"), ("<", ">")] + for start_char in re.findall(_lead_pattern, middle): + end_char = next(c for o, c in pairs if o == start_char) + while ( + middle.count(start_char) > middle.count(end_char) + and end_char in trail + ): + end_char_index = trail.index(end_char) + middle = middle + trail[: end_char_index + len(end_char)] + trail = trail[end_char_index + len(end_char) :] + + if _simple_http_https_re.match(middle): + if middle.startswith("https://") or middle.startswith("http://"): + middle = ( + f'<a href="{middle}"{rel_attr}{target_attr}>' + f"{trim_url(middle)}</a>" + ) + else: + middle = ( + f'<a href="https://{middle}"{rel_attr}{target_attr}>' + f"{trim_url(middle)}</a>" + ) + if ( "@" in middle and not middle.startswith("www.") @@ -231,8 +250,21 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): and _simple_email_re.match(middle) ): middle = f'<a href="mailto:{middle}">{middle}</a>' + if middle.startswith("mailto:") and _simple_email_re.match(middle[7:]): + middle = f'<a href="{middle}">{middle[7:]}</a>' + + if extra_uri_schemes is not None: + schemes = {x for x in extra_uri_schemes if middle.startswith(x)} + for uri_scheme in schemes: + if len(middle) > len(uri_scheme): + middle = ( + f'<a href="{middle}"{rel_attr}{target_attr}>' + f"{middle}</a>" + ) + if lead + middle + trail != word: words[i] = lead + middle + trail + return "".join(words) diff --git a/tests/test_filters.py b/tests/test_filters.py index 8087a24..bf00f06 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -337,11 +337,23 @@ class TestFilter: assert tmpl.render() == "FOO" def test_urlize(self, env): + tmpl = env.from_string('{{ "foo example.org bar"|urlize }}') + assert tmpl.render() == ( + 'foo <a href="https://example.org" rel="noopener">' "example.org</a> bar" + ) tmpl = env.from_string('{{ "foo http://www.example.com/ bar"|urlize }}') assert tmpl.render() == ( 'foo <a href="http://www.example.com/" rel="noopener">' "http://www.example.com/</a> bar" ) + tmpl = env.from_string('{{ "foo mailto:email@example.com bar"|urlize }}') + assert tmpl.render() == ( + 'foo <a href="mailto:email@example.com">email@example.com</a> bar' + ) + tmpl = env.from_string('{{ "foo email@example.com bar"|urlize }}') + assert tmpl.render() == ( + 'foo <a href="mailto:email@example.com">email@example.com</a> bar' + ) def test_urlize_rel_policy(self): env = Environment() @@ -361,6 +373,17 @@ class TestFilter: "http://www.example.com/</a> bar" ) + def test_urlize_extra_uri_schemes_parameter(self, env): + tmpl = env.from_string( + '{{ "foo tel:+1-514-555-1234 ftp://localhost bar"|' + 'urlize(extra_uri_schemes=["tel:", "ftp:"]) }}' + ) + assert tmpl.render() == ( + 'foo <a href="tel:+1-514-555-1234" rel="noopener">' + 'tel:+1-514-555-1234</a> <a href="ftp://localhost" rel="noopener">' + "ftp://localhost</a> bar" + ) + def test_wordcount(self, env): tmpl = env.from_string('{{ "foo bar baz"|wordcount }}') assert tmpl.render() == "3" diff --git a/tests/test_regression.py b/tests/test_regression.py index d052f43..21a6d92 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -109,6 +109,15 @@ class TestBug: "http://www.example.org/<foo</a>" ) + def test_urlize_filter_closing_punctuation(self, env): + tmpl = env.from_string( + '{{ "(see http://www.example.org/?page=subj_<desc.h>)"|urlize }}' + ) + assert tmpl.render() == ( + '(see <a href="http://www.example.org/?page=subj_<desc.h>" ' + 'rel="noopener">http://www.example.org/?page=subj_<desc.h></a>)' + ) + def test_loop_call_loop(self, env): tmpl = env.from_string( """ |