summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBebleo <james.warne@outlook.com>2020-04-19 05:42:12 -0400
committerDavid Lord <davidism@gmail.com>2021-01-30 06:25:03 -0800
commitd504e1d1e2798d7b4661462b9ef4cd77dd270ff9 (patch)
tree7b97d883edb56768cc5d72351dfe9f8946dff61d
parentc3b34a06f340234939df5ad77bbe6327ca7fc3f0 (diff)
downloadjinja2-d504e1d1e2798d7b4661462b9ef4cd77dd270ff9.tar.gz
Improve and extend urlize
-rw-r--r--docs/api.rst4
-rw-r--r--src/jinja2/defaults.py1
-rw-r--r--src/jinja2/filters.py39
-rw-r--r--src/jinja2/utils.py80
-rw-r--r--tests/test_filters.py23
-rw-r--r--tests/test_regression.py9
6 files changed, 129 insertions, 27 deletions
diff --git a/docs/api.rst b/docs/api.rst
index ec083a8..9189642 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -550,6 +550,10 @@ Example::
The default target that is issued for links from the `urlize` filter
if no other target is defined by the call explicitly.
+``urlize.additional_uri_schemes``:
+ Additional uri scheme prefixes that will generate links from the
+ `urlize` filter in addition to http://, https://, and mailto:.
+
``json.dumps_function``:
If this is set to a value other than `None` then the `tojson` filter
will dump with this function instead of the default one. Note that
diff --git a/src/jinja2/defaults.py b/src/jinja2/defaults.py
index 1f0b0ab..d582836 100644
--- a/src/jinja2/defaults.py
+++ b/src/jinja2/defaults.py
@@ -35,6 +35,7 @@ DEFAULT_POLICIES = {
"compiler.ascii_str": True,
"urlize.rel": "noopener",
"urlize.target": None,
+ "urlize.extra_uri_schemes": None,
"truncate.leeway": 5,
"json.dumps_function": None,
"json.dumps_kwargs": {"sort_keys": True},
diff --git a/src/jinja2/filters.py b/src/jinja2/filters.py
index 7a554a0..0d1639f 100644
--- a/src/jinja2/filters.py
+++ b/src/jinja2/filters.py
@@ -20,6 +20,7 @@ from .utils import urlize
_word_re = re.compile(r"\w+")
_word_beginning_split_re = re.compile(r"([-\s({\[<]+)")
+_uri_scheme_re = re.compile(r"^([\w\.\+-]{2,}:(/){0,2})$")
def contextfilter(f):
@@ -569,7 +570,13 @@ def do_pprint(value):
@evalcontextfilter
def do_urlize(
- eval_ctx, value, trim_url_limit=None, nofollow=False, target=None, rel=None
+ eval_ctx,
+ value,
+ trim_url_limit=None,
+ nofollow=False,
+ target=None,
+ rel=None,
+ extra_uri_schemes=None,
):
"""Converts URLs in plain text into clickable links.
@@ -589,18 +596,44 @@ def do_urlize(
{{ mytext|urlize(40, target='_blank') }}
+ If *extra_uri_schemes* are added then links will be generated for those
+ in addition to http(s): and mailto: schemes.
+
+ .. sourcecode:: jinja
+
+ {{ mytext|urlize(extra_uri_schemes=['tel:', 'ftp://']) }}
+ links are generated for tel and ftp.
+
.. versionchanged:: 2.8
The ``target`` parameter was added.
+
+ .. versionchanged:: 3.0
+ The ``extra_uri_schemes`` parameter was added.
"""
policies = eval_ctx.environment.policies
+
rel = set((rel or "").split() or [])
if nofollow:
rel.add("nofollow")
rel.update((policies["urlize.rel"] or "").split())
+ rel = " ".join(sorted(rel)) or None
+
if target is None:
target = policies["urlize.target"]
- rel = " ".join(sorted(rel)) or None
- rv = urlize(value, trim_url_limit, rel=rel, target=target)
+
+ if extra_uri_schemes is None:
+ extra_uri_schemes = policies["urlize.extra_uri_schemes"] or []
+ for uri_scheme in extra_uri_schemes:
+ if _uri_scheme_re.fullmatch(uri_scheme) is None:
+ raise FilterArgumentError(f"{uri_scheme} is not a valid URI scheme prefix.")
+
+ rv = urlize(
+ value,
+ trim_url_limit,
+ rel=rel,
+ target=target,
+ extra_uri_schemes=extra_uri_schemes,
+ )
if eval_ctx.autoescape:
rv = Markup(rv)
return rv
diff --git a/src/jinja2/utils.py b/src/jinja2/utils.py
index 8ee0295..9ab5eb0 100644
--- a/src/jinja2/utils.py
+++ b/src/jinja2/utils.py
@@ -17,6 +17,14 @@ _trail_pattern = "|".join(map(re.escape, (".", ",", ")", ">", "\n", "&gt;")))
_punctuation_re = re.compile(
fr"^(?P<lead>(?:{_lead_pattern})*)(?P<middle>.*?)(?P<trail>(?:{_trail_pattern})*)$"
)
+_simple_http_https_re = re.compile(
+ r"^((https?://|www\.)(([\w%-]+\.)+)?([a-z]{2,63}|xn--[\w%]{2,59})|"
+ r"([\w%-]{2,63}\.)+(com|net|int|edu|gov|org|info|mil)|"
+ r"(https?://)((([\d]{1,3})(\.[\d]{1,3}){3})|"
+ r"(\[([\da-f]{0,4}:){2}([\da-f]{0,4}:?){1,6}\])))"
+ r"(?::[\d]{1,5})?(?:[/?#]\S*)?$",
+ re.IGNORECASE,
+)
_simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
_entity_re = re.compile(r"&([^;]+);")
@@ -175,11 +183,11 @@ def pformat(obj):
return pformat(obj)
-def urlize(text, trim_url_limit=None, rel=None, target=None):
+def urlize(text, trim_url_limit=None, rel=None, target=None, extra_uri_schemes=None):
"""Converts any URLs in text into clickable links. Works on http://,
- https:// and www. links. Links can have trailing punctuation (periods,
- commas, close-parens) and leading punctuation (opening parens) and
- it'll still do the right thing.
+ https://, www., mailto:, and email links. Links can have trailing
+ punctuation (periods, commas, close-parens) and leading punctuation
+ (opening parens) and it'll still do the right thing.
If trim_url_limit is not None, the URLs in link text will be limited
to trim_url_limit characters.
@@ -188,6 +196,13 @@ def urlize(text, trim_url_limit=None, rel=None, target=None):
attribute.
If target is not None, a target attribute will be added to the link.
+
+ Known Limitations:
+ - Will not urlize emails or mailto: links if they include header fields
+ (for example, mailto:address@example.com?cc=copy@example.com).
+
+ .. versionchanged:: 3.0
+ Adds limited support for mailto: links
"""
def trim_url(x, limit=trim_url_limit):
@@ -204,26 +219,30 @@ def urlize(text, trim_url_limit=None, rel=None, target=None):
match = _punctuation_re.match(word)
if match:
lead, middle, trail = match.groups()
- if middle.startswith("www.") or (
- "@" not in middle
- and not middle.startswith("http://")
- and not middle.startswith("https://")
- and len(middle) > 0
- and middle[0] in _letters + _digits
- and (
- middle.endswith(".org")
- or middle.endswith(".net")
- or middle.endswith(".com")
- )
- ):
- middle = (
- f'<a href="http://{middle}"{rel_attr}{target_attr}>'
- f"{trim_url(middle)}</a>"
- )
- if middle.startswith("http://") or middle.startswith("https://"):
- middle = (
- f'<a href="{middle}"{rel_attr}{target_attr}>{trim_url(middle)}</a>'
- )
+ # fix for mismatched opening and closing parentheses
+ pairs = [("(", ")"), ("<", ">"), ("&lt;", "&gt;")]
+ for start_char in re.findall(_lead_pattern, middle):
+ end_char = next(c for o, c in pairs if o == start_char)
+ while (
+ middle.count(start_char) > middle.count(end_char)
+ and end_char in trail
+ ):
+ end_char_index = trail.index(end_char)
+ middle = middle + trail[: end_char_index + len(end_char)]
+ trail = trail[end_char_index + len(end_char) :]
+
+ if _simple_http_https_re.match(middle):
+ if middle.startswith("https://") or middle.startswith("http://"):
+ middle = (
+ f'<a href="{middle}"{rel_attr}{target_attr}>'
+ f"{trim_url(middle)}</a>"
+ )
+ else:
+ middle = (
+ f'<a href="https://{middle}"{rel_attr}{target_attr}>'
+ f"{trim_url(middle)}</a>"
+ )
+
if (
"@" in middle
and not middle.startswith("www.")
@@ -231,8 +250,21 @@ def urlize(text, trim_url_limit=None, rel=None, target=None):
and _simple_email_re.match(middle)
):
middle = f'<a href="mailto:{middle}">{middle}</a>'
+ if middle.startswith("mailto:") and _simple_email_re.match(middle[7:]):
+ middle = f'<a href="{middle}">{middle[7:]}</a>'
+
+ if extra_uri_schemes is not None:
+ schemes = {x for x in extra_uri_schemes if middle.startswith(x)}
+ for uri_scheme in schemes:
+ if len(middle) > len(uri_scheme):
+ middle = (
+ f'<a href="{middle}"{rel_attr}{target_attr}>'
+ f"{middle}</a>"
+ )
+
if lead + middle + trail != word:
words[i] = lead + middle + trail
+
return "".join(words)
diff --git a/tests/test_filters.py b/tests/test_filters.py
index 8087a24..bf00f06 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -337,11 +337,23 @@ class TestFilter:
assert tmpl.render() == "FOO"
def test_urlize(self, env):
+ tmpl = env.from_string('{{ "foo example.org bar"|urlize }}')
+ assert tmpl.render() == (
+ 'foo <a href="https://example.org" rel="noopener">' "example.org</a> bar"
+ )
tmpl = env.from_string('{{ "foo http://www.example.com/ bar"|urlize }}')
assert tmpl.render() == (
'foo <a href="http://www.example.com/" rel="noopener">'
"http://www.example.com/</a> bar"
)
+ tmpl = env.from_string('{{ "foo mailto:email@example.com bar"|urlize }}')
+ assert tmpl.render() == (
+ 'foo <a href="mailto:email@example.com">email@example.com</a> bar'
+ )
+ tmpl = env.from_string('{{ "foo email@example.com bar"|urlize }}')
+ assert tmpl.render() == (
+ 'foo <a href="mailto:email@example.com">email@example.com</a> bar'
+ )
def test_urlize_rel_policy(self):
env = Environment()
@@ -361,6 +373,17 @@ class TestFilter:
"http://www.example.com/</a> bar"
)
+ def test_urlize_extra_uri_schemes_parameter(self, env):
+ tmpl = env.from_string(
+ '{{ "foo tel:+1-514-555-1234 ftp://localhost bar"|'
+ 'urlize(extra_uri_schemes=["tel:", "ftp:"]) }}'
+ )
+ assert tmpl.render() == (
+ 'foo <a href="tel:+1-514-555-1234" rel="noopener">'
+ 'tel:+1-514-555-1234</a> <a href="ftp://localhost" rel="noopener">'
+ "ftp://localhost</a> bar"
+ )
+
def test_wordcount(self, env):
tmpl = env.from_string('{{ "foo bar baz"|wordcount }}')
assert tmpl.render() == "3"
diff --git a/tests/test_regression.py b/tests/test_regression.py
index d052f43..21a6d92 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -109,6 +109,15 @@ class TestBug:
"http://www.example.org/&lt;foo</a>"
)
+ def test_urlize_filter_closing_punctuation(self, env):
+ tmpl = env.from_string(
+ '{{ "(see http://www.example.org/?page=subj_<desc.h>)"|urlize }}'
+ )
+ assert tmpl.render() == (
+ '(see <a href="http://www.example.org/?page=subj_&lt;desc.h&gt;" '
+ 'rel="noopener">http://www.example.org/?page=subj_&lt;desc.h&gt;</a>)'
+ )
+
def test_loop_call_loop(self, env):
tmpl = env.from_string(
"""