diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/jinja2/defaults.py | 1 | ||||
-rw-r--r-- | src/jinja2/filters.py | 39 | ||||
-rw-r--r-- | src/jinja2/utils.py | 80 |
3 files changed, 93 insertions, 27 deletions
diff --git a/src/jinja2/defaults.py b/src/jinja2/defaults.py index 1f0b0ab..d582836 100644 --- a/src/jinja2/defaults.py +++ b/src/jinja2/defaults.py @@ -35,6 +35,7 @@ DEFAULT_POLICIES = { "compiler.ascii_str": True, "urlize.rel": "noopener", "urlize.target": None, + "urlize.extra_uri_schemes": None, "truncate.leeway": 5, "json.dumps_function": None, "json.dumps_kwargs": {"sort_keys": True}, diff --git a/src/jinja2/filters.py b/src/jinja2/filters.py index 7a554a0..0d1639f 100644 --- a/src/jinja2/filters.py +++ b/src/jinja2/filters.py @@ -20,6 +20,7 @@ from .utils import urlize _word_re = re.compile(r"\w+") _word_beginning_split_re = re.compile(r"([-\s({\[<]+)") +_uri_scheme_re = re.compile(r"^([\w\.\+-]{2,}:(/){0,2})$") def contextfilter(f): @@ -569,7 +570,13 @@ def do_pprint(value): @evalcontextfilter def do_urlize( - eval_ctx, value, trim_url_limit=None, nofollow=False, target=None, rel=None + eval_ctx, + value, + trim_url_limit=None, + nofollow=False, + target=None, + rel=None, + extra_uri_schemes=None, ): """Converts URLs in plain text into clickable links. @@ -589,18 +596,44 @@ def do_urlize( {{ mytext|urlize(40, target='_blank') }} + If *extra_uri_schemes* are added then links will be generated for those + in addition to http(s): and mailto: schemes. + + .. sourcecode:: jinja + + {{ mytext|urlize(extra_uri_schemes=['tel:', 'ftp://']) }} + links are generated for tel and ftp. + .. versionchanged:: 2.8 The ``target`` parameter was added. + + .. versionchanged:: 3.0 + The ``extra_uri_schemes`` parameter was added. """ policies = eval_ctx.environment.policies + rel = set((rel or "").split() or []) if nofollow: rel.add("nofollow") rel.update((policies["urlize.rel"] or "").split()) + rel = " ".join(sorted(rel)) or None + if target is None: target = policies["urlize.target"] - rel = " ".join(sorted(rel)) or None - rv = urlize(value, trim_url_limit, rel=rel, target=target) + + if extra_uri_schemes is None: + extra_uri_schemes = policies["urlize.extra_uri_schemes"] or [] + for uri_scheme in extra_uri_schemes: + if _uri_scheme_re.fullmatch(uri_scheme) is None: + raise FilterArgumentError(f"{uri_scheme} is not a valid URI scheme prefix.") + + rv = urlize( + value, + trim_url_limit, + rel=rel, + target=target, + extra_uri_schemes=extra_uri_schemes, + ) if eval_ctx.autoescape: rv = Markup(rv) return rv diff --git a/src/jinja2/utils.py b/src/jinja2/utils.py index 8ee0295..9ab5eb0 100644 --- a/src/jinja2/utils.py +++ b/src/jinja2/utils.py @@ -17,6 +17,14 @@ _trail_pattern = "|".join(map(re.escape, (".", ",", ")", ">", "\n", ">"))) _punctuation_re = re.compile( fr"^(?P<lead>(?:{_lead_pattern})*)(?P<middle>.*?)(?P<trail>(?:{_trail_pattern})*)$" ) +_simple_http_https_re = re.compile( + r"^((https?://|www\.)(([\w%-]+\.)+)?([a-z]{2,63}|xn--[\w%]{2,59})|" + r"([\w%-]{2,63}\.)+(com|net|int|edu|gov|org|info|mil)|" + r"(https?://)((([\d]{1,3})(\.[\d]{1,3}){3})|" + r"(\[([\da-f]{0,4}:){2}([\da-f]{0,4}:?){1,6}\])))" + r"(?::[\d]{1,5})?(?:[/?#]\S*)?$", + re.IGNORECASE, +) _simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$") _striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") _entity_re = re.compile(r"&([^;]+);") @@ -175,11 +183,11 @@ def pformat(obj): return pformat(obj) -def urlize(text, trim_url_limit=None, rel=None, target=None): +def urlize(text, trim_url_limit=None, rel=None, target=None, extra_uri_schemes=None): """Converts any URLs in text into clickable links. Works on http://, - https:// and www. links. Links can have trailing punctuation (periods, - commas, close-parens) and leading punctuation (opening parens) and - it'll still do the right thing. + https://, www., mailto:, and email links. Links can have trailing + punctuation (periods, commas, close-parens) and leading punctuation + (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text will be limited to trim_url_limit characters. @@ -188,6 +196,13 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): attribute. If target is not None, a target attribute will be added to the link. + + Known Limitations: + - Will not urlize emails or mailto: links if they include header fields + (for example, mailto:address@example.com?cc=copy@example.com). + + .. versionchanged:: 3.0 + Adds limited support for mailto: links """ def trim_url(x, limit=trim_url_limit): @@ -204,26 +219,30 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): match = _punctuation_re.match(word) if match: lead, middle, trail = match.groups() - if middle.startswith("www.") or ( - "@" not in middle - and not middle.startswith("http://") - and not middle.startswith("https://") - and len(middle) > 0 - and middle[0] in _letters + _digits - and ( - middle.endswith(".org") - or middle.endswith(".net") - or middle.endswith(".com") - ) - ): - middle = ( - f'<a href="http://{middle}"{rel_attr}{target_attr}>' - f"{trim_url(middle)}</a>" - ) - if middle.startswith("http://") or middle.startswith("https://"): - middle = ( - f'<a href="{middle}"{rel_attr}{target_attr}>{trim_url(middle)}</a>' - ) + # fix for mismatched opening and closing parentheses + pairs = [("(", ")"), ("<", ">"), ("<", ">")] + for start_char in re.findall(_lead_pattern, middle): + end_char = next(c for o, c in pairs if o == start_char) + while ( + middle.count(start_char) > middle.count(end_char) + and end_char in trail + ): + end_char_index = trail.index(end_char) + middle = middle + trail[: end_char_index + len(end_char)] + trail = trail[end_char_index + len(end_char) :] + + if _simple_http_https_re.match(middle): + if middle.startswith("https://") or middle.startswith("http://"): + middle = ( + f'<a href="{middle}"{rel_attr}{target_attr}>' + f"{trim_url(middle)}</a>" + ) + else: + middle = ( + f'<a href="https://{middle}"{rel_attr}{target_attr}>' + f"{trim_url(middle)}</a>" + ) + if ( "@" in middle and not middle.startswith("www.") @@ -231,8 +250,21 @@ def urlize(text, trim_url_limit=None, rel=None, target=None): and _simple_email_re.match(middle) ): middle = f'<a href="mailto:{middle}">{middle}</a>' + if middle.startswith("mailto:") and _simple_email_re.match(middle[7:]): + middle = f'<a href="{middle}">{middle[7:]}</a>' + + if extra_uri_schemes is not None: + schemes = {x for x in extra_uri_schemes if middle.startswith(x)} + for uri_scheme in schemes: + if len(middle) > len(uri_scheme): + middle = ( + f'<a href="{middle}"{rel_attr}{target_attr}>' + f"{middle}</a>" + ) + if lead + middle + trail != word: words[i] = lead + middle + trail + return "".join(words) |