from django.template.defaultfilters import urlize from django.test import SimpleTestCase from django.utils.functional import lazy from django.utils.safestring import mark_safe from ..utils import setup class UrlizeTests(SimpleTestCase): @setup( { "urlize01": ( "{% autoescape off %}{{ a|urlize }} {{ b|urlize }}{% endautoescape %}" ) } ) def test_urlize01(self): output = self.engine.render_to_string( "urlize01", { "a": "http://example.com/?x=&y=", "b": mark_safe("http://example.com?x=&y=<2>"), }, ) self.assertEqual( output, '' "http://example.com/?x=&y= " '' "http://example.com?x=&y=<2>", ) @setup({"urlize02": "{{ a|urlize }} {{ b|urlize }}"}) def test_urlize02(self): output = self.engine.render_to_string( "urlize02", { "a": "http://example.com/?x=&y=", "b": mark_safe("http://example.com?x=&y="), }, ) self.assertEqual( output, '' "http://example.com/?x=&y= " '' "http://example.com?x=&y=", ) @setup({"urlize03": "{% autoescape off %}{{ a|urlize }}{% endautoescape %}"}) def test_urlize03(self): output = self.engine.render_to_string("urlize03", {"a": mark_safe("a & b")}) self.assertEqual(output, "a & b") @setup({"urlize04": "{{ a|urlize }}"}) def test_urlize04(self): output = self.engine.render_to_string("urlize04", {"a": mark_safe("a & b")}) self.assertEqual(output, "a & b") # This will lead to a nonsense result, but at least it won't be # exploitable for XSS purposes when auto-escaping is on. @setup({"urlize05": "{% autoescape off %}{{ a|urlize }}{% endautoescape %}"}) def test_urlize05(self): output = self.engine.render_to_string( "urlize05", {"a": ""} ) self.assertEqual(output, "") @setup({"urlize06": "{{ a|urlize }}"}) def test_urlize06(self): output = self.engine.render_to_string( "urlize06", {"a": ""} ) self.assertEqual(output, "<script>alert('foo')</script>") # mailto: testing for urlize @setup({"urlize07": "{{ a|urlize }}"}) def test_urlize07(self): output = self.engine.render_to_string( "urlize07", {"a": "Email me at me@example.com"} ) self.assertEqual( output, 'Email me at me@example.com', ) @setup({"urlize08": "{{ a|urlize }}"}) def test_urlize08(self): output = self.engine.render_to_string( "urlize08", {"a": "Email me at "} ) self.assertEqual( output, 'Email me at <me@example.com>', ) @setup({"urlize09": "{% autoescape off %}{{ a|urlize }}{% endautoescape %}"}) def test_urlize09(self): output = self.engine.render_to_string( "urlize09", {"a": "http://example.com/?x=&y=<2>"} ) self.assertEqual( output, '' "http://example.com/?x=&y=<2>", ) class FunctionTests(SimpleTestCase): def test_urls(self): self.assertEqual( urlize("http://google.com"), 'http://google.com', ) self.assertEqual( urlize("http://google.com/"), 'http://google.com/', ) self.assertEqual( urlize("www.google.com"), 'www.google.com', ) self.assertEqual( urlize("djangoproject.org"), 'djangoproject.org', ) self.assertEqual( urlize("djangoproject.org/"), 'djangoproject.org/', ) def test_url_split_chars(self): # Quotes (single and double) and angle brackets shouldn't be considered # part of URLs. self.assertEqual( urlize('www.server.com"abc'), 'www.server.com"' "abc", ) self.assertEqual( urlize("www.server.com'abc"), 'www.server.com'' "abc", ) self.assertEqual( urlize("www.server.comwww.server.com<abc', ) self.assertEqual( urlize("www.server.com>abc"), 'www.server.com>abc', ) def test_email(self): self.assertEqual( urlize("info@djangoproject.org"), 'info@djangoproject.org', ) def test_word_with_dot(self): self.assertEqual(urlize("some.organization"), "some.organization"), def test_https(self): self.assertEqual( urlize("https://google.com"), 'https://google.com', ) def test_quoting(self): """ #9655 - Check urlize doesn't overquote already quoted urls. The teststring is the urlquoted version of 'http://hi.baidu.com/重新开始' """ self.assertEqual( urlize("http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B"), 'http://hi.baidu.com/%E9%87%8D%E6%96%B0%E5%BC%80%E5%A7%8B' "", ) def test_urlencoded(self): self.assertEqual( urlize("www.mystore.com/30%OffCoupons!"), '' "www.mystore.com/30%OffCoupons!", ) self.assertEqual( urlize("https://en.wikipedia.org/wiki/Caf%C3%A9"), '' "https://en.wikipedia.org/wiki/Caf%C3%A9", ) def test_unicode(self): self.assertEqual( urlize("https://en.wikipedia.org/wiki/Café"), '' "https://en.wikipedia.org/wiki/Café", ) def test_parenthesis(self): """ #11911 - Check urlize keeps balanced parentheses """ self.assertEqual( urlize("https://en.wikipedia.org/wiki/Django_(web_framework)"), 'https://en.wikipedia.org/wiki/Django_(web_framework)', ) self.assertEqual( urlize("(see https://en.wikipedia.org/wiki/Django_(web_framework))"), '(see https://en.wikipedia.org/wiki/Django_(web_framework))', ) def test_nofollow(self): """ #12183 - Check urlize adds nofollow properly - see #12183 """ self.assertEqual( urlize("foo@bar.com or www.bar.com"), 'foo@bar.com or ' 'www.bar.com', ) def test_idn(self): """ #13704 - Check urlize handles IDN correctly """ self.assertEqual( urlize("http://c✶.ws"), 'http://c✶.ws', ) self.assertEqual( urlize("www.c✶.ws"), 'www.c✶.ws', ) self.assertEqual( urlize("c✶.org"), 'c✶.org' ) self.assertEqual( urlize("info@c✶.org"), 'info@c✶.org' ) def test_malformed(self): """ #16395 - Check urlize doesn't highlight malformed URIs """ self.assertEqual(urlize("http:///www.google.com"), "http:///www.google.com") self.assertEqual(urlize("http://.google.com"), "http://.google.com") self.assertEqual(urlize("http://@foo.com"), "http://@foo.com") def test_tlds(self): """ #16656 - Check urlize accepts more TLDs """ self.assertEqual( urlize("usa.gov"), 'usa.gov' ) def test_invalid_email(self): """ #17592 - Check urlize don't crash on invalid email with dot-starting domain """ self.assertEqual(urlize("email@.stream.ru"), "email@.stream.ru") def test_uppercase(self): """ #18071 - Check urlize accepts uppercased URL schemes """ self.assertEqual( urlize("HTTPS://github.com/"), 'HTTPS://github.com/', ) def test_trailing_period(self): """ #18644 - Check urlize trims trailing period when followed by parenthesis """ self.assertEqual( urlize("(Go to http://www.example.com/foo.)"), '(Go to ' "http://www.example.com/foo.)", ) def test_trailing_multiple_punctuation(self): self.assertEqual( urlize("A test http://testing.com/example.."), 'A test ' "http://testing.com/example..", ) self.assertEqual( urlize("A test http://testing.com/example!!"), 'A test ' "http://testing.com/example!!", ) self.assertEqual( urlize("A test http://testing.com/example!!!"), 'A test ' "http://testing.com/example!!!", ) self.assertEqual( urlize('A test http://testing.com/example.,:;)"!'), 'A test ' "http://testing.com/example.,:;)"!", ) def test_brackets(self): """ #19070 - Check urlize handles brackets properly """ self.assertEqual( urlize("[see www.example.com]"), '[see www.example.com]', ) self.assertEqual( urlize("see test[at[example.com"), 'see ' "test[at[example.com", ) self.assertEqual( urlize("[http://168.192.0.1](http://168.192.0.1)"), '[' "http://168.192.0.1](http://168.192.0.1)", ) def test_wrapping_characters(self): wrapping_chars = ( ("()", ("(", ")")), ("<>", ("<", ">")), ("[]", ("[", "]")), ('""', (""", """)), ("''", ("'", "'")), ) for wrapping_in, (start_out, end_out) in wrapping_chars: with self.subTest(wrapping_in=wrapping_in): start_in, end_in = wrapping_in self.assertEqual( urlize(start_in + "https://www.example.org/" + end_in), f'{start_out}' f"https://www.example.org/{end_out}", ) def test_ipv4(self): self.assertEqual( urlize("http://192.168.0.15/api/9"), '' "http://192.168.0.15/api/9", ) def test_ipv6(self): self.assertEqual( urlize("http://[2001:db8:cafe::2]/api/9"), '' "http://[2001:db8:cafe::2]/api/9", ) def test_quotation_marks(self): """ #20364 - Check urlize correctly include quotation marks in links """ self.assertEqual( urlize('before "hi@example.com" afterward', autoescape=False), 'before "hi@example.com" afterward', ) self.assertEqual( urlize('before hi@example.com" afterward', autoescape=False), 'before hi@example.com" afterward', ) self.assertEqual( urlize('before "hi@example.com afterward', autoescape=False), 'before "hi@example.com afterward', ) self.assertEqual( urlize("before 'hi@example.com' afterward", autoescape=False), "before 'hi@example.com' afterward", ) self.assertEqual( urlize("before hi@example.com' afterward", autoescape=False), 'before hi@example.com\' afterward', ) self.assertEqual( urlize("before 'hi@example.com afterward", autoescape=False), 'before \'hi@example.com afterward', ) def test_quote_commas(self): """ #20364 - Check urlize copes with commas following URLs in quotes """ self.assertEqual( urlize( 'Email us at "hi@example.com", or phone us at +xx.yy', autoescape=False ), 'Email us at "hi@example.com", or ' "phone us at +xx.yy", ) def test_exclamation_marks(self): """ #23715 - Check urlize correctly handles exclamation marks after TLDs or query string """ self.assertEqual( urlize("Go to djangoproject.com! and enjoy."), 'Go to djangoproject.com' "! and enjoy.", ) self.assertEqual( urlize("Search for google.com/?q=! and see."), 'Search for google.com/?q=' "! and see.", ) self.assertEqual( urlize("Search for google.com/?q=dj!`? and see."), 'Search for ' "google.com/?q=dj!`? and see.", ) self.assertEqual( urlize("Search for google.com/?q=dj!`?! and see."), 'Search for ' "google.com/?q=dj!`?! and see.", ) def test_non_string_input(self): self.assertEqual(urlize(123), "123") def test_autoescape(self): self.assertEqual( urlize('foobarbuz'), 'foo<a href=" google.com' " ">bar</a>buz", ) def test_autoescape_off(self): self.assertEqual( urlize('foobarbuz', autoescape=False), 'foogoogle.com ">' "barbuz", ) def test_lazystring(self): prepend_www = lazy(lambda url: "www." + url, str) self.assertEqual( urlize(prepend_www("google.com")), 'www.google.com', )