From 27508918fbbbfda6f5e3b697bbea6bf2c4a6b8b8 Mon Sep 17 00:00:00 2001 From: Aymeric Augustin Date: Sun, 8 Jan 2012 15:43:32 +0000 Subject: [PATCH] Fixed #16395 -- Prevented urlize from highlighting some malformed URLs. Thanks BernhardEssl for the report and initial patch. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17358 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/html.py | 8 ++++---- tests/regressiontests/defaultfilters/tests.py | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index a0ad5f3d00..207620ed86 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -23,6 +23,8 @@ word_split_re = re.compile(r'(\s+)') punctuation_re = re.compile('^(?P(?:%s)*)(?P.*?)(?P(?:%s)*)$' % \ ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) +simple_url_re = re.compile(r'^https?://\w') +simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|net|org)$') simple_email_re = re.compile(r'^\S+@\S+\.\S+$') link_target_attribute_re = re.compile(r'(]*?)target=[^\s>]+') html_gunk_re = re.compile(r'(?:
|<\/i>|<\/b>|<\/em>|<\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) @@ -150,11 +152,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): # Make URL we want to point to. url = None nofollow_attr = ' rel="nofollow"' if nofollow else '' - if middle.startswith('http://') or middle.startswith('https://'): + if simple_url_re.match(middle): url = smart_urlquote(middle) - elif middle.startswith('www.') or ('@' not in middle and \ - middle and middle[0] in string.ascii_letters + string.digits and \ - (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): + elif simple_url_2_re.match(middle): url = smart_urlquote('http://%s' % middle) elif not ':' in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1) diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py index 2c7cdb17a6..2f3d012346 100644 --- a/tests/regressiontests/defaultfilters/tests.py +++ b/tests/regressiontests/defaultfilters/tests.py @@ -268,6 +268,14 @@ class DefaultFiltersTests(TestCase): self.assertEqual(urlize('info@c✶.org'), u'
info@c✶.org') + # Check urlize doesn't highlight malformed URIs - see #16395 + self.assertEqual(urlize('http:///www.google.com'), + u'http:///www.google.com') + self.assertEqual(urlize('http://.google.com'), + u'http://.google.com') + self.assertEqual(urlize('http://@foo.com'), + u'http://@foo.com') + def test_wordcount(self): self.assertEqual(wordcount(''), 0) self.assertEqual(wordcount(u'oneword'), 1)