diff --git a/django/utils/html.py b/django/utils/html.py index 8eeaa66330..cb786db1e4 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -6,6 +6,7 @@ import string from django.utils.safestring import SafeData, mark_safe from django.utils.encoding import force_unicode from django.utils.functional import allow_lazy +from django.utils.http import urlquote # Configuration for urlize() function LEADING_PUNCTUATION = ['(', '<', '<'] @@ -101,14 +102,24 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \ len(middle) > 0 and middle[0] in string.letters + string.digits and \ (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): - middle = '%s' % (middle, nofollow_attr, trim_url(middle)) + middle = '%s' % ( + urlquote(middle, safe='/&=:;#?+'), nofollow_attr, + trim_url(middle)) if middle.startswith('http://') or middle.startswith('https://'): - middle = '%s' % (middle, nofollow_attr, trim_url(middle)) - if '@' in middle and not middle.startswith('www.') and not ':' in middle \ - and simple_email_re.match(middle): + middle = '%s' % ( + urlquote(middle, safe='/&=:;#?+'), nofollow_attr, + trim_url(middle)) + if '@' in middle and not middle.startswith('www.') and \ + not ':' in middle and simple_email_re.match(middle): middle = '%s' % (middle, middle) if lead + middle + trail != word: words[i] = lead + middle + trail + elif autoescape and not safe_input: + words[i] = escape(word) + elif safe_input: + words[i] = mark_safe(word) + elif autoescape: + words[i] = escape(word) return u''.join(words) urlize = allow_lazy(urlize, unicode) diff --git a/tests/regressiontests/templates/filters.py b/tests/regressiontests/templates/filters.py index 00dfbe3c35..36d15216b8 100644 --- a/tests/regressiontests/templates/filters.py +++ b/tests/regressiontests/templates/filters.py @@ -94,6 +94,11 @@ def get_filter_tests(): 'filter-urlize03': ('{% autoescape off %}{{ a|urlize }}{% endautoescape %}', {"a": mark_safe("a & b")}, 'a & b'), 'filter-urlize04': ('{{ a|urlize }}', {"a": mark_safe("a & b")}, 'a & b'), + # This will lead to a nonsense result, but at least it won't be + # exploitable for XSS purposes when auto-escaping is on. + 'filter-urlize05': ('{% autoescape off %}{{ a|urlize }}{% endautoescape %}', {"a": ""}, ""), + 'filter-urlize06': ('{{ a|urlize }}', {"a": ""}, '<script>alert('foo')</script>'), + 'filter-urlizetrunc01': ('{% autoescape off %}{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}{% endautoescape %}', {"a": "http://example.com/x=&y=", "b": mark_safe("http://example.com?x=&y=")}, u'http:... http:...'), 'filter-urlizetrunc02': ('{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}', {"a": "http://example.com/x=&y=", "b": mark_safe("http://example.com?x=&y=")}, u'http:... http:...'),