diff --git a/django/utils/html.py b/django/utils/html.py
index 8eeaa66330..cb786db1e4 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -6,6 +6,7 @@ import string
from django.utils.safestring import SafeData, mark_safe
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy
+from django.utils.http import urlquote
# Configuration for urlize() function
LEADING_PUNCTUATION = ['(', '<', '<']
@@ -101,14 +102,24 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
len(middle) > 0 and middle[0] in string.letters + string.digits and \
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
- middle = '%s' % (middle, nofollow_attr, trim_url(middle))
+ middle = '%s' % (
+ urlquote(middle, safe='/&=:;#?+'), nofollow_attr,
+ trim_url(middle))
if middle.startswith('http://') or middle.startswith('https://'):
- middle = '%s' % (middle, nofollow_attr, trim_url(middle))
- if '@' in middle and not middle.startswith('www.') and not ':' in middle \
- and simple_email_re.match(middle):
+ middle = '%s' % (
+ urlquote(middle, safe='/&=:;#?+'), nofollow_attr,
+ trim_url(middle))
+ if '@' in middle and not middle.startswith('www.') and \
+ not ':' in middle and simple_email_re.match(middle):
middle = '%s' % (middle, middle)
if lead + middle + trail != word:
words[i] = lead + middle + trail
+ elif autoescape and not safe_input:
+ words[i] = escape(word)
+ elif safe_input:
+ words[i] = mark_safe(word)
+ elif autoescape:
+ words[i] = escape(word)
return u''.join(words)
urlize = allow_lazy(urlize, unicode)
diff --git a/tests/regressiontests/templates/filters.py b/tests/regressiontests/templates/filters.py
index 00dfbe3c35..36d15216b8 100644
--- a/tests/regressiontests/templates/filters.py
+++ b/tests/regressiontests/templates/filters.py
@@ -94,6 +94,11 @@ def get_filter_tests():
'filter-urlize03': ('{% autoescape off %}{{ a|urlize }}{% endautoescape %}', {"a": mark_safe("a & b")}, 'a & b'),
'filter-urlize04': ('{{ a|urlize }}', {"a": mark_safe("a & b")}, 'a & b'),
+ # This will lead to a nonsense result, but at least it won't be
+ # exploitable for XSS purposes when auto-escaping is on.
+ 'filter-urlize05': ('{% autoescape off %}{{ a|urlize }}{% endautoescape %}', {"a": ""}, ""),
+ 'filter-urlize06': ('{{ a|urlize }}', {"a": ""}, '<script>alert('foo')</script>'),
+
'filter-urlizetrunc01': ('{% autoescape off %}{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}{% endautoescape %}', {"a": "http://example.com/x=&y=", "b": mark_safe("http://example.com?x=&y=")}, u'http:... http:...'),
'filter-urlizetrunc02': ('{{ a|urlizetrunc:"8" }} {{ b|urlizetrunc:"8" }}', {"a": "http://example.com/x=&y=", "b": mark_safe("http://example.com?x=&y=")}, u'http:... http:...'),