mirror of
https://github.com/django/django.git
synced 2025-10-24 14:16:09 +00:00
Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.
Removed obsolete and potentially problematic IDNA 2003 ("punycode") encoding of international domain names in smart_urlquote() and Urlizer, which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc template filters. Changed to use percent-encoded UTF-8, which defers IDNA details to the browser (like other URLs rendered by Django).
This commit is contained in:
committed by
Sarah Boyce
parent
23c960a98e
commit
29ba75e6e5
1
AUTHORS
1
AUTHORS
@@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better:
|
|||||||
Mihai Preda <mihai_preda@yahoo.com>
|
Mihai Preda <mihai_preda@yahoo.com>
|
||||||
Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
|
Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
|
||||||
Mike Axiak <axiak@mit.edu>
|
Mike Axiak <axiak@mit.edu>
|
||||||
|
Mike Edmunds <medmunds@gmail.com>
|
||||||
Mike Grouchy <https://mikegrouchy.com/>
|
Mike Grouchy <https://mikegrouchy.com/>
|
||||||
Mike Malone <mjmalone@gmail.com>
|
Mike Malone <mjmalone@gmail.com>
|
||||||
Mike Richardson
|
Mike Richardson
|
||||||
|
@@ -9,7 +9,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
|
|||||||
|
|
||||||
from django.core.exceptions import SuspiciousOperation, ValidationError
|
from django.core.exceptions import SuspiciousOperation, ValidationError
|
||||||
from django.core.validators import EmailValidator
|
from django.core.validators import EmailValidator
|
||||||
from django.utils.encoding import punycode
|
|
||||||
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
|
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
|
||||||
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
|
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
|
||||||
from django.utils.regex_helper import _lazy_re_compile
|
from django.utils.regex_helper import _lazy_re_compile
|
||||||
@@ -237,17 +236,16 @@ def smart_urlquote(url):
|
|||||||
# see also https://bugs.python.org/issue16285
|
# see also https://bugs.python.org/issue16285
|
||||||
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
|
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
|
||||||
|
|
||||||
# Handle IDN before quoting.
|
|
||||||
try:
|
try:
|
||||||
scheme, netloc, path, query, fragment = urlsplit(url)
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# invalid IPv6 URL (normally square brackets in hostname part).
|
# invalid IPv6 URL (normally square brackets in hostname part).
|
||||||
return unquote_quote(url)
|
return unquote_quote(url)
|
||||||
|
|
||||||
try:
|
# Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
|
||||||
netloc = punycode(netloc) # IDN -> ACE
|
# section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
|
||||||
except UnicodeError: # invalid domain part
|
# See #36013.
|
||||||
return unquote_quote(url)
|
netloc = unquote_quote(netloc)
|
||||||
|
|
||||||
if query:
|
if query:
|
||||||
# Separately unquoting key/value, so as to not mix querystring separators
|
# Separately unquoting key/value, so as to not mix querystring separators
|
||||||
@@ -348,10 +346,8 @@ class Urlizer:
|
|||||||
url = smart_urlquote("http://%s" % html.unescape(middle))
|
url = smart_urlquote("http://%s" % html.unescape(middle))
|
||||||
elif ":" not in middle and self.is_email_simple(middle):
|
elif ":" not in middle and self.is_email_simple(middle):
|
||||||
local, domain = middle.rsplit("@", 1)
|
local, domain = middle.rsplit("@", 1)
|
||||||
try:
|
# Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
|
||||||
domain = punycode(domain)
|
# to the user agent. See #36013.
|
||||||
except UnicodeError:
|
|
||||||
return word
|
|
||||||
local = quote(local, safe="")
|
local = quote(local, safe="")
|
||||||
domain = quote(domain, safe="")
|
domain = quote(domain, safe="")
|
||||||
url = self.mailto_template.format(local=local, domain=domain)
|
url = self.mailto_template.format(local=local, domain=domain)
|
||||||
|
@@ -486,11 +486,13 @@ class AdminURLWidgetTest(SimpleTestCase):
|
|||||||
w = widgets.AdminURLFieldWidget()
|
w = widgets.AdminURLFieldWidget()
|
||||||
self.assertHTMLEqual(
|
self.assertHTMLEqual(
|
||||||
w.render("test", "http://example-äüö.com"),
|
w.render("test", "http://example-äüö.com"),
|
||||||
'<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">'
|
'<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">'
|
||||||
"http://example-äüö.com</a><br>"
|
"http://example-äüö.com</a><br>"
|
||||||
'Change:<input class="vURLField" name="test" type="url" '
|
'Change:<input class="vURLField" name="test" type="url" '
|
||||||
'value="http://example-äüö.com"></p>',
|
'value="http://example-äüö.com"></p>',
|
||||||
)
|
)
|
||||||
|
# Does not use obsolete IDNA-2003 encoding (#36013).
|
||||||
|
self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com"))
|
||||||
|
|
||||||
def test_render_quoting(self):
|
def test_render_quoting(self):
|
||||||
"""
|
"""
|
||||||
@@ -517,7 +519,8 @@ class AdminURLWidgetTest(SimpleTestCase):
|
|||||||
output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
|
output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
HREF_RE.search(output)[1],
|
HREF_RE.search(output)[1],
|
||||||
"http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E",
|
"http://example-%C3%A4%C3%BC%C3%B6.com/"
|
||||||
|
"%3Csometag%3Esome-text%3C/sometag%3E",
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
TEXT_RE.search(output)[1],
|
TEXT_RE.search(output)[1],
|
||||||
|
@@ -229,19 +229,34 @@ class FunctionTests(SimpleTestCase):
|
|||||||
"""
|
"""
|
||||||
#13704 - Check urlize handles IDN correctly
|
#13704 - Check urlize handles IDN correctly
|
||||||
"""
|
"""
|
||||||
|
# The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}.
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
urlize("http://c✶.ws"),
|
urlize("http://c✶.ws"),
|
||||||
'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>',
|
'<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>',
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
urlize("www.c✶.ws"),
|
urlize("www.c✶.ws"),
|
||||||
'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>',
|
'<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>',
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>'
|
urlize("c✶.org"),
|
||||||
|
'<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>',
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>'
|
urlize("info@c✶.org"),
|
||||||
|
'<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>',
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pre-encoded IDNA is urlized but not re-encoded.
|
||||||
|
self.assertEqual(
|
||||||
|
urlize("www.xn--iny-zx5a.com/idna2003"),
|
||||||
|
'<a href="http://www.xn--iny-zx5a.com/idna2003"'
|
||||||
|
' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>',
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
urlize("www.xn--fa-hia.com/idna2008"),
|
||||||
|
'<a href="http://www.xn--fa-hia.com/idna2008"'
|
||||||
|
' rel="nofollow">www.xn--fa-hia.com/idna2008</a>',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_malformed(self):
|
def test_malformed(self):
|
||||||
|
@@ -264,8 +264,26 @@ class TestUtilsHtml(SimpleTestCase):
|
|||||||
|
|
||||||
def test_smart_urlquote(self):
|
def test_smart_urlquote(self):
|
||||||
items = (
|
items = (
|
||||||
("http://öäü.com/", "http://xn--4ca9at.com/"),
|
# IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
|
||||||
("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"),
|
("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
|
||||||
|
("https://faß.example.com", "https://fa%C3%9F.example.com"),
|
||||||
|
(
|
||||||
|
"http://öäü.com/öäü/",
|
||||||
|
"http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
# Valid under IDNA 2008, but was invalid in IDNA 2003.
|
||||||
|
"https://މިހާރު.com",
|
||||||
|
"https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
# Valid under WHATWG URL Specification but not IDNA 2008.
|
||||||
|
"http://👓.ws",
|
||||||
|
"http://%F0%9F%91%93.ws",
|
||||||
|
),
|
||||||
|
# Pre-encoded IDNA is left unchanged.
|
||||||
|
("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
|
||||||
|
("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
|
||||||
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
|
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
|
||||||
# safe as per RFC.
|
# safe as per RFC.
|
||||||
(
|
(
|
||||||
@@ -287,8 +305,10 @@ class TestUtilsHtml(SimpleTestCase):
|
|||||||
"django",
|
"django",
|
||||||
),
|
),
|
||||||
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
|
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
|
||||||
|
('http://example.com">', "http://example.com%22%3E"),
|
||||||
|
("http://10.22.1.1/", "http://10.22.1.1/"),
|
||||||
|
("http://[fd00::1]/", "http://[fd00::1]/"),
|
||||||
)
|
)
|
||||||
# IDNs are properly quoted
|
|
||||||
for value, output in items:
|
for value, output in items:
|
||||||
with self.subTest(value=value, output=output):
|
with self.subTest(value=value, output=output):
|
||||||
self.assertEqual(smart_urlquote(value), output)
|
self.assertEqual(smart_urlquote(value), output)
|
||||||
@@ -361,11 +381,21 @@ class TestUtilsHtml(SimpleTestCase):
|
|||||||
lazystr("Search for google.com/?q=!"),
|
lazystr("Search for google.com/?q=!"),
|
||||||
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
|
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"http://www.foo.bar/",
|
||||||
|
'<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Look on www.نامهای.com.",
|
||||||
|
"Look on <a "
|
||||||
|
'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
|
||||||
|
">www.نامهای.com</a>.",
|
||||||
|
),
|
||||||
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
|
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
|
||||||
(
|
(
|
||||||
"test@" + "한.글." * 15 + "aaa",
|
"test@" + "한.글." * 15 + "aaa",
|
||||||
'<a href="mailto:test@'
|
'<a href="mailto:test@'
|
||||||
+ "xn--6q8b.xn--bj0b." * 15
|
+ "%ED%95%9C.%EA%B8%80." * 15
|
||||||
+ 'aaa">'
|
+ 'aaa">'
|
||||||
+ "test@"
|
+ "test@"
|
||||||
+ "한.글." * 15
|
+ "한.글." * 15
|
||||||
@@ -378,6 +408,15 @@ class TestUtilsHtml(SimpleTestCase):
|
|||||||
'<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"'
|
'<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"'
|
||||||
">yes+this=is&a%valid!email@example.com</a>",
|
">yes+this=is&a%valid!email@example.com</a>",
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"foo@faß.example.com",
|
||||||
|
'<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"idna-2008@މިހާރު.example.mv",
|
||||||
|
'<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
|
||||||
|
'ample.mv">idna-2008@މިހާރު.example.mv</a>',
|
||||||
|
),
|
||||||
)
|
)
|
||||||
for value, output in tests:
|
for value, output in tests:
|
||||||
with self.subTest(value=value):
|
with self.subTest(value=value):
|
||||||
|
Reference in New Issue
Block a user