1
0
mirror of https://github.com/django/django.git synced 2024-12-22 17:16:24 +00:00

Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.

Removed obsolete and potentially problematic IDNA 2003 ("punycode")
encoding of international domain names in smart_urlquote() and Urlizer,
which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc
template filters. Changed to use percent-encoded UTF-8, which defers
IDNA details to the browser (like other URLs rendered by Django).

See additional discussion in ticket-36013 (refs #36013).
This commit is contained in:
Mike Edmunds 2024-12-14 16:54:42 -08:00 committed by Sarah Boyce
parent 7e41a7a47d
commit 921dd5820c
5 changed files with 74 additions and 20 deletions

View File

@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better:
Mihai Preda <mihai_preda@yahoo.com>
Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
Mike Axiak <axiak@mit.edu>
Mike Edmunds <medmunds@gmail.com>
Mike Grouchy <https://mikegrouchy.com/>
Mike Malone <mjmalone@gmail.com>
Mike Richardson

View File

@ -10,7 +10,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
from django.core.exceptions import SuspiciousOperation
from django.utils.deprecation import RemovedInDjango60Warning
from django.utils.encoding import punycode
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.regex_helper import _lazy_re_compile
@ -244,17 +243,16 @@ def smart_urlquote(url):
# see also https://bugs.python.org/issue16285
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
# Handle IDN before quoting.
try:
scheme, netloc, path, query, fragment = urlsplit(url)
except ValueError:
# invalid IPv6 URL (normally square brackets in hostname part).
return unquote_quote(url)
try:
netloc = punycode(netloc) # IDN -> ACE
except UnicodeError: # invalid domain part
return unquote_quote(url)
# Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
# section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
# See #36013.
netloc = unquote_quote(netloc)
if query:
# Separately unquoting key/value, so as to not mix querystring separators
@ -353,10 +351,8 @@ class Urlizer:
url = smart_urlquote("http://%s" % html.unescape(middle))
elif ":" not in middle and self.is_email_simple(middle):
local, domain = middle.rsplit("@", 1)
try:
domain = punycode(domain)
except UnicodeError:
return word
# Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
# to the user agent. See #36013.
local = quote(local, safe="")
domain = quote(domain, safe="")
url = self.mailto_template.format(local=local, domain=domain)

View File

@ -490,11 +490,13 @@ class AdminURLWidgetTest(SimpleTestCase):
w = widgets.AdminURLFieldWidget()
self.assertHTMLEqual(
w.render("test", "http://example-äüö.com"),
'<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">'
'<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">'
"http://example-äüö.com</a><br>"
'Change:<input class="vURLField" name="test" type="url" '
'value="http://example-äüö.com"></p>',
)
# Does not use obsolete IDNA-2003 encoding (#36013).
self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com"))
def test_render_quoting(self):
"""
@ -521,7 +523,8 @@ class AdminURLWidgetTest(SimpleTestCase):
output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
self.assertEqual(
HREF_RE.search(output)[1],
"http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E",
"http://example-%C3%A4%C3%BC%C3%B6.com/"
"%3Csometag%3Esome-text%3C/sometag%3E",
)
self.assertEqual(
TEXT_RE.search(output)[1],

View File

@ -226,19 +226,34 @@ class FunctionTests(SimpleTestCase):
"""
#13704 - Check urlize handles IDN correctly
"""
# (The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}.)
self.assertEqual(
urlize("http://c✶.ws"),
'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>',
'<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>',
)
self.assertEqual(
urlize("www.c✶.ws"),
'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>',
'<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>',
)
self.assertEqual(
urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>'
urlize("c✶.org"),
'<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>',
)
self.assertEqual(
urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>'
urlize("info@c✶.org"),
'<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>',
)
# Pre-encoded IDNA is urlized but not re-encoded.
self.assertEqual(
urlize("www.xn--iny-zx5a.com/idna2003"),
'<a href="http://www.xn--iny-zx5a.com/idna2003"'
' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>',
)
self.assertEqual(
urlize("www.xn--fa-hia.com/idna2008"),
'<a href="http://www.xn--fa-hia.com/idna2008"'
' rel="nofollow">www.xn--fa-hia.com/idna2008</a>',
)
def test_malformed(self):

View File

@ -269,8 +269,26 @@ class TestUtilsHtml(SimpleTestCase):
def test_smart_urlquote(self):
items = (
("http://öäü.com/", "http://xn--4ca9at.com/"),
("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"),
# IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
("https://faß.example.com", "https://fa%C3%9F.example.com"),
(
"http://öäü.com/öäü/",
"http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
),
(
# Valid under IDNA 2008, but was invalid in IDNA 2003.
"https://މިހާރު.com",
"https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
),
(
# Valid under WHATWG URL Specification but not IDNA 2008.
"http://👓.ws",
"http://%F0%9F%91%93.ws",
),
# Pre-encoded IDNA is left unchanged.
("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
# safe as per RFC.
(
@ -292,8 +310,10 @@ class TestUtilsHtml(SimpleTestCase):
"django",
),
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
('http://example.com">', "http://example.com%22%3E"),
("http://10.22.1.1/", "http://10.22.1.1/"),
("http://[fd00::1]/", "http://[fd00::1]/"),
)
# IDNs are properly quoted
for value, output in items:
with self.subTest(value=value, output=output):
self.assertEqual(smart_urlquote(value), output)
@ -366,11 +386,21 @@ class TestUtilsHtml(SimpleTestCase):
lazystr("Search for google.com/?q=!"),
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
),
(
"http://www.foo.bar/",
'<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
),
(
"Look on www.نامه‌ای.com.",
"Look on <a "
'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
">www.نامه‌ای.com</a>.",
),
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
(
"test@" + "한.글." * 15 + "aaa",
'<a href="mailto:test@'
+ "xn--6q8b.xn--bj0b." * 15
+ "%ED%95%9C.%EA%B8%80." * 15
+ 'aaa">'
+ "test@"
+ "한.글." * 15
@ -389,6 +419,15 @@ class TestUtilsHtml(SimpleTestCase):
"test@example.com?org",
'<a href="mailto:test@example.com%3Forg">test@example.com?org</a>',
),
(
"foo@faß.example.com",
'<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
),
(
"idna-2008@މިހާރު.example.mv",
'<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
'ample.mv">idna-2008@މިހާރު.example.mv</a>',
),
)
for value, output in tests:
with self.subTest(value=value):