1
0
mirror of https://github.com/django/django.git synced 2025-03-06 15:32:33 +00:00

[5.2.x] Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.

Removed obsolete and potentially problematic IDNA 2003 ("punycode")
encoding of international domain names in smart_urlquote() and Urlizer,
which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc
template filters. Changed to use percent-encoded UTF-8, which defers
IDNA details to the browser (like other URLs rendered by Django).

Backport of 29ba75e6e57414f0e6f9528d08a520b8b931fb28 from main.
This commit is contained in:
Mike Edmunds 2024-12-15 01:54:42 +01:00 committed by Sarah Boyce
parent d6c2b67884
commit 698d05c11c
5 changed files with 74 additions and 20 deletions

View File

@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better:
Mihai Preda <mihai_preda@yahoo.com> Mihai Preda <mihai_preda@yahoo.com>
Mikaël Barbero <mikael.barbero nospam at nospam free.fr> Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
Mike Axiak <axiak@mit.edu> Mike Axiak <axiak@mit.edu>
Mike Edmunds <medmunds@gmail.com>
Mike Grouchy <https://mikegrouchy.com/> Mike Grouchy <https://mikegrouchy.com/>
Mike Malone <mjmalone@gmail.com> Mike Malone <mjmalone@gmail.com>
Mike Richardson Mike Richardson

View File

@ -11,7 +11,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
from django.core.exceptions import SuspiciousOperation, ValidationError from django.core.exceptions import SuspiciousOperation, ValidationError
from django.core.validators import EmailValidator from django.core.validators import EmailValidator
from django.utils.deprecation import RemovedInDjango60Warning from django.utils.deprecation import RemovedInDjango60Warning
from django.utils.encoding import punycode
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.regex_helper import _lazy_re_compile from django.utils.regex_helper import _lazy_re_compile
@ -245,17 +244,16 @@ def smart_urlquote(url):
# see also https://bugs.python.org/issue16285 # see also https://bugs.python.org/issue16285
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~") return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
# Handle IDN before quoting.
try: try:
scheme, netloc, path, query, fragment = urlsplit(url) scheme, netloc, path, query, fragment = urlsplit(url)
except ValueError: except ValueError:
# invalid IPv6 URL (normally square brackets in hostname part). # invalid IPv6 URL (normally square brackets in hostname part).
return unquote_quote(url) return unquote_quote(url)
try: # Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
netloc = punycode(netloc) # IDN -> ACE # section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
except UnicodeError: # invalid domain part # See #36013.
return unquote_quote(url) netloc = unquote_quote(netloc)
if query: if query:
# Separately unquoting key/value, so as to not mix querystring separators # Separately unquoting key/value, so as to not mix querystring separators
@ -356,10 +354,8 @@ class Urlizer:
url = smart_urlquote("http://%s" % html.unescape(middle)) url = smart_urlquote("http://%s" % html.unescape(middle))
elif ":" not in middle and self.is_email_simple(middle): elif ":" not in middle and self.is_email_simple(middle):
local, domain = middle.rsplit("@", 1) local, domain = middle.rsplit("@", 1)
try: # Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
domain = punycode(domain) # to the user agent. See #36013.
except UnicodeError:
return word
local = quote(local, safe="") local = quote(local, safe="")
domain = quote(domain, safe="") domain = quote(domain, safe="")
url = self.mailto_template.format(local=local, domain=domain) url = self.mailto_template.format(local=local, domain=domain)

View File

@ -490,11 +490,13 @@ class AdminURLWidgetTest(SimpleTestCase):
w = widgets.AdminURLFieldWidget() w = widgets.AdminURLFieldWidget()
self.assertHTMLEqual( self.assertHTMLEqual(
w.render("test", "http://example-äüö.com"), w.render("test", "http://example-äüö.com"),
'<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">' '<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">'
"http://example-äüö.com</a><br>" "http://example-äüö.com</a><br>"
'Change:<input class="vURLField" name="test" type="url" ' 'Change:<input class="vURLField" name="test" type="url" '
'value="http://example-äüö.com"></p>', 'value="http://example-äüö.com"></p>',
) )
# Does not use obsolete IDNA-2003 encoding (#36013).
self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com"))
def test_render_quoting(self): def test_render_quoting(self):
""" """
@ -521,7 +523,8 @@ class AdminURLWidgetTest(SimpleTestCase):
output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>") output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
self.assertEqual( self.assertEqual(
HREF_RE.search(output)[1], HREF_RE.search(output)[1],
"http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E", "http://example-%C3%A4%C3%BC%C3%B6.com/"
"%3Csometag%3Esome-text%3C/sometag%3E",
) )
self.assertEqual( self.assertEqual(
TEXT_RE.search(output)[1], TEXT_RE.search(output)[1],

View File

@ -229,19 +229,34 @@ class FunctionTests(SimpleTestCase):
""" """
#13704 - Check urlize handles IDN correctly #13704 - Check urlize handles IDN correctly
""" """
# The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}.
self.assertEqual( self.assertEqual(
urlize("http://c✶.ws"), urlize("http://c✶.ws"),
'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>', '<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>',
) )
self.assertEqual( self.assertEqual(
urlize("www.c✶.ws"), urlize("www.c✶.ws"),
'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>', '<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>',
) )
self.assertEqual( self.assertEqual(
urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>' urlize("c✶.org"),
'<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>',
) )
self.assertEqual( self.assertEqual(
urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>' urlize("info@c✶.org"),
'<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>',
)
# Pre-encoded IDNA is urlized but not re-encoded.
self.assertEqual(
urlize("www.xn--iny-zx5a.com/idna2003"),
'<a href="http://www.xn--iny-zx5a.com/idna2003"'
' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>',
)
self.assertEqual(
urlize("www.xn--fa-hia.com/idna2008"),
'<a href="http://www.xn--fa-hia.com/idna2008"'
' rel="nofollow">www.xn--fa-hia.com/idna2008</a>',
) )
def test_malformed(self): def test_malformed(self):

View File

@ -269,8 +269,26 @@ class TestUtilsHtml(SimpleTestCase):
def test_smart_urlquote(self): def test_smart_urlquote(self):
items = ( items = (
("http://öäü.com/", "http://xn--4ca9at.com/"), # IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"), ("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
("https://faß.example.com", "https://fa%C3%9F.example.com"),
(
"http://öäü.com/öäü/",
"http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
),
(
# Valid under IDNA 2008, but was invalid in IDNA 2003.
"https://މިހާރު.com",
"https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
),
(
# Valid under WHATWG URL Specification but not IDNA 2008.
"http://👓.ws",
"http://%F0%9F%91%93.ws",
),
# Pre-encoded IDNA is left unchanged.
("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered # Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
# safe as per RFC. # safe as per RFC.
( (
@ -292,8 +310,10 @@ class TestUtilsHtml(SimpleTestCase):
"django", "django",
), ),
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"), ("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
('http://example.com">', "http://example.com%22%3E"),
("http://10.22.1.1/", "http://10.22.1.1/"),
("http://[fd00::1]/", "http://[fd00::1]/"),
) )
# IDNs are properly quoted
for value, output in items: for value, output in items:
with self.subTest(value=value, output=output): with self.subTest(value=value, output=output):
self.assertEqual(smart_urlquote(value), output) self.assertEqual(smart_urlquote(value), output)
@ -366,11 +386,21 @@ class TestUtilsHtml(SimpleTestCase):
lazystr("Search for google.com/?q=!"), lazystr("Search for google.com/?q=!"),
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!', 'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
), ),
(
"http://www.foo.bar/",
'<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
),
(
"Look on www.نامه‌ای.com.",
"Look on <a "
'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
">www.نامه‌ای.com</a>.",
),
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'), ("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
( (
"test@" + "한.글." * 15 + "aaa", "test@" + "한.글." * 15 + "aaa",
'<a href="mailto:test@' '<a href="mailto:test@'
+ "xn--6q8b.xn--bj0b." * 15 + "%ED%95%9C.%EA%B8%80." * 15
+ 'aaa">' + 'aaa">'
+ "test@" + "test@"
+ "한.글." * 15 + "한.글." * 15
@ -383,6 +413,15 @@ class TestUtilsHtml(SimpleTestCase):
'<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"' '<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"'
">yes+this=is&a%valid!email@example.com</a>", ">yes+this=is&a%valid!email@example.com</a>",
), ),
(
"foo@faß.example.com",
'<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
),
(
"idna-2008@މިހާރު.example.mv",
'<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
'ample.mv">idna-2008@މިހާރު.example.mv</a>',
),
) )
for value, output in tests: for value, output in tests:
with self.subTest(value=value): with self.subTest(value=value):