mirror of
https://github.com/django/django.git
synced 2024-12-22 17:16:24 +00:00
Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.
Removed obsolete and potentially problematic IDNA 2003 ("punycode") encoding of international domain names in smart_urlquote() and Urlizer, which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc template filters. Changed to use percent-encoded UTF-8, which defers IDNA details to the browser (like other URLs rendered by Django). See additional discussion in ticket-36013 (refs #36013).
This commit is contained in:
parent
7e41a7a47d
commit
921dd5820c
1
AUTHORS
1
AUTHORS
@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better:
|
||||
Mihai Preda <mihai_preda@yahoo.com>
|
||||
Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
|
||||
Mike Axiak <axiak@mit.edu>
|
||||
Mike Edmunds <medmunds@gmail.com>
|
||||
Mike Grouchy <https://mikegrouchy.com/>
|
||||
Mike Malone <mjmalone@gmail.com>
|
||||
Mike Richardson
|
||||
|
@ -10,7 +10,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
|
||||
|
||||
from django.core.exceptions import SuspiciousOperation
|
||||
from django.utils.deprecation import RemovedInDjango60Warning
|
||||
from django.utils.encoding import punycode
|
||||
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
|
||||
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
|
||||
from django.utils.regex_helper import _lazy_re_compile
|
||||
@ -244,17 +243,16 @@ def smart_urlquote(url):
|
||||
# see also https://bugs.python.org/issue16285
|
||||
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
|
||||
|
||||
# Handle IDN before quoting.
|
||||
try:
|
||||
scheme, netloc, path, query, fragment = urlsplit(url)
|
||||
except ValueError:
|
||||
# invalid IPv6 URL (normally square brackets in hostname part).
|
||||
return unquote_quote(url)
|
||||
|
||||
try:
|
||||
netloc = punycode(netloc) # IDN -> ACE
|
||||
except UnicodeError: # invalid domain part
|
||||
return unquote_quote(url)
|
||||
# Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
|
||||
# section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
|
||||
# See #36013.
|
||||
netloc = unquote_quote(netloc)
|
||||
|
||||
if query:
|
||||
# Separately unquoting key/value, so as to not mix querystring separators
|
||||
@ -353,10 +351,8 @@ class Urlizer:
|
||||
url = smart_urlquote("http://%s" % html.unescape(middle))
|
||||
elif ":" not in middle and self.is_email_simple(middle):
|
||||
local, domain = middle.rsplit("@", 1)
|
||||
try:
|
||||
domain = punycode(domain)
|
||||
except UnicodeError:
|
||||
return word
|
||||
# Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
|
||||
# to the user agent. See #36013.
|
||||
local = quote(local, safe="")
|
||||
domain = quote(domain, safe="")
|
||||
url = self.mailto_template.format(local=local, domain=domain)
|
||||
|
@ -490,11 +490,13 @@ class AdminURLWidgetTest(SimpleTestCase):
|
||||
w = widgets.AdminURLFieldWidget()
|
||||
self.assertHTMLEqual(
|
||||
w.render("test", "http://example-äüö.com"),
|
||||
'<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">'
|
||||
'<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">'
|
||||
"http://example-äüö.com</a><br>"
|
||||
'Change:<input class="vURLField" name="test" type="url" '
|
||||
'value="http://example-äüö.com"></p>',
|
||||
)
|
||||
# Does not use obsolete IDNA-2003 encoding (#36013).
|
||||
self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com"))
|
||||
|
||||
def test_render_quoting(self):
|
||||
"""
|
||||
@ -521,7 +523,8 @@ class AdminURLWidgetTest(SimpleTestCase):
|
||||
output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
|
||||
self.assertEqual(
|
||||
HREF_RE.search(output)[1],
|
||||
"http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E",
|
||||
"http://example-%C3%A4%C3%BC%C3%B6.com/"
|
||||
"%3Csometag%3Esome-text%3C/sometag%3E",
|
||||
)
|
||||
self.assertEqual(
|
||||
TEXT_RE.search(output)[1],
|
||||
|
@ -226,19 +226,34 @@ class FunctionTests(SimpleTestCase):
|
||||
"""
|
||||
#13704 - Check urlize handles IDN correctly
|
||||
"""
|
||||
# (The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}.)
|
||||
self.assertEqual(
|
||||
urlize("http://c✶.ws"),
|
||||
'<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>',
|
||||
'<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>',
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize("www.c✶.ws"),
|
||||
'<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>',
|
||||
'<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>',
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>'
|
||||
urlize("c✶.org"),
|
||||
'<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>',
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>'
|
||||
urlize("info@c✶.org"),
|
||||
'<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>',
|
||||
)
|
||||
|
||||
# Pre-encoded IDNA is urlized but not re-encoded.
|
||||
self.assertEqual(
|
||||
urlize("www.xn--iny-zx5a.com/idna2003"),
|
||||
'<a href="http://www.xn--iny-zx5a.com/idna2003"'
|
||||
' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>',
|
||||
)
|
||||
self.assertEqual(
|
||||
urlize("www.xn--fa-hia.com/idna2008"),
|
||||
'<a href="http://www.xn--fa-hia.com/idna2008"'
|
||||
' rel="nofollow">www.xn--fa-hia.com/idna2008</a>',
|
||||
)
|
||||
|
||||
def test_malformed(self):
|
||||
|
@ -269,8 +269,26 @@ class TestUtilsHtml(SimpleTestCase):
|
||||
|
||||
def test_smart_urlquote(self):
|
||||
items = (
|
||||
("http://öäü.com/", "http://xn--4ca9at.com/"),
|
||||
("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"),
|
||||
# IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
|
||||
("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
|
||||
("https://faß.example.com", "https://fa%C3%9F.example.com"),
|
||||
(
|
||||
"http://öäü.com/öäü/",
|
||||
"http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
|
||||
),
|
||||
(
|
||||
# Valid under IDNA 2008, but was invalid in IDNA 2003.
|
||||
"https://މިހާރު.com",
|
||||
"https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
|
||||
),
|
||||
(
|
||||
# Valid under WHATWG URL Specification but not IDNA 2008.
|
||||
"http://👓.ws",
|
||||
"http://%F0%9F%91%93.ws",
|
||||
),
|
||||
# Pre-encoded IDNA is left unchanged.
|
||||
("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
|
||||
("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
|
||||
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
|
||||
# safe as per RFC.
|
||||
(
|
||||
@ -292,8 +310,10 @@ class TestUtilsHtml(SimpleTestCase):
|
||||
"django",
|
||||
),
|
||||
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
|
||||
('http://example.com">', "http://example.com%22%3E"),
|
||||
("http://10.22.1.1/", "http://10.22.1.1/"),
|
||||
("http://[fd00::1]/", "http://[fd00::1]/"),
|
||||
)
|
||||
# IDNs are properly quoted
|
||||
for value, output in items:
|
||||
with self.subTest(value=value, output=output):
|
||||
self.assertEqual(smart_urlquote(value), output)
|
||||
@ -366,11 +386,21 @@ class TestUtilsHtml(SimpleTestCase):
|
||||
lazystr("Search for google.com/?q=!"),
|
||||
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
|
||||
),
|
||||
(
|
||||
"http://www.foo.bar/",
|
||||
'<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
|
||||
),
|
||||
(
|
||||
"Look on www.نامهای.com.",
|
||||
"Look on <a "
|
||||
'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
|
||||
">www.نامهای.com</a>.",
|
||||
),
|
||||
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
|
||||
(
|
||||
"test@" + "한.글." * 15 + "aaa",
|
||||
'<a href="mailto:test@'
|
||||
+ "xn--6q8b.xn--bj0b." * 15
|
||||
+ "%ED%95%9C.%EA%B8%80." * 15
|
||||
+ 'aaa">'
|
||||
+ "test@"
|
||||
+ "한.글." * 15
|
||||
@ -389,6 +419,15 @@ class TestUtilsHtml(SimpleTestCase):
|
||||
"test@example.com?org",
|
||||
'<a href="mailto:test@example.com%3Forg">test@example.com?org</a>',
|
||||
),
|
||||
(
|
||||
"foo@faß.example.com",
|
||||
'<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
|
||||
),
|
||||
(
|
||||
"idna-2008@މިހާރު.example.mv",
|
||||
'<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
|
||||
'ample.mv">idna-2008@މިހާރު.example.mv</a>',
|
||||
),
|
||||
)
|
||||
for value, output in tests:
|
||||
with self.subTest(value=value):
|
||||
|
Loading…
Reference in New Issue
Block a user