diff --git a/django/utils/html.py b/django/utils/html.py
index 22d3ae42fa..1d96cfe6db 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -9,7 +9,7 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
from django.utils.deprecation import RemovedInDjango60Warning
from django.utils.encoding import punycode
-from django.utils.functional import Promise, keep_lazy, keep_lazy_text
+from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.regex_helper import _lazy_re_compile
from django.utils.safestring import SafeData, SafeString, mark_safe
@@ -257,6 +257,16 @@ def smart_urlquote(url):
return urlunsplit((scheme, netloc, path, query, fragment))
+class CountsDict(dict):
+ def __init__(self, *args, word, **kwargs):
+ super().__init__(*args, *kwargs)
+ self.word = word
+
+ def __missing__(self, key):
+ self[key] = self.word.count(key)
+ return self[key]
+
+
class Urlizer:
"""
Convert any URLs in text into clickable links.
@@ -362,40 +372,72 @@ class Urlizer:
return x
return "%s…" % x[: max(0, limit - 1)]
+ @cached_property
+ def wrapping_punctuation_openings(self):
+ return "".join(dict(self.wrapping_punctuation).keys())
+
+ @cached_property
+ def trailing_punctuation_chars_no_semicolon(self):
+ return self.trailing_punctuation_chars.replace(";", "")
+
+ @cached_property
+ def trailing_punctuation_chars_has_semicolon(self):
+ return ";" in self.trailing_punctuation_chars
+
def trim_punctuation(self, word):
"""
Trim trailing and wrapping punctuation from `word`. Return the items of
the new state.
"""
- lead, middle, trail = "", word, ""
+ # Strip all opening wrapping punctuation.
+ middle = word.lstrip(self.wrapping_punctuation_openings)
+ lead = word[: len(word) - len(middle)]
+ trail = ""
+
# Continue trimming until middle remains unchanged.
trimmed_something = True
- while trimmed_something:
+ counts = CountsDict(word=middle)
+ while trimmed_something and middle:
trimmed_something = False
# Trim wrapping punctuation.
for opening, closing in self.wrapping_punctuation:
- if middle.startswith(opening):
- middle = middle.removeprefix(opening)
- lead += opening
- trimmed_something = True
- # Keep parentheses at the end only if they're balanced.
- if (
- middle.endswith(closing)
- and middle.count(closing) == middle.count(opening) + 1
- ):
- middle = middle.removesuffix(closing)
- trail = closing + trail
- trimmed_something = True
- # Trim trailing punctuation (after trimming wrapping punctuation,
- # as encoded entities contain ';'). Unescape entities to avoid
- # breaking them by removing ';'.
- middle_unescaped = html.unescape(middle)
- stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
- if middle_unescaped != stripped:
- punctuation_count = len(middle_unescaped) - len(stripped)
- trail = middle[-punctuation_count:] + trail
- middle = middle[:-punctuation_count]
+ if counts[opening] < counts[closing]:
+ rstripped = middle.rstrip(closing)
+ if rstripped != middle:
+ strip = counts[closing] - counts[opening]
+ trail = middle[-strip:]
+ middle = middle[:-strip]
+ trimmed_something = True
+ counts[closing] -= strip
+
+ rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)
+ if rstripped != middle:
+ trail = middle[len(rstripped) :] + trail
+ middle = rstripped
trimmed_something = True
+
+ if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"):
+ # Only strip if not part of an HTML entity.
+ amp = middle.rfind("&")
+ if amp == -1:
+ can_strip = True
+ else:
+ potential_entity = middle[amp:]
+ escaped = html.unescape(potential_entity)
+ can_strip = (escaped == potential_entity) or escaped.endswith(";")
+
+ if can_strip:
+ rstripped = middle.rstrip(";")
+ amount_stripped = len(middle) - len(rstripped)
+ if amp > -1 and amount_stripped > 1:
+ # Leave a trailing semicolon as might be an entity.
+ trail = middle[len(rstripped) + 1 :] + trail
+ middle = rstripped + ";"
+ else:
+ trail = middle[len(rstripped) :] + trail
+ middle = rstripped
+ trimmed_something = True
+
return lead, middle, trail
@staticmethod
diff --git a/docs/releases/4.2.14.txt b/docs/releases/4.2.14.txt
index a0d95a477b..f32c0cf8d4 100644
--- a/docs/releases/4.2.14.txt
+++ b/docs/releases/4.2.14.txt
@@ -7,3 +7,9 @@ Django 4.2.14 release notes
Django 4.2.14 fixes two security issues with severity "moderate" and two
security issues with severity "low" in 4.2.13.
+CVE-2024-38875: Potential denial-of-service vulnerability in ``django.utils.html.urlize()``
+===========================================================================================
+
+:tfilter:`urlize` and :tfilter:`urlizetrunc` were subject to a potential
+denial-of-service attack via certain inputs with a very large number of
+brackets.
diff --git a/docs/releases/5.0.7.txt b/docs/releases/5.0.7.txt
index 0bee6d57ce..f25bc1f37d 100644
--- a/docs/releases/5.0.7.txt
+++ b/docs/releases/5.0.7.txt
@@ -7,6 +7,13 @@ Django 5.0.7 release notes
Django 5.0.7 fixes two security issues with severity "moderate", two security
issues with severity "low", and several bugs in 5.0.6.
+CVE-2024-38875: Potential denial-of-service vulnerability in ``django.utils.html.urlize()``
+===========================================================================================
+
+:tfilter:`urlize` and :tfilter:`urlizetrunc` were subject to a potential
+denial-of-service attack via certain inputs with a very large number of
+brackets.
+
Bugfixes
========
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index ad31b8cc5b..9fe782ed2f 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -352,6 +352,13 @@ class TestUtilsHtml(SimpleTestCase):
"foo@.example.com",
"foo@localhost",
"foo@localhost.",
+ # trim_punctuation catastrophic tests
+ "(" * 100_000 + ":" + ")" * 100_000,
+ "(" * 100_000 + "&:" + ")" * 100_000,
+ "([" * 100_000 + ":" + "])" * 100_000,
+ "[(" * 100_000 + ":" + ")]" * 100_000,
+ "([[" * 100_000 + ":" + "]])" * 100_000,
+ "&:" + ";" * 100_000,
)
for value in tests:
with self.subTest(value=value):