From d6664574539c1531612dea833d264ed5c2b04e1e Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Mon, 24 Jun 2024 15:30:59 +0200 Subject: [PATCH] Fixed CVE-2024-38875 -- Mitigated potential DoS in urlize and urlizetrunc template filters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you to Elias Myllymäki for the report. Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com> --- django/utils/html.py | 90 +++++++++++++++++++++++++--------- docs/releases/4.2.14.txt | 6 +++ docs/releases/5.0.7.txt | 7 +++ tests/utils_tests/test_html.py | 7 +++ 4 files changed, 86 insertions(+), 24 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 22d3ae42fa..1d96cfe6db 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -9,7 +9,7 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp from django.utils.deprecation import RemovedInDjango60Warning from django.utils.encoding import punycode -from django.utils.functional import Promise, keep_lazy, keep_lazy_text +from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import SafeData, SafeString, mark_safe @@ -257,6 +257,16 @@ def smart_urlquote(url): return urlunsplit((scheme, netloc, path, query, fragment)) +class CountsDict(dict): + def __init__(self, *args, word, **kwargs): + super().__init__(*args, *kwargs) + self.word = word + + def __missing__(self, key): + self[key] = self.word.count(key) + return self[key] + + class Urlizer: """ Convert any URLs in text into clickable links. @@ -362,40 +372,72 @@ class Urlizer: return x return "%s…" % x[: max(0, limit - 1)] + @cached_property + def wrapping_punctuation_openings(self): + return "".join(dict(self.wrapping_punctuation).keys()) + + @cached_property + def trailing_punctuation_chars_no_semicolon(self): + return self.trailing_punctuation_chars.replace(";", "") + + @cached_property + def trailing_punctuation_chars_has_semicolon(self): + return ";" in self.trailing_punctuation_chars + def trim_punctuation(self, word): """ Trim trailing and wrapping punctuation from `word`. Return the items of the new state. """ - lead, middle, trail = "", word, "" + # Strip all opening wrapping punctuation. + middle = word.lstrip(self.wrapping_punctuation_openings) + lead = word[: len(word) - len(middle)] + trail = "" + # Continue trimming until middle remains unchanged. trimmed_something = True - while trimmed_something: + counts = CountsDict(word=middle) + while trimmed_something and middle: trimmed_something = False # Trim wrapping punctuation. for opening, closing in self.wrapping_punctuation: - if middle.startswith(opening): - middle = middle.removeprefix(opening) - lead += opening - trimmed_something = True - # Keep parentheses at the end only if they're balanced. - if ( - middle.endswith(closing) - and middle.count(closing) == middle.count(opening) + 1 - ): - middle = middle.removesuffix(closing) - trail = closing + trail - trimmed_something = True - # Trim trailing punctuation (after trimming wrapping punctuation, - # as encoded entities contain ';'). Unescape entities to avoid - # breaking them by removing ';'. - middle_unescaped = html.unescape(middle) - stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars) - if middle_unescaped != stripped: - punctuation_count = len(middle_unescaped) - len(stripped) - trail = middle[-punctuation_count:] + trail - middle = middle[:-punctuation_count] + if counts[opening] < counts[closing]: + rstripped = middle.rstrip(closing) + if rstripped != middle: + strip = counts[closing] - counts[opening] + trail = middle[-strip:] + middle = middle[:-strip] + trimmed_something = True + counts[closing] -= strip + + rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon) + if rstripped != middle: + trail = middle[len(rstripped) :] + trail + middle = rstripped trimmed_something = True + + if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"): + # Only strip if not part of an HTML entity. + amp = middle.rfind("&") + if amp == -1: + can_strip = True + else: + potential_entity = middle[amp:] + escaped = html.unescape(potential_entity) + can_strip = (escaped == potential_entity) or escaped.endswith(";") + + if can_strip: + rstripped = middle.rstrip(";") + amount_stripped = len(middle) - len(rstripped) + if amp > -1 and amount_stripped > 1: + # Leave a trailing semicolon as might be an entity. + trail = middle[len(rstripped) + 1 :] + trail + middle = rstripped + ";" + else: + trail = middle[len(rstripped) :] + trail + middle = rstripped + trimmed_something = True + return lead, middle, trail @staticmethod diff --git a/docs/releases/4.2.14.txt b/docs/releases/4.2.14.txt index a0d95a477b..f32c0cf8d4 100644 --- a/docs/releases/4.2.14.txt +++ b/docs/releases/4.2.14.txt @@ -7,3 +7,9 @@ Django 4.2.14 release notes Django 4.2.14 fixes two security issues with severity "moderate" and two security issues with severity "low" in 4.2.13. +CVE-2024-38875: Potential denial-of-service vulnerability in ``django.utils.html.urlize()`` +=========================================================================================== + +:tfilter:`urlize` and :tfilter:`urlizetrunc` were subject to a potential +denial-of-service attack via certain inputs with a very large number of +brackets. diff --git a/docs/releases/5.0.7.txt b/docs/releases/5.0.7.txt index 0bee6d57ce..f25bc1f37d 100644 --- a/docs/releases/5.0.7.txt +++ b/docs/releases/5.0.7.txt @@ -7,6 +7,13 @@ Django 5.0.7 release notes Django 5.0.7 fixes two security issues with severity "moderate", two security issues with severity "low", and several bugs in 5.0.6. +CVE-2024-38875: Potential denial-of-service vulnerability in ``django.utils.html.urlize()`` +=========================================================================================== + +:tfilter:`urlize` and :tfilter:`urlizetrunc` were subject to a potential +denial-of-service attack via certain inputs with a very large number of +brackets. + Bugfixes ======== diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index ad31b8cc5b..9fe782ed2f 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -352,6 +352,13 @@ class TestUtilsHtml(SimpleTestCase): "foo@.example.com", "foo@localhost", "foo@localhost.", + # trim_punctuation catastrophic tests + "(" * 100_000 + ":" + ")" * 100_000, + "(" * 100_000 + "&:" + ")" * 100_000, + "([" * 100_000 + ":" + "])" * 100_000, + "[(" * 100_000 + ":" + ")]" * 100_000, + "([[" * 100_000 + ":" + "]])" * 100_000, + "&:" + ";" * 100_000, ) for value in tests: with self.subTest(value=value):