From 3c9a2771cc80821e041b16eb36c1c37af5349d4a Mon Sep 17 00:00:00 2001 From: Shai Berger Date: Mon, 19 Feb 2024 13:56:37 +0100 Subject: [PATCH] [4.2.x] Fixed CVE-2024-27351 -- Prevented potential ReDoS in Truncator.words(). Thanks Seokchan Yoon for the report. Co-Authored-By: Mariusz Felisiak --- django/utils/text.py | 57 ++++++++++++++++++++++++++++++++-- docs/releases/3.2.25.txt | 8 +++++ docs/releases/4.2.11.txt | 8 +++++ tests/utils_tests/test_text.py | 26 ++++++++++++++++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/django/utils/text.py b/django/utils/text.py index 26631641e9..e1b835e0e2 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -23,8 +23,61 @@ def capfirst(x): return x[0].upper() + x[1:] -# Set up regular expressions -re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S) +# ----- Begin security-related performance workaround ----- + +# We used to have, below +# +# re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S) +# +# But it was shown that this regex, in the way we use it here, has some +# catastrophic edge-case performance features. Namely, when it is applied to +# text with only open brackets "<<<...". The class below provides the services +# and correct answers for the use cases, but in these edge cases does it much +# faster. +re_notag = _lazy_re_compile(r"([^<>\s]+)", re.S) +re_prt = _lazy_re_compile(r"<|([^<>\s]+)", re.S) + + +class WordsRegex: + @staticmethod + def search(text, pos): + # Look for "<" or a non-tag word. + partial = re_prt.search(text, pos) + if partial is None or partial[1] is not None: + return partial + + # "<" was found, look for a closing ">". + end = text.find(">", partial.end(0)) + if end < 0: + # ">" cannot be found, look for a word. + return re_notag.search(text, pos + 1) + else: + # "<" followed by a ">" was found -- fake a match. + end += 1 + return FakeMatch(text[partial.start(0) : end], end) + + +class FakeMatch: + __slots__ = ["_text", "_end"] + + def end(self, group=0): + assert group == 0, "This specific object takes only group=0" + return self._end + + def __getitem__(self, group): + if group == 1: + return None + assert group == 0, "This specific object takes only group in {0,1}" + return self._text + + def __init__(self, text, end): + self._text, self._end = text, end + + +# ----- End security-related performance workaround ----- + +# Set up regular expressions. +re_words = WordsRegex re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S) re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S) re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines diff --git a/docs/releases/3.2.25.txt b/docs/releases/3.2.25.txt index aa81c720d5..a3a90986ff 100644 --- a/docs/releases/3.2.25.txt +++ b/docs/releases/3.2.25.txt @@ -7,6 +7,14 @@ Django 3.2.25 release notes Django 3.2.25 fixes a security issue with severity "moderate" and a regression in 3.2.24. +CVE-2024-27351: Potential regular expression denial-of-service in ``django.utils.text.Truncator.words()`` +========================================================================================================= + +``django.utils.text.Truncator.words()`` method (with ``html=True``) and +:tfilter:`truncatewords_html` template filter were subject to a potential +regular expression denial-of-service attack using a suitably crafted string +(follow up to :cve:`2019-14232` and :cve:`2023-43665`). + Bugfixes ======== diff --git a/docs/releases/4.2.11.txt b/docs/releases/4.2.11.txt index 82c691fcb7..c562e47866 100644 --- a/docs/releases/4.2.11.txt +++ b/docs/releases/4.2.11.txt @@ -7,6 +7,14 @@ Django 4.2.11 release notes Django 4.2.11 fixes a security issue with severity "moderate" and a regression in 4.2.10. +CVE-2024-27351: Potential regular expression denial-of-service in ``django.utils.text.Truncator.words()`` +========================================================================================================= + +``django.utils.text.Truncator.words()`` method (with ``html=True``) and +:tfilter:`truncatewords_html` template filter were subject to a potential +regular expression denial-of-service attack using a suitably crafted string +(follow up to :cve:`2019-14232` and :cve:`2023-43665`). + Bugfixes ======== diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py index 7d20445b1e..d1890e7b6d 100644 --- a/tests/utils_tests/test_text.py +++ b/tests/utils_tests/test_text.py @@ -183,6 +183,32 @@ class TestUtilsText(SimpleTestCase): truncator = text.Truncator("

I <3 python, what about you?

") self.assertEqual("

I <3 python,…

", truncator.words(3, html=True)) + # Only open brackets. + test = "<" * 60_000 + truncator = text.Truncator(test) + self.assertEqual(truncator.words(1, html=True), test) + + # Tags with special chars in attrs. + truncator = text.Truncator( + """Hello, my dear lady!""" + ) + self.assertEqual( + """Hello, my dear…""", + truncator.words(3, html=True), + ) + + # Tags with special non-latin chars in attrs. + truncator = text.Truncator("""

Hello, my dear lady!

""") + self.assertEqual( + """

Hello, my dear…

""", + truncator.words(3, html=True), + ) + + # Misplaced brackets. + truncator = text.Truncator("hello >< world") + self.assertEqual(truncator.words(1, html=True), "hello…") + self.assertEqual(truncator.words(2, html=True), "hello >< world") + @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000) def test_truncate_words_html_size_limit(self): max_len = text.Truncator.MAX_LENGTH_HTML