From 17b51094d778b421bb2b3aae0c270894b050455d Mon Sep 17 00:00:00 2001 From: Natalia <124304+nessita@users.noreply.github.com> Date: Tue, 19 Sep 2023 09:51:48 -0300 Subject: [PATCH] Fixed CVE-2023-43665 -- Mitigated potential DoS in django.utils.text.Truncator when truncating HTML text. Thanks Wenchao Li of Alibaba Group for the report. --- django/utils/text.py | 21 ++++++++++++++++++--- docs/ref/templates/builtins.txt | 20 ++++++++++++++++++++ docs/releases/3.2.22.txt | 18 +++++++++++++++++- docs/releases/4.1.12.txt | 18 +++++++++++++++++- docs/releases/4.2.6.txt | 18 ++++++++++++++++++ tests/utils_tests/test_text.py | 33 +++++++++++++++++++++++++-------- 6 files changed, 115 insertions(+), 13 deletions(-) diff --git a/django/utils/text.py b/django/utils/text.py index 082673a0cc..295f919b51 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -83,8 +83,14 @@ def add_truncation_text(text, truncate=None): class Truncator(SimpleLazyObject): """ An object used to truncate text, either by characters or words. + + When truncating HTML text (either chars or words), input will be limited to + at most `MAX_LENGTH_HTML` characters. """ + # 5 million characters are approximately 4000 text pages or 3 web pages. + MAX_LENGTH_HTML = 5_000_000 + def __init__(self, text): super().__init__(lambda: str(text)) @@ -165,6 +171,11 @@ class Truncator(SimpleLazyObject): if words and length <= 0: return "" + size_limited = False + if len(text) > self.MAX_LENGTH_HTML: + text = text[: self.MAX_LENGTH_HTML] + size_limited = True + html4_singlets = ( "br", "col", @@ -221,10 +232,14 @@ class Truncator(SimpleLazyObject): # Add it to the start of the open tags list open_tags.insert(0, tagname) - if current_len <= length: - return text - out = text[:end_text_pos] truncate_text = add_truncation_text("", truncate) + + if current_len <= length: + if size_limited and truncate_text: + text += truncate_text + return text + + out = text[:end_text_pos] if truncate_text: out += truncate_text # Close any tags still open diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt index e0e06dafe8..038a2093c4 100644 --- a/docs/ref/templates/builtins.txt +++ b/docs/ref/templates/builtins.txt @@ -2648,6 +2648,16 @@ If ``value`` is ``"

Joel is a slug

"``, the output will be Newlines in the HTML content will be preserved. +.. admonition:: Size of input string + + Processing large, potentially malformed HTML strings can be + resource-intensive and impact service performance. ``truncatechars_html`` + limits input to the first five million characters. + +.. versionchanged:: 3.2.22 + + In older versions, strings over five million characters were processed. + .. templatefilter:: truncatewords ``truncatewords`` @@ -2690,6 +2700,16 @@ If ``value`` is ``"

Joel is a slug

"``, the output will be Newlines in the HTML content will be preserved. +.. admonition:: Size of input string + + Processing large, potentially malformed HTML strings can be + resource-intensive and impact service performance. ``truncatewords_html`` + limits input to the first five million characters. + +.. versionchanged:: 3.2.22 + + In older versions, strings over five million characters were processed. + .. templatefilter:: unordered_list ``unordered_list`` diff --git a/docs/releases/3.2.22.txt b/docs/releases/3.2.22.txt index 6e1815de11..cfedc41de8 100644 --- a/docs/releases/3.2.22.txt +++ b/docs/releases/3.2.22.txt @@ -6,4 +6,20 @@ Django 3.2.22 release notes Django 3.2.22 fixes a security issue with severity "moderate" in 3.2.21. -... +CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator`` +================================================================================ + +Following the fix for :cve:`2019-14232`, the regular expressions used in the +implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` +methods (with ``html=True``) were revised and improved. However, these regular +expressions still exhibited linear backtracking complexity, so when given a +very long, potentially malformed HTML input, the evaluation would still be +slow, leading to a potential denial of service vulnerability. + +The ``chars()`` and ``words()`` methods are used to implement the +:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template +filters, which were thus also vulnerable. + +The input processed by ``Truncator``, when operating in HTML mode, has been +limited to the first five million characters in order to avoid potential +performance and memory issues. diff --git a/docs/releases/4.1.12.txt b/docs/releases/4.1.12.txt index d81df3aefd..6c331dd318 100644 --- a/docs/releases/4.1.12.txt +++ b/docs/releases/4.1.12.txt @@ -6,4 +6,20 @@ Django 4.1.12 release notes Django 4.1.12 fixes a security issue with severity "moderate" in 4.1.11. -... +CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator`` +================================================================================ + +Following the fix for :cve:`2019-14232`, the regular expressions used in the +implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` +methods (with ``html=True``) were revised and improved. However, these regular +expressions still exhibited linear backtracking complexity, so when given a +very long, potentially malformed HTML input, the evaluation would still be +slow, leading to a potential denial of service vulnerability. + +The ``chars()`` and ``words()`` methods are used to implement the +:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template +filters, which were thus also vulnerable. + +The input processed by ``Truncator``, when operating in HTML mode, has been +limited to the first five million characters in order to avoid potential +performance and memory issues. diff --git a/docs/releases/4.2.6.txt b/docs/releases/4.2.6.txt index 31929468a6..9b99d8c622 100644 --- a/docs/releases/4.2.6.txt +++ b/docs/releases/4.2.6.txt @@ -7,6 +7,24 @@ Django 4.2.6 release notes Django 4.2.6 fixes a security issue with severity "moderate" and several bugs in 4.2.5. +CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator`` +================================================================================ + +Following the fix for :cve:`2019-14232`, the regular expressions used in the +implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` +methods (with ``html=True``) were revised and improved. However, these regular +expressions still exhibited linear backtracking complexity, so when given a +very long, potentially malformed HTML input, the evaluation would still be +slow, leading to a potential denial of service vulnerability. + +The ``chars()`` and ``words()`` methods are used to implement the +:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template +filters, which were thus also vulnerable. + +The input processed by ``Truncator``, when operating in HTML mode, has been +limited to the first five million characters in order to avoid potential +performance and memory issues. + Bugfixes ======== diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py index 12328d7693..77e637ae6c 100644 --- a/tests/utils_tests/test_text.py +++ b/tests/utils_tests/test_text.py @@ -1,5 +1,6 @@ import json import sys +from unittest.mock import patch from django.core.exceptions import SuspiciousFileOperation from django.test import SimpleTestCase @@ -94,11 +95,17 @@ class TestUtilsText(SimpleTestCase): text.Truncator(lazystr("The quick brown fox")).chars(10), "The quick…" ) - def test_truncate_chars_html(self): + @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000) + def test_truncate_chars_html_size_limit(self): + max_len = text.Truncator.MAX_LENGTH_HTML + bigger_len = text.Truncator.MAX_LENGTH_HTML + 1 + valid_html = "

Joel is a slug

" # 14 chars perf_test_values = [ - (("", None), - ("&" * 50000, "&" * 9 + "…"), + ("", None), + ("", "", None), + (valid_html * bigger_len, "

Joel is a…

"), # 10 chars ] for value, expected in perf_test_values: with self.subTest(value=value): @@ -176,15 +183,25 @@ class TestUtilsText(SimpleTestCase): truncator = text.Truncator("

I <3 python, what about you?

") self.assertEqual("

I <3 python,…

", truncator.words(3, html=True)) + @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000) + def test_truncate_words_html_size_limit(self): + max_len = text.Truncator.MAX_LENGTH_HTML + bigger_len = text.Truncator.MAX_LENGTH_HTML + 1 + valid_html = "

Joel is a slug

" # 4 words perf_test_values = [ - ("", - "&" * 50000, - "_X<<<<<<<<<<<>", + ("", None), + ("", "", None), + (valid_html * bigger_len, valid_html * 12 + "

Joel is…

"), # 50 words ] - for value in perf_test_values: + for value, expected in perf_test_values: with self.subTest(value=value): truncator = text.Truncator(value) - self.assertEqual(value, truncator.words(50, html=True)) + self.assertEqual( + expected if expected else value, truncator.words(50, html=True) + ) def test_wrap(self): digits = "1234 67 9"