1
0
mirror of https://github.com/django/django.git synced 2025-03-31 19:46:42 +00:00

Fixed CVE-2023-43665 -- Mitigated potential DoS in django.utils.text.Truncator when truncating HTML text.

Thanks Wenchao Li of Alibaba Group for the report.
This commit is contained in:
Natalia 2023-09-19 09:51:48 -03:00
parent 1dae65dc63
commit 17b51094d7
6 changed files with 115 additions and 13 deletions

View File

@ -83,8 +83,14 @@ def add_truncation_text(text, truncate=None):
class Truncator(SimpleLazyObject): class Truncator(SimpleLazyObject):
""" """
An object used to truncate text, either by characters or words. An object used to truncate text, either by characters or words.
When truncating HTML text (either chars or words), input will be limited to
at most `MAX_LENGTH_HTML` characters.
""" """
# 5 million characters are approximately 4000 text pages or 3 web pages.
MAX_LENGTH_HTML = 5_000_000
def __init__(self, text): def __init__(self, text):
super().__init__(lambda: str(text)) super().__init__(lambda: str(text))
@ -165,6 +171,11 @@ class Truncator(SimpleLazyObject):
if words and length <= 0: if words and length <= 0:
return "" return ""
size_limited = False
if len(text) > self.MAX_LENGTH_HTML:
text = text[: self.MAX_LENGTH_HTML]
size_limited = True
html4_singlets = ( html4_singlets = (
"br", "br",
"col", "col",
@ -221,10 +232,14 @@ class Truncator(SimpleLazyObject):
# Add it to the start of the open tags list # Add it to the start of the open tags list
open_tags.insert(0, tagname) open_tags.insert(0, tagname)
if current_len <= length:
return text
out = text[:end_text_pos]
truncate_text = add_truncation_text("", truncate) truncate_text = add_truncation_text("", truncate)
if current_len <= length:
if size_limited and truncate_text:
text += truncate_text
return text
out = text[:end_text_pos]
if truncate_text: if truncate_text:
out += truncate_text out += truncate_text
# Close any tags still open # Close any tags still open

View File

@ -2648,6 +2648,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
Newlines in the HTML content will be preserved. Newlines in the HTML content will be preserved.
.. admonition:: Size of input string
Processing large, potentially malformed HTML strings can be
resource-intensive and impact service performance. ``truncatechars_html``
limits input to the first five million characters.
.. versionchanged:: 3.2.22
In older versions, strings over five million characters were processed.
.. templatefilter:: truncatewords .. templatefilter:: truncatewords
``truncatewords`` ``truncatewords``
@ -2690,6 +2700,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be
Newlines in the HTML content will be preserved. Newlines in the HTML content will be preserved.
.. admonition:: Size of input string
Processing large, potentially malformed HTML strings can be
resource-intensive and impact service performance. ``truncatewords_html``
limits input to the first five million characters.
.. versionchanged:: 3.2.22
In older versions, strings over five million characters were processed.
.. templatefilter:: unordered_list .. templatefilter:: unordered_list
``unordered_list`` ``unordered_list``

View File

@ -6,4 +6,20 @@ Django 3.2.22 release notes
Django 3.2.22 fixes a security issue with severity "moderate" in 3.2.21. Django 3.2.22 fixes a security issue with severity "moderate" in 3.2.21.
... CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
================================================================================
Following the fix for :cve:`2019-14232`, the regular expressions used in the
implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
methods (with ``html=True``) were revised and improved. However, these regular
expressions still exhibited linear backtracking complexity, so when given a
very long, potentially malformed HTML input, the evaluation would still be
slow, leading to a potential denial of service vulnerability.
The ``chars()`` and ``words()`` methods are used to implement the
:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
filters, which were thus also vulnerable.
The input processed by ``Truncator``, when operating in HTML mode, has been
limited to the first five million characters in order to avoid potential
performance and memory issues.

View File

@ -6,4 +6,20 @@ Django 4.1.12 release notes
Django 4.1.12 fixes a security issue with severity "moderate" in 4.1.11. Django 4.1.12 fixes a security issue with severity "moderate" in 4.1.11.
... CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
================================================================================
Following the fix for :cve:`2019-14232`, the regular expressions used in the
implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
methods (with ``html=True``) were revised and improved. However, these regular
expressions still exhibited linear backtracking complexity, so when given a
very long, potentially malformed HTML input, the evaluation would still be
slow, leading to a potential denial of service vulnerability.
The ``chars()`` and ``words()`` methods are used to implement the
:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
filters, which were thus also vulnerable.
The input processed by ``Truncator``, when operating in HTML mode, has been
limited to the first five million characters in order to avoid potential
performance and memory issues.

View File

@ -7,6 +7,24 @@ Django 4.2.6 release notes
Django 4.2.6 fixes a security issue with severity "moderate" and several bugs Django 4.2.6 fixes a security issue with severity "moderate" and several bugs
in 4.2.5. in 4.2.5.
CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
================================================================================
Following the fix for :cve:`2019-14232`, the regular expressions used in the
implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
methods (with ``html=True``) were revised and improved. However, these regular
expressions still exhibited linear backtracking complexity, so when given a
very long, potentially malformed HTML input, the evaluation would still be
slow, leading to a potential denial of service vulnerability.
The ``chars()`` and ``words()`` methods are used to implement the
:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
filters, which were thus also vulnerable.
The input processed by ``Truncator``, when operating in HTML mode, has been
limited to the first five million characters in order to avoid potential
performance and memory issues.
Bugfixes Bugfixes
======== ========

View File

@ -1,5 +1,6 @@
import json import json
import sys import sys
from unittest.mock import patch
from django.core.exceptions import SuspiciousFileOperation from django.core.exceptions import SuspiciousFileOperation
from django.test import SimpleTestCase from django.test import SimpleTestCase
@ -94,11 +95,17 @@ class TestUtilsText(SimpleTestCase):
text.Truncator(lazystr("The quick brown fox")).chars(10), "The quick…" text.Truncator(lazystr("The quick brown fox")).chars(10), "The quick…"
) )
def test_truncate_chars_html(self): @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
def test_truncate_chars_html_size_limit(self):
max_len = text.Truncator.MAX_LENGTH_HTML
bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
valid_html = "<p>Joel is a slug</p>" # 14 chars
perf_test_values = [ perf_test_values = [
(("</a" + "\t" * 50000) + "//>", None), ("</a" + "\t" * (max_len - 6) + "//>", None),
("&" * 50000, "&" * 9 + ""), ("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * 6 + ""),
("&" * bigger_len, "&" * 9 + ""),
("_X<<<<<<<<<<<>", None), ("_X<<<<<<<<<<<>", None),
(valid_html * bigger_len, "<p>Joel is a…</p>"), # 10 chars
] ]
for value, expected in perf_test_values: for value, expected in perf_test_values:
with self.subTest(value=value): with self.subTest(value=value):
@ -176,15 +183,25 @@ class TestUtilsText(SimpleTestCase):
truncator = text.Truncator("<p>I &lt;3 python, what about you?</p>") truncator = text.Truncator("<p>I &lt;3 python, what about you?</p>")
self.assertEqual("<p>I &lt;3 python,…</p>", truncator.words(3, html=True)) self.assertEqual("<p>I &lt;3 python,…</p>", truncator.words(3, html=True))
@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
def test_truncate_words_html_size_limit(self):
max_len = text.Truncator.MAX_LENGTH_HTML
bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
valid_html = "<p>Joel is a slug</p>" # 4 words
perf_test_values = [ perf_test_values = [
("</a" + "\t" * 50000) + "//>", ("</a" + "\t" * (max_len - 6) + "//>", None),
"&" * 50000, ("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * (max_len - 3) + ""),
"_X<<<<<<<<<<<>", ("&" * max_len, None), # no change
("&" * bigger_len, "&" * max_len + ""),
("_X<<<<<<<<<<<>", None),
(valid_html * bigger_len, valid_html * 12 + "<p>Joel is…</p>"), # 50 words
] ]
for value in perf_test_values: for value, expected in perf_test_values:
with self.subTest(value=value): with self.subTest(value=value):
truncator = text.Truncator(value) truncator = text.Truncator(value)
self.assertEqual(value, truncator.words(50, html=True)) self.assertEqual(
expected if expected else value, truncator.words(50, html=True)
)
def test_wrap(self): def test_wrap(self):
digits = "1234 67 9" digits = "1234 67 9"