From 790eb058b0716c536a2f2e8d1c6d5079d776c22b Mon Sep 17 00:00:00 2001
From: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>
Date: Wed, 13 Nov 2024 15:06:23 +0100
Subject: [PATCH] [4.2.x] Fixed CVE-2024-53907 -- Mitigated potential DoS in
strip_tags().
Thanks to jiangniao for the report, and Shai Berger and Natalia Bidart
for the reviews.
---
django/utils/html.py | 10 ++++++++--
docs/releases/4.2.17.txt | 16 ++++++++++++++++
tests/utils_tests/test_html.py | 7 +++++++
3 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/django/utils/html.py b/django/utils/html.py
index df38c20519..a3a7238cba 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -6,6 +6,7 @@ import re
from html.parser import HTMLParser
from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
+from django.core.exceptions import SuspiciousOperation
from django.utils.encoding import punycode
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
@@ -14,6 +15,7 @@ from django.utils.safestring import SafeData, SafeString, mark_safe
from django.utils.text import normalize_newlines
MAX_URL_LENGTH = 2048
+MAX_STRIP_TAGS_DEPTH = 50
@keep_lazy(SafeString)
@@ -172,15 +174,19 @@ def _strip_once(value):
@keep_lazy_text
def strip_tags(value):
"""Return the given HTML with all tags stripped."""
- # Note: in typical case this loop executes _strip_once once. Loop condition
- # is redundant, but helps to reduce number of executions of _strip_once.
value = str(value)
+ # Note: in typical case this loop executes _strip_once twice (the second
+ # execution does not remove any more tags).
+ strip_tags_depth = 0
while "<" in value and ">" in value:
+ if strip_tags_depth >= MAX_STRIP_TAGS_DEPTH:
+ raise SuspiciousOperation
new_value = _strip_once(value)
if value.count("<") == new_value.count("<"):
# _strip_once wasn't able to detect more tags.
break
value = new_value
+ strip_tags_depth += 1
return value
diff --git a/docs/releases/4.2.17.txt b/docs/releases/4.2.17.txt
index 5139d7034d..9db07f6da7 100644
--- a/docs/releases/4.2.17.txt
+++ b/docs/releases/4.2.17.txt
@@ -6,3 +6,19 @@ Django 4.2.17 release notes
Django 4.2.17 fixes one security issue with severity "high" and one security
issue with severity "moderate" in 4.2.16.
+
+CVE-2024-53907: Denial-of-service possibility in ``strip_tags()``
+=================================================================
+
+:func:`~django.utils.html.strip_tags` would be extremely slow to evaluate
+certain inputs containing large sequences of nested incomplete HTML entities.
+The ``strip_tags()`` method is used to implement the corresponding
+:tfilter:`striptags` template filter, which was thus also vulnerable.
+
+``strip_tags()`` now has an upper limit of recursive calls to ``HTMLParser``
+before raising a :exc:`.SuspiciousOperation` exception.
+
+Remember that absolutely NO guarantee is provided about the results of
+``strip_tags()`` being HTML safe. So NEVER mark safe the result of a
+``strip_tags()`` call without escaping it first, for example with
+:func:`django.utils.html.escape`.
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 7ff5020fb6..579bb2a1e3 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -1,6 +1,7 @@
import os
from datetime import datetime
+from django.core.exceptions import SuspiciousOperation
from django.core.serializers.json import DjangoJSONEncoder
from django.test import SimpleTestCase
from django.utils.functional import lazystr
@@ -113,12 +114,18 @@ class TestUtilsHtml(SimpleTestCase):
("&h", "alert()h"),
(">br>br>br>X", "XX"),
+ ("<" * 50 + "a>" * 50, ""),
)
for value, output in items:
with self.subTest(value=value, output=output):
self.check_output(strip_tags, value, output)
self.check_output(strip_tags, lazystr(value), output)
+ def test_strip_tags_suspicious_operation(self):
+ value = "<" * 51 + "a>" * 51, ""
+ with self.assertRaises(SuspiciousOperation):
+ strip_tags(value)
+
def test_strip_tags_files(self):
# Test with more lengthy content (also catching performance regressions)
for filename in ("strip_tags1.html", "strip_tags2.txt"):