Fixed #35440 -- Simplified parse_header_parameters by leveraging stdlid's Message.

The `parse_header_parameters` function historically used Python's `cgi` module (now deprecated). In 34e2148fc7, the logic was inlined to work around this deprecation ( #33173). Later, in d4d5427571, the header parsing logic was further cleaned up to align with `multipartparser.py` (#33697). This change takes it a step further by replacing the copied `cgi` logic with Python's `email.message.Message` API for a more robust and maintainable header parsing implementation. Thanks to Raphael Gaschignard for testing, and to Adam Johnson and Shai Berger for reviews. Co-authored-by: Ben Cail <bcail@crossway.org> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>
2025-11-07 07:15:35 +00:00 · 2024-07-29 22:05:10 +03:00
parent 0d92428d77
commit 9aabe7eae3
3 changed files with 69 additions and 38 deletions
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -3,8 +3,9 @@ import re
 import unicodedata
 from binascii import Error as BinasciiError
 from datetime import UTC, datetime
-from email.utils import formatdate
+from email.message import Message
-from urllib.parse import quote, unquote
+from email.utils import collapse_rfc2231_value, formatdate
 from urllib.parse import quote
 from urllib.parse import urlencode as original_urlencode
 from urllib.parse import urlsplit
@@ -24,6 +25,7 @@ ETAG_MATCH = _lazy_re_compile(
    re.X,
 )
 MAX_HEADER_LENGTH = 10_000
 MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
 __D = r"(?P<day>[0-9]{2})"
 __D2 = r"(?P<day>[ 0-9][0-9])"
@@ -310,46 +312,28 @@ def escape_leading_slashes(url):
    return url
-def _parseparam(s):
+def parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
    while s[:1] == ";":
        s = s[1:]
        end = s.find(";")
        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
            end = s.find(";", end + 1)
        if end < 0:
            end = len(s)
        f = s[:end]
        yield f.strip()
        s = s[end:]
 def parse_header_parameters(line):
    """
    Parse a Content-type like header.
    Return the main content-type and a dictionary of options.
    If `line` is longer than `max_length`, `ValueError` is raised.
    """
-    parts = _parseparam(";" + line)
+    if max_length is not None and line and len(line) > max_length:
-    key = parts.__next__().lower()
+        raise ValueError("Unable to parse header parameters (value too long).")
    m = Message()
    m["content-type"] = line
    params = m.get_params()
    pdict = {}
-    for p in parts:
+    key = params.pop(0)[0].lower()
-        i = p.find("=")
+    for name, value in params:
-        if i >= 0:
+        if not name:
-            has_encoding = False
+            continue
-            name = p[:i].strip().lower()
+        if isinstance(value, tuple):
-            if name.endswith("*"):
+            value = collapse_rfc2231_value(value)
-                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
+        pdict[name] = value
                # https://tools.ietf.org/html/rfc2231#section-4
                name = name[:-1]
                if p.count("'") == 2:
                    has_encoding = True
            value = p[i + 1 :].strip()
            if len(value) >= 2 and value[0] == value[-1] == '"':
                value = value[1:-1]
                value = value.replace("\\\\", "\\").replace('\\"', '"')
            if has_encoding:
                encoding, lang, value = value.split("'")
                value = unquote(value, encoding=encoding)
            pdict[name] = value
    return key, pdict
--- a/docs/releases/6.0.txt
+++ b/docs/releases/6.0.txt
@@ -311,6 +311,10 @@ Miscellaneous
 * The :ref:`JSON <serialization-formats-json>` serializer now writes a newline
  at the end of the output, even without the ``indent`` option set.
 * The undocumented ``django.utils.http.parse_header_parameters()`` function is
  refactored to use Python's :py:class:`email.message.Message` for parsing.
  Input headers exceeding 10000 characters will now raise :exc:`ValueError`.
 .. _deprecated-features-6.0:
 Features deprecated in 6.0
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@@ -6,6 +6,7 @@ from unittest import mock
 from django.test import SimpleTestCase
 from django.utils.datastructures import MultiValueDict
 from django.utils.http import (
    MAX_HEADER_LENGTH,
    base36_to_int,
    content_disposition_header,
    escape_leading_slashes,
@@ -424,6 +425,8 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
 class ParseHeaderParameterTests(unittest.TestCase):
    def test_basic(self):
        tests = [
            ("", ("", {})),
            (None, ("none", {})),
            ("text/plain", ("text/plain", {})),
            ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
            ("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
@@ -447,10 +450,18 @@ class ParseHeaderParameterTests(unittest.TestCase):
                'attachment; filename="strange;name";size=123;',
                ("attachment", {"filename": "strange;name", "size": "123"}),
            ),
            (
                'attachment; filename="strange;name";;;;size=123;;;',
                ("attachment", {"filename": "strange;name", "size": "123"}),
            ),
            (
                'form-data; name="files"; filename="fo\\"o;bar"',
                ("form-data", {"name": "files", "filename": 'fo"o;bar'}),
            ),
            (
                'form-data; name="files"; filename="\\"fo\\"o;b\\\\ar\\""',
                ("form-data", {"name": "files", "filename": '"fo"o;b\\ar"'}),
            ),
        ]
        for header, expected in tests:
            with self.subTest(header=header):
@@ -480,12 +491,13 @@ class ParseHeaderParameterTests(unittest.TestCase):
        """
        Test wrongly formatted RFC 2231 headers (missing double single quotes).
        Parsing should not crash (#24209).
        But stdlib email still decodes (#35440).
        """
        test_data = (
            (
                "Content-Type: application/x-stuff; "
                "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
-                "'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+                "'This is ***fun***",
            ),
            ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
            ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
@@ -494,6 +506,37 @@ class ParseHeaderParameterTests(unittest.TestCase):
            parsed = parse_header_parameters(raw_line)
            self.assertEqual(parsed[1]["title"], expected_title)
    def test_header_max_length(self):
        base_header = "Content-Type: application/x-stuff; title*="
        base_header_len = len(base_header)
        test_data = [
            (MAX_HEADER_LENGTH, {}),
            (MAX_HEADER_LENGTH, {"max_length": None}),
            (MAX_HEADER_LENGTH + 1, {"max_length": None}),
            (100, {"max_length": 100}),
        ]
        for line_length, kwargs in test_data:
            with self.subTest(line_length=line_length, kwargs=kwargs):
                title = "x" * (line_length - base_header_len)
                line = base_header + title
                assert len(line) == line_length
                parsed = parse_header_parameters(line, **kwargs)
                expected = ("content-type: application/x-stuff", {"title": title})
                self.assertEqual(parsed, expected)
    def test_header_too_long(self):
        test_data = [
            ("x" * (MAX_HEADER_LENGTH + 1), {}),
            ("x" * 101, {"max_length": 100}),
        ]
        for line, kwargs in test_data:
            with self.subTest(line_length=len(line), kwargs=kwargs):
                with self.assertRaises(ValueError):
                    parse_header_parameters(line, **kwargs)
 class ContentDispositionHeaderTests(unittest.TestCase):
    def test_basic(self):