mirror of
https://github.com/django/django.git
synced 2025-04-09 07:56:43 +00:00
Fixed #35440 -- Simplified parse_header_parameters by leveraging stdlid's Message.
The `parse_header_parameters` function historically used Python's `cgi` module (now deprecated). In 34e2148fc725e7200050f74130d7523e3cd8507a, the logic was inlined to work around this deprecation ( #33173). Later, in d4d5427571b4bf3a21c902276c2a00215c2a37cc, the header parsing logic was further cleaned up to align with `multipartparser.py` (#33697). This change takes it a step further by replacing the copied `cgi` logic with Python's `email.message.Message` API for a more robust and maintainable header parsing implementation. Thanks to Raphael Gaschignard for testing, and to Adam Johnson and Shai Berger for reviews. Co-authored-by: Ben Cail <bcail@crossway.org> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>
This commit is contained in:
parent
0d92428d77
commit
9aabe7eae3
@ -3,8 +3,9 @@ import re
|
||||
import unicodedata
|
||||
from binascii import Error as BinasciiError
|
||||
from datetime import UTC, datetime
|
||||
from email.utils import formatdate
|
||||
from urllib.parse import quote, unquote
|
||||
from email.message import Message
|
||||
from email.utils import collapse_rfc2231_value, formatdate
|
||||
from urllib.parse import quote
|
||||
from urllib.parse import urlencode as original_urlencode
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
@ -24,6 +25,7 @@ ETAG_MATCH = _lazy_re_compile(
|
||||
re.X,
|
||||
)
|
||||
|
||||
MAX_HEADER_LENGTH = 10_000
|
||||
MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
|
||||
__D = r"(?P<day>[0-9]{2})"
|
||||
__D2 = r"(?P<day>[ 0-9][0-9])"
|
||||
@ -310,46 +312,28 @@ def escape_leading_slashes(url):
|
||||
return url
|
||||
|
||||
|
||||
def _parseparam(s):
|
||||
while s[:1] == ";":
|
||||
s = s[1:]
|
||||
end = s.find(";")
|
||||
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
||||
end = s.find(";", end + 1)
|
||||
if end < 0:
|
||||
end = len(s)
|
||||
f = s[:end]
|
||||
yield f.strip()
|
||||
s = s[end:]
|
||||
|
||||
|
||||
def parse_header_parameters(line):
|
||||
def parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
|
||||
"""
|
||||
Parse a Content-type like header.
|
||||
Return the main content-type and a dictionary of options.
|
||||
|
||||
If `line` is longer than `max_length`, `ValueError` is raised.
|
||||
"""
|
||||
parts = _parseparam(";" + line)
|
||||
key = parts.__next__().lower()
|
||||
if max_length is not None and line and len(line) > max_length:
|
||||
raise ValueError("Unable to parse header parameters (value too long).")
|
||||
|
||||
m = Message()
|
||||
m["content-type"] = line
|
||||
params = m.get_params()
|
||||
|
||||
pdict = {}
|
||||
for p in parts:
|
||||
i = p.find("=")
|
||||
if i >= 0:
|
||||
has_encoding = False
|
||||
name = p[:i].strip().lower()
|
||||
if name.endswith("*"):
|
||||
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
|
||||
# https://tools.ietf.org/html/rfc2231#section-4
|
||||
name = name[:-1]
|
||||
if p.count("'") == 2:
|
||||
has_encoding = True
|
||||
value = p[i + 1 :].strip()
|
||||
if len(value) >= 2 and value[0] == value[-1] == '"':
|
||||
value = value[1:-1]
|
||||
value = value.replace("\\\\", "\\").replace('\\"', '"')
|
||||
if has_encoding:
|
||||
encoding, lang, value = value.split("'")
|
||||
value = unquote(value, encoding=encoding)
|
||||
pdict[name] = value
|
||||
key = params.pop(0)[0].lower()
|
||||
for name, value in params:
|
||||
if not name:
|
||||
continue
|
||||
if isinstance(value, tuple):
|
||||
value = collapse_rfc2231_value(value)
|
||||
pdict[name] = value
|
||||
return key, pdict
|
||||
|
||||
|
||||
|
@ -311,6 +311,10 @@ Miscellaneous
|
||||
* The :ref:`JSON <serialization-formats-json>` serializer now writes a newline
|
||||
at the end of the output, even without the ``indent`` option set.
|
||||
|
||||
* The undocumented ``django.utils.http.parse_header_parameters()`` function is
|
||||
refactored to use Python's :py:class:`email.message.Message` for parsing.
|
||||
Input headers exceeding 10000 characters will now raise :exc:`ValueError`.
|
||||
|
||||
.. _deprecated-features-6.0:
|
||||
|
||||
Features deprecated in 6.0
|
||||
|
@ -6,6 +6,7 @@ from unittest import mock
|
||||
from django.test import SimpleTestCase
|
||||
from django.utils.datastructures import MultiValueDict
|
||||
from django.utils.http import (
|
||||
MAX_HEADER_LENGTH,
|
||||
base36_to_int,
|
||||
content_disposition_header,
|
||||
escape_leading_slashes,
|
||||
@ -424,6 +425,8 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
|
||||
class ParseHeaderParameterTests(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
tests = [
|
||||
("", ("", {})),
|
||||
(None, ("none", {})),
|
||||
("text/plain", ("text/plain", {})),
|
||||
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
|
||||
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
|
||||
@ -447,10 +450,18 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
||||
'attachment; filename="strange;name";size=123;',
|
||||
("attachment", {"filename": "strange;name", "size": "123"}),
|
||||
),
|
||||
(
|
||||
'attachment; filename="strange;name";;;;size=123;;;',
|
||||
("attachment", {"filename": "strange;name", "size": "123"}),
|
||||
),
|
||||
(
|
||||
'form-data; name="files"; filename="fo\\"o;bar"',
|
||||
("form-data", {"name": "files", "filename": 'fo"o;bar'}),
|
||||
),
|
||||
(
|
||||
'form-data; name="files"; filename="\\"fo\\"o;b\\\\ar\\""',
|
||||
("form-data", {"name": "files", "filename": '"fo"o;b\\ar"'}),
|
||||
),
|
||||
]
|
||||
for header, expected in tests:
|
||||
with self.subTest(header=header):
|
||||
@ -480,12 +491,13 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
||||
"""
|
||||
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
||||
Parsing should not crash (#24209).
|
||||
But stdlib email still decodes (#35440).
|
||||
"""
|
||||
test_data = (
|
||||
(
|
||||
"Content-Type: application/x-stuff; "
|
||||
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||
"'This is ***fun***",
|
||||
),
|
||||
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
|
||||
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
|
||||
@ -494,6 +506,37 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
||||
parsed = parse_header_parameters(raw_line)
|
||||
self.assertEqual(parsed[1]["title"], expected_title)
|
||||
|
||||
def test_header_max_length(self):
|
||||
base_header = "Content-Type: application/x-stuff; title*="
|
||||
base_header_len = len(base_header)
|
||||
|
||||
test_data = [
|
||||
(MAX_HEADER_LENGTH, {}),
|
||||
(MAX_HEADER_LENGTH, {"max_length": None}),
|
||||
(MAX_HEADER_LENGTH + 1, {"max_length": None}),
|
||||
(100, {"max_length": 100}),
|
||||
]
|
||||
for line_length, kwargs in test_data:
|
||||
with self.subTest(line_length=line_length, kwargs=kwargs):
|
||||
title = "x" * (line_length - base_header_len)
|
||||
line = base_header + title
|
||||
assert len(line) == line_length
|
||||
|
||||
parsed = parse_header_parameters(line, **kwargs)
|
||||
|
||||
expected = ("content-type: application/x-stuff", {"title": title})
|
||||
self.assertEqual(parsed, expected)
|
||||
|
||||
def test_header_too_long(self):
|
||||
test_data = [
|
||||
("x" * (MAX_HEADER_LENGTH + 1), {}),
|
||||
("x" * 101, {"max_length": 100}),
|
||||
]
|
||||
for line, kwargs in test_data:
|
||||
with self.subTest(line_length=len(line), kwargs=kwargs):
|
||||
with self.assertRaises(ValueError):
|
||||
parse_header_parameters(line, **kwargs)
|
||||
|
||||
|
||||
class ContentDispositionHeaderTests(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user