mirror of
https://github.com/django/django.git
synced 2025-06-05 03:29:12 +00:00
Fixed #35440 -- Simplified parse_header_parameters by leveraging stdlid's Message.
The `parse_header_parameters` function historically used Python's `cgi` module (now deprecated). In 34e2148fc725e7200050f74130d7523e3cd8507a, the logic was inlined to work around this deprecation ( #33173). Later, in d4d5427571b4bf3a21c902276c2a00215c2a37cc, the header parsing logic was further cleaned up to align with `multipartparser.py` (#33697). This change takes it a step further by replacing the copied `cgi` logic with Python's `email.message.Message` API for a more robust and maintainable header parsing implementation. Thanks to Raphael Gaschignard for testing, and to Adam Johnson and Shai Berger for reviews. Co-authored-by: Ben Cail <bcail@crossway.org> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>
This commit is contained in:
parent
0d92428d77
commit
9aabe7eae3
@ -3,8 +3,9 @@ import re
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
from binascii import Error as BinasciiError
|
from binascii import Error as BinasciiError
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from email.utils import formatdate
|
from email.message import Message
|
||||||
from urllib.parse import quote, unquote
|
from email.utils import collapse_rfc2231_value, formatdate
|
||||||
|
from urllib.parse import quote
|
||||||
from urllib.parse import urlencode as original_urlencode
|
from urllib.parse import urlencode as original_urlencode
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
@ -24,6 +25,7 @@ ETAG_MATCH = _lazy_re_compile(
|
|||||||
re.X,
|
re.X,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MAX_HEADER_LENGTH = 10_000
|
||||||
MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
|
MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
|
||||||
__D = r"(?P<day>[0-9]{2})"
|
__D = r"(?P<day>[0-9]{2})"
|
||||||
__D2 = r"(?P<day>[ 0-9][0-9])"
|
__D2 = r"(?P<day>[ 0-9][0-9])"
|
||||||
@ -310,46 +312,28 @@ def escape_leading_slashes(url):
|
|||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def _parseparam(s):
|
def parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
|
||||||
while s[:1] == ";":
|
|
||||||
s = s[1:]
|
|
||||||
end = s.find(";")
|
|
||||||
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
|
||||||
end = s.find(";", end + 1)
|
|
||||||
if end < 0:
|
|
||||||
end = len(s)
|
|
||||||
f = s[:end]
|
|
||||||
yield f.strip()
|
|
||||||
s = s[end:]
|
|
||||||
|
|
||||||
|
|
||||||
def parse_header_parameters(line):
|
|
||||||
"""
|
"""
|
||||||
Parse a Content-type like header.
|
Parse a Content-type like header.
|
||||||
Return the main content-type and a dictionary of options.
|
Return the main content-type and a dictionary of options.
|
||||||
|
|
||||||
|
If `line` is longer than `max_length`, `ValueError` is raised.
|
||||||
"""
|
"""
|
||||||
parts = _parseparam(";" + line)
|
if max_length is not None and line and len(line) > max_length:
|
||||||
key = parts.__next__().lower()
|
raise ValueError("Unable to parse header parameters (value too long).")
|
||||||
|
|
||||||
|
m = Message()
|
||||||
|
m["content-type"] = line
|
||||||
|
params = m.get_params()
|
||||||
|
|
||||||
pdict = {}
|
pdict = {}
|
||||||
for p in parts:
|
key = params.pop(0)[0].lower()
|
||||||
i = p.find("=")
|
for name, value in params:
|
||||||
if i >= 0:
|
if not name:
|
||||||
has_encoding = False
|
continue
|
||||||
name = p[:i].strip().lower()
|
if isinstance(value, tuple):
|
||||||
if name.endswith("*"):
|
value = collapse_rfc2231_value(value)
|
||||||
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
|
pdict[name] = value
|
||||||
# https://tools.ietf.org/html/rfc2231#section-4
|
|
||||||
name = name[:-1]
|
|
||||||
if p.count("'") == 2:
|
|
||||||
has_encoding = True
|
|
||||||
value = p[i + 1 :].strip()
|
|
||||||
if len(value) >= 2 and value[0] == value[-1] == '"':
|
|
||||||
value = value[1:-1]
|
|
||||||
value = value.replace("\\\\", "\\").replace('\\"', '"')
|
|
||||||
if has_encoding:
|
|
||||||
encoding, lang, value = value.split("'")
|
|
||||||
value = unquote(value, encoding=encoding)
|
|
||||||
pdict[name] = value
|
|
||||||
return key, pdict
|
return key, pdict
|
||||||
|
|
||||||
|
|
||||||
|
@ -311,6 +311,10 @@ Miscellaneous
|
|||||||
* The :ref:`JSON <serialization-formats-json>` serializer now writes a newline
|
* The :ref:`JSON <serialization-formats-json>` serializer now writes a newline
|
||||||
at the end of the output, even without the ``indent`` option set.
|
at the end of the output, even without the ``indent`` option set.
|
||||||
|
|
||||||
|
* The undocumented ``django.utils.http.parse_header_parameters()`` function is
|
||||||
|
refactored to use Python's :py:class:`email.message.Message` for parsing.
|
||||||
|
Input headers exceeding 10000 characters will now raise :exc:`ValueError`.
|
||||||
|
|
||||||
.. _deprecated-features-6.0:
|
.. _deprecated-features-6.0:
|
||||||
|
|
||||||
Features deprecated in 6.0
|
Features deprecated in 6.0
|
||||||
|
@ -6,6 +6,7 @@ from unittest import mock
|
|||||||
from django.test import SimpleTestCase
|
from django.test import SimpleTestCase
|
||||||
from django.utils.datastructures import MultiValueDict
|
from django.utils.datastructures import MultiValueDict
|
||||||
from django.utils.http import (
|
from django.utils.http import (
|
||||||
|
MAX_HEADER_LENGTH,
|
||||||
base36_to_int,
|
base36_to_int,
|
||||||
content_disposition_header,
|
content_disposition_header,
|
||||||
escape_leading_slashes,
|
escape_leading_slashes,
|
||||||
@ -424,6 +425,8 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
|
|||||||
class ParseHeaderParameterTests(unittest.TestCase):
|
class ParseHeaderParameterTests(unittest.TestCase):
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
tests = [
|
tests = [
|
||||||
|
("", ("", {})),
|
||||||
|
(None, ("none", {})),
|
||||||
("text/plain", ("text/plain", {})),
|
("text/plain", ("text/plain", {})),
|
||||||
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
|
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
|
||||||
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
|
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
|
||||||
@ -447,10 +450,18 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
|||||||
'attachment; filename="strange;name";size=123;',
|
'attachment; filename="strange;name";size=123;',
|
||||||
("attachment", {"filename": "strange;name", "size": "123"}),
|
("attachment", {"filename": "strange;name", "size": "123"}),
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'attachment; filename="strange;name";;;;size=123;;;',
|
||||||
|
("attachment", {"filename": "strange;name", "size": "123"}),
|
||||||
|
),
|
||||||
(
|
(
|
||||||
'form-data; name="files"; filename="fo\\"o;bar"',
|
'form-data; name="files"; filename="fo\\"o;bar"',
|
||||||
("form-data", {"name": "files", "filename": 'fo"o;bar'}),
|
("form-data", {"name": "files", "filename": 'fo"o;bar'}),
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'form-data; name="files"; filename="\\"fo\\"o;b\\\\ar\\""',
|
||||||
|
("form-data", {"name": "files", "filename": '"fo"o;b\\ar"'}),
|
||||||
|
),
|
||||||
]
|
]
|
||||||
for header, expected in tests:
|
for header, expected in tests:
|
||||||
with self.subTest(header=header):
|
with self.subTest(header=header):
|
||||||
@ -480,12 +491,13 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
Test wrongly formatted RFC 2231 headers (missing double single quotes).
|
||||||
Parsing should not crash (#24209).
|
Parsing should not crash (#24209).
|
||||||
|
But stdlib email still decodes (#35440).
|
||||||
"""
|
"""
|
||||||
test_data = (
|
test_data = (
|
||||||
(
|
(
|
||||||
"Content-Type: application/x-stuff; "
|
"Content-Type: application/x-stuff; "
|
||||||
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
||||||
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
|
"'This is ***fun***",
|
||||||
),
|
),
|
||||||
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
|
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
|
||||||
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
|
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
|
||||||
@ -494,6 +506,37 @@ class ParseHeaderParameterTests(unittest.TestCase):
|
|||||||
parsed = parse_header_parameters(raw_line)
|
parsed = parse_header_parameters(raw_line)
|
||||||
self.assertEqual(parsed[1]["title"], expected_title)
|
self.assertEqual(parsed[1]["title"], expected_title)
|
||||||
|
|
||||||
|
def test_header_max_length(self):
|
||||||
|
base_header = "Content-Type: application/x-stuff; title*="
|
||||||
|
base_header_len = len(base_header)
|
||||||
|
|
||||||
|
test_data = [
|
||||||
|
(MAX_HEADER_LENGTH, {}),
|
||||||
|
(MAX_HEADER_LENGTH, {"max_length": None}),
|
||||||
|
(MAX_HEADER_LENGTH + 1, {"max_length": None}),
|
||||||
|
(100, {"max_length": 100}),
|
||||||
|
]
|
||||||
|
for line_length, kwargs in test_data:
|
||||||
|
with self.subTest(line_length=line_length, kwargs=kwargs):
|
||||||
|
title = "x" * (line_length - base_header_len)
|
||||||
|
line = base_header + title
|
||||||
|
assert len(line) == line_length
|
||||||
|
|
||||||
|
parsed = parse_header_parameters(line, **kwargs)
|
||||||
|
|
||||||
|
expected = ("content-type: application/x-stuff", {"title": title})
|
||||||
|
self.assertEqual(parsed, expected)
|
||||||
|
|
||||||
|
def test_header_too_long(self):
|
||||||
|
test_data = [
|
||||||
|
("x" * (MAX_HEADER_LENGTH + 1), {}),
|
||||||
|
("x" * 101, {"max_length": 100}),
|
||||||
|
]
|
||||||
|
for line, kwargs in test_data:
|
||||||
|
with self.subTest(line_length=len(line), kwargs=kwargs):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
parse_header_parameters(line, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class ContentDispositionHeaderTests(unittest.TestCase):
|
class ContentDispositionHeaderTests(unittest.TestCase):
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user