1
0
mirror of https://github.com/django/django.git synced 2024-12-22 17:16:24 +00:00

Fixed #35440 -- Updated parse_header_parameters logic.

Updated parse_header_parameters to leverage the parsing logic
from (stdlib) email Message implementation. Limited the number
of parameters that parsed by default to two.
This commit is contained in:
Khudyakov Artem 2024-07-29 22:05:10 +03:00
parent 0e94f292cd
commit 2f690d25ee
2 changed files with 83 additions and 35 deletions

View File

@ -3,6 +3,7 @@ import re
import unicodedata
from binascii import Error as BinasciiError
from datetime import datetime, timezone
from email.message import Message
from email.utils import formatdate
from urllib.parse import quote, unquote
from urllib.parse import urlencode as original_urlencode
@ -310,46 +311,31 @@ def escape_leading_slashes(url):
return url
def _parseparam(s):
while s[:1] == ";":
s = s[1:]
end = s.find(";")
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(";", end + 1)
if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]
def _cgi_compatible_params(params, limit):
pdict = {}
key = params.pop(0)[0].strip().lower()
params = list(filter(lambda x: x[0], params))[:limit]
for name, value in params:
if isinstance(value, tuple):
encoding, lang, value = value
# email.message params always unquoted with "latin-1"
if encoding:
value = unquote(quote(value, encoding="latin-1"), encoding=encoding)
pdict[name] = value
return key, pdict
def parse_header_parameters(line):
def parse_header_parameters(line, limit=2):
"""
Parse a Content-type like header.
Return the main content-type and a dictionary of options.
"""
parts = _parseparam(";" + line)
key = parts.__next__().lower()
pdict = {}
for p in parts:
i = p.find("=")
if i >= 0:
has_encoding = False
name = p[:i].strip().lower()
if name.endswith("*"):
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
# https://tools.ietf.org/html/rfc2231#section-4
name = name[:-1]
if p.count("'") == 2:
has_encoding = True
value = p[i + 1 :].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace("\\\\", "\\").replace('\\"', '"')
if has_encoding:
encoding, lang, value = value.split("'")
value = unquote(value, encoding=encoding)
pdict[name] = value
m = Message()
m["content-type"] = line
key, pdict = _cgi_compatible_params(m.get_params(), limit)
return key, pdict

View File

@ -424,6 +424,7 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
class ParseHeaderParameterTests(unittest.TestCase):
def test_basic(self):
tests = [
("", ("", {})),
("text/plain", ("text/plain", {})),
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
@ -447,10 +448,18 @@ class ParseHeaderParameterTests(unittest.TestCase):
'attachment; filename="strange;name";size=123;',
("attachment", {"filename": "strange;name", "size": "123"}),
),
(
'attachment; filename="strange;name";;;;size=123;;;',
("attachment", {"filename": "strange;name", "size": "123"}),
),
(
'form-data; name="files"; filename="fo\\"o;bar"',
("form-data", {"name": "files", "filename": 'fo"o;bar'}),
),
(
'form-data; name="files"; filename="\\"fo\\"o;b\\\\ar\\""',
("form-data", {"name": "files", "filename": '"fo"o;b\\ar"'}),
),
]
for header, expected in tests:
with self.subTest(header=header):
@ -480,12 +489,13 @@ class ParseHeaderParameterTests(unittest.TestCase):
"""
Test wrongly formatted RFC 2231 headers (missing double single quotes).
Parsing should not crash (#24209).
But stdlib email still decodes (#35440).
"""
test_data = (
(
"Content-Type: application/x-stuff; "
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
"'This is ***fun***",
),
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
@ -494,6 +504,58 @@ class ParseHeaderParameterTests(unittest.TestCase):
parsed = parse_header_parameters(raw_line)
self.assertEqual(parsed[1]["title"], expected_title)
def test_params_limitation(self):
test_data = (
(
"Content-Disposition: form-data",
("content-disposition: form-data", {}),
),
(
"Content-Disposition: form-data; ",
("content-disposition: form-data", {}),
),
(
'Content-Disposition: form-data; name="field2"',
("content-disposition: form-data", {"name": "field2"}),
),
(
'Content-Disposition: form-data; name="field2"; filename="example.txt"',
(
"content-disposition: form-data",
{"name": "field2", "filename": "example.txt"},
),
),
(
'Content-Disposition: form-data; name="field2"; '
'filename="example.txt"; unexpected="value"',
(
"content-disposition: form-data",
{"name": "field2", "filename": "example.txt"},
),
),
(
"Content-Disposition: form-data"
f'{"".join([f"; field{i}=value{i}" for i in range(1, 50)])}',
(
"content-disposition: form-data",
{"field1": "value1", "field2": "value2"},
),
),
)
for raw_line, expected_resp in test_data:
parsed = parse_header_parameters(raw_line)
self.assertEqual(parsed, expected_resp)
for params_count in range(0, 10):
fields = {f"field{i}": f"value{i}" for i in range(params_count)}
test_data = (
"Content-Disposition: form-data"
f'{"".join([f"; {k}={v}" for k, v in fields.items()])}'
)
expected_resp = ("content-disposition: form-data", fields)
parsed = parse_header_parameters(test_data, limit=params_count)
self.assertEqual(parsed, expected_resp)
class ContentDispositionHeaderTests(unittest.TestCase):
def test_basic(self):