1
0
mirror of https://github.com/django/django.git synced 2024-12-22 17:16:24 +00:00

Fixed #35440 -- Updated parse_header_parameters logic.

Updated parse_header_parameters to leverage the parsing logic
from (stdlib) email Message implementation. Limited the number
of parameters that parsed by default to two.
This commit is contained in:
Khudyakov Artem 2024-07-29 22:05:10 +03:00
parent 0e94f292cd
commit 2f690d25ee
2 changed files with 83 additions and 35 deletions

View File

@ -3,6 +3,7 @@ import re
import unicodedata import unicodedata
from binascii import Error as BinasciiError from binascii import Error as BinasciiError
from datetime import datetime, timezone from datetime import datetime, timezone
from email.message import Message
from email.utils import formatdate from email.utils import formatdate
from urllib.parse import quote, unquote from urllib.parse import quote, unquote
from urllib.parse import urlencode as original_urlencode from urllib.parse import urlencode as original_urlencode
@ -310,46 +311,31 @@ def escape_leading_slashes(url):
return url return url
def _parseparam(s): def _cgi_compatible_params(params, limit):
while s[:1] == ";": pdict = {}
s = s[1:] key = params.pop(0)[0].strip().lower()
end = s.find(";") params = list(filter(lambda x: x[0], params))[:limit]
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: for name, value in params:
end = s.find(";", end + 1) if isinstance(value, tuple):
if end < 0: encoding, lang, value = value
end = len(s) # email.message params always unquoted with "latin-1"
f = s[:end] if encoding:
yield f.strip() value = unquote(quote(value, encoding="latin-1"), encoding=encoding)
s = s[end:] pdict[name] = value
return key, pdict
def parse_header_parameters(line): def parse_header_parameters(line, limit=2):
""" """
Parse a Content-type like header. Parse a Content-type like header.
Return the main content-type and a dictionary of options. Return the main content-type and a dictionary of options.
""" """
parts = _parseparam(";" + line) m = Message()
key = parts.__next__().lower() m["content-type"] = line
pdict = {}
for p in parts: key, pdict = _cgi_compatible_params(m.get_params(), limit)
i = p.find("=")
if i >= 0:
has_encoding = False
name = p[:i].strip().lower()
if name.endswith("*"):
# Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
# https://tools.ietf.org/html/rfc2231#section-4
name = name[:-1]
if p.count("'") == 2:
has_encoding = True
value = p[i + 1 :].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace("\\\\", "\\").replace('\\"', '"')
if has_encoding:
encoding, lang, value = value.split("'")
value = unquote(value, encoding=encoding)
pdict[name] = value
return key, pdict return key, pdict

View File

@ -424,6 +424,7 @@ class EscapeLeadingSlashesTests(unittest.TestCase):
class ParseHeaderParameterTests(unittest.TestCase): class ParseHeaderParameterTests(unittest.TestCase):
def test_basic(self): def test_basic(self):
tests = [ tests = [
("", ("", {})),
("text/plain", ("text/plain", {})), ("text/plain", ("text/plain", {})),
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})), ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})), ("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
@ -447,10 +448,18 @@ class ParseHeaderParameterTests(unittest.TestCase):
'attachment; filename="strange;name";size=123;', 'attachment; filename="strange;name";size=123;',
("attachment", {"filename": "strange;name", "size": "123"}), ("attachment", {"filename": "strange;name", "size": "123"}),
), ),
(
'attachment; filename="strange;name";;;;size=123;;;',
("attachment", {"filename": "strange;name", "size": "123"}),
),
( (
'form-data; name="files"; filename="fo\\"o;bar"', 'form-data; name="files"; filename="fo\\"o;bar"',
("form-data", {"name": "files", "filename": 'fo"o;bar'}), ("form-data", {"name": "files", "filename": 'fo"o;bar'}),
), ),
(
'form-data; name="files"; filename="\\"fo\\"o;b\\\\ar\\""',
("form-data", {"name": "files", "filename": '"fo"o;b\\ar"'}),
),
] ]
for header, expected in tests: for header, expected in tests:
with self.subTest(header=header): with self.subTest(header=header):
@ -480,12 +489,13 @@ class ParseHeaderParameterTests(unittest.TestCase):
""" """
Test wrongly formatted RFC 2231 headers (missing double single quotes). Test wrongly formatted RFC 2231 headers (missing double single quotes).
Parsing should not crash (#24209). Parsing should not crash (#24209).
But stdlib email still decodes (#35440).
""" """
test_data = ( test_data = (
( (
"Content-Type: application/x-stuff; " "Content-Type: application/x-stuff; "
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
"'This%20is%20%2A%2A%2Afun%2A%2A%2A", "'This is ***fun***",
), ),
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"), ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"), ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
@ -494,6 +504,58 @@ class ParseHeaderParameterTests(unittest.TestCase):
parsed = parse_header_parameters(raw_line) parsed = parse_header_parameters(raw_line)
self.assertEqual(parsed[1]["title"], expected_title) self.assertEqual(parsed[1]["title"], expected_title)
def test_params_limitation(self):
test_data = (
(
"Content-Disposition: form-data",
("content-disposition: form-data", {}),
),
(
"Content-Disposition: form-data; ",
("content-disposition: form-data", {}),
),
(
'Content-Disposition: form-data; name="field2"',
("content-disposition: form-data", {"name": "field2"}),
),
(
'Content-Disposition: form-data; name="field2"; filename="example.txt"',
(
"content-disposition: form-data",
{"name": "field2", "filename": "example.txt"},
),
),
(
'Content-Disposition: form-data; name="field2"; '
'filename="example.txt"; unexpected="value"',
(
"content-disposition: form-data",
{"name": "field2", "filename": "example.txt"},
),
),
(
"Content-Disposition: form-data"
f'{"".join([f"; field{i}=value{i}" for i in range(1, 50)])}',
(
"content-disposition: form-data",
{"field1": "value1", "field2": "value2"},
),
),
)
for raw_line, expected_resp in test_data:
parsed = parse_header_parameters(raw_line)
self.assertEqual(parsed, expected_resp)
for params_count in range(0, 10):
fields = {f"field{i}": f"value{i}" for i in range(params_count)}
test_data = (
"Content-Disposition: form-data"
f'{"".join([f"; {k}={v}" for k, v in fields.items()])}'
)
expected_resp = ("content-disposition: form-data", fields)
parsed = parse_header_parameters(test_data, limit=params_count)
self.assertEqual(parsed, expected_resp)
class ContentDispositionHeaderTests(unittest.TestCase): class ContentDispositionHeaderTests(unittest.TestCase):
def test_basic(self): def test_basic(self):