From 2f690d25eeefb21b9e33b03723f1458c81b75119 Mon Sep 17 00:00:00 2001 From: Khudyakov Artem Date: Mon, 29 Jul 2024 22:05:10 +0300 Subject: [PATCH] Fixed #35440 -- Updated parse_header_parameters logic. Updated parse_header_parameters to leverage the parsing logic from (stdlib) email Message implementation. Limited the number of parameters that parsed by default to two. --- django/utils/http.py | 54 +++++++++++----------------- tests/utils_tests/test_http.py | 64 +++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 35 deletions(-) diff --git a/django/utils/http.py b/django/utils/http.py index bf783562dd..8e181d7d47 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -3,6 +3,7 @@ import re import unicodedata from binascii import Error as BinasciiError from datetime import datetime, timezone +from email.message import Message from email.utils import formatdate from urllib.parse import quote, unquote from urllib.parse import urlencode as original_urlencode @@ -310,46 +311,31 @@ def escape_leading_slashes(url): return url -def _parseparam(s): - while s[:1] == ";": - s = s[1:] - end = s.find(";") - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(";", end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] +def _cgi_compatible_params(params, limit): + pdict = {} + key = params.pop(0)[0].strip().lower() + params = list(filter(lambda x: x[0], params))[:limit] + for name, value in params: + if isinstance(value, tuple): + encoding, lang, value = value + # email.message params always unquoted with "latin-1" + if encoding: + value = unquote(quote(value, encoding="latin-1"), encoding=encoding) + pdict[name] = value + + return key, pdict -def parse_header_parameters(line): +def parse_header_parameters(line, limit=2): """ Parse a Content-type like header. Return the main content-type and a dictionary of options. """ - parts = _parseparam(";" + line) - key = parts.__next__().lower() - pdict = {} - for p in parts: - i = p.find("=") - if i >= 0: - has_encoding = False - name = p[:i].strip().lower() - if name.endswith("*"): - # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") - # https://tools.ietf.org/html/rfc2231#section-4 - name = name[:-1] - if p.count("'") == 2: - has_encoding = True - value = p[i + 1 :].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace("\\\\", "\\").replace('\\"', '"') - if has_encoding: - encoding, lang, value = value.split("'") - value = unquote(value, encoding=encoding) - pdict[name] = value + m = Message() + m["content-type"] = line + + key, pdict = _cgi_compatible_params(m.get_params(), limit) + return key, pdict diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index 68df04696a..e329a9ea16 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -424,6 +424,7 @@ class EscapeLeadingSlashesTests(unittest.TestCase): class ParseHeaderParameterTests(unittest.TestCase): def test_basic(self): tests = [ + ("", ("", {})), ("text/plain", ("text/plain", {})), ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})), ("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})), @@ -447,10 +448,18 @@ class ParseHeaderParameterTests(unittest.TestCase): 'attachment; filename="strange;name";size=123;', ("attachment", {"filename": "strange;name", "size": "123"}), ), + ( + 'attachment; filename="strange;name";;;;size=123;;;', + ("attachment", {"filename": "strange;name", "size": "123"}), + ), ( 'form-data; name="files"; filename="fo\\"o;bar"', ("form-data", {"name": "files", "filename": 'fo"o;bar'}), ), + ( + 'form-data; name="files"; filename="\\"fo\\"o;b\\\\ar\\""', + ("form-data", {"name": "files", "filename": '"fo"o;b\\ar"'}), + ), ] for header, expected in tests: with self.subTest(header=header): @@ -480,12 +489,13 @@ class ParseHeaderParameterTests(unittest.TestCase): """ Test wrongly formatted RFC 2231 headers (missing double single quotes). Parsing should not crash (#24209). + But stdlib email still decodes (#35440). """ test_data = ( ( "Content-Type: application/x-stuff; " "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", - "'This%20is%20%2A%2A%2Afun%2A%2A%2A", + "'This is ***fun***", ), ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"), ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"), @@ -494,6 +504,58 @@ class ParseHeaderParameterTests(unittest.TestCase): parsed = parse_header_parameters(raw_line) self.assertEqual(parsed[1]["title"], expected_title) + def test_params_limitation(self): + test_data = ( + ( + "Content-Disposition: form-data", + ("content-disposition: form-data", {}), + ), + ( + "Content-Disposition: form-data; ", + ("content-disposition: form-data", {}), + ), + ( + 'Content-Disposition: form-data; name="field2"', + ("content-disposition: form-data", {"name": "field2"}), + ), + ( + 'Content-Disposition: form-data; name="field2"; filename="example.txt"', + ( + "content-disposition: form-data", + {"name": "field2", "filename": "example.txt"}, + ), + ), + ( + 'Content-Disposition: form-data; name="field2"; ' + 'filename="example.txt"; unexpected="value"', + ( + "content-disposition: form-data", + {"name": "field2", "filename": "example.txt"}, + ), + ), + ( + "Content-Disposition: form-data" + f'{"".join([f"; field{i}=value{i}" for i in range(1, 50)])}', + ( + "content-disposition: form-data", + {"field1": "value1", "field2": "value2"}, + ), + ), + ) + for raw_line, expected_resp in test_data: + parsed = parse_header_parameters(raw_line) + self.assertEqual(parsed, expected_resp) + + for params_count in range(0, 10): + fields = {f"field{i}": f"value{i}" for i in range(params_count)} + test_data = ( + "Content-Disposition: form-data" + f'{"".join([f"; {k}={v}" for k, v in fields.items()])}' + ) + expected_resp = ("content-disposition: form-data", fields) + parsed = parse_header_parameters(test_data, limit=params_count) + self.assertEqual(parsed, expected_resp) + class ContentDispositionHeaderTests(unittest.TestCase): def test_basic(self):