From ca0ac66ac2762610878b64f2d0f92da08374e65f Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Fri, 6 Dec 2024 10:47:31 -0500 Subject: [PATCH] Fixed #36023 -- Handle controls chars in content_disposition_header. The current implementation would happily insert a newline character into the returned string, which is not valid for an HTTP header value. To use the simple `filename="..."` form, the value must conform to the official grammar from RFC6266[^1]: filename-parm = "filename" "=" value value = ; token | quoted-string The `quoted-string` definition comes from RFC 9110[^2]: ``` quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text The backslash octet ("\") can be used as a single-octet quoting mechanism within quoted-string and comment constructs. Recipients that process the value of a quoted-string MUST handle a quoted-pair as if it were replaced by the octet following the backslash. quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) A sender SHOULD NOT generate a quoted-pair in a quoted-string except where necessary to quote DQUOTE and backslash octets occurring within that string. ``` That is, quoted strings are able to express horizontal tabs, space characters, and everything in the range from 0x21 to 0x7e, with the exception of 0x22 (`"`) and 0x5C (`\`), which can still be expressed but must be escaped with their own `\`. We ignore the case of `obs-text`, which is defined as the range 0x80-0xFF, since its presence is there for permissive parsing of accidental high-bit characters, and it should not be generated by conforming implementations. Transform this character range into a regex and apply it in addition to the "is ASCII" check. This ensures that all simple filenames are expressed in the simple format, and that all filenames with newlines and other control characters are properly expressed with the percent-encoded `filename*=...`form. [^1]: https://datatracker.ietf.org/doc/html/rfc6266#section-4.1 [^2]: https://datatracker.ietf.org/doc/html/rfc9110#name-quoted-strings --- django/utils/http.py | 7 ++++++- tests/utils_tests/test_http.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/django/utils/http.py b/django/utils/http.py index bf783562dd..dd1f5d6c0d 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -362,10 +362,15 @@ def content_disposition_header(as_attachment, filename): disposition = "attachment" if as_attachment else "inline" try: filename.encode("ascii") + is_ascii = True + except UnicodeEncodeError: + is_ascii = False + # https://datatracker.ietf.org/doc/html/rfc9110#name-quoted-strings + if is_ascii and re.match(r"^[\t \x21-\x7e]*$", filename): file_expr = 'filename="{}"'.format( filename.replace("\\", "\\\\").replace('"', r"\"") ) - except UnicodeEncodeError: + else: file_expr = "filename*=utf-8''{}".format(quote(filename)) return f"{disposition}; {file_expr}" elif as_attachment: diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index 68df04696a..c04f0a03d7 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -511,6 +511,7 @@ class ContentDispositionHeaderTests(unittest.TestCase): (True, '"espécimen" filename'), "attachment; filename*=utf-8''%22esp%C3%A9cimen%22%20filename", ), + ((True, "some\nfile"), "attachment; filename*=utf-8''some%0Afile"), ) for (is_attachment, filename), expected in tests: