From c075508b4de8edf9db553b409f8a8ed2f26ecead Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Tue, 27 May 2025 17:00:29 +0100 Subject: [PATCH] Fixed #36411 -- Made HttpRequest.get_preferred_type() consider media type parameters. HttpRequest.get_preferred_type() did not account for parameters in Accept header media types (e.g., "text/vcard; version=3.0"). This caused incorrect content negotiation when multiple types differed only by parameters, reducing specificity as per RFC 7231 section 5.3.2 (https://datatracker.ietf.org/doc/html/rfc7231.html#section-5.3.2). This fix updates get_preferred_type() to treat media types with parameters as distinct, allowing more precise and standards-compliant matching. Thanks to magicfelix for the report, and to David Sanders and Sarah Boyce for the reviews. --- django/http/request.py | 61 +++++++--- docs/ref/request-response.txt | 35 ++++++ docs/releases/5.2.2.txt | 5 + tests/requests_tests/test_accept_header.py | 130 ++++++++++++++++++--- 4 files changed, 200 insertions(+), 31 deletions(-) diff --git a/django/http/request.py b/django/http/request.py index 986d4eee89..6438f26268 100644 --- a/django/http/request.py +++ b/django/http/request.py @@ -93,8 +93,12 @@ class HttpRequest: """Return a list of MediaType instances, in order of preference.""" header_value = self.headers.get("Accept", "*/*") return sorted( - (MediaType(token) for token in header_value.split(",") if token.strip()), - key=operator.attrgetter("quality", "specificity"), + ( + media_type + for token in header_value.split(",") + if token.strip() and (media_type := MediaType(token)).quality != 0 + ), + key=operator.attrgetter("specificity", "quality"), reverse=True, ) @@ -102,11 +106,12 @@ class HttpRequest: """ Return the preferred MediaType instance which matches the given media type. """ + media_type = MediaType(media_type) return next( ( accepted_type for accepted_type in self.accepted_types - if accepted_type.match(media_type) + if media_type.match(accepted_type) ), None, ) @@ -689,13 +694,13 @@ class QueryDict(MultiValueDict): class MediaType: def __init__(self, media_type_raw_line): - full_type, self.params = parse_header_parameters( + full_type, self._params = parse_header_parameters( media_type_raw_line if media_type_raw_line else "" ) self.main_type, _, self.sub_type = full_type.partition("/") def __str__(self): - params_str = "".join("; %s=%s" % (k, v) for k, v in self.params.items()) + params_str = "".join("; %s=%s" % (k, v) for k, v in self._params.items()) return "%s%s%s" % ( self.main_type, ("/%s" % self.sub_type) if self.sub_type else "", @@ -705,23 +710,45 @@ class MediaType: def __repr__(self): return "<%s: %s>" % (self.__class__.__qualname__, self) - @property - def is_all_types(self): - return self.main_type == "*" and self.sub_type == "*" + @cached_property + def params(self): + params = self._params.copy() + params.pop("q", None) + return params def match(self, other): - if self.is_all_types: - return True - other = MediaType(other) - return self.main_type == other.main_type and self.sub_type in { - "*", - other.sub_type, - } + if not other: + return False + + if not isinstance(other, MediaType): + other = MediaType(other) + + main_types = [self.main_type, other.main_type] + sub_types = [self.sub_type, other.sub_type] + + # Main types and sub types must be defined. + if not all((*main_types, *sub_types)): + return False + + # Main types must match or one be "*", same for sub types. + for this_type, other_type in (main_types, sub_types): + if this_type != other_type and this_type != "*" and other_type != "*": + return False + + if bool(self.params) == bool(other.params): + # If both have params or neither have params, they must be identical. + result = self.params == other.params + else: + # If self has params and other does not, it's a match. + # If other has params and self does not, don't match. + result = bool(self.params or not other.params) + + return result @cached_property def quality(self): try: - quality = float(self.params.get("q", 1)) + quality = float(self._params.get("q", 1)) except ValueError: # Discard invalid values. return 1 @@ -741,7 +768,7 @@ class MediaType: return 0 elif self.sub_type == "*": return 1 - elif self.quality == 1: + elif not self.params: return 2 return 3 diff --git a/docs/ref/request-response.txt b/docs/ref/request-response.txt index 9846a3a4b5..805b36023a 100644 --- a/docs/ref/request-response.txt +++ b/docs/ref/request-response.txt @@ -445,6 +445,41 @@ Methods >>> request.get_preferred_type(["application/xml", "text/plain"]) None + If the mime type includes parameters, these are also considered when + determining the preferred media type. For example, with an ``Accept`` + header of ``text/vcard;version=3.0,text/html;q=0.5``, the return value of + ``request.get_preferred_type()`` depends on the available media types: + + .. code-block:: pycon + + >>> request.get_preferred_type( + ... [ + ... "text/vcard; version=4.0", + ... "text/vcard; version=3.0", + ... "text/vcard", + ... "text/directory", + ... ] + ... ) + "text/vcard; version=3.0" + >>> request.get_preferred_type( + ... [ + ... "text/vcard; version=4.0", + ... "text/html", + ... ] + ... ) + "text/html" + >>> request.get_preferred_type( + ... [ + ... "text/vcard; version=4.0", + ... "text/vcard", + ... "text/directory", + ... ] + ... ) + None + + (For further details on how content negotiation is performed, see + :rfc:`7231#section-5.3.2`.) + Most browsers send ``Accept: */*`` by default, meaning they don't have a preference, in which case the first item in ``media_types`` would be returned. diff --git a/docs/releases/5.2.2.txt b/docs/releases/5.2.2.txt index 3870a3efa4..1a363ad55d 100644 --- a/docs/releases/5.2.2.txt +++ b/docs/releases/5.2.2.txt @@ -38,3 +38,8 @@ Bugfixes * Fixed a bug in Django 5.2 where calling ``QuerySet.in_bulk()`` with an ``id_list`` argument on models with a ``CompositePrimaryKey`` failed to observe database parameter limits (:ticket:`36416`). + +* Fixed a bug in Django 5.2 where :meth:`HttpRequest.get_preferred_type() + ` did not account for media type + parameters in ``Accept`` headers, reducing specificity in content negotiation + (:ticket:`36411`). diff --git a/tests/requests_tests/test_accept_header.py b/tests/requests_tests/test_accept_header.py index 6585fec678..37826278e2 100644 --- a/tests/requests_tests/test_accept_header.py +++ b/tests/requests_tests/test_accept_header.py @@ -6,10 +6,9 @@ from django.http.request import MediaType class MediaTypeTests(TestCase): def test_empty(self): - for empty_media_type in (None, ""): + for empty_media_type in (None, "", " "): with self.subTest(media_type=empty_media_type): media_type = MediaType(empty_media_type) - self.assertIs(media_type.is_all_types, False) self.assertEqual(str(media_type), "") self.assertEqual(repr(media_type), "") @@ -24,21 +23,18 @@ class MediaTypeTests(TestCase): "", ) - def test_is_all_types(self): - self.assertIs(MediaType("*/*").is_all_types, True) - self.assertIs(MediaType("*/*; q=0.8").is_all_types, True) - self.assertIs(MediaType("text/*").is_all_types, False) - self.assertIs(MediaType("application/xml").is_all_types, False) - def test_match(self): tests = [ ("*/*; q=0.8", "*/*"), ("*/*", "application/json"), (" */* ", "application/json"), ("application/*", "application/json"), + ("application/*", "application/*"), ("application/xml", "application/xml"), (" application/xml ", "application/xml"), ("application/xml", " application/xml "), + ("text/vcard; version=4.0", "text/vcard; version=4.0"), + ("text/vcard; version=4.0", "text/vcard"), ] for accepted_type, mime_type in tests: with self.subTest(accepted_type, mime_type=mime_type): @@ -46,11 +42,23 @@ class MediaTypeTests(TestCase): def test_no_match(self): tests = [ - (None, "*/*"), - ("", "*/*"), - ("; q=0.8", "*/*"), + # other is falsey. + ("*/*", None), + ("*/*", ""), + # other is malformed. + ("*/*", "; q=0.8"), + # main_type is falsey. + ("/*", "*/*"), + # other.main_type is falsey. + ("*/*", "/*"), + # main sub_type is falsey. + ("application", "application/*"), + # other.sub_type is falsey. + ("application/*", "application"), + # All main and sub types are defined, but there is no match. ("application/xml", "application/html"), - ("application/xml", "*/*"), + ("text/vcard; version=4.0", "text/vcard; version=3.0"), + ("text/vcard", "text/vcard; version=3.0"), ] for accepted_type, mime_type in tests: with self.subTest(accepted_type, mime_type=mime_type): @@ -65,6 +73,8 @@ class MediaTypeTests(TestCase): ("*/*; q=-1", 1), ("*/*; q=2", 1), ("*/*; q=h", 1), + ("*/*; q=inf", 1), + ("*/*; q=0", 0), ("*/*", 1), ] for accepted_type, quality in tests: @@ -79,7 +89,8 @@ class MediaTypeTests(TestCase): ("text/*;q=0.5", 1), ("text/html", 2), ("text/html;q=1", 2), - ("text/html;q=0.5", 3), + ("text/html;q=0.5", 2), + ("text/html;version=5", 3), ] for accepted_type, specificity in tests: with self.subTest(accepted_type, specificity=specificity): @@ -105,12 +116,38 @@ class AcceptHeaderTests(TestCase): [ "text/html", "application/xhtml+xml", - "text/*", "application/xml; q=0.9", + "text/*", "*/*; q=0.8", ], ) + def test_zero_quality(self): + request = HttpRequest() + request.META["HTTP_ACCEPT"] = "text/*;q=0,text/html" + self.assertEqual( + [str(accepted_type) for accepted_type in request.accepted_types], + ["text/html"], + ) + + def test_precedence(self): + """ + Taken from https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.2. + """ + request = HttpRequest() + request.META["HTTP_ACCEPT"] = ( + "text/*, text/plain, text/plain;format=flowed, */*" + ) + self.assertEqual( + [str(accepted_type) for accepted_type in request.accepted_types], + [ + "text/plain; format=flowed", + "text/plain", + "text/*", + "*/*", + ], + ) + def test_request_accepts_any(self): request = HttpRequest() request.META["HTTP_ACCEPT"] = "*/*" @@ -175,3 +212,68 @@ class AcceptHeaderTests(TestCase): self.assertIsNone( request.get_preferred_type(["application/json", "text/plain"]) ) + + def test_accept_with_param(self): + request = HttpRequest() + request.META["HTTP_ACCEPT"] = "text/vcard; version=3.0, text/html;q=0.5" + + for media_types, expected in [ + ( + [ + "text/vcard; version=4.0", + "text/vcard; version=3.0", + "text/vcard", + "text/directory", + ], + "text/vcard; version=3.0", + ), + (["text/vcard; version=4.0", "text/vcard", "text/directory"], None), + (["text/vcard; version=4.0", "text/html"], "text/html"), + ]: + self.assertEqual(request.get_preferred_type(media_types), expected) + + def test_quality(self): + """ + Taken from https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.2. + """ + request = HttpRequest() + request.META["HTTP_ACCEPT"] = ( + "text/*;q=0.3,text/html;q=0.7,text/html;level=1,text/html;level=2;q=0.4," + "*/*;q=0.5" + ) + + for media_type, quality in [ + ("text/html;level=1", 1), + ("text/html", 0.7), + ("text/plain", 0.3), + ("image/jpeg", 0.5), + ("text/html;level=2", 0.4), + ("text/html;level=3", 0.7), + ]: + with self.subTest(media_type): + accepted_media_type = request.accepted_type(media_type) + self.assertIsNotNone(accepted_media_type) + self.assertEqual(accepted_media_type.quality, quality) + + for media_types, expected in [ + (["text/html", "text/html; level=1"], "text/html; level=1"), + (["text/html; level=2", "text/html; level=3"], "text/html; level=2"), + ]: + self.assertEqual(request.get_preferred_type(media_types), expected) + + def test_quality_breaks_specificity(self): + """ + With the same specificity, the quality breaks the tie. + """ + request = HttpRequest() + request.META["HTTP_ACCEPT"] = "text/plain;q=0.5,text/html" + + self.assertEqual(request.accepted_type("text/plain").quality, 0.5) + self.assertEqual(request.accepted_type("text/plain").specificity, 2) + + self.assertEqual(request.accepted_type("text/html").quality, 1) + self.assertEqual(request.accepted_type("text/html").specificity, 2) + + self.assertEqual( + request.get_preferred_type(["text/html", "text/plain"]), "text/html" + )