From f663277a4c22ef96cbdebfd0ed76155b9d37b4f8 Mon Sep 17 00:00:00 2001
From: Mariusz Felisiak <felisiak.mariusz@gmail.com>
Date: Sun, 1 Dec 2024 12:31:12 +0100
Subject: [PATCH] [4.2.x] Refs CVE-2024-11168 -- Updated vendored _urlsplit()
 to properly validate IPv6 and IPvFuture addresses.

Refs Python CVE-2024-11168. Django should not affected, but others who
incorrectly use internal function _urlsplit() with unsanitized input
could be at risk.

https://github.com/python/cpython/pull/103849
---
 django/utils/http.py           | 19 ++++++++++++++++
 tests/utils_tests/test_http.py | 41 ++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/django/utils/http.py b/django/utils/http.py
index 3e7acb5835..94ad60bdbc 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -1,5 +1,6 @@
 import base64
 import datetime
+import ipaddress
 import re
 import unicodedata
 from binascii import Error as BinasciiError
@@ -309,6 +310,21 @@ def _remove_unsafe_bytes_from_url(url):
     return url
 
 
+# TODO: Remove when dropping support for PY38.
+def _check_bracketed_host(hostname):
+    # Valid bracketed hosts are defined in
+    # https://www.rfc-editor.org/rfc/rfc3986#page-49 and
+    # https://url.spec.whatwg.org/.
+    if hostname.startswith("v"):
+        if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname):
+            raise ValueError("IPvFuture address is invalid")
+    else:
+        # Throws Value Error if not IPv6 or IPv4.
+        ip = ipaddress.ip_address(hostname)
+        if isinstance(ip, ipaddress.IPv4Address):
+            raise ValueError("An IPv4 address cannot be in brackets")
+
+
 # TODO: Remove when dropping support for PY38.
 # Backport of urllib.parse.urlsplit() from Python 3.9.
 def _urlsplit(url, scheme="", allow_fragments=True):
@@ -336,6 +352,9 @@ def _urlsplit(url, scheme="", allow_fragments=True):
             "]" in netloc and "[" not in netloc
         ):
             raise ValueError("Invalid IPv6 URL")
+        if "[" in netloc and "]" in netloc:
+            bracketed_host = netloc.partition("[")[2].partition("]")[0]
+            _check_bracketed_host(bracketed_host)
     if allow_fragments and "#" in url:
         url, fragment = url.split("#", 1)
     if "?" in url:
diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py
index 2290fe85fb..265f74346c 100644
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@@ -6,6 +6,7 @@ from unittest import mock
 from django.test import SimpleTestCase
 from django.utils.datastructures import MultiValueDict
 from django.utils.http import (
+    _urlsplit,
     base36_to_int,
     content_disposition_header,
     escape_leading_slashes,
@@ -291,6 +292,46 @@ class URLHasAllowedHostAndSchemeTests(unittest.TestCase):
                     False,
                 )
 
+    # TODO: Remove when dropping support for PY38.
+    def test_invalid_bracketed_hosts(self):
+        # Port of urllib.parse.urlsplit() tests from Python.
+        tests = [
+            "Scheme://user@[192.0.2.146]/Path?Query",
+            "Scheme://user@[important.com:8000]/Path?Query",
+            "Scheme://user@[v123r.IP]/Path?Query",
+            "Scheme://user@[v12ae]/Path?Query",
+            "Scheme://user@[v.IP]/Path?Query",
+            "Scheme://user@[v123.]/Path?Query",
+            "Scheme://user@[v]/Path?Query",
+            "Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query",
+            "Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/"
+            "Path?Query",
+            "Scheme://user@]v6a.ip[/Path",
+        ]
+        for invalid_url in tests:
+            with self.subTest(invalid_url=invalid_url):
+                self.assertRaises(ValueError, _urlsplit, invalid_url)
+
+    # TODO: Remove when dropping support for PY38.
+    def test_splitting_bracketed_hosts(self):
+        # Port of urllib.parse.urlsplit() tests from Python.
+        p1 = _urlsplit("scheme://user@[v6a.ip]/path?query")
+        self.assertEqual(p1.hostname, "v6a.ip")
+        self.assertEqual(p1.username, "user")
+        self.assertEqual(p1.path, "/path")
+        # Removed the '%test' suffix from ported tests as %scope_id suffixes were
+        # added in Python 3.9: https://docs.python.org/3/whatsnew/3.9.html#ipaddress
+        p2 = _urlsplit("scheme://user@[0439:23af:2309::fae7]/path?query")
+        self.assertEqual(p2.hostname, "0439:23af:2309::fae7")
+        self.assertEqual(p2.username, "user")
+        self.assertEqual(p2.path, "/path")
+        p3 = _urlsplit(
+            "scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146]/path?query"
+        )
+        self.assertEqual(p3.hostname, "0439:23af:2309::fae7:1234:192.0.2.146")
+        self.assertEqual(p3.username, "user")
+        self.assertEqual(p3.path, "/path")
+
 
 class URLSafeBase64Tests(unittest.TestCase):
     def test_roundtrip(self):