From 03eec9ff6cc78e7c1bcf88bb76ecd11f0d433c72 Mon Sep 17 00:00:00 2001 From: Michael Manfre Date: Wed, 29 Jun 2022 20:39:51 -0400 Subject: [PATCH] Updated vendored _urlsplit() to strip newline and tabs. Refs Python CVE-2022-0391. Django is not affected, but others who incorrectly use internal function url_has_allowed_host_and_scheme() with unsanitized input could be at risk. --- django/utils/http.py | 19 +++++++++++++++++-- docs/releases/4.2.txt | 7 +++++++ tests/utils_tests/test_http.py | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/django/utils/http.py b/django/utils/http.py index 51fdc4b149..d2ec2638b0 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -46,6 +46,10 @@ ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y)) RFC3986_GENDELIMS = ":/?#[]@" RFC3986_SUBDELIMS = "!$&'()*+,;=" +# TODO: Remove when dropping support for PY38. +# Unsafe bytes to be removed per WHATWG spec. +_UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"] + def urlencode(query, doseq=False): """ @@ -278,6 +282,7 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): ) +# TODO: Remove when dropping support for PY38. # Copied from urllib.parse.urlparse() but uses fixed urlsplit() function. def _urlparse(url, scheme="", allow_fragments=True): """Parse a URL into 6 components: @@ -296,8 +301,15 @@ def _urlparse(url, scheme="", allow_fragments=True): return _coerce_result(result) -# Copied from urllib.parse.urlsplit() with -# https://github.com/python/cpython/pull/661 applied. +# TODO: Remove when dropping support for PY38. +def _remove_unsafe_bytes_from_url(url): + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + return url + + +# TODO: Remove when dropping support for PY38. +# Backport of urllib.parse.urlsplit() from Python 3.9. def _urlsplit(url, scheme="", allow_fragments=True): """Parse a URL into 5 components: :///?# @@ -305,6 +317,9 @@ def _urlsplit(url, scheme="", allow_fragments=True): Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) + url = _remove_unsafe_bytes_from_url(url) + scheme = _remove_unsafe_bytes_from_url(scheme) + netloc = query = fragment = "" i = url.find(":") if i > 0: diff --git a/docs/releases/4.2.txt b/docs/releases/4.2.txt index fad89ea008..34f8362be9 100644 --- a/docs/releases/4.2.txt +++ b/docs/releases/4.2.txt @@ -217,6 +217,13 @@ Utilities * The new ``encoder`` parameter for :meth:`django.utils.html.json_script` function allows customizing a JSON encoder class. +* The private internal vendored copy of ``urllib.parse.urlsplit()`` now strips + ``'\r'``, ``'\n'``, and ``'\t'`` (see :cve:`2022-0391` and :bpo:`43882`). + This is to protect projects that may be incorrectly using the internal + ``url_has_allowed_host_and_scheme()`` function, instead of using one of the + documented functions for handling URL redirects. The Django functions were + not affected. + Validators ~~~~~~~~~~ diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index b2754b4ddb..9978c7bb52 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -177,6 +177,7 @@ class URLHasAllowedHostAndSchemeTests(unittest.TestCase): r"http:/\example.com", 'javascript:alert("XSS")', "\njavascript:alert(x)", + "java\nscript:alert(x)", "\x08//example.com", r"http://otherserver\@example.com", r"http:\\testserver\@example.com",