mirror of
				https://github.com/django/django.git
				synced 2025-10-31 09:41:08 +00:00 
			
		
		
		
	Fixed 35467 -- Replaced urlparse with urlsplit where appropriate.
This work should not generate any change of functionality, and `urlsplit` is approximately 6x faster. Most use cases of `urlparse` didn't touch the path, so they can be converted to `urlsplit` without any issue. Most of those which do use `.path`, simply parse the URL, mutate the querystring, then put them back together, which is also fine (so long as urlunsplit is used).
This commit is contained in:
		| @@ -6,7 +6,7 @@ import warnings | ||||
| from functools import partial, update_wrapper | ||||
| from urllib.parse import parse_qsl | ||||
| from urllib.parse import quote as urlquote | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from django import forms | ||||
| from django.conf import settings | ||||
| @@ -1384,7 +1384,7 @@ class ModelAdmin(BaseModelAdmin): | ||||
|         ) | ||||
|  | ||||
|     def _get_preserved_qsl(self, request, preserved_filters): | ||||
|         query_string = urlparse(request.build_absolute_uri()).query | ||||
|         query_string = urlsplit(request.build_absolute_uri()).query | ||||
|         return parse_qsl(query_string.replace(preserved_filters, "")) | ||||
|  | ||||
|     def response_add(self, request, obj, post_url_continue=None): | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from urllib.parse import parse_qsl, unquote, urlparse, urlunparse | ||||
| from urllib.parse import parse_qsl, unquote, urlsplit, urlunsplit | ||||
|  | ||||
| from django import template | ||||
| from django.contrib.admin.utils import quote | ||||
| @@ -24,8 +24,8 @@ def add_preserved_filters(context, url, popup=False, to_field=None): | ||||
|     preserved_filters = context.get("preserved_filters") | ||||
|     preserved_qsl = context.get("preserved_qsl") | ||||
|  | ||||
|     parsed_url = list(urlparse(url)) | ||||
|     parsed_qs = dict(parse_qsl(parsed_url[4])) | ||||
|     parsed_url = list(urlsplit(url)) | ||||
|     parsed_qs = dict(parse_qsl(parsed_url[3])) | ||||
|     merged_qs = {} | ||||
|  | ||||
|     if preserved_qsl: | ||||
| @@ -66,5 +66,5 @@ def add_preserved_filters(context, url, popup=False, to_field=None): | ||||
|  | ||||
|     merged_qs.update(parsed_qs) | ||||
|  | ||||
|     parsed_url[4] = urlencode(merged_qs) | ||||
|     return urlunparse(parsed_url) | ||||
|     parsed_url[3] = urlencode(merged_qs) | ||||
|     return urlunsplit(parsed_url) | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| import asyncio | ||||
| from functools import wraps | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from asgiref.sync import async_to_sync, sync_to_async | ||||
|  | ||||
| @@ -25,8 +25,8 @@ def user_passes_test( | ||||
|             resolved_login_url = resolve_url(login_url or settings.LOGIN_URL) | ||||
|             # If the login url is the same scheme and net location then just | ||||
|             # use the path as the "next" url. | ||||
|             login_scheme, login_netloc = urlparse(resolved_login_url)[:2] | ||||
|             current_scheme, current_netloc = urlparse(path)[:2] | ||||
|             login_scheme, login_netloc = urlsplit(resolved_login_url)[:2] | ||||
|             current_scheme, current_netloc = urlsplit(path)[:2] | ||||
|             if (not login_scheme or login_scheme == current_scheme) and ( | ||||
|                 not login_netloc or login_netloc == current_netloc | ||||
|             ): | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from functools import partial | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.contrib import auth | ||||
| @@ -74,8 +74,8 @@ class LoginRequiredMiddleware(MiddlewareMixin): | ||||
|         resolved_login_url = resolve_url(self.get_login_url(view_func)) | ||||
|         # If the login url is the same scheme and net location then use the | ||||
|         # path as the "next" url. | ||||
|         login_scheme, login_netloc = urlparse(resolved_login_url)[:2] | ||||
|         current_scheme, current_netloc = urlparse(path)[:2] | ||||
|         login_scheme, login_netloc = urlsplit(resolved_login_url)[:2] | ||||
|         current_scheme, current_netloc = urlsplit(path)[:2] | ||||
|         if (not login_scheme or login_scheme == current_scheme) and ( | ||||
|             not login_netloc or login_netloc == current_netloc | ||||
|         ): | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.contrib.auth import REDIRECT_FIELD_NAME | ||||
| @@ -51,8 +51,8 @@ class AccessMixin: | ||||
|         resolved_login_url = resolve_url(self.get_login_url()) | ||||
|         # If the login url is the same scheme and net location then use the | ||||
|         # path as the "next" url. | ||||
|         login_scheme, login_netloc = urlparse(resolved_login_url)[:2] | ||||
|         current_scheme, current_netloc = urlparse(path)[:2] | ||||
|         login_scheme, login_netloc = urlsplit(resolved_login_url)[:2] | ||||
|         current_scheme, current_netloc = urlsplit(path)[:2] | ||||
|         if (not login_scheme or login_scheme == current_scheme) and ( | ||||
|             not login_netloc or login_netloc == current_netloc | ||||
|         ): | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from urllib.parse import urlparse, urlunparse | ||||
| from urllib.parse import urlsplit, urlunsplit | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
| @@ -183,13 +183,13 @@ def redirect_to_login(next, login_url=None, redirect_field_name=REDIRECT_FIELD_N | ||||
|     """ | ||||
|     resolved_url = resolve_url(login_url or settings.LOGIN_URL) | ||||
|  | ||||
|     login_url_parts = list(urlparse(resolved_url)) | ||||
|     login_url_parts = list(urlsplit(resolved_url)) | ||||
|     if redirect_field_name: | ||||
|         querystring = QueryDict(login_url_parts[4], mutable=True) | ||||
|         querystring = QueryDict(login_url_parts[3], mutable=True) | ||||
|         querystring[redirect_field_name] = next | ||||
|         login_url_parts[4] = querystring.urlencode(safe="/") | ||||
|         login_url_parts[3] = querystring.urlencode(safe="/") | ||||
|  | ||||
|     return HttpResponseRedirect(urlunparse(login_url_parts)) | ||||
|     return HttpResponseRedirect(urlunsplit(login_url_parts)) | ||||
|  | ||||
|  | ||||
| # Class-based password reset views | ||||
|   | ||||
| @@ -36,13 +36,13 @@ class StaticFilesHandlerMixin: | ||||
|         * the host is provided as part of the base_url | ||||
|         * the request's path isn't under the media path (or equal) | ||||
|         """ | ||||
|         return path.startswith(self.base_url[2]) and not self.base_url[1] | ||||
|         return path.startswith(self.base_url.path) and not self.base_url.netloc | ||||
|  | ||||
|     def file_path(self, url): | ||||
|         """ | ||||
|         Return the relative path to the media file on disk for the given URL. | ||||
|         """ | ||||
|         relative_url = url.removeprefix(self.base_url[2]) | ||||
|         relative_url = url.removeprefix(self.base_url.path) | ||||
|         return url2pathname(relative_url) | ||||
|  | ||||
|     def serve(self, request): | ||||
|   | ||||
| @@ -792,13 +792,13 @@ class URLField(CharField): | ||||
|     def to_python(self, value): | ||||
|         def split_url(url): | ||||
|             """ | ||||
|             Return a list of url parts via urlparse.urlsplit(), or raise | ||||
|             Return a list of url parts via urlsplit(), or raise | ||||
|             ValidationError for some malformed URLs. | ||||
|             """ | ||||
|             try: | ||||
|                 return list(urlsplit(url)) | ||||
|             except ValueError: | ||||
|                 # urlparse.urlsplit can raise a ValueError with some | ||||
|                 # urlsplit can raise a ValueError with some | ||||
|                 # misformatted URLs. | ||||
|                 raise ValidationError(self.error_messages["invalid"], code="invalid") | ||||
|  | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import time | ||||
| import warnings | ||||
| from email.header import Header | ||||
| from http.client import responses | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from asgiref.sync import async_to_sync, sync_to_async | ||||
|  | ||||
| @@ -616,7 +616,7 @@ class HttpResponseRedirectBase(HttpResponse): | ||||
|     def __init__(self, redirect_to, *args, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|         self["Location"] = iri_to_uri(redirect_to) | ||||
|         parsed = urlparse(str(redirect_to)) | ||||
|         parsed = urlsplit(str(redirect_to)) | ||||
|         if parsed.scheme and parsed.scheme not in self.allowed_schemes: | ||||
|             raise DisallowedRedirect( | ||||
|                 "Unsafe redirect to URL with protocol '%s'" % parsed.scheme | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import re | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.exceptions import PermissionDenied | ||||
| @@ -171,7 +171,7 @@ class BrokenLinkEmailsMiddleware(MiddlewareMixin): | ||||
|  | ||||
|         # The referer is equal to the current URL, ignoring the scheme (assumed | ||||
|         # to be a poorly implemented bot). | ||||
|         parsed_referer = urlparse(referer) | ||||
|         parsed_referer = urlsplit(referer) | ||||
|         if parsed_referer.netloc in ["", domain] and parsed_referer.path == uri: | ||||
|             return True | ||||
|  | ||||
|   | ||||
| @@ -8,7 +8,7 @@ against request forgeries from other sites. | ||||
| import logging | ||||
| import string | ||||
| from collections import defaultdict | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.exceptions import DisallowedHost, ImproperlyConfigured | ||||
| @@ -174,7 +174,7 @@ class CsrfViewMiddleware(MiddlewareMixin): | ||||
|     @cached_property | ||||
|     def csrf_trusted_origins_hosts(self): | ||||
|         return [ | ||||
|             urlparse(origin).netloc.lstrip("*") | ||||
|             urlsplit(origin).netloc.lstrip("*") | ||||
|             for origin in settings.CSRF_TRUSTED_ORIGINS | ||||
|         ] | ||||
|  | ||||
| @@ -190,7 +190,7 @@ class CsrfViewMiddleware(MiddlewareMixin): | ||||
|         """ | ||||
|         allowed_origin_subdomains = defaultdict(list) | ||||
|         for parsed in ( | ||||
|             urlparse(origin) | ||||
|             urlsplit(origin) | ||||
|             for origin in settings.CSRF_TRUSTED_ORIGINS | ||||
|             if "*" in origin | ||||
|         ): | ||||
| @@ -284,7 +284,7 @@ class CsrfViewMiddleware(MiddlewareMixin): | ||||
|         if request_origin in self.allowed_origins_exact: | ||||
|             return True | ||||
|         try: | ||||
|             parsed_origin = urlparse(request_origin) | ||||
|             parsed_origin = urlsplit(request_origin) | ||||
|         except ValueError: | ||||
|             return False | ||||
|         request_scheme = parsed_origin.scheme | ||||
| @@ -300,7 +300,7 @@ class CsrfViewMiddleware(MiddlewareMixin): | ||||
|             raise RejectRequest(REASON_NO_REFERER) | ||||
|  | ||||
|         try: | ||||
|             referer = urlparse(referer) | ||||
|             referer = urlsplit(referer) | ||||
|         except ValueError: | ||||
|             raise RejectRequest(REASON_MALFORMED_REFERER) | ||||
|  | ||||
|   | ||||
| @@ -8,7 +8,7 @@ from functools import partial | ||||
| from http import HTTPStatus | ||||
| from importlib import import_module | ||||
| from io import BytesIO, IOBase | ||||
| from urllib.parse import unquote_to_bytes, urljoin, urlparse, urlsplit | ||||
| from urllib.parse import unquote_to_bytes, urljoin, urlsplit | ||||
|  | ||||
| from asgiref.sync import sync_to_async | ||||
|  | ||||
| @@ -458,11 +458,7 @@ class RequestFactory: | ||||
|         return json.dumps(data, cls=self.json_encoder) if should_encode else data | ||||
|  | ||||
|     def _get_path(self, parsed): | ||||
|         path = parsed.path | ||||
|         # If there are parameters, add them | ||||
|         if parsed.params: | ||||
|             path += ";" + parsed.params | ||||
|         path = unquote_to_bytes(path) | ||||
|         path = unquote_to_bytes(parsed.path) | ||||
|         # Replace the behavior where non-ASCII values in the WSGI environ are | ||||
|         # arbitrarily decoded with ISO-8859-1. | ||||
|         # Refs comment in `get_bytes_from_wsgi()`. | ||||
| @@ -647,7 +643,7 @@ class RequestFactory: | ||||
|         **extra, | ||||
|     ): | ||||
|         """Construct an arbitrary HTTP request.""" | ||||
|         parsed = urlparse(str(path))  # path can be lazy | ||||
|         parsed = urlsplit(str(path))  # path can be lazy | ||||
|         data = force_bytes(data, settings.DEFAULT_CHARSET) | ||||
|         r = { | ||||
|             "PATH_INFO": self._get_path(parsed), | ||||
| @@ -671,8 +667,7 @@ class RequestFactory: | ||||
|         # If QUERY_STRING is absent or empty, we want to extract it from the URL. | ||||
|         if not r.get("QUERY_STRING"): | ||||
|             # WSGI requires latin-1 encoded strings. See get_path_info(). | ||||
|             query_string = parsed[4].encode().decode("iso-8859-1") | ||||
|             r["QUERY_STRING"] = query_string | ||||
|             r["QUERY_STRING"] = parsed.query.encode().decode("iso-8859-1") | ||||
|         return self.request(**r) | ||||
|  | ||||
|  | ||||
| @@ -748,7 +743,7 @@ class AsyncRequestFactory(RequestFactory): | ||||
|         **extra, | ||||
|     ): | ||||
|         """Construct an arbitrary HTTP request.""" | ||||
|         parsed = urlparse(str(path))  # path can be lazy. | ||||
|         parsed = urlsplit(str(path))  # path can be lazy. | ||||
|         data = force_bytes(data, settings.DEFAULT_CHARSET) | ||||
|         s = { | ||||
|             "method": method, | ||||
| @@ -772,7 +767,7 @@ class AsyncRequestFactory(RequestFactory): | ||||
|         else: | ||||
|             # If QUERY_STRING is absent or empty, we want to extract it from | ||||
|             # the URL. | ||||
|             s["query_string"] = parsed[4] | ||||
|             s["query_string"] = parsed.query | ||||
|         if headers: | ||||
|             extra.update(HttpHeaders.to_asgi_names(headers)) | ||||
|         s["headers"] += [ | ||||
|   | ||||
| @@ -21,7 +21,7 @@ from urllib.parse import ( | ||||
|     urljoin, | ||||
|     urlparse, | ||||
|     urlsplit, | ||||
|     urlunparse, | ||||
|     urlunsplit, | ||||
| ) | ||||
| from urllib.request import url2pathname | ||||
|  | ||||
| @@ -541,11 +541,9 @@ class SimpleTestCase(unittest.TestCase): | ||||
|         def normalize(url): | ||||
|             """Sort the URL's query string parameters.""" | ||||
|             url = str(url)  # Coerce reverse_lazy() URLs. | ||||
|             scheme, netloc, path, params, query, fragment = urlparse(url) | ||||
|             scheme, netloc, path, query, fragment = urlsplit(url) | ||||
|             query_parts = sorted(parse_qsl(query)) | ||||
|             return urlunparse( | ||||
|                 (scheme, netloc, path, params, urlencode(query_parts), fragment) | ||||
|             ) | ||||
|             return urlunsplit((scheme, netloc, path, urlencode(query_parts), fragment)) | ||||
|  | ||||
|         if msg_prefix: | ||||
|             msg_prefix += ": " | ||||
| @@ -1637,11 +1635,11 @@ class FSFilesHandler(WSGIHandler): | ||||
|         * the host is provided as part of the base_url | ||||
|         * the request's path isn't under the media path (or equal) | ||||
|         """ | ||||
|         return path.startswith(self.base_url[2]) and not self.base_url[1] | ||||
|         return path.startswith(self.base_url.path) and not self.base_url.netloc | ||||
|  | ||||
|     def file_path(self, url): | ||||
|         """Return the relative path to the file on disk for the given URL.""" | ||||
|         relative_url = url.removeprefix(self.base_url[2]) | ||||
|         relative_url = url.removeprefix(self.base_url.path) | ||||
|         return url2pathname(relative_url) | ||||
|  | ||||
|     def get_response(self, request): | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from datetime import datetime, timezone | ||||
| from email.utils import formatdate | ||||
| from urllib.parse import quote, unquote | ||||
| from urllib.parse import urlencode as original_urlencode | ||||
| from urllib.parse import urlparse | ||||
| from urllib.parse import urlsplit | ||||
|  | ||||
| from django.utils.datastructures import MultiValueDict | ||||
| from django.utils.regex_helper import _lazy_re_compile | ||||
| @@ -271,11 +271,11 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): | ||||
|  | ||||
| def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): | ||||
|     # Chrome considers any URL with more than two slashes to be absolute, but | ||||
|     # urlparse is not so flexible. Treat any url with three slashes as unsafe. | ||||
|     # urlsplit is not so flexible. Treat any url with three slashes as unsafe. | ||||
|     if url.startswith("///"): | ||||
|         return False | ||||
|     try: | ||||
|         url_info = urlparse(url) | ||||
|         url_info = urlsplit(url) | ||||
|     except ValueError:  # e.g. invalid IPv6 addresses | ||||
|         return False | ||||
|     # Forbid URLs like http:///example.com - with a scheme, but without a hostname. | ||||
|   | ||||
| @@ -203,7 +203,7 @@ A :class:`ResolverMatch` object can also be assigned to a triple:: | ||||
| One possible use of :func:`~django.urls.resolve` would be to test whether a | ||||
| view would raise a ``Http404`` error before redirecting to it:: | ||||
|  | ||||
|     from urllib.parse import urlparse | ||||
|     from urllib.parse import urlsplit | ||||
|     from django.urls import resolve | ||||
|     from django.http import Http404, HttpResponseRedirect | ||||
|  | ||||
| @@ -215,7 +215,7 @@ view would raise a ``Http404`` error before redirecting to it:: | ||||
|         # modify the request and response as required, e.g. change locale | ||||
|         # and set corresponding locale cookie | ||||
|  | ||||
|         view, args, kwargs = resolve(urlparse(next)[2]) | ||||
|         view, args, kwargs = resolve(urlsplit(next).path) | ||||
|         kwargs["request"] = request | ||||
|         try: | ||||
|             view(*args, **kwargs) | ||||
|   | ||||
| @@ -4,7 +4,7 @@ import re | ||||
| import unittest | ||||
| import zoneinfo | ||||
| from unittest import mock | ||||
| from urllib.parse import parse_qsl, urljoin, urlparse | ||||
| from urllib.parse import parse_qsl, urljoin, urlsplit | ||||
|  | ||||
| from django import forms | ||||
| from django.contrib import admin | ||||
| @@ -357,7 +357,7 @@ class AdminViewBasicTest(AdminViewBasicTestCase): | ||||
|                             **save_option, | ||||
|                         }, | ||||
|                     ) | ||||
|                     parsed_url = urlparse(response.url) | ||||
|                     parsed_url = urlsplit(response.url) | ||||
|                     self.assertEqual(parsed_url.query, qsl) | ||||
|  | ||||
|     def test_change_query_string_persists(self): | ||||
| @@ -386,7 +386,7 @@ class AdminViewBasicTest(AdminViewBasicTestCase): | ||||
|                             **save_option, | ||||
|                         }, | ||||
|                     ) | ||||
|                     parsed_url = urlparse(response.url) | ||||
|                     parsed_url = urlsplit(response.url) | ||||
|                     self.assertEqual(parsed_url.query, qsl) | ||||
|  | ||||
|     def test_basic_edit_GET(self): | ||||
| @@ -8032,11 +8032,11 @@ class AdminKeepChangeListFiltersTests(TestCase): | ||||
|         Assert that two URLs are equal despite the ordering | ||||
|         of their querystring. Refs #22360. | ||||
|         """ | ||||
|         parsed_url1 = urlparse(url1) | ||||
|         parsed_url1 = urlsplit(url1) | ||||
|         path1 = parsed_url1.path | ||||
|         parsed_qs1 = dict(parse_qsl(parsed_url1.query)) | ||||
|  | ||||
|         parsed_url2 = urlparse(url2) | ||||
|         parsed_url2 = urlsplit(url2) | ||||
|         path2 = parsed_url2.path | ||||
|         parsed_qs2 = dict(parse_qsl(parsed_url2.query)) | ||||
|  | ||||
|   | ||||
| @@ -709,25 +709,21 @@ class CsrfViewMiddlewareTestMixin(CsrfFunctionTestMixin): | ||||
|         response = mw.process_view(req, post_form_view, (), {}) | ||||
|         self.assertContains(response, malformed_referer_msg, status_code=403) | ||||
|         # missing scheme | ||||
|         # >>> urlparse('//example.com/') | ||||
|         # ParseResult( | ||||
|         #   scheme='', netloc='example.com', path='/', params='', query='', fragment='', | ||||
|         # ) | ||||
|         # >>> urlsplit('//example.com/') | ||||
|         # SplitResult(scheme='', netloc='example.com', path='/', query='', fragment='') | ||||
|         req.META["HTTP_REFERER"] = "//example.com/" | ||||
|         self._check_referer_rejects(mw, req) | ||||
|         response = mw.process_view(req, post_form_view, (), {}) | ||||
|         self.assertContains(response, malformed_referer_msg, status_code=403) | ||||
|         # missing netloc | ||||
|         # >>> urlparse('https://') | ||||
|         # ParseResult( | ||||
|         #   scheme='https', netloc='', path='', params='', query='', fragment='', | ||||
|         # ) | ||||
|         # >>> urlsplit('https://') | ||||
|         # SplitResult(scheme='https', netloc='', path='', query='', fragment='') | ||||
|         req.META["HTTP_REFERER"] = "https://" | ||||
|         self._check_referer_rejects(mw, req) | ||||
|         response = mw.process_view(req, post_form_view, (), {}) | ||||
|         self.assertContains(response, malformed_referer_msg, status_code=403) | ||||
|         # Invalid URL | ||||
|         # >>> urlparse('https://[') | ||||
|         # >>> urlsplit('https://[') | ||||
|         # ValueError: Invalid IPv6 URL | ||||
|         req.META["HTTP_REFERER"] = "https://[" | ||||
|         self._check_referer_rejects(mw, req) | ||||
| @@ -979,7 +975,7 @@ class CsrfViewMiddlewareTestMixin(CsrfFunctionTestMixin): | ||||
|     @override_settings(ALLOWED_HOSTS=["www.example.com"]) | ||||
|     def test_bad_origin_cannot_be_parsed(self): | ||||
|         """ | ||||
|         A POST request with an origin that can't be parsed by urlparse() is | ||||
|         A POST request with an origin that can't be parsed by urlsplit() is | ||||
|         rejected. | ||||
|         """ | ||||
|         req = self._get_POST_request_with_token() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user