From bdf3e156b4b47d45b8e37823164b598afc533ce0 Mon Sep 17 00:00:00 2001 From: Ad Timmering Date: Fri, 26 Nov 2021 10:44:54 +0900 Subject: [PATCH] Fixed #28628 -- Changed \d to [0-9] in regexes where appropriate. --- django/contrib/gis/geometry.py | 4 ++-- django/core/validators.py | 9 ++++++--- django/http/request.py | 2 +- django/test/client.py | 2 +- django/utils/http.py | 10 +++++----- django/utils/translation/trans_real.py | 6 +++--- docs/topics/http/urls.txt | 4 ++-- tests/gis_tests/geos_tests/test_geos.py | 7 +++++++ tests/i18n/tests.py | 1 + tests/utils_tests/test_http.py | 7 +++++++ 10 files changed, 35 insertions(+), 17 deletions(-) diff --git a/django/contrib/gis/geometry.py b/django/contrib/gis/geometry.py index d92a17c964..f7a70618fa 100644 --- a/django/contrib/gis/geometry.py +++ b/django/contrib/gis/geometry.py @@ -7,11 +7,11 @@ from django.utils.regex_helper import _lazy_re_compile # library. Not a substitute for good web security programming practices. hex_regex = _lazy_re_compile(r'^[0-9A-F]+$', re.I) wkt_regex = _lazy_re_compile( - r'^(SRID=(?P\-?\d+);)?' + r'^(SRID=(?P\-?[0-9]+);)?' r'(?P' r'(?PPOINT|LINESTRING|LINEARRING|POLYGON|MULTIPOINT|' r'MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)' - r'[ACEGIMLONPSRUTYZ\d,\.\-\+\(\) ]+)$', + r'[ACEGIMLONPSRUTYZ0-9,\.\-\+\(\) ]+)$', re.I ) json_regex = _lazy_re_compile(r'^(\s+)?\{.*}(\s+)?$', re.DOTALL) diff --git a/django/core/validators.py b/django/core/validators.py index eec7803714..9ad90f006f 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -64,7 +64,10 @@ class URLValidator(RegexValidator): ul = '\u00a1-\uffff' # Unicode letters range (must not be a raw string). # IP patterns - ipv4_re = r'(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)(?:\.(?:0|25[0-5]|2[0-4]\d|1\d?\d?|[1-9]\d?)){3}' + ipv4_re = ( + r'(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)' + r'(?:\.(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)){3}' + ) ipv6_re = r'\[[0-9a-f:.]+\]' # (simple regex, validated later) # Host patterns @@ -85,7 +88,7 @@ class URLValidator(RegexValidator): r'^(?:[a-z0-9.+-]*)://' # scheme is validated separately r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication r'(?:' + ipv4_re + '|' + ipv6_re + '|' + host_re + ')' - r'(?::\d{1,5})?' # port + r'(?::[0-9]{1,5})?' # port r'(?:[/?#][^\s]*)?' # resource path r'\Z', re.IGNORECASE) message = _('Enter a valid URL.') @@ -128,7 +131,7 @@ class URLValidator(RegexValidator): raise else: # Now verify IPv6 in the netloc part - host_match = re.search(r'^\[(.+)\](?::\d{1,5})?$', splitted_url.netloc) + host_match = re.search(r'^\[(.+)\](?::[0-9]{1,5})?$', splitted_url.netloc) if host_match: potential_ip = host_match[1] try: diff --git a/django/http/request.py b/django/http/request.py index c7922e59e9..5971203261 100644 --- a/django/http/request.py +++ b/django/http/request.py @@ -23,7 +23,7 @@ from django.utils.regex_helper import _lazy_re_compile from .multipartparser import parse_header RAISE_ERROR = object() -host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$") +host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:[0-9]+)?$") class UnreadablePostError(OSError): diff --git a/django/test/client.py b/django/test/client.py index 560f994876..6e8d0f4d9d 100644 --- a/django/test/client.py +++ b/django/test/client.py @@ -38,7 +38,7 @@ __all__ = ( BOUNDARY = 'BoUnDaRyStRiNg' MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY -CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w\d-]+);?') +CONTENT_TYPE_RE = _lazy_re_compile(r'.*; charset=([\w-]+);?') # Structured suffix spec: https://tools.ietf.org/html/rfc6838#section-4.2.8 JSON_CONTENT_TYPE_RE = _lazy_re_compile(r'^application\/(.+\+)?json') diff --git a/django/utils/http.py b/django/utils/http.py index 6aa45a2cd6..ab90f1e377 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -23,12 +23,12 @@ ETAG_MATCH = _lazy_re_compile(r''' ''', re.X) MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split() -__D = r'(?P\d{2})' -__D2 = r'(?P[ \d]\d)' +__D = r'(?P[0-9]{2})' +__D2 = r'(?P[ 0-9][0-9])' __M = r'(?P\w{3})' -__Y = r'(?P\d{4})' -__Y2 = r'(?P\d{2})' -__T = r'(?P\d{2}):(?P\d{2}):(?P\d{2})' +__Y = r'(?P[0-9]{4})' +__Y2 = r'(?P[0-9]{2})' +__T = r'(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9]{2})' RFC1123_DATE = _lazy_re_compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) RFC850_DATE = _lazy_re_compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) ASCTIME_DATE = _lazy_re_compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py index 7c20f5f32b..7a6bc81406 100644 --- a/django/utils/translation/trans_real.py +++ b/django/utils/translation/trans_real.py @@ -33,9 +33,9 @@ CONTEXT_SEPARATOR = "\x04" # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9 # and RFC 3066, section 2.1 accept_language_re = _lazy_re_compile(r''' - ([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*" - (?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8" - (?:\s*,\s*|$) # Multiple accepts per header. + ([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*" + (?:\s*;\s*q=(0(?:\.[0-9]{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8" + (?:\s*,\s*|$) # Multiple accepts per header. ''', re.VERBOSE) language_code_re = _lazy_re_compile( diff --git a/docs/topics/http/urls.txt b/docs/topics/http/urls.txt index d5d732e660..ab612547b0 100644 --- a/docs/topics/http/urls.txt +++ b/docs/topics/http/urls.txt @@ -246,8 +246,8 @@ following URL patterns which optionally take a page argument:: from django.urls import re_path urlpatterns = [ - re_path(r'^blog/(page-(\d+)/)?$', blog_articles), # bad - re_path(r'^comments/(?:page-(?P\d+)/)?$', comments), # good + re_path(r'^blog/(page-([0-9]+)/)?$', blog_articles), # bad + re_path(r'^comments/(?:page-(?P[0-9]+)/)?$', comments), # good ] Both patterns use nested arguments and will resolve: for example, diff --git a/tests/gis_tests/geos_tests/test_geos.py b/tests/gis_tests/geos_tests/test_geos.py index 27208e005d..08169f38ab 100644 --- a/tests/gis_tests/geos_tests/test_geos.py +++ b/tests/gis_tests/geos_tests/test_geos.py @@ -31,6 +31,13 @@ class GEOSTest(SimpleTestCase, TestDataMixin): if geom.hasz: self.assertEqual(g.ewkt, geom.wkt) + def test_wkt_invalid(self): + msg = 'String input unrecognized as WKT EWKT, and HEXEWKB.' + with self.assertRaisesMessage(ValueError, msg): + fromstr('POINT(٠٠١ ٠)') + with self.assertRaisesMessage(ValueError, msg): + fromstr('SRID=٧٥٨٣;POINT(100 0)') + def test_hex(self): "Testing HEX output." for g in self.geometries.hex_wkt: diff --git a/tests/i18n/tests.py b/tests/i18n/tests.py index bebe313aa5..3dbb8236d7 100644 --- a/tests/i18n/tests.py +++ b/tests/i18n/tests.py @@ -1406,6 +1406,7 @@ class MiscTests(SimpleTestCase): ('12-345', []), ('', []), ('en;q=1e0', []), + ('en-au;q=1.0', []), ] for value, expected in tests: with self.subTest(value=value): diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py index 6867ed8274..77b91a7174 100644 --- a/tests/utils_tests/test_http.py +++ b/tests/utils_tests/test_http.py @@ -328,6 +328,13 @@ class HttpDateProcessingTests(unittest.TestCase): datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc), ) + def test_parsing_asctime_nonascii_digits(self): + """Non-ASCII unicode decimals raise an error.""" + with self.assertRaises(ValueError): + parse_http_date('Sun Nov 6 08:49:37 1994') + with self.assertRaises(ValueError): + parse_http_date('Sun Nov 12 08:49:37 1994') + def test_parsing_year_less_than_70(self): parsed = parse_http_date('Sun Nov 6 08:49:37 0037') self.assertEqual(