2013-09-06 15:28:28 +00:00
|
|
|
import datetime
|
2023-08-22 06:53:03 +00:00
|
|
|
import inspect
|
2019-07-19 15:04:53 +00:00
|
|
|
import sys
|
2015-01-28 12:35:27 +00:00
|
|
|
import unittest
|
2019-10-29 20:15:18 +00:00
|
|
|
from pathlib import Path
|
2017-02-23 01:54:55 +00:00
|
|
|
from unittest import mock
|
2023-08-22 06:53:03 +00:00
|
|
|
from urllib.parse import quote, quote_plus
|
2013-07-01 12:22:27 +00:00
|
|
|
|
2017-02-01 20:48:53 +00:00
|
|
|
from django.test import SimpleTestCase
|
2014-10-31 15:43:34 +00:00
|
|
|
from django.utils.encoding import (
|
2017-02-01 20:48:53 +00:00
|
|
|
DjangoUnicodeDecodeError,
|
|
|
|
escape_uri_path,
|
|
|
|
filepath_to_uri,
|
|
|
|
force_bytes,
|
2019-07-19 15:04:53 +00:00
|
|
|
force_str,
|
|
|
|
get_system_encoding,
|
|
|
|
iri_to_uri,
|
|
|
|
repercent_broken_unicode,
|
|
|
|
smart_bytes,
|
|
|
|
smart_str,
|
|
|
|
uri_to_iri,
|
2014-10-31 15:43:34 +00:00
|
|
|
)
|
2015-05-26 20:46:13 +00:00
|
|
|
from django.utils.functional import SimpleLazyObject
|
2017-02-23 01:54:55 +00:00
|
|
|
from django.utils.translation import gettext_lazy
|
2012-09-04 07:24:39 +00:00
|
|
|
|
|
|
|
|
2017-02-01 20:48:53 +00:00
|
|
|
class TestEncodingUtils(SimpleTestCase):
|
2017-01-26 09:08:08 +00:00
|
|
|
def test_force_str_exception(self):
|
2013-09-06 17:16:06 +00:00
|
|
|
"""
|
2017-01-20 09:20:53 +00:00
|
|
|
Broken __str__ actually raises an error.
|
2013-09-06 17:16:06 +00:00
|
|
|
"""
|
2022-02-03 19:24:19 +00:00
|
|
|
|
2017-01-19 07:39:46 +00:00
|
|
|
class MyString:
|
2013-09-06 17:16:06 +00:00
|
|
|
def __str__(self):
|
|
|
|
return b"\xc3\xb6\xc3\xa4\xc3\xbc"
|
|
|
|
|
2016-12-01 10:38:01 +00:00
|
|
|
# str(s) raises a TypeError if the result is not a text type.
|
|
|
|
with self.assertRaises(TypeError):
|
2017-01-26 09:08:08 +00:00
|
|
|
force_str(MyString())
|
2013-09-06 15:28:28 +00:00
|
|
|
|
2017-01-26 09:08:08 +00:00
|
|
|
def test_force_str_lazy(self):
|
2015-05-26 20:46:13 +00:00
|
|
|
s = SimpleLazyObject(lambda: "x")
|
2017-01-26 09:08:08 +00:00
|
|
|
self.assertIs(type(force_str(s)), str)
|
2015-05-26 20:46:13 +00:00
|
|
|
|
2017-01-26 09:08:08 +00:00
|
|
|
def test_force_str_DjangoUnicodeDecodeError(self):
|
2017-02-01 20:48:53 +00:00
|
|
|
msg = (
|
|
|
|
"'utf-8' codec can't decode byte 0xff in position 0: invalid "
|
|
|
|
"start byte. You passed in b'\\xff' (<class 'bytes'>)"
|
|
|
|
)
|
|
|
|
with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
|
2017-01-26 09:08:08 +00:00
|
|
|
force_str(b"\xff")
|
2017-02-01 20:48:53 +00:00
|
|
|
|
2012-09-04 07:24:39 +00:00
|
|
|
def test_force_bytes_exception(self):
|
|
|
|
"""
|
2016-10-27 07:53:39 +00:00
|
|
|
force_bytes knows how to convert to bytes an exception
|
2012-09-04 07:24:39 +00:00
|
|
|
containing non-ASCII characters in its args.
|
|
|
|
"""
|
|
|
|
error_msg = "This is an exception, voilà"
|
|
|
|
exc = ValueError(error_msg)
|
2017-02-07 17:05:47 +00:00
|
|
|
self.assertEqual(force_bytes(exc), error_msg.encode())
|
2017-02-04 00:36:53 +00:00
|
|
|
self.assertEqual(
|
|
|
|
force_bytes(exc, encoding="ascii", errors="ignore"),
|
|
|
|
b"This is an exception, voil",
|
|
|
|
)
|
2013-03-22 16:55:12 +00:00
|
|
|
|
2013-09-06 15:28:28 +00:00
|
|
|
def test_force_bytes_strings_only(self):
|
|
|
|
today = datetime.date.today()
|
|
|
|
self.assertEqual(force_bytes(today, strings_only=True), today)
|
|
|
|
|
2017-02-23 01:54:55 +00:00
|
|
|
def test_force_bytes_encoding(self):
|
|
|
|
error_msg = "This is an exception, voilà".encode()
|
|
|
|
result = force_bytes(error_msg, encoding="ascii", errors="ignore")
|
|
|
|
self.assertEqual(result, b"This is an exception, voil")
|
|
|
|
|
|
|
|
def test_force_bytes_memory_view(self):
|
2018-07-09 15:01:42 +00:00
|
|
|
data = b"abc"
|
|
|
|
result = force_bytes(memoryview(data))
|
|
|
|
# Type check is needed because memoryview(bytes) == bytes.
|
|
|
|
self.assertIs(type(result), bytes)
|
|
|
|
self.assertEqual(result, data)
|
2017-02-23 01:54:55 +00:00
|
|
|
|
|
|
|
def test_smart_bytes(self):
|
2015-01-25 15:45:54 +00:00
|
|
|
class Test:
|
2016-12-01 10:38:01 +00:00
|
|
|
def __str__(self):
|
|
|
|
return "ŠĐĆŽćžšđ"
|
2015-01-25 15:45:54 +00:00
|
|
|
|
2017-02-23 01:54:55 +00:00
|
|
|
lazy_func = gettext_lazy("x")
|
|
|
|
self.assertIs(smart_bytes(lazy_func), lazy_func)
|
|
|
|
self.assertEqual(
|
|
|
|
smart_bytes(Test()),
|
|
|
|
b"\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91",
|
|
|
|
)
|
|
|
|
self.assertEqual(smart_bytes(1), b"1")
|
|
|
|
self.assertEqual(smart_bytes("foo"), b"foo")
|
|
|
|
|
2017-01-26 09:08:08 +00:00
|
|
|
def test_smart_str(self):
|
2017-02-23 01:54:55 +00:00
|
|
|
class Test:
|
2016-12-01 10:38:01 +00:00
|
|
|
def __str__(self):
|
|
|
|
return "ŠĐĆŽćžšđ"
|
2015-01-25 15:45:54 +00:00
|
|
|
|
2017-02-23 01:54:55 +00:00
|
|
|
lazy_func = gettext_lazy("x")
|
2017-01-26 09:08:08 +00:00
|
|
|
self.assertIs(smart_str(lazy_func), lazy_func)
|
|
|
|
self.assertEqual(
|
|
|
|
smart_str(Test()), "\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111"
|
|
|
|
)
|
|
|
|
self.assertEqual(smart_str(1), "1")
|
|
|
|
self.assertEqual(smart_str("foo"), "foo")
|
2014-10-31 15:43:34 +00:00
|
|
|
|
2017-02-23 01:54:55 +00:00
|
|
|
def test_get_default_encoding(self):
|
2022-03-08 12:17:05 +00:00
|
|
|
with mock.patch("locale.getlocale", side_effect=Exception):
|
2017-02-23 01:54:55 +00:00
|
|
|
self.assertEqual(get_system_encoding(), "ascii")
|
|
|
|
|
2019-07-19 15:04:53 +00:00
|
|
|
def test_repercent_broken_unicode_recursion_error(self):
|
|
|
|
# Prepare a string long enough to force a recursion error if the tested
|
|
|
|
# function uses recursion.
|
|
|
|
data = b"\xfc" * sys.getrecursionlimit()
|
|
|
|
try:
|
|
|
|
self.assertEqual(
|
|
|
|
repercent_broken_unicode(data), b"%FC" * sys.getrecursionlimit()
|
2022-02-03 19:24:19 +00:00
|
|
|
)
|
2019-07-19 15:04:53 +00:00
|
|
|
except RecursionError:
|
|
|
|
self.fail("Unexpected RecursionError raised.")
|
|
|
|
|
2023-08-22 06:53:03 +00:00
|
|
|
def test_repercent_broken_unicode_small_fragments(self):
|
|
|
|
data = b"test\xfctest\xfctest\xfc"
|
|
|
|
decoded_paths = []
|
|
|
|
|
|
|
|
def mock_quote(*args, **kwargs):
|
|
|
|
# The second frame is the call to repercent_broken_unicode().
|
|
|
|
decoded_paths.append(inspect.currentframe().f_back.f_locals["path"])
|
|
|
|
return quote(*args, **kwargs)
|
|
|
|
|
|
|
|
with mock.patch("django.utils.encoding.quote", mock_quote):
|
|
|
|
self.assertEqual(repercent_broken_unicode(data), b"test%FCtest%FCtest%FC")
|
|
|
|
|
|
|
|
# decode() is called on smaller fragment of the path each time.
|
|
|
|
self.assertEqual(
|
|
|
|
decoded_paths,
|
|
|
|
[b"test\xfctest\xfctest\xfc", b"test\xfctest\xfc", b"test\xfc"],
|
|
|
|
)
|
|
|
|
|
2014-07-22 12:25:22 +00:00
|
|
|
|
|
|
|
class TestRFC3987IEncodingUtils(unittest.TestCase):
|
2013-03-22 16:55:12 +00:00
|
|
|
def test_filepath_to_uri(self):
|
2019-10-21 08:55:05 +00:00
|
|
|
self.assertIsNone(filepath_to_uri(None))
|
2016-04-08 02:04:45 +00:00
|
|
|
self.assertEqual(
|
|
|
|
filepath_to_uri("upload\\чубака.mp4"),
|
|
|
|
"upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4",
|
|
|
|
)
|
2019-10-29 20:15:18 +00:00
|
|
|
self.assertEqual(filepath_to_uri(Path("upload/test.png")), "upload/test.png")
|
|
|
|
self.assertEqual(filepath_to_uri(Path("upload\\test.png")), "upload/test.png")
|
2013-10-13 16:06:58 +00:00
|
|
|
|
2014-09-23 12:45:59 +00:00
|
|
|
def test_iri_to_uri(self):
|
2014-07-22 12:25:22 +00:00
|
|
|
cases = [
|
|
|
|
# Valid UTF-8 sequences are encoded.
|
|
|
|
("red%09rosé#red", "red%09ros%C3%A9#red"),
|
|
|
|
("/blog/for/Jürgen Münster/", "/blog/for/J%C3%BCrgen%20M%C3%BCnster/"),
|
2017-01-26 13:25:15 +00:00
|
|
|
(
|
|
|
|
"locations/%s" % quote_plus("Paris & Orléans"),
|
|
|
|
"locations/Paris+%26+Orl%C3%A9ans",
|
|
|
|
),
|
2014-07-22 12:25:22 +00:00
|
|
|
# Reserved chars remain unescaped.
|
|
|
|
("%&", "%&"),
|
|
|
|
("red&♥ros%#red", "red&%E2%99%A5ros%#red"),
|
2017-02-23 01:54:55 +00:00
|
|
|
(gettext_lazy("red&♥ros%#red"), "red&%E2%99%A5ros%#red"),
|
2014-07-22 12:25:22 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
for iri, uri in cases:
|
2019-08-26 09:58:06 +00:00
|
|
|
with self.subTest(iri):
|
|
|
|
self.assertEqual(iri_to_uri(iri), uri)
|
2014-07-22 12:25:22 +00:00
|
|
|
|
2019-08-26 09:58:06 +00:00
|
|
|
# Test idempotency.
|
|
|
|
self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
|
2014-07-22 12:25:22 +00:00
|
|
|
|
|
|
|
def test_uri_to_iri(self):
|
|
|
|
cases = [
|
2017-02-23 01:54:55 +00:00
|
|
|
(None, None),
|
2014-07-22 12:25:22 +00:00
|
|
|
# Valid UTF-8 sequences are decoded.
|
2017-02-07 13:55:44 +00:00
|
|
|
("/%e2%89%Ab%E2%99%a5%E2%89%aB/", "/≫♥≫/"),
|
2014-07-22 12:25:22 +00:00
|
|
|
("/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93", "/♥♥/?utf8=✓"),
|
2017-02-07 13:55:44 +00:00
|
|
|
("/%41%5a%6B/", "/AZk/"),
|
|
|
|
# Reserved and non-URL valid ASCII chars are not decoded.
|
|
|
|
("/%25%20%02%41%7b/", "/%25%20%02A%7b/"),
|
2014-07-22 12:25:22 +00:00
|
|
|
# Broken UTF-8 sequences remain escaped.
|
|
|
|
("/%AAd%AAj%AAa%AAn%AAg%AAo%AA/", "/%AAd%AAj%AAa%AAn%AAg%AAo%AA/"),
|
|
|
|
("/%E2%99%A5%E2%E2%99%A5/", "/♥%E2♥/"),
|
|
|
|
("/%E2%99%A5%E2%99%E2%99%A5/", "/♥%E2%99♥/"),
|
|
|
|
("/%E2%E2%99%A5%E2%99%A5%99/", "/%E2♥♥%99/"),
|
|
|
|
(
|
|
|
|
"/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93",
|
|
|
|
"/♥♥/?utf8=%9C%93✓%9C%93",
|
|
|
|
),
|
|
|
|
]
|
2014-09-23 12:45:59 +00:00
|
|
|
|
2014-07-22 12:25:22 +00:00
|
|
|
for uri, iri in cases:
|
2019-08-26 09:58:06 +00:00
|
|
|
with self.subTest(uri):
|
|
|
|
self.assertEqual(uri_to_iri(uri), iri)
|
2014-09-23 12:45:59 +00:00
|
|
|
|
2019-08-26 09:58:06 +00:00
|
|
|
# Test idempotency.
|
|
|
|
self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
|
2014-09-23 12:45:59 +00:00
|
|
|
|
2014-07-22 12:25:22 +00:00
|
|
|
def test_complementarity(self):
|
|
|
|
cases = [
|
2017-02-07 13:55:44 +00:00
|
|
|
(
|
|
|
|
"/blog/for/J%C3%BCrgen%20M%C3%BCnster/",
|
|
|
|
"/blog/for/J\xfcrgen%20M\xfcnster/",
|
|
|
|
),
|
2014-07-22 12:25:22 +00:00
|
|
|
("%&", "%&"),
|
|
|
|
("red&%E2%99%A5ros%#red", "red&♥ros%#red"),
|
|
|
|
("/%E2%99%A5%E2%99%A5/", "/♥♥/"),
|
|
|
|
("/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93", "/♥♥/?utf8=✓"),
|
2017-02-07 13:55:44 +00:00
|
|
|
("/%25%20%02%7b/", "/%25%20%02%7b/"),
|
2014-07-22 12:25:22 +00:00
|
|
|
("/%AAd%AAj%AAa%AAn%AAg%AAo%AA/", "/%AAd%AAj%AAa%AAn%AAg%AAo%AA/"),
|
|
|
|
("/%E2%99%A5%E2%E2%99%A5/", "/♥%E2♥/"),
|
|
|
|
("/%E2%99%A5%E2%99%E2%99%A5/", "/♥%E2%99♥/"),
|
|
|
|
("/%E2%E2%99%A5%E2%99%A5%99/", "/%E2♥♥%99/"),
|
|
|
|
(
|
|
|
|
"/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93",
|
|
|
|
"/♥♥/?utf8=%9C%93✓%9C%93",
|
|
|
|
),
|
|
|
|
]
|
2014-09-23 12:45:59 +00:00
|
|
|
|
2014-07-22 12:25:22 +00:00
|
|
|
for uri, iri in cases:
|
2019-08-26 09:58:06 +00:00
|
|
|
with self.subTest(uri):
|
|
|
|
self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
|
|
|
|
self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
|
2015-01-25 15:45:54 +00:00
|
|
|
|
|
|
|
def test_escape_uri_path(self):
|
2019-08-26 09:58:06 +00:00
|
|
|
cases = [
|
|
|
|
(
|
|
|
|
"/;some/=awful/?path/:with/@lots/&of/+awful/chars",
|
|
|
|
"/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars",
|
|
|
|
),
|
|
|
|
("/foo#bar", "/foo%23bar"),
|
|
|
|
("/foo?bar", "/foo%3Fbar"),
|
|
|
|
]
|
|
|
|
for uri, expected in cases:
|
|
|
|
with self.subTest(uri):
|
|
|
|
self.assertEqual(escape_uri_path(uri), expected)
|