2013-09-06 10:28:28 -05:00
|
|
|
import datetime
|
2015-01-28 07:35:27 -05:00
|
|
|
import unittest
|
2017-01-26 14:25:15 +01:00
|
|
|
from urllib.parse import quote_plus
|
2013-07-01 14:22:27 +02:00
|
|
|
|
2017-02-01 15:48:53 -05:00
|
|
|
from django.test import SimpleTestCase
|
2014-10-31 17:43:34 +02:00
|
|
|
from django.utils.encoding import (
|
2017-02-01 15:48:53 -05:00
|
|
|
DjangoUnicodeDecodeError, escape_uri_path, filepath_to_uri, force_bytes,
|
|
|
|
force_text, iri_to_uri, smart_text, uri_to_iri,
|
2014-10-31 17:43:34 +02:00
|
|
|
)
|
2015-05-26 16:46:13 -04:00
|
|
|
from django.utils.functional import SimpleLazyObject
|
2012-09-04 09:24:39 +02:00
|
|
|
|
|
|
|
|
2017-02-01 15:48:53 -05:00
|
|
|
class TestEncodingUtils(SimpleTestCase):
|
2013-09-06 19:16:06 +02:00
|
|
|
def test_force_text_exception(self):
|
|
|
|
"""
|
2017-01-20 10:20:53 +01:00
|
|
|
Broken __str__ actually raises an error.
|
2013-09-06 19:16:06 +02:00
|
|
|
"""
|
2017-01-19 02:39:46 -05:00
|
|
|
class MyString:
|
2013-09-06 19:16:06 +02:00
|
|
|
def __str__(self):
|
|
|
|
return b'\xc3\xb6\xc3\xa4\xc3\xbc'
|
|
|
|
|
2016-12-01 11:38:01 +01:00
|
|
|
# str(s) raises a TypeError if the result is not a text type.
|
|
|
|
with self.assertRaises(TypeError):
|
2016-01-17 14:56:39 +03:30
|
|
|
force_text(MyString())
|
2013-09-06 10:28:28 -05:00
|
|
|
|
2015-05-26 16:46:13 -04:00
|
|
|
def test_force_text_lazy(self):
|
|
|
|
s = SimpleLazyObject(lambda: 'x')
|
2017-01-24 12:22:42 +01:00
|
|
|
self.assertTrue(type(force_text(s)), str)
|
2015-05-26 16:46:13 -04:00
|
|
|
|
2017-02-01 15:48:53 -05:00
|
|
|
def test_force_text_DjangoUnicodeDecodeError(self):
|
|
|
|
msg = (
|
|
|
|
"'utf-8' codec can't decode byte 0xff in position 0: invalid "
|
|
|
|
"start byte. You passed in b'\\xff' (<class 'bytes'>)"
|
|
|
|
)
|
|
|
|
with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
|
|
|
|
force_text(b'\xff')
|
|
|
|
|
2012-09-04 09:24:39 +02:00
|
|
|
def test_force_bytes_exception(self):
|
|
|
|
"""
|
2016-10-27 14:53:39 +07:00
|
|
|
force_bytes knows how to convert to bytes an exception
|
2012-09-04 09:24:39 +02:00
|
|
|
containing non-ASCII characters in its args.
|
|
|
|
"""
|
|
|
|
error_msg = "This is an exception, voilà"
|
|
|
|
exc = ValueError(error_msg)
|
2017-02-07 12:05:47 -05:00
|
|
|
self.assertEqual(force_bytes(exc), error_msg.encode())
|
2017-02-03 19:36:53 -05:00
|
|
|
self.assertEqual(force_bytes(exc, encoding='ascii', errors='ignore'), b'This is an exception, voil')
|
2013-03-22 17:55:12 +01:00
|
|
|
|
2013-09-06 10:28:28 -05:00
|
|
|
def test_force_bytes_strings_only(self):
|
|
|
|
today = datetime.date.today()
|
|
|
|
self.assertEqual(force_bytes(today, strings_only=True), today)
|
|
|
|
|
2015-01-25 22:45:54 +07:00
|
|
|
def test_smart_text(self):
|
|
|
|
class Test:
|
2016-12-01 11:38:01 +01:00
|
|
|
def __str__(self):
|
|
|
|
return 'ŠĐĆŽćžšđ'
|
2015-01-25 22:45:54 +07:00
|
|
|
|
|
|
|
class TestU:
|
2016-12-01 11:38:01 +01:00
|
|
|
def __str__(self):
|
|
|
|
return 'ŠĐĆŽćžšđ'
|
2015-01-25 22:45:54 +07:00
|
|
|
|
2016-12-01 11:38:01 +01:00
|
|
|
def __bytes__(self):
|
|
|
|
return b'Foo'
|
2015-01-25 22:45:54 +07:00
|
|
|
|
|
|
|
self.assertEqual(smart_text(Test()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
|
|
|
|
self.assertEqual(smart_text(TestU()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
|
|
|
|
self.assertEqual(smart_text(1), '1')
|
|
|
|
self.assertEqual(smart_text('foo'), 'foo')
|
2014-10-31 17:43:34 +02:00
|
|
|
|
2014-07-22 17:55:22 +05:30
|
|
|
|
|
|
|
class TestRFC3987IEncodingUtils(unittest.TestCase):
|
|
|
|
|
2013-03-22 17:55:12 +01:00
|
|
|
def test_filepath_to_uri(self):
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
|
2013-10-13 18:06:58 +02:00
|
|
|
|
2014-09-23 19:45:59 +07:00
|
|
|
def test_iri_to_uri(self):
|
2014-07-22 17:55:22 +05:30
|
|
|
cases = [
|
|
|
|
# Valid UTF-8 sequences are encoded.
|
|
|
|
('red%09rosé#red', 'red%09ros%C3%A9#red'),
|
|
|
|
('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
|
2017-01-26 14:25:15 +01:00
|
|
|
('locations/%s' % quote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),
|
2014-07-22 17:55:22 +05:30
|
|
|
|
|
|
|
# Reserved chars remain unescaped.
|
|
|
|
('%&', '%&'),
|
|
|
|
('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
|
|
|
|
]
|
|
|
|
|
|
|
|
for iri, uri in cases:
|
|
|
|
self.assertEqual(iri_to_uri(iri), uri)
|
|
|
|
|
|
|
|
# Test idempotency.
|
|
|
|
self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
|
|
|
|
|
|
|
|
def test_uri_to_iri(self):
|
|
|
|
cases = [
|
|
|
|
# Valid UTF-8 sequences are decoded.
|
2017-02-07 14:55:44 +01:00
|
|
|
('/%e2%89%Ab%E2%99%a5%E2%89%aB/', '/≫♥≫/'),
|
2014-07-22 17:55:22 +05:30
|
|
|
('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
|
2017-02-07 14:55:44 +01:00
|
|
|
('/%41%5a%6B/', '/AZk/'),
|
|
|
|
# Reserved and non-URL valid ASCII chars are not decoded.
|
|
|
|
('/%25%20%02%41%7b/', '/%25%20%02A%7b/'),
|
2014-07-22 17:55:22 +05:30
|
|
|
# Broken UTF-8 sequences remain escaped.
|
|
|
|
('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
|
|
|
|
('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
|
|
|
|
('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
|
|
|
|
('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
|
|
|
|
('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
|
|
|
|
]
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2014-07-22 17:55:22 +05:30
|
|
|
for uri, iri in cases:
|
|
|
|
self.assertEqual(uri_to_iri(uri), iri)
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2014-07-22 17:55:22 +05:30
|
|
|
# Test idempotency.
|
|
|
|
self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2014-07-22 17:55:22 +05:30
|
|
|
def test_complementarity(self):
|
|
|
|
cases = [
|
2017-02-07 14:55:44 +01:00
|
|
|
('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen%20M\xfcnster/'),
|
2014-07-22 17:55:22 +05:30
|
|
|
('%&', '%&'),
|
|
|
|
('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
|
|
|
|
('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
|
|
|
|
('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
|
2017-02-07 14:55:44 +01:00
|
|
|
('/%25%20%02%7b/', '/%25%20%02%7b/'),
|
2014-07-22 17:55:22 +05:30
|
|
|
('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
|
|
|
|
('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
|
|
|
|
('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
|
|
|
|
('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
|
|
|
|
('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
|
|
|
|
]
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2014-07-22 17:55:22 +05:30
|
|
|
for uri, iri in cases:
|
|
|
|
self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
|
|
|
|
self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
|
2015-01-25 22:45:54 +07:00
|
|
|
|
|
|
|
def test_escape_uri_path(self):
|
|
|
|
self.assertEqual(
|
|
|
|
escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
|
|
|
|
'/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
|
|
|
|
)
|
|
|
|
self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
|
|
|
|
self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')
|