2015-01-29 07:59:41 +00:00
|
|
|
import json
|
2018-07-14 10:38:18 +02:00
|
|
|
import sys
|
2015-01-29 07:59:41 +00:00
|
|
|
|
2021-04-14 18:23:44 +02:00
|
|
|
from django.core.exceptions import SuspiciousFileOperation
|
2021-01-07 08:09:04 +01:00
|
|
|
from django.test import SimpleTestCase
|
2016-12-29 16:27:49 +01:00
|
|
|
from django.utils import text
|
2015-11-07 14:30:20 +01:00
|
|
|
from django.utils.functional import lazystr
|
2016-08-24 18:18:17 +02:00
|
|
|
from django.utils.text import format_lazy
|
2017-01-26 20:58:33 +01:00
|
|
|
from django.utils.translation import gettext_lazy, override
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2022-02-03 20:24:19 +01:00
|
|
|
IS_WIDE_BUILD = len("\U0001F4A9") == 1
|
2014-02-18 18:07:07 +01:00
|
|
|
|
2013-11-02 16:34:05 -05:00
|
|
|
|
2012-05-03 15:27:01 +02:00
|
|
|
class TestUtilsText(SimpleTestCase):
|
2014-09-23 19:45:59 +07:00
|
|
|
def test_get_text_list(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(text.get_text_list(["a", "b", "c", "d"]), "a, b, c or d")
|
|
|
|
self.assertEqual(text.get_text_list(["a", "b", "c"], "and"), "a, b and c")
|
|
|
|
self.assertEqual(text.get_text_list(["a", "b"], "and"), "a and b")
|
|
|
|
self.assertEqual(text.get_text_list(["a"]), "a")
|
|
|
|
self.assertEqual(text.get_text_list([]), "")
|
|
|
|
with override("ar"):
|
|
|
|
self.assertEqual(text.get_text_list(["a", "b", "c"]), "a، b أو c")
|
2014-09-23 19:45:59 +07:00
|
|
|
|
|
|
|
def test_smart_split(self):
|
|
|
|
testdata = [
|
2022-02-03 20:24:19 +01:00
|
|
|
('This is "a person" test.', ["This", "is", '"a person"', "test."]),
|
|
|
|
('This is "a person\'s" test.', ["This", "is", '"a person\'s"', "test."]),
|
|
|
|
('This is "a person\\"s" test.', ["This", "is", '"a person\\"s"', "test."]),
|
|
|
|
("\"a 'one", ['"a', "'one"]),
|
|
|
|
("all friends' tests", ["all", "friends'", "tests"]),
|
|
|
|
(
|
|
|
|
'url search_page words="something else"',
|
|
|
|
["url", "search_page", 'words="something else"'],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"url search_page words='something else'",
|
|
|
|
["url", "search_page", "words='something else'"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
'url search_page words "something else"',
|
|
|
|
["url", "search_page", "words", '"something else"'],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
'url search_page words-"something else"',
|
|
|
|
["url", "search_page", 'words-"something else"'],
|
|
|
|
),
|
|
|
|
("url search_page words=hello", ["url", "search_page", "words=hello"]),
|
|
|
|
(
|
|
|
|
'url search_page words="something else',
|
|
|
|
["url", "search_page", 'words="something', "else"],
|
|
|
|
),
|
|
|
|
("cut:','|cut:' '", ["cut:','|cut:' '"]),
|
|
|
|
(lazystr("a b c d"), ["a", "b", "c", "d"]), # Test for #20231
|
2014-09-23 19:45:59 +07:00
|
|
|
]
|
|
|
|
for test, expected in testdata:
|
2020-06-04 02:16:21 -07:00
|
|
|
with self.subTest(value=test):
|
|
|
|
self.assertEqual(list(text.smart_split(test)), expected)
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2011-07-14 13:47:10 +00:00
|
|
|
def test_truncate_chars(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator("The quick brown fox jumped over the lazy dog.")
|
|
|
|
self.assertEqual(
|
|
|
|
"The quick brown fox jumped over the lazy dog.", truncator.chars(100)
|
|
|
|
),
|
|
|
|
self.assertEqual("The quick brown fox …", truncator.chars(21)),
|
|
|
|
self.assertEqual("The quick brown fo.....", truncator.chars(23, ".....")),
|
|
|
|
self.assertEqual(".....", truncator.chars(4, ".....")),
|
2011-07-14 13:47:10 +00:00
|
|
|
|
2022-02-03 20:24:19 +01:00
|
|
|
nfc = text.Truncator("o\xfco\xfco\xfco\xfc")
|
|
|
|
nfd = text.Truncator("ou\u0308ou\u0308ou\u0308ou\u0308")
|
|
|
|
self.assertEqual("oüoüoüoü", nfc.chars(8))
|
|
|
|
self.assertEqual("oüoüoüoü", nfd.chars(8))
|
|
|
|
self.assertEqual("oü…", nfc.chars(3))
|
|
|
|
self.assertEqual("oü…", nfd.chars(3))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
# Ensure the final length is calculated correctly when there are
|
|
|
|
# combining characters with no precomposed form, and that combining
|
|
|
|
# characters are not split up.
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator("-B\u030AB\u030A----8")
|
|
|
|
self.assertEqual("-B\u030A…", truncator.chars(3))
|
|
|
|
self.assertEqual("-B\u030AB\u030A-…", truncator.chars(5))
|
|
|
|
self.assertEqual("-B\u030AB\u030A----8", truncator.chars(8))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
# Ensure the length of the end text is correctly calculated when it
|
|
|
|
# contains combining characters with no precomposed form.
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator("-----")
|
|
|
|
self.assertEqual("---B\u030A", truncator.chars(4, "B\u030A"))
|
|
|
|
self.assertEqual("-----", truncator.chars(5, "B\u030A"))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
# Make a best effort to shorten to the desired length, but requesting
|
|
|
|
# a length shorter than the ellipsis shouldn't break
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual("…", text.Truncator("asdf").chars(0))
|
2016-10-27 14:53:39 +07:00
|
|
|
# lazy strings are handled correctly
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(
|
|
|
|
text.Truncator(lazystr("The quick brown fox")).chars(10), "The quick…"
|
|
|
|
)
|
2011-07-14 13:47:10 +00:00
|
|
|
|
2019-07-15 11:46:09 +02:00
|
|
|
def test_truncate_chars_html(self):
|
|
|
|
perf_test_values = [
|
2022-02-03 20:24:19 +01:00
|
|
|
(("</a" + "\t" * 50000) + "//>", None),
|
|
|
|
("&" * 50000, "&" * 9 + "…"),
|
|
|
|
("_X<<<<<<<<<<<>", None),
|
2019-07-15 11:46:09 +02:00
|
|
|
]
|
|
|
|
for value, expected in perf_test_values:
|
|
|
|
with self.subTest(value=value):
|
|
|
|
truncator = text.Truncator(value)
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(
|
|
|
|
expected if expected else value, truncator.chars(10, html=True)
|
|
|
|
)
|
2019-07-15 11:46:09 +02:00
|
|
|
|
2010-09-27 15:15:04 +00:00
|
|
|
def test_truncate_words(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator("The quick brown fox jumped over the lazy dog.")
|
|
|
|
self.assertEqual(
|
|
|
|
"The quick brown fox jumped over the lazy dog.", truncator.words(10)
|
|
|
|
)
|
|
|
|
self.assertEqual("The quick brown fox…", truncator.words(4))
|
|
|
|
self.assertEqual("The quick brown fox[snip]", truncator.words(4, "[snip]"))
|
2016-10-27 14:53:39 +07:00
|
|
|
# lazy strings are handled correctly
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator(
|
|
|
|
lazystr("The quick brown fox jumped over the lazy dog.")
|
|
|
|
)
|
|
|
|
self.assertEqual("The quick brown fox…", truncator.words(4))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
def test_truncate_html_words(self):
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator(
|
2022-02-04 08:08:27 +01:00
|
|
|
'<p id="par"><strong><em>The quick brown fox jumped over the lazy dog.</em>'
|
|
|
|
"</strong></p>"
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
2022-02-04 08:08:27 +01:00
|
|
|
'<p id="par"><strong><em>The quick brown fox jumped over the lazy dog.</em>'
|
|
|
|
"</strong></p>",
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator.words(10, html=True),
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
2018-08-21 15:28:51 +02:00
|
|
|
'<p id="par"><strong><em>The quick brown fox…</em></strong></p>',
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator.words(4, html=True),
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
'<p id="par"><strong><em>The quick brown fox....</em></strong></p>',
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator.words(4, "....", html=True),
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
'<p id="par"><strong><em>The quick brown fox</em></strong></p>',
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator.words(4, "", html=True),
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
2013-02-13 18:24:49 +01:00
|
|
|
|
2012-01-02 18:47:18 +00:00
|
|
|
# Test with new line inside tag
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator(
|
2022-02-04 08:08:27 +01:00
|
|
|
'<p>The quick <a href="xyz.html"\n id="mylink">brown fox</a> jumped over '
|
|
|
|
"the lazy dog.</p>"
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
2018-08-21 15:28:51 +02:00
|
|
|
'<p>The quick <a href="xyz.html"\n id="mylink">brown…</a></p>',
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator.words(3, html=True),
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
2011-07-14 13:47:10 +00:00
|
|
|
|
2013-02-13 18:24:49 +01:00
|
|
|
# Test self-closing tags
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator(
|
|
|
|
"<br/>The <hr />quick brown fox jumped over the lazy dog."
|
|
|
|
)
|
|
|
|
self.assertEqual("<br/>The <hr />quick brown…", truncator.words(3, html=True))
|
|
|
|
truncator = text.Truncator(
|
|
|
|
"<br>The <hr/>quick <em>brown fox</em> jumped over the lazy dog."
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
"<br>The <hr/>quick <em>brown…</em>", truncator.words(3, html=True)
|
|
|
|
)
|
2013-02-13 18:24:49 +01:00
|
|
|
|
2013-07-18 10:45:34 +02:00
|
|
|
# Test html entities
|
2022-02-03 20:24:19 +01:00
|
|
|
truncator = text.Truncator(
|
|
|
|
"<i>Buenos días! ¿Cómo está?</i>"
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
"<i>Buenos días! ¿Cómo…</i>",
|
|
|
|
truncator.words(3, html=True),
|
|
|
|
)
|
|
|
|
truncator = text.Truncator("<p>I <3 python, what about you?</p>")
|
|
|
|
self.assertEqual("<p>I <3 python,…</p>", truncator.words(3, html=True))
|
2013-07-18 10:45:34 +02:00
|
|
|
|
2019-07-15 11:46:09 +02:00
|
|
|
perf_test_values = [
|
2022-02-03 20:24:19 +01:00
|
|
|
("</a" + "\t" * 50000) + "//>",
|
|
|
|
"&" * 50000,
|
|
|
|
"_X<<<<<<<<<<<>",
|
2019-07-15 11:46:09 +02:00
|
|
|
]
|
|
|
|
for value in perf_test_values:
|
|
|
|
with self.subTest(value=value):
|
|
|
|
truncator = text.Truncator(value)
|
|
|
|
self.assertEqual(value, truncator.words(50, html=True))
|
2018-02-24 16:22:43 -05:00
|
|
|
|
2010-11-30 21:21:37 +00:00
|
|
|
def test_wrap(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
digits = "1234 67 9"
|
|
|
|
self.assertEqual(text.wrap(digits, 100), "1234 67 9")
|
|
|
|
self.assertEqual(text.wrap(digits, 9), "1234 67 9")
|
|
|
|
self.assertEqual(text.wrap(digits, 8), "1234 67\n9")
|
2010-11-30 21:21:37 +00:00
|
|
|
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(text.wrap("short\na long line", 7), "short\na long\nline")
|
|
|
|
self.assertEqual(
|
|
|
|
text.wrap("do-not-break-long-words please? ok", 8),
|
|
|
|
"do-not-break-long-words\nplease?\nok",
|
|
|
|
)
|
2010-11-30 21:21:37 +00:00
|
|
|
|
2022-02-03 20:24:19 +01:00
|
|
|
long_word = "l%sng" % ("o" * 20)
|
2010-11-30 21:21:37 +00:00
|
|
|
self.assertEqual(text.wrap(long_word, 20), long_word)
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(
|
|
|
|
text.wrap("a %s word" % long_word, 10), "a\n%s\nword" % long_word
|
|
|
|
)
|
|
|
|
self.assertEqual(text.wrap(lazystr(digits), 100), "1234 67 9")
|
2012-08-18 13:53:22 +01:00
|
|
|
|
2013-12-07 16:28:22 +08:00
|
|
|
def test_normalize_newlines(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(
|
|
|
|
text.normalize_newlines("abc\ndef\rghi\r\n"), "abc\ndef\nghi\n"
|
|
|
|
)
|
2013-12-07 16:28:22 +08:00
|
|
|
self.assertEqual(text.normalize_newlines("\n\r\r\n\r"), "\n\n\n\n")
|
|
|
|
self.assertEqual(text.normalize_newlines("abcdefghi"), "abcdefghi")
|
|
|
|
self.assertEqual(text.normalize_newlines(""), "")
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual(
|
|
|
|
text.normalize_newlines(lazystr("abc\ndef\rghi\r\n")), "abc\ndef\nghi\n"
|
|
|
|
)
|
2013-12-07 16:28:22 +08:00
|
|
|
|
2015-11-07 14:30:20 +01:00
|
|
|
def test_phone2numeric(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
numeric = text.phone2numeric("0800 flowers")
|
|
|
|
self.assertEqual(numeric, "0800 3569377")
|
|
|
|
lazy_numeric = lazystr(text.phone2numeric("0800 flowers"))
|
|
|
|
self.assertEqual(lazy_numeric, "0800 3569377")
|
2015-11-07 14:30:20 +01:00
|
|
|
|
2012-08-18 13:53:22 +01:00
|
|
|
def test_slugify(self):
|
|
|
|
items = (
|
2020-04-18 07:46:05 -07:00
|
|
|
# given - expected - Unicode?
|
2022-02-03 20:24:19 +01:00
|
|
|
("Hello, World!", "hello-world", False),
|
|
|
|
("spam & eggs", "spam-eggs", False),
|
|
|
|
(" multiple---dash and space ", "multiple-dash-and-space", False),
|
|
|
|
("\t whitespace-in-value \n", "whitespace-in-value", False),
|
|
|
|
("underscore_in-value", "underscore_in-value", False),
|
|
|
|
("__strip__underscore-value___", "strip__underscore-value", False),
|
|
|
|
("--strip-dash-value---", "strip-dash-value", False),
|
|
|
|
("__strip-mixed-value---", "strip-mixed-value", False),
|
|
|
|
("_ -strip-mixed-value _-", "strip-mixed-value", False),
|
|
|
|
("spam & ıçüş", "spam-ıçüş", True),
|
|
|
|
("foo ıç bar", "foo-ıç-bar", True),
|
|
|
|
(" foo ıç bar", "foo-ıç-bar", True),
|
|
|
|
("你好", "你好", True),
|
|
|
|
("İstanbul", "istanbul", True),
|
2012-08-18 13:53:22 +01:00
|
|
|
)
|
2015-04-15 16:28:49 -06:00
|
|
|
for value, output, is_unicode in items:
|
2020-06-04 02:16:21 -07:00
|
|
|
with self.subTest(value=value):
|
|
|
|
self.assertEqual(text.slugify(value, allow_unicode=is_unicode), output)
|
|
|
|
# Interning the result may be useful, e.g. when fed to Path.
|
2022-02-03 20:24:19 +01:00
|
|
|
with self.subTest("intern"):
|
|
|
|
self.assertEqual(sys.intern(text.slugify("a")), "a")
|
2013-09-27 17:00:42 +02:00
|
|
|
|
2015-11-07 14:30:20 +01:00
|
|
|
def test_unescape_string_literal(self):
|
|
|
|
items = [
|
2022-02-03 20:24:19 +01:00
|
|
|
('"abc"', "abc"),
|
|
|
|
("'abc'", "abc"),
|
|
|
|
('"a "bc""', 'a "bc"'),
|
|
|
|
("''ab' c'", "'ab' c"),
|
2015-11-07 14:30:20 +01:00
|
|
|
]
|
|
|
|
for value, output in items:
|
2020-06-04 02:16:21 -07:00
|
|
|
with self.subTest(value=value):
|
|
|
|
self.assertEqual(text.unescape_string_literal(value), output)
|
|
|
|
self.assertEqual(text.unescape_string_literal(lazystr(value)), output)
|
2013-11-24 16:10:21 +08:00
|
|
|
|
2021-12-14 20:16:41 +01:00
|
|
|
def test_unescape_string_literal_invalid_value(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
items = ["", "abc", "'abc\""]
|
2021-12-14 20:16:41 +01:00
|
|
|
for item in items:
|
2022-02-03 20:24:19 +01:00
|
|
|
msg = f"Not a string literal: {item!r}"
|
2021-12-14 20:16:41 +01:00
|
|
|
with self.assertRaisesMessage(ValueError, msg):
|
|
|
|
text.unescape_string_literal(item)
|
|
|
|
|
2013-11-24 16:10:21 +08:00
|
|
|
def test_get_valid_filename(self):
|
|
|
|
filename = "^&'@{}[],$=!-#()%+~_123.txt"
|
|
|
|
self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
|
2015-11-07 14:30:20 +01:00
|
|
|
self.assertEqual(text.get_valid_filename(lazystr(filename)), "-_123.txt")
|
2021-04-14 18:23:44 +02:00
|
|
|
msg = "Could not derive file name from '???'"
|
|
|
|
with self.assertRaisesMessage(SuspiciousFileOperation, msg):
|
2022-02-03 20:24:19 +01:00
|
|
|
text.get_valid_filename("???")
|
2021-04-14 18:23:44 +02:00
|
|
|
# After sanitizing this would yield '..'.
|
|
|
|
msg = "Could not derive file name from '$.$.$'"
|
|
|
|
with self.assertRaisesMessage(SuspiciousFileOperation, msg):
|
2022-02-03 20:24:19 +01:00
|
|
|
text.get_valid_filename("$.$.$")
|
2015-01-29 07:59:41 +00:00
|
|
|
|
|
|
|
def test_compress_sequence(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
data = [{"key": i} for i in range(10)]
|
2015-01-29 07:59:41 +00:00
|
|
|
seq = list(json.JSONEncoder().iterencode(data))
|
2017-02-07 12:05:47 -05:00
|
|
|
seq = [s.encode() for s in seq]
|
2022-02-03 20:24:19 +01:00
|
|
|
actual_length = len(b"".join(seq))
|
2015-01-29 07:59:41 +00:00
|
|
|
out = text.compress_sequence(seq)
|
2022-02-03 20:24:19 +01:00
|
|
|
compressed_length = len(b"".join(out))
|
2019-10-21 09:55:05 +01:00
|
|
|
self.assertLess(compressed_length, actual_length)
|
2016-08-24 18:18:17 +02:00
|
|
|
|
|
|
|
def test_format_lazy(self):
|
2022-02-03 20:24:19 +01:00
|
|
|
self.assertEqual("django/test", format_lazy("{}/{}", "django", lazystr("test")))
|
|
|
|
self.assertEqual("django/test", format_lazy("{0}/{1}", *("django", "test")))
|
|
|
|
self.assertEqual(
|
|
|
|
"django/test", format_lazy("{a}/{b}", **{"a": "django", "b": "test"})
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
"django/test", format_lazy("{a[0]}/{a[1]}", a=("django", "test"))
|
|
|
|
)
|
2016-08-24 18:18:17 +02:00
|
|
|
|
|
|
|
t = {}
|
2022-02-03 20:24:19 +01:00
|
|
|
s = format_lazy("{0[a]}-{p[a]}", t, p=t)
|
|
|
|
t["a"] = lazystr("django")
|
|
|
|
self.assertEqual("django-django", s)
|
|
|
|
t["a"] = "update"
|
|
|
|
self.assertEqual("update-update", s)
|
2016-08-24 18:18:17 +02:00
|
|
|
|
|
|
|
# The format string can be lazy. (string comes from contrib.admin)
|
|
|
|
s = format_lazy(
|
2022-02-03 20:24:19 +01:00
|
|
|
gettext_lazy("Added {name} “{object}”."),
|
|
|
|
name="article",
|
|
|
|
object="My first try",
|
2016-08-24 18:18:17 +02:00
|
|
|
)
|
2022-02-03 20:24:19 +01:00
|
|
|
with override("fr"):
|
|
|
|
self.assertEqual("Ajout de article «\xa0My first try\xa0».", s)
|