2015-01-29 07:59:41 +00:00
|
|
|
import json
|
2018-07-14 10:38:18 +02:00
|
|
|
import sys
|
2015-01-29 07:59:41 +00:00
|
|
|
|
2021-04-14 18:23:44 +02:00
|
|
|
from django.core.exceptions import SuspiciousFileOperation
|
2021-01-07 08:09:04 +01:00
|
|
|
from django.test import SimpleTestCase
|
2016-12-29 16:27:49 +01:00
|
|
|
from django.utils import text
|
2015-11-07 14:30:20 +01:00
|
|
|
from django.utils.functional import lazystr
|
2016-08-24 18:18:17 +02:00
|
|
|
from django.utils.text import format_lazy
|
2017-01-26 20:58:33 +01:00
|
|
|
from django.utils.translation import gettext_lazy, override
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2014-02-18 18:07:07 +01:00
|
|
|
IS_WIDE_BUILD = (len('\U0001F4A9') == 1)
|
|
|
|
|
2013-11-02 16:34:05 -05:00
|
|
|
|
2012-05-03 15:27:01 +02:00
|
|
|
class TestUtilsText(SimpleTestCase):
|
|
|
|
|
2014-09-23 19:45:59 +07:00
|
|
|
def test_get_text_list(self):
|
|
|
|
self.assertEqual(text.get_text_list(['a', 'b', 'c', 'd']), 'a, b, c or d')
|
|
|
|
self.assertEqual(text.get_text_list(['a', 'b', 'c'], 'and'), 'a, b and c')
|
|
|
|
self.assertEqual(text.get_text_list(['a', 'b'], 'and'), 'a and b')
|
|
|
|
self.assertEqual(text.get_text_list(['a']), 'a')
|
|
|
|
self.assertEqual(text.get_text_list([]), '')
|
|
|
|
with override('ar'):
|
|
|
|
self.assertEqual(text.get_text_list(['a', 'b', 'c']), "a، b أو c")
|
|
|
|
|
|
|
|
def test_smart_split(self):
|
|
|
|
testdata = [
|
|
|
|
('This is "a person" test.',
|
|
|
|
['This', 'is', '"a person"', 'test.']),
|
|
|
|
('This is "a person\'s" test.',
|
|
|
|
['This', 'is', '"a person\'s"', 'test.']),
|
|
|
|
('This is "a person\\"s" test.',
|
|
|
|
['This', 'is', '"a person\\"s"', 'test.']),
|
|
|
|
('"a \'one',
|
|
|
|
['"a', "'one"]),
|
|
|
|
('all friends\' tests',
|
|
|
|
['all', 'friends\'', 'tests']),
|
|
|
|
('url search_page words="something else"',
|
|
|
|
['url', 'search_page', 'words="something else"']),
|
|
|
|
("url search_page words='something else'",
|
|
|
|
['url', 'search_page', "words='something else'"]),
|
|
|
|
('url search_page words "something else"',
|
|
|
|
['url', 'search_page', 'words', '"something else"']),
|
|
|
|
('url search_page words-"something else"',
|
|
|
|
['url', 'search_page', 'words-"something else"']),
|
|
|
|
('url search_page words=hello',
|
|
|
|
['url', 'search_page', 'words=hello']),
|
|
|
|
('url search_page words="something else',
|
|
|
|
['url', 'search_page', 'words="something', 'else']),
|
|
|
|
("cut:','|cut:' '",
|
|
|
|
["cut:','|cut:' '"]),
|
|
|
|
(lazystr("a b c d"), # Test for #20231
|
|
|
|
['a', 'b', 'c', 'd']),
|
|
|
|
]
|
|
|
|
for test, expected in testdata:
|
2020-06-04 02:16:21 -07:00
|
|
|
with self.subTest(value=test):
|
|
|
|
self.assertEqual(list(text.smart_split(test)), expected)
|
2014-09-23 19:45:59 +07:00
|
|
|
|
2011-07-14 13:47:10 +00:00
|
|
|
def test_truncate_chars(self):
|
2016-10-27 14:53:39 +07:00
|
|
|
truncator = text.Truncator('The quick brown fox jumped over the lazy dog.')
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual('The quick brown fox jumped over the lazy dog.', truncator.chars(100)),
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('The quick brown fox …', truncator.chars(21)),
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual('The quick brown fo.....', truncator.chars(23, '.....')),
|
2019-01-20 00:45:41 +01:00
|
|
|
self.assertEqual('.....', truncator.chars(4, '.....')),
|
2011-07-14 13:47:10 +00:00
|
|
|
|
2012-06-07 18:08:47 +02:00
|
|
|
nfc = text.Truncator('o\xfco\xfco\xfco\xfc')
|
|
|
|
nfd = text.Truncator('ou\u0308ou\u0308ou\u0308ou\u0308')
|
|
|
|
self.assertEqual('oüoüoüoü', nfc.chars(8))
|
|
|
|
self.assertEqual('oüoüoüoü', nfd.chars(8))
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('oü…', nfc.chars(3))
|
|
|
|
self.assertEqual('oü…', nfd.chars(3))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
# Ensure the final length is calculated correctly when there are
|
|
|
|
# combining characters with no precomposed form, and that combining
|
|
|
|
# characters are not split up.
|
2012-06-07 18:08:47 +02:00
|
|
|
truncator = text.Truncator('-B\u030AB\u030A----8')
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('-B\u030A…', truncator.chars(3))
|
|
|
|
self.assertEqual('-B\u030AB\u030A-…', truncator.chars(5))
|
2012-06-07 18:08:47 +02:00
|
|
|
self.assertEqual('-B\u030AB\u030A----8', truncator.chars(8))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
# Ensure the length of the end text is correctly calculated when it
|
|
|
|
# contains combining characters with no precomposed form.
|
2012-06-07 18:08:47 +02:00
|
|
|
truncator = text.Truncator('-----')
|
|
|
|
self.assertEqual('---B\u030A', truncator.chars(4, 'B\u030A'))
|
|
|
|
self.assertEqual('-----', truncator.chars(5, 'B\u030A'))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
# Make a best effort to shorten to the desired length, but requesting
|
|
|
|
# a length shorter than the ellipsis shouldn't break
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('…', text.Truncator('asdf').chars(0))
|
2016-10-27 14:53:39 +07:00
|
|
|
# lazy strings are handled correctly
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual(text.Truncator(lazystr('The quick brown fox')).chars(10), 'The quick…')
|
2011-07-14 13:47:10 +00:00
|
|
|
|
2019-07-15 11:46:09 +02:00
|
|
|
def test_truncate_chars_html(self):
|
|
|
|
perf_test_values = [
|
|
|
|
(('</a' + '\t' * 50000) + '//>', None),
|
|
|
|
('&' * 50000, '&' * 9 + '…'),
|
|
|
|
('_X<<<<<<<<<<<>', None),
|
|
|
|
]
|
|
|
|
for value, expected in perf_test_values:
|
|
|
|
with self.subTest(value=value):
|
|
|
|
truncator = text.Truncator(value)
|
|
|
|
self.assertEqual(expected if expected else value, truncator.chars(10, html=True))
|
|
|
|
|
2010-09-27 15:15:04 +00:00
|
|
|
def test_truncate_words(self):
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator('The quick brown fox jumped over the lazy dog.')
|
|
|
|
self.assertEqual('The quick brown fox jumped over the lazy dog.', truncator.words(10))
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('The quick brown fox…', truncator.words(4))
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual('The quick brown fox[snip]', truncator.words(4, '[snip]'))
|
2016-10-27 14:53:39 +07:00
|
|
|
# lazy strings are handled correctly
|
2015-11-07 14:30:20 +01:00
|
|
|
truncator = text.Truncator(lazystr('The quick brown fox jumped over the lazy dog.'))
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('The quick brown fox…', truncator.words(4))
|
2011-07-14 13:47:10 +00:00
|
|
|
|
|
|
|
def test_truncate_html_words(self):
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator(
|
|
|
|
'<p id="par"><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>'
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
'<p id="par"><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>',
|
|
|
|
truncator.words(10, html=True)
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
2018-08-21 15:28:51 +02:00
|
|
|
'<p id="par"><strong><em>The quick brown fox…</em></strong></p>',
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator.words(4, html=True)
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
'<p id="par"><strong><em>The quick brown fox....</em></strong></p>',
|
|
|
|
truncator.words(4, '....', html=True)
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
'<p id="par"><strong><em>The quick brown fox</em></strong></p>',
|
|
|
|
truncator.words(4, '', html=True)
|
|
|
|
)
|
2013-02-13 18:24:49 +01:00
|
|
|
|
2012-01-02 18:47:18 +00:00
|
|
|
# Test with new line inside tag
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator(
|
|
|
|
'<p>The quick <a href="xyz.html"\n id="mylink">brown fox</a> jumped over the lazy dog.</p>'
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
2018-08-21 15:28:51 +02:00
|
|
|
'<p>The quick <a href="xyz.html"\n id="mylink">brown…</a></p>',
|
|
|
|
truncator.words(3, html=True)
|
2016-04-07 22:04:45 -04:00
|
|
|
)
|
2011-07-14 13:47:10 +00:00
|
|
|
|
2013-02-13 18:24:49 +01:00
|
|
|
# Test self-closing tags
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator('<br/>The <hr />quick brown fox jumped over the lazy dog.')
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('<br/>The <hr />quick brown…', truncator.words(3, html=True))
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator('<br>The <hr/>quick <em>brown fox</em> jumped over the lazy dog.')
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('<br>The <hr/>quick <em>brown…</em>', truncator.words(3, html=True))
|
2013-02-13 18:24:49 +01:00
|
|
|
|
2013-07-18 10:45:34 +02:00
|
|
|
# Test html entities
|
2016-04-07 22:04:45 -04:00
|
|
|
truncator = text.Truncator('<i>Buenos días! ¿Cómo está?</i>')
|
2018-08-21 15:28:51 +02:00
|
|
|
self.assertEqual('<i>Buenos días! ¿Cómo…</i>', truncator.words(3, html=True))
|
2013-07-18 10:45:34 +02:00
|
|
|
truncator = text.Truncator('<p>I <3 python, what about you?</p>')
|
2019-07-15 11:46:09 +02:00
|
|
|
self.assertEqual('<p>I <3 python,…</p>', truncator.words(3, html=True))
|
2013-07-18 10:45:34 +02:00
|
|
|
|
2019-07-15 11:46:09 +02:00
|
|
|
perf_test_values = [
|
|
|
|
('</a' + '\t' * 50000) + '//>',
|
|
|
|
'&' * 50000,
|
|
|
|
'_X<<<<<<<<<<<>',
|
|
|
|
]
|
|
|
|
for value in perf_test_values:
|
|
|
|
with self.subTest(value=value):
|
|
|
|
truncator = text.Truncator(value)
|
|
|
|
self.assertEqual(value, truncator.words(50, html=True))
|
2018-02-24 16:22:43 -05:00
|
|
|
|
2010-11-30 21:21:37 +00:00
|
|
|
def test_wrap(self):
|
|
|
|
digits = '1234 67 9'
|
2012-06-07 18:08:47 +02:00
|
|
|
self.assertEqual(text.wrap(digits, 100), '1234 67 9')
|
|
|
|
self.assertEqual(text.wrap(digits, 9), '1234 67 9')
|
|
|
|
self.assertEqual(text.wrap(digits, 8), '1234 67\n9')
|
2010-11-30 21:21:37 +00:00
|
|
|
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual(text.wrap('short\na long line', 7), 'short\na long\nline')
|
|
|
|
self.assertEqual(text.wrap('do-not-break-long-words please? ok', 8), 'do-not-break-long-words\nplease?\nok')
|
2010-11-30 21:21:37 +00:00
|
|
|
|
|
|
|
long_word = 'l%sng' % ('o' * 20)
|
|
|
|
self.assertEqual(text.wrap(long_word, 20), long_word)
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual(text.wrap('a %s word' % long_word, 10), 'a\n%s\nword' % long_word)
|
2015-11-07 14:30:20 +01:00
|
|
|
self.assertEqual(text.wrap(lazystr(digits), 100), '1234 67 9')
|
2012-08-18 13:53:22 +01:00
|
|
|
|
2013-12-07 16:28:22 +08:00
|
|
|
def test_normalize_newlines(self):
|
2016-04-07 22:04:45 -04:00
|
|
|
self.assertEqual(text.normalize_newlines("abc\ndef\rghi\r\n"), "abc\ndef\nghi\n")
|
2013-12-07 16:28:22 +08:00
|
|
|
self.assertEqual(text.normalize_newlines("\n\r\r\n\r"), "\n\n\n\n")
|
|
|
|
self.assertEqual(text.normalize_newlines("abcdefghi"), "abcdefghi")
|
|
|
|
self.assertEqual(text.normalize_newlines(""), "")
|
2015-11-07 14:30:20 +01:00
|
|
|
self.assertEqual(text.normalize_newlines(lazystr("abc\ndef\rghi\r\n")), "abc\ndef\nghi\n")
|
2013-12-07 16:28:22 +08:00
|
|
|
|
2015-11-07 14:30:20 +01:00
|
|
|
def test_phone2numeric(self):
|
|
|
|
numeric = text.phone2numeric('0800 flowers')
|
|
|
|
self.assertEqual(numeric, '0800 3569377')
|
|
|
|
lazy_numeric = lazystr(text.phone2numeric('0800 flowers'))
|
|
|
|
self.assertEqual(lazy_numeric, '0800 3569377')
|
|
|
|
|
2012-08-18 13:53:22 +01:00
|
|
|
def test_slugify(self):
|
|
|
|
items = (
|
2020-04-18 07:46:05 -07:00
|
|
|
# given - expected - Unicode?
|
2015-04-15 16:28:49 -06:00
|
|
|
('Hello, World!', 'hello-world', False),
|
|
|
|
('spam & eggs', 'spam-eggs', False),
|
2020-05-26 22:39:03 +01:00
|
|
|
(' multiple---dash and space ', 'multiple-dash-and-space', False),
|
|
|
|
('\t whitespace-in-value \n', 'whitespace-in-value', False),
|
|
|
|
('underscore_in-value', 'underscore_in-value', False),
|
2020-05-26 22:56:41 +01:00
|
|
|
('__strip__underscore-value___', 'strip__underscore-value', False),
|
|
|
|
('--strip-dash-value---', 'strip-dash-value', False),
|
|
|
|
('__strip-mixed-value---', 'strip-mixed-value', False),
|
|
|
|
('_ -strip-mixed-value _-', 'strip-mixed-value', False),
|
2015-04-15 16:28:49 -06:00
|
|
|
('spam & ıçüş', 'spam-ıçüş', True),
|
|
|
|
('foo ıç bar', 'foo-ıç-bar', True),
|
|
|
|
(' foo ıç bar', 'foo-ıç-bar', True),
|
|
|
|
('你好', '你好', True),
|
2019-12-21 11:45:54 +01:00
|
|
|
('İstanbul', 'istanbul', True),
|
2012-08-18 13:53:22 +01:00
|
|
|
)
|
2015-04-15 16:28:49 -06:00
|
|
|
for value, output, is_unicode in items:
|
2020-06-04 02:16:21 -07:00
|
|
|
with self.subTest(value=value):
|
|
|
|
self.assertEqual(text.slugify(value, allow_unicode=is_unicode), output)
|
|
|
|
# Interning the result may be useful, e.g. when fed to Path.
|
|
|
|
with self.subTest('intern'):
|
|
|
|
self.assertEqual(sys.intern(text.slugify('a')), 'a')
|
2013-09-27 17:00:42 +02:00
|
|
|
|
2015-11-07 14:30:20 +01:00
|
|
|
def test_unescape_string_literal(self):
|
|
|
|
items = [
|
|
|
|
('"abc"', 'abc'),
|
|
|
|
("'abc'", 'abc'),
|
|
|
|
('"a \"bc\""', 'a "bc"'),
|
|
|
|
("'\'ab\' c'", "'ab' c"),
|
|
|
|
]
|
|
|
|
for value, output in items:
|
2020-06-04 02:16:21 -07:00
|
|
|
with self.subTest(value=value):
|
|
|
|
self.assertEqual(text.unescape_string_literal(value), output)
|
|
|
|
self.assertEqual(text.unescape_string_literal(lazystr(value)), output)
|
2013-11-24 16:10:21 +08:00
|
|
|
|
|
|
|
def test_get_valid_filename(self):
|
|
|
|
filename = "^&'@{}[],$=!-#()%+~_123.txt"
|
|
|
|
self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
|
2015-11-07 14:30:20 +01:00
|
|
|
self.assertEqual(text.get_valid_filename(lazystr(filename)), "-_123.txt")
|
2021-04-14 18:23:44 +02:00
|
|
|
msg = "Could not derive file name from '???'"
|
|
|
|
with self.assertRaisesMessage(SuspiciousFileOperation, msg):
|
|
|
|
text.get_valid_filename('???')
|
|
|
|
# After sanitizing this would yield '..'.
|
|
|
|
msg = "Could not derive file name from '$.$.$'"
|
|
|
|
with self.assertRaisesMessage(SuspiciousFileOperation, msg):
|
|
|
|
text.get_valid_filename('$.$.$')
|
2015-01-29 07:59:41 +00:00
|
|
|
|
|
|
|
def test_compress_sequence(self):
|
|
|
|
data = [{'key': i} for i in range(10)]
|
|
|
|
seq = list(json.JSONEncoder().iterencode(data))
|
2017-02-07 12:05:47 -05:00
|
|
|
seq = [s.encode() for s in seq]
|
2015-01-29 07:59:41 +00:00
|
|
|
actual_length = len(b''.join(seq))
|
|
|
|
out = text.compress_sequence(seq)
|
|
|
|
compressed_length = len(b''.join(out))
|
2019-10-21 09:55:05 +01:00
|
|
|
self.assertLess(compressed_length, actual_length)
|
2016-08-24 18:18:17 +02:00
|
|
|
|
|
|
|
def test_format_lazy(self):
|
|
|
|
self.assertEqual('django/test', format_lazy('{}/{}', 'django', lazystr('test')))
|
|
|
|
self.assertEqual('django/test', format_lazy('{0}/{1}', *('django', 'test')))
|
|
|
|
self.assertEqual('django/test', format_lazy('{a}/{b}', **{'a': 'django', 'b': 'test'}))
|
|
|
|
self.assertEqual('django/test', format_lazy('{a[0]}/{a[1]}', a=('django', 'test')))
|
|
|
|
|
|
|
|
t = {}
|
|
|
|
s = format_lazy('{0[a]}-{p[a]}', t, p=t)
|
|
|
|
t['a'] = lazystr('django')
|
|
|
|
self.assertEqual('django-django', s)
|
|
|
|
t['a'] = 'update'
|
|
|
|
self.assertEqual('update-update', s)
|
|
|
|
|
|
|
|
# The format string can be lazy. (string comes from contrib.admin)
|
|
|
|
s = format_lazy(
|
2019-06-27 09:39:47 -07:00
|
|
|
gettext_lazy('Added {name} “{object}”.'),
|
2016-08-24 18:18:17 +02:00
|
|
|
name='article', object='My first try',
|
|
|
|
)
|
|
|
|
with override('fr'):
|
2017-04-04 13:07:47 +02:00
|
|
|
self.assertEqual('Ajout de article «\xa0My first try\xa0».', s)
|