diff --git a/django/utils/text.py b/django/utils/text.py index 979775be77..c41c35151b 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -59,7 +59,7 @@ def truncate_html_words(s, num): return u'' html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') # Set up regular expressions - re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') + re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') # Count non-HTML words and keep note of open tags pos = 0 diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py index 53a92edaed..dc3693f0d6 100644 --- a/tests/regressiontests/defaultfilters/tests.py +++ b/tests/regressiontests/defaultfilters/tests.py @@ -104,6 +104,9 @@ u'

one two - three
four
five

' >>> truncatewords_html(u'

one two - three
four
five

', 100) u'

one two - three
four
five

' +>>> truncatewords_html(u'\xc5ngstr\xf6m was here', 1) +u'\xc5ngstr\xf6m ...' + >>> upper(u'Mixed case input') u'MIXED CASE INPUT'