From daaa3a1dde1d400d57206b258ae62282521461c3 Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Mon, 25 Jun 2007 13:11:10 +0000 Subject: [PATCH] unicode: Fixed #4662 -- Fixed a remaining ASCII assumption in truncatewords_html(). Thanks, Ivan Sagalaev. git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5533 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/text.py | 2 +- tests/regressiontests/defaultfilters/tests.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/django/utils/text.py b/django/utils/text.py index 979775be77..c41c35151b 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -59,7 +59,7 @@ def truncate_html_words(s, num): return u'' html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') # Set up regular expressions - re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') + re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') # Count non-HTML words and keep note of open tags pos = 0 diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py index 53a92edaed..dc3693f0d6 100644 --- a/tests/regressiontests/defaultfilters/tests.py +++ b/tests/regressiontests/defaultfilters/tests.py @@ -104,6 +104,9 @@ u'

one two - three
four
five

' >>> truncatewords_html(u'

one two - three
four
five

', 100) u'

one two - three
four
five

' +>>> truncatewords_html(u'\xc5ngstr\xf6m was here', 1) +u'\xc5ngstr\xf6m ...' + >>> upper(u'Mixed case input') u'MIXED CASE INPUT'