From daaa3a1dde1d400d57206b258ae62282521461c3 Mon Sep 17 00:00:00 2001
From: Malcolm Tredinnick <malcolm.tredinnick@gmail.com>
Date: Mon, 25 Jun 2007 13:11:10 +0000
Subject: [PATCH] unicode: Fixed #4662 -- Fixed a remaining ASCII assumption in
 truncatewords_html(). Thanks, Ivan Sagalaev.

git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5533 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/utils/text.py                          | 2 +-
 tests/regressiontests/defaultfilters/tests.py | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/django/utils/text.py b/django/utils/text.py
index 979775be77..c41c35151b 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -59,7 +59,7 @@ def truncate_html_words(s, num):
         return u''
     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
     # Set up regular expressions
-    re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)')
+    re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
     # Count non-HTML words and keep note of open tags
     pos = 0
diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py
index 53a92edaed..dc3693f0d6 100644
--- a/tests/regressiontests/defaultfilters/tests.py
+++ b/tests/regressiontests/defaultfilters/tests.py
@@ -104,6 +104,9 @@ u'<p>one <a href="#">two - three <br>four</a> five</p>'
 >>> truncatewords_html(u'<p>one <a href="#">two - three <br>four</a> five</p>', 100)
 u'<p>one <a href="#">two - three <br>four</a> five</p>'
 
+>>> truncatewords_html(u'\xc5ngstr\xf6m was here', 1)
+u'\xc5ngstr\xf6m ...'
+
 >>> upper(u'Mixed case input')
 u'MIXED CASE INPUT'