From b30e5b54346abacdd91b8ef7ec439c00f34a5ad6 Mon Sep 17 00:00:00 2001
From: Malcolm Tredinnick <malcolm.tredinnick@gmail.com>
Date: Sat, 21 Apr 2007 14:34:43 +0000
Subject: [PATCH] Changed default template filters to always return unicode
 (and to handle unicode input better). Also changed all django.utils.*
 functions that are used (or likely to be used) by filters to return unicode
 strings.

Most of the filter porting was done by Ivan Sagalaev. Fixed #3977.


git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5056 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/template/defaultfilters.py             | 128 +++---
 django/utils/dateformat.py                    |  45 +-
 django/utils/dates.py                         |   2 +-
 django/utils/html.py                          |   8 +-
 django/utils/stopwords.py                     |   2 +-
 django/utils/text.py                          |  37 +-
 django/utils/timesince.py                     |  18 +-
 django/utils/tzinfo.py                        |   6 +-
 tests/regressiontests/dateformat/tests.py     |  46 +-
 tests/regressiontests/defaultfilters/tests.py | 430 +++++++++---------
 tests/regressiontests/text/tests.py           |   4 +-
 11 files changed, 356 insertions(+), 370 deletions(-)

diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py
index a025365c90..c82ce8edc7 100644
--- a/django/template/defaultfilters.py
+++ b/django/template/defaultfilters.py
@@ -2,7 +2,8 @@
 
 from django.template import resolve_variable, Library
 from django.conf import settings
-from django.utils.translation import gettext
+from django.utils.translation import ugettext
+from django.utils.encoding import smart_unicode, smart_str
 import re
 import random as random_module
 
@@ -12,29 +13,17 @@ register = Library()
 # STRING DECORATOR    #
 #######################
 
-def smart_string(obj):
-    # FUTURE: Unicode strings should probably be normalized to a specific
-    # encoding and non-unicode strings should be converted to unicode too.
-#    if isinstance(obj, unicode):
-#        obj = obj.encode(settings.DEFAULT_CHARSET)
-#    else:
-#        obj = unicode(obj, settings.DEFAULT_CHARSET)
-    # FUTURE: Replace dumb string logic below with cool unicode logic above.
-    if not isinstance(obj, basestring):
-        obj = str(obj)
-    return obj
-
 def stringfilter(func):
     """
-    Decorator for filters which should only receive strings. The object passed
-    as the first positional argument will be converted to a string.
+    Decorator for filters which should only receive unicode objects. The object passed
+    as the first positional argument will be converted to a unicode object.
     """
     def _dec(*args, **kwargs):
         if args:
             args = list(args)
-            args[0] = smart_string(args[0])
+            args[0] = smart_unicode(args[0])
         return func(*args, **kwargs)
-        
+
     # Include a reference to the real function (used to check original
     # arguments by the template parser).
     _dec._decorated_function = getattr(func, '_decorated_function', func)
@@ -54,7 +43,7 @@ def capfirst(value):
     "Capitalizes the first character of the value"
     return value and value[0].upper() + value[1:]
 capfirst = stringfilter(capfirst)
- 
+
 def fix_ampersands(value):
     "Replaces ampersands with ``&amp;`` entities"
     from django.utils.html import fix_ampersands
@@ -83,27 +72,27 @@ def floatformat(text, arg=-1):
     try:
         f = float(text)
     except ValueError:
-        return ''
+        return u''
     try:
         d = int(arg)
     except ValueError:
-        return smart_string(f)
+        return smart_unicode(f)
     m = f - int(f)
     if not m and d < 0:
-        return '%d' % int(f)
+        return u'%d' % int(f)
     else:
-        formatstr = '%%.%df' % abs(d)
+        formatstr = u'%%.%df' % abs(d)
         return formatstr % f
 
 def linenumbers(value):
     "Displays text with line numbers"
     from django.utils.html import escape
-    lines = value.split('\n')
+    lines = value.split(u'\n')
     # Find the maximum width of the line count, for use with zero padding string format command
-    width = str(len(str(len(lines))))
+    width = unicode(len(unicode(len(lines))))
     for i, line in enumerate(lines):
-        lines[i] = ("%0" + width  + "d. %s") % (i + 1, escape(line))
-    return '\n'.join(lines)
+        lines[i] = (u"%0" + width  + u"d. %s") % (i + 1, escape(line))
+    return u'\n'.join(lines)
 linenumbers = stringfilter(linenumbers)
 
 def lower(value):
@@ -121,6 +110,7 @@ make_list = stringfilter(make_list)
 
 def slugify(value):
     "Converts to lowercase, removes non-alpha chars and converts spaces to hyphens"
+    # Don't compile patterns as unicode because \w then would mean any letter. Slugify is effectively an asciiization.
     value = re.sub('[^\w\s-]', '', value).strip().lower()
     return re.sub('[-\s]+', '-', value)
 slugify = stringfilter(slugify)
@@ -135,9 +125,9 @@ def stringformat(value, arg):
     of Python string formatting
     """
     try:
-        return ("%" + str(arg)) % value
+        return (u"%" + unicode(arg)) % value
     except (ValueError, TypeError):
-        return ""
+        return u""
 
 def title(value):
     "Converts a string into titlecase"
@@ -155,8 +145,6 @@ def truncatewords(value, arg):
         length = int(arg)
     except ValueError: # invalid literal for int()
         return value # Fail silently.
-    if not isinstance(value, basestring):
-        value = str(value)
     return truncate_words(value, length)
 truncatewords = stringfilter(truncatewords)
 
@@ -171,8 +159,6 @@ def truncatewords_html(value, arg):
         length = int(arg)
     except ValueError: # invalid literal for int()
         return value # Fail silently.
-    if not isinstance(value, basestring):
-        value = str(value)
     return truncate_html_words(value, length)
 truncatewords_html = stringfilter(truncatewords_html)
 
@@ -184,9 +170,7 @@ upper = stringfilter(upper)
 def urlencode(value):
     "Escapes a value for use in a URL"
     import urllib
-    if not isinstance(value, basestring):
-        value = str(value)
-    return urllib.quote(value)
+    return urllib.quote(value).decode('utf-8')
 urlencode = stringfilter(urlencode)
 
 def urlize(value):
@@ -246,7 +230,7 @@ center = stringfilter(center)
 
 def cut(value, arg):
     "Removes all values of arg from the given string"
-    return value.replace(arg, '')
+    return value.replace(arg, u'')
 cut = stringfilter(cut)
 
 ###################
@@ -273,11 +257,11 @@ linebreaksbr = stringfilter(linebreaksbr)
 def removetags(value, tags):
     "Removes a space separated list of [X]HTML tags from the output"
     tags = [re.escape(tag) for tag in tags.split()]
-    tags_re = '(%s)' % '|'.join(tags)
-    starttag_re = re.compile(r'<%s(/?>|(\s+[^>]*>))' % tags_re)
-    endtag_re = re.compile('</%s>' % tags_re)
-    value = starttag_re.sub('', value)
-    value = endtag_re.sub('', value)
+    tags_re = u'(%s)' % u'|'.join(tags)
+    starttag_re = re.compile(ur'<%s(/?>|(\s+[^>]*>))' % tags_re, re.U)
+    endtag_re = re.compile(u'</%s>' % tags_re)
+    value = starttag_re.sub(u'', value)
+    value = endtag_re.sub(u'', value)
     return value
 removetags = stringfilter(removetags)
 
@@ -296,7 +280,7 @@ def dictsort(value, arg):
     Takes a list of dicts, returns that list sorted by the property given in
     the argument.
     """
-    decorated = [(resolve_variable('var.' + arg, {'var' : item}), item) for item in value]
+    decorated = [(resolve_variable(u'var.' + arg, {u'var' : item}), item) for item in value]
     decorated.sort()
     return [item[1] for item in decorated]
 
@@ -305,7 +289,7 @@ def dictsortreversed(value, arg):
     Takes a list of dicts, returns that list sorted in reverse order by the
     property given in the argument.
     """
-    decorated = [(resolve_variable('var.' + arg, {'var' : item}), item) for item in value]
+    decorated = [(resolve_variable(u'var.' + arg, {u'var' : item}), item) for item in value]
     decorated.sort()
     decorated.reverse()
     return [item[1] for item in decorated]
@@ -315,12 +299,12 @@ def first(value):
     try:
         return value[0]
     except IndexError:
-        return ''
+        return u''
 
 def join(value, arg):
     "Joins a list with a string, like Python's ``str.join(list)``"
     try:
-        return arg.join(map(smart_string, value))
+        return arg.join(map(smart_unicode, value))
     except AttributeError: # fail silently but nicely
         return value
 
@@ -346,7 +330,7 @@ def slice_(value, arg):
     """
     try:
         bits = []
-        for x in arg.split(':'):
+        for x in arg.split(u':'):
             if len(x) == 0:
                 bits.append(None)
             else:
@@ -378,12 +362,12 @@ def unordered_list(value):
         </li>
     """
     def _helper(value, tabs):
-        indent = '\t' * tabs
+        indent = u'\t' * tabs
         if value[1]:
-            return '%s<li>%s\n%s<ul>\n%s\n%s</ul>\n%s</li>' % (indent, value[0], indent,
-                '\n'.join([_helper(v, tabs+1) for v in value[1]]), indent, indent)
+            return u'%s<li>%s\n%s<ul>\n%s\n%s</ul>\n%s</li>' % (indent, value[0], indent,
+                u'\n'.join([_helper(v, tabs+1) for v in value[1]]), indent, indent)
         else:
-            return '%s<li>%s</li>' % (indent, value[0])
+            return u'%s<li>%s</li>' % (indent, value[0])
     return _helper(value, 1)
 
 ###################
@@ -421,7 +405,7 @@ def date(value, arg=None):
     "Formats a date according to the given format"
     from django.utils.dateformat import format
     if not value:
-        return ''
+        return u''
     if arg is None:
         arg = settings.DATE_FORMAT
     return format(value, arg)
@@ -429,8 +413,8 @@ def date(value, arg=None):
 def time(value, arg=None):
     "Formats a time according to the given format"
     from django.utils.dateformat import time_format
-    if value in (None, ''):
-        return ''
+    if value in (None, u''):
+        return u''
     if arg is None:
         arg = settings.TIME_FORMAT
     return time_format(value, arg)
@@ -439,7 +423,7 @@ def timesince(value, arg=None):
     'Formats a date as the time since that date (i.e. "4 days, 6 hours")'
     from django.utils.timesince import timesince
     if not value:
-        return ''
+        return u''
     if arg:
         return timesince(arg, value)
     return timesince(value)
@@ -449,7 +433,7 @@ def timeuntil(value, arg=None):
     from django.utils.timesince import timesince
     from datetime import datetime
     if not value:
-        return ''
+        return u''
     if arg:
         return timesince(arg, value)
     return timesince(datetime.now(), value)
@@ -488,8 +472,8 @@ def yesno(value, arg=None):
     ==========  ======================  ==================================
     """
     if arg is None:
-        arg = gettext('yes,no,maybe')
-    bits = arg.split(',')
+        arg = ugettext('yes,no,maybe')
+    bits = arg.split(u',')
     if len(bits) < 2:
         return value # Invalid arg.
     try:
@@ -514,28 +498,28 @@ def filesizeformat(bytes):
     try:
         bytes = float(bytes)
     except TypeError:
-        return "0 bytes"
-        
-    if bytes < 1024:
-        return "%d byte%s" % (bytes, bytes != 1 and 's' or '')
-    if bytes < 1024 * 1024:
-        return "%.1f KB" % (bytes / 1024)
-    if bytes < 1024 * 1024 * 1024:
-        return "%.1f MB" % (bytes / (1024 * 1024))
-    return "%.1f GB" % (bytes / (1024 * 1024 * 1024))
+        return u"0 bytes"
 
-def pluralize(value, arg='s'):
+    if bytes < 1024:
+        return u"%d byte%s" % (bytes, bytes != 1 and u's' or u'')
+    if bytes < 1024 * 1024:
+        return u"%.1f KB" % (bytes / 1024)
+    if bytes < 1024 * 1024 * 1024:
+        return u"%.1f MB" % (bytes / (1024 * 1024))
+    return u"%.1f GB" % (bytes / (1024 * 1024 * 1024))
+
+def pluralize(value, arg=u's'):
     """
     Returns a plural suffix if the value is not 1, for '1 vote' vs. '2 votes'
     By default, 's' is used as a suffix; if an argument is provided, that string
     is used instead. If the provided argument contains a comma, the text before
     the comma is used for the singular case.
     """
-    if not ',' in arg:
-        arg = ',' + arg
-    bits = arg.split(',')
+    if not u',' in arg:
+        arg = u',' + arg
+    bits = arg.split(u',')
     if len(bits) > 2:
-        return ''
+        return u''
     singular_suffix, plural_suffix = bits[:2]
 
     try:
@@ -562,7 +546,7 @@ def pprint(value):
     try:
         return pformat(value)
     except Exception, e:
-        return "Error in formatting:%s" % e
+        return u"Error in formatting:%s" % e
 
 # Syntax: register.filter(name of filter, callback)
 register.filter(add)
diff --git a/django/utils/dateformat.py b/django/utils/dateformat.py
index a558e3a69f..8413e1280d 100644
--- a/django/utils/dateformat.py
+++ b/django/utils/dateformat.py
@@ -13,7 +13,7 @@ Usage:
 
 from django.utils.dates import MONTHS, MONTHS_3, MONTHS_AP, WEEKDAYS
 from django.utils.tzinfo import LocalTimezone
-from django.utils.translation import gettext as _
+from django.utils.translation import ugettext as _
 from calendar import isleap, monthrange
 import re, time
 
@@ -28,7 +28,7 @@ class Formatter(object):
                 pieces.append(str(getattr(self, piece)()))
             elif piece:
                 pieces.append(re_escaped.sub(r'\1', piece))
-        return ''.join(pieces)
+        return u''.join(pieces)
 
 class TimeFormat(Formatter):
     def __init__(self, t):
@@ -52,13 +52,14 @@ class TimeFormat(Formatter):
 
     def f(self):
         """
-        Time, in 12-hour hours and minutes, with minutes left off if they're zero.
+        Time, in 12-hour hours and minutes, with minutes left off if they're
+        zero.
         Examples: '1', '1:30', '2:05', '2'
         Proprietary extension.
         """
         if self.data.minute == 0:
             return self.g()
-        return '%s:%s' % (self.g(), self.i())
+        return u'%s:%s' % (self.g(), self.i())
 
     def g(self):
         "Hour, 12-hour format without leading zeros; i.e. '1' to '12'"
@@ -74,15 +75,15 @@ class TimeFormat(Formatter):
 
     def h(self):
         "Hour, 12-hour format; i.e. '01' to '12'"
-        return '%02d' % self.g()
+        return u'%02d' % self.g()
 
     def H(self):
         "Hour, 24-hour format; i.e. '00' to '23'"
-        return '%02d' % self.G()
+        return u'%02d' % self.G()
 
     def i(self):
         "Minutes; i.e. '00' to '59'"
-        return '%02d' % self.data.minute
+        return u'%02d' % self.data.minute
 
     def P(self):
         """
@@ -95,11 +96,11 @@ class TimeFormat(Formatter):
             return _('midnight')
         if self.data.minute == 0 and self.data.hour == 12:
             return _('noon')
-        return '%s %s' % (self.f(), self.a())
+        return u'%s %s' % (self.f(), self.a())
 
     def s(self):
         "Seconds; i.e. '00' to '59'"
-        return '%02d' % self.data.second
+        return u'%02d' % self.data.second
 
 class DateFormat(TimeFormat):
     year_days = [None, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]
@@ -117,7 +118,7 @@ class DateFormat(TimeFormat):
 
     def d(self):
         "Day of the month, 2 digits with leading zeros; i.e. '01' to '31'"
-        return '%02d' % self.data.day
+        return u'%02d' % self.data.day
 
     def D(self):
         "Day of the week, textual, 3 letters; e.g. 'Fri'"
@@ -130,9 +131,9 @@ class DateFormat(TimeFormat):
     def I(self):
         "'1' if Daylight Savings Time, '0' otherwise."
         if self.timezone.dst(self.data):
-            return '1'
+            return u'1'
         else:
-            return '0'
+            return u'0'
 
     def j(self):
         "Day of the month without leading zeros; i.e. '1' to '31'"
@@ -148,7 +149,7 @@ class DateFormat(TimeFormat):
 
     def m(self):
         "Month; i.e. '01' to '12'"
-        return '%02d' % self.data.month
+        return u'%02d' % self.data.month
 
     def M(self):
         "Month, textual, 3 letters; e.g. 'Jan'"
@@ -165,7 +166,7 @@ class DateFormat(TimeFormat):
     def O(self):
         "Difference to Greenwich time in hours; e.g. '+0200'"
         tz = self.timezone.utcoffset(self.data)
-        return "%+03d%02d" % (tz.seconds // 3600, (tz.seconds // 60) % 60)
+        return u"%+03d%02d" % (tz.seconds // 3600, (tz.seconds // 60) % 60)
 
     def r(self):
         "RFC 822 formatted date; e.g. 'Thu, 21 Dec 2000 16:01:07 +0200'"
@@ -174,26 +175,26 @@ class DateFormat(TimeFormat):
     def S(self):
         "English ordinal suffix for the day of the month, 2 characters; i.e. 'st', 'nd', 'rd' or 'th'"
         if self.data.day in (11, 12, 13): # Special case
-            return 'th'
+            return u'th'
         last = self.data.day % 10
         if last == 1:
-            return 'st'
+            return u'st'
         if last == 2:
-            return 'nd'
+            return u'nd'
         if last == 3:
-            return 'rd'
-        return 'th'
+            return u'rd'
+        return u'th'
 
     def t(self):
         "Number of days in the given month; i.e. '28' to '31'"
-        return '%02d' % monthrange(self.data.year, self.data.month)[1]
+        return u'%02d' % monthrange(self.data.year, self.data.month)[1]
 
     def T(self):
         "Time zone of this machine; e.g. 'EST' or 'MDT'"
         name = self.timezone.tzname(self.data)
         if name is None:
             name = self.format('O')
-        return name
+        return unicode(name)
 
     def U(self):
         "Seconds since the Unix epoch (January 1 1970 00:00:00 GMT)"
@@ -232,7 +233,7 @@ class DateFormat(TimeFormat):
 
     def y(self):
         "Year, 2 digits; e.g. '99'"
-        return str(self.data.year)[2:]
+        return unicode(self.data.year)[2:]
 
     def Y(self):
         "Year, 4 digits; e.g. '1999'"
diff --git a/django/utils/dates.py b/django/utils/dates.py
index 111f32e4fc..b2dc48843c 100644
--- a/django/utils/dates.py
+++ b/django/utils/dates.py
@@ -1,6 +1,6 @@
 "Commonly-used date structures"
 
-from django.utils.translation import gettext_lazy as _
+from django.utils.translation import ugettext_lazy as _
 
 WEEKDAYS = {
     0:_('Monday'), 1:_('Tuesday'), 2:_('Wednesday'), 3:_('Thursday'), 4:_('Friday'),
diff --git a/django/utils/html.py b/django/utils/html.py
index 607362817b..efb8b5a154 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -25,7 +25,7 @@ del x # Temporary variable
 def escape(html):
     "Returns the given HTML with ampersands, quotes and carets encoded"
     if not isinstance(html, basestring):
-        html = str(html)
+        html = smart_unicode(html)
     return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
 
 def linebreaks(value):
@@ -33,7 +33,7 @@ def linebreaks(value):
     value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
     paras = re.split('\n{2,}', value)
     paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
-    return '\n\n'.join(paras)
+    return u'\n\n'.join(paras)
 
 def strip_tags(value):
     "Returns the given HTML with all tags stripped"
@@ -80,7 +80,7 @@ def urlize(text, trim_url_limit=None, nofollow=False):
                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
             if lead + middle + trail != word:
                 words[i] = lead + middle + trail
-    return ''.join(words)
+    return u''.join(words)
 
 def clean_html(text):
     """
@@ -108,7 +108,7 @@ def clean_html(text):
         s = match.group().replace('</p>', '</li>')
         for d in DOTS:
             s = s.replace('<p>%s' % d, '<li>')
-        return '<ul>\n%s\n</ul>' % s
+        return u'<ul>\n%s\n</ul>' % s
     text = hard_coded_bullets_re.sub(replace_p_tags, text)
     # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
     text = trailing_empty_content_re.sub('', text)
diff --git a/django/utils/stopwords.py b/django/utils/stopwords.py
index dea5660413..18aeb7f5d3 100644
--- a/django/utils/stopwords.py
+++ b/django/utils/stopwords.py
@@ -38,5 +38,5 @@ def strip_stopwords(sentence):
     for word in words:
         if word.lower() not in stopwords:
             sentence.append(word)
-    return ' '.join(sentence)
+    return u' '.join(sentence)
 
diff --git a/django/utils/text.py b/django/utils/text.py
index c73ab908f3..a355744743 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -1,6 +1,6 @@
 import re
-
 from django.conf import settings
+from django.utils.encoding import smart_unicode
 
 # Capitalizes the first letter of a string.
 capfirst = lambda x: x and x[0].upper() + x[1:]
@@ -10,6 +10,7 @@ def wrap(text, width):
     A word-wrap function that preserves existing line breaks and most spaces in
     the text. Expects that existing line breaks are posix newlines.
     """
+    text = smart_unicode(text)
     def _generator():
         it = iter(text.split(' '))
         word = it.next()
@@ -29,26 +30,29 @@ def wrap(text, width):
                 if len(lines) > 1:
                     pos = len(lines[-1])
             yield word
-    return "".join(_generator())
+    return u''.join(_generator())
 
 def truncate_words(s, num):
     "Truncates a string after a certain number of words."
+    s = smart_unicode(s)
     length = int(num)
     words = s.split()
     if len(words) > length:
         words = words[:length]
         if not words[-1].endswith('...'):
             words.append('...')
-    return ' '.join(words)
+    return u' '.join(words)
 
 def truncate_html_words(s, num):
     """
-    Truncates html to a certain number of words (not counting tags and comments).
-    Closes opened tags if they were correctly closed in the given html.
+    Truncates html to a certain number of words (not counting tags and
+    comments). Closes opened tags if they were correctly closed in the given
+    html.
     """
+    s = smart_unicode(s)
     length = int(num)
     if length <= 0:
-        return ''
+        return u''
     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
     # Set up regular expressions
     re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)')
@@ -110,10 +114,10 @@ def get_valid_filename(s):
     >>> get_valid_filename("john's portrait in 2004.jpg")
     'johns_portrait_in_2004.jpg'
     """
-    s = s.strip().replace(' ', '_')
+    s = smart_unicode(s).strip().replace(' ', '_')
     return re.sub(r'[^-A-Za-z0-9_.]', '', s)
 
-def get_text_list(list_, last_word='or'):
+def get_text_list(list_, last_word=u'or'):
     """
     >>> get_text_list(['a', 'b', 'c', 'd'])
     'a, b, c or d'
@@ -126,22 +130,18 @@ def get_text_list(list_, last_word='or'):
     >>> get_text_list([])
     ''
     """
-    if len(list_) == 0: return ''
-    if len(list_) == 1: return list_[0]
-    return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])
+    if len(list_) == 0: return u''
+    if len(list_) == 1: return smart_unicode(list_[0])
+    return u'%s %s %s' % (', '.join([smart_unicode(i) for i in list_][:-1]), smart_unicode(last_word), smart_unicode(list_[-1]))
 
 def normalize_newlines(text):
-    return re.sub(r'\r\n|\r|\n', '\n', text)
+    return smart_unicode(re.sub(r'\r\n|\r|\n', '\n', text))
 
 def recapitalize(text):
     "Recapitalizes text, placing caps after end-of-sentence punctuation."
-#     capwords = ()
-    text = text.lower()
+    text = smart_unicode(text).lower()
     capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
     text = capsRE.sub(lambda x: x.group(1).upper(), text)
-#     for capword in capwords:
-#         capwordRE = re.compile(r'\b%s\b' % capword, re.I)
-#         text = capwordRE.sub(capword, text)
     return text
 
 def phone2numeric(phone):
@@ -172,7 +172,7 @@ def javascript_quote(s, quote_double_quotes=False):
         return r"\u%04x" % ord(match.group(1))
 
     if type(s) == str:
-        s = s.decode(settings.DEFAULT_CHARSET)
+        s = s.decode('utf-8')
     elif type(s) != unicode:
         raise TypeError, s
     s = s.replace('\\', '\\\\')
@@ -195,6 +195,7 @@ def smart_split(text):
     >>> list(smart_split('This is "a person\'s" test.'))
     ['This', 'is', '"a person\'s"', 'test.']
     """
+    text = smart_unicode(text)
     for bit in smart_split_re.finditer(text):
         bit = bit.group(0)
         if bit[0] == '"' and bit[-1] == '"':
diff --git a/django/utils/timesince.py b/django/utils/timesince.py
index e69c45c8c1..dd1d20ac94 100644
--- a/django/utils/timesince.py
+++ b/django/utils/timesince.py
@@ -1,6 +1,6 @@
 import datetime, math, time
 from django.utils.tzinfo import LocalTimezone
-from django.utils.translation import ngettext
+from django.utils.translation import ungettext
 
 def timesince(d, now=None):
     """
@@ -9,12 +9,12 @@ def timesince(d, now=None):
     Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
     """
     chunks = (
-      (60 * 60 * 24 * 365, lambda n: ngettext('year', 'years', n)),
-      (60 * 60 * 24 * 30, lambda n: ngettext('month', 'months', n)),
-      (60 * 60 * 24 * 7, lambda n : ngettext('week', 'weeks', n)),
-      (60 * 60 * 24, lambda n : ngettext('day', 'days', n)),
-      (60 * 60, lambda n: ngettext('hour', 'hours', n)),
-      (60, lambda n: ngettext('minute', 'minutes', n))
+      (60 * 60 * 24 * 365, lambda n: ungettext('year', 'years', n)),
+      (60 * 60 * 24 * 30, lambda n: ungettext('month', 'months', n)),
+      (60 * 60 * 24 * 7, lambda n : ungettext('week', 'weeks', n)),
+      (60 * 60 * 24, lambda n : ungettext('day', 'days', n)),
+      (60 * 60, lambda n: ungettext('hour', 'hours', n)),
+      (60, lambda n: ungettext('minute', 'minutes', n))
     )
     # Convert datetime.date to datetime.datetime for comparison
     if d.__class__ is not datetime.datetime:
@@ -37,8 +37,8 @@ def timesince(d, now=None):
         if count != 0:
             break
     if count < 0:
-        return '%d milliseconds' % math.floor((now - d).microseconds / 1000)
-    s = '%d %s' % (count, name(count))
+        return u'%d milliseconds' % math.floor((now - d).microseconds / 1000)
+    s = u'%d %s' % (count, name(count))
     if i + 1 < len(chunks):
         # Now get the second item
         seconds2, name2 = chunks[i + 1]
diff --git a/django/utils/tzinfo.py b/django/utils/tzinfo.py
index cc9f028e91..b486db4f33 100644
--- a/django/utils/tzinfo.py
+++ b/django/utils/tzinfo.py
@@ -7,7 +7,7 @@ class FixedOffset(tzinfo):
     "Fixed offset in minutes east from UTC."
     def __init__(self, offset):
         self.__offset = timedelta(minutes=offset)
-        self.__name = "%+03d%02d" % (offset // 60, offset % 60)
+        self.__name = u"%+03d%02d" % (offset // 60, offset % 60)
 
     def __repr__(self):
         return self.__name
@@ -25,7 +25,7 @@ class LocalTimezone(tzinfo):
     "Proxy timezone information from time module."
     def __init__(self, dt):
         tzinfo.__init__(self, dt)
-        self._tzname = time.tzname[self._isdst(dt)]
+        self._tzname = unicode(time.tzname[self._isdst(dt)])
 
     def __repr__(self):
         return self._tzname
@@ -43,7 +43,7 @@ class LocalTimezone(tzinfo):
             return timedelta(0)
 
     def tzname(self, dt):
-        return time.tzname[self._isdst(dt)]
+        return unicode(time.tzname[self._isdst(dt)])
 
     def _isdst(self, dt):
         tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.weekday(), 0, -1)
diff --git a/tests/regressiontests/dateformat/tests.py b/tests/regressiontests/dateformat/tests.py
index f9f84145c5..30c9a4e6dd 100644
--- a/tests/regressiontests/dateformat/tests.py
+++ b/tests/regressiontests/dateformat/tests.py
@@ -1,54 +1,54 @@
 r"""
 >>> format(my_birthday, '')
-''
+u''
 >>> format(my_birthday, 'a')
-'p.m.'
+u'p.m.'
 >>> format(my_birthday, 'A')
-'PM'
+u'PM'
 >>> format(my_birthday, 'd')
-'08'
+u'08'
 >>> format(my_birthday, 'j')
-'8'
+u'8'
 >>> format(my_birthday, 'l')
-'Sunday'
+u'Sunday'
 >>> format(my_birthday, 'L')
-'False'
+u'False'
 >>> format(my_birthday, 'm')
-'07'
+u'07'
 >>> format(my_birthday, 'M')
-'Jul'
+u'Jul'
 >>> format(my_birthday, 'b')
-'jul'
+u'jul'
 >>> format(my_birthday, 'n')
-'7'
+u'7'
 >>> format(my_birthday, 'N')
-'July'
+u'July'
 >>> no_tz or format(my_birthday, 'O') == '+0100'
 True
 >>> format(my_birthday, 'P')
-'10 p.m.'
+u'10 p.m.'
 >>> no_tz or format(my_birthday, 'r') == 'Sun, 8 Jul 1979 22:00:00 +0100'
 True
 >>> format(my_birthday, 's')
-'00'
+u'00'
 >>> format(my_birthday, 'S')
-'th'
+u'th'
 >>> format(my_birthday, 't')
-'31'
+u'31'
 >>> no_tz or format(my_birthday, 'T') == 'CET'
 True
 >>> no_tz or format(my_birthday, 'U') == '300531600'
 True
 >>> format(my_birthday, 'w')
-'0'
+u'0'
 >>> format(my_birthday, 'W')
-'27'
+u'27'
 >>> format(my_birthday, 'y')
-'79'
+u'79'
 >>> format(my_birthday, 'Y')
-'1979'
+u'1979'
 >>> format(my_birthday, 'z')
-'189'
+u'189'
 >>> no_tz or format(my_birthday, 'Z') == '3600'
 True
 
@@ -62,10 +62,10 @@ True
 True
 
 >>> format(my_birthday, r'Y z \C\E\T')
-'1979 189 CET'
+u'1979 189 CET'
 
 >>> format(my_birthday, r'jS o\f F')
-'8th of July'
+u'8th of July'
 """
 
 from django.utils import dateformat, translation
diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py
index 4a2e9432b0..f1af7b7bfb 100644
--- a/tests/regressiontests/defaultfilters/tests.py
+++ b/tests/regressiontests/defaultfilters/tests.py
@@ -2,186 +2,186 @@
 
 r"""
 >>> floatformat(7.7)
-'7.7'
+u'7.7'
 >>> floatformat(7.0)
-'7'
+u'7'
 >>> floatformat(0.7)
-'0.7'
+u'0.7'
 >>> floatformat(0.07)
-'0.1'
+u'0.1'
 >>> floatformat(0.007)
-'0.0'
+u'0.0'
 >>> floatformat(0.0)
-'0'
+u'0'
 >>> floatformat(7.7,3)
-'7.700'
+u'7.700'
 >>> floatformat(6.000000,3)
-'6.000'
+u'6.000'
 >>> floatformat(13.1031,-3)
-'13.103'
+u'13.103'
 >>> floatformat(11.1197, -2)
-'11.12'
+u'11.12'
 >>> floatformat(11.0000, -2)
-'11'
+u'11'
 >>> floatformat(11.000001, -2)
-'11.00'
+u'11.00'
 >>> floatformat(8.2798, 3)
-'8.280'
->>> floatformat('foo')
-''
->>> floatformat(13.1031, 'bar')
-'13.1031'
->>> floatformat('foo', 'bar')
-''
+u'8.280'
+>>> floatformat(u'foo')
+u''
+>>> floatformat(13.1031, u'bar')
+u'13.1031'
+>>> floatformat(u'foo', u'bar')
+u''
 
->>> addslashes('"double quotes" and \'single quotes\'')
-'\\"double quotes\\" and \\\'single quotes\\\''
+>>> addslashes(u'"double quotes" and \'single quotes\'')
+u'\\"double quotes\\" and \\\'single quotes\\\''
 
->>> addslashes(r'\ : backslashes, too')
-'\\\\ : backslashes, too'
+>>> addslashes(ur'\ : backslashes, too')
+u'\\\\ : backslashes, too'
 
->>> capfirst('hello world')
-'Hello world'
+>>> capfirst(u'hello world')
+u'Hello world'
 
->>> fix_ampersands('Jack & Jill & Jeroboam')
-'Jack &amp; Jill &amp; Jeroboam'
+>>> fix_ampersands(u'Jack & Jill & Jeroboam')
+u'Jack &amp; Jill &amp; Jeroboam'
 
->>> linenumbers('line 1\nline 2')
-'1. line 1\n2. line 2'
+>>> linenumbers(u'line 1\nline 2')
+u'1. line 1\n2. line 2'
 
->>> linenumbers('\n'.join(['x'] * 10))
-'01. x\n02. x\n03. x\n04. x\n05. x\n06. x\n07. x\n08. x\n09. x\n10. x'
+>>> linenumbers(u'\n'.join([u'x'] * 10))
+u'01. x\n02. x\n03. x\n04. x\n05. x\n06. x\n07. x\n08. x\n09. x\n10. x'
 
 >>> lower('TEST')
-'test'
+u'test'
 
 >>> lower(u'\xcb') # uppercase E umlaut
 u'\xeb'
 
 >>> make_list('abc')
-['a', 'b', 'c']
+[u'a', u'b', u'c']
 
 >>> make_list(1234)
-['1', '2', '3', '4']
+[u'1', u'2', u'3', u'4']
 
 >>> slugify(' Jack & Jill like numbers 1,2,3 and 4 and silly characters ?%.$!/')
-'jack-jill-like-numbers-123-and-4-and-silly-characters'
+u'jack-jill-like-numbers-123-and-4-and-silly-characters'
 
->>> stringformat(1, '03d')
-'001'
+>>> stringformat(1, u'03d')
+u'001'
 
->>> stringformat(1, 'z')
-''
+>>> stringformat(1, u'z')
+u''
 
 >>> title('a nice title, isn\'t it?')
-"A Nice Title, Isn't It?"
+u"A Nice Title, Isn't It?"
 
 
->>> truncatewords('A sentence with a few words in it', 1)
-'A ...'
+>>> truncatewords(u'A sentence with a few words in it', 1)
+u'A ...'
 
->>> truncatewords('A sentence with a few words in it', 5)
-'A sentence with a few ...'
+>>> truncatewords(u'A sentence with a few words in it', 5)
+u'A sentence with a few ...'
 
->>> truncatewords('A sentence with a few words in it', 100)
-'A sentence with a few words in it'
+>>> truncatewords(u'A sentence with a few words in it', 100)
+u'A sentence with a few words in it'
 
->>> truncatewords('A sentence with a few words in it', 'not a number')
-'A sentence with a few words in it'
+>>> truncatewords(u'A sentence with a few words in it', 'not a number')
+u'A sentence with a few words in it'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0)
-''
+>>> truncatewords_html(u'<p>one <a href="#">two - three <br>four</a> five</p>', 0)
+u''
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2)
-'<p>one <a href="#">two ...</a></p>'
+>>> truncatewords_html(u'<p>one <a href="#">two - three <br>four</a> five</p>', 2)
+u'<p>one <a href="#">two ...</a></p>'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4)
-'<p>one <a href="#">two - three <br>four ...</a></p>'
+>>> truncatewords_html(u'<p>one <a href="#">two - three <br>four</a> five</p>', 4)
+u'<p>one <a href="#">two - three <br>four ...</a></p>'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5)
-'<p>one <a href="#">two - three <br>four</a> five</p>'
+>>> truncatewords_html(u'<p>one <a href="#">two - three <br>four</a> five</p>', 5)
+u'<p>one <a href="#">two - three <br>four</a> five</p>'
 
->>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100)
-'<p>one <a href="#">two - three <br>four</a> five</p>'
+>>> truncatewords_html(u'<p>one <a href="#">two - three <br>four</a> five</p>', 100)
+u'<p>one <a href="#">two - three <br>four</a> five</p>'
 
->>> upper('Mixed case input')
-'MIXED CASE INPUT'
+>>> upper(u'Mixed case input')
+u'MIXED CASE INPUT'
 
 >>> upper(u'\xeb') # lowercase e umlaut
 u'\xcb'
 
 
->>> urlencode('jack & jill')
-'jack%20%26%20jill'
+>>> urlencode(u'jack & jill')
+u'jack%20%26%20jill'
 >>> urlencode(1)
-'1'
+u'1'
 
 
->>> urlizetrunc('http://short.com/', 20)
-'<a href="http://short.com/" rel="nofollow">http://short.com/</a>'
+>>> urlizetrunc(u'http://short.com/', 20)
+u'<a href="http://short.com/" rel="nofollow">http://short.com/</a>'
 
->>> urlizetrunc('http://www.google.co.uk/search?hl=en&q=some+long+url&btnG=Search&meta=', 20)
-'<a href="http://www.google.co.uk/search?hl=en&q=some+long+url&btnG=Search&meta=" rel="nofollow">http://www.google.co...</a>'
+>>> urlizetrunc(u'http://www.google.co.uk/search?hl=en&q=some+long+url&btnG=Search&meta=', 20)
+u'<a href="http://www.google.co.uk/search?hl=en&q=some+long+url&btnG=Search&meta=" rel="nofollow">http://www.google.co...</a>'
 
->>> wordcount('')
+>>> wordcount(u'')
 0
 
->>> wordcount('oneword')
+>>> wordcount(u'oneword')
 1
 
->>> wordcount('lots of words')
+>>> wordcount(u'lots of words')
 3
 
->>> wordwrap('this is a long paragraph of text that really needs to be wrapped I\'m afraid', 14)
-"this is a long\nparagraph of\ntext that\nreally needs\nto be wrapped\nI'm afraid"
+>>> wordwrap(u'this is a long paragraph of text that really needs to be wrapped I\'m afraid', 14)
+u"this is a long\nparagraph of\ntext that\nreally needs\nto be wrapped\nI'm afraid"
 
->>> wordwrap('this is a short paragraph of text.\n  But this line should be indented',14)
-'this is a\nshort\nparagraph of\ntext.\n  But this\nline should be\nindented'
+>>> wordwrap(u'this is a short paragraph of text.\n  But this line should be indented',14)
+u'this is a\nshort\nparagraph of\ntext.\n  But this\nline should be\nindented'
 
->>> wordwrap('this is a short paragraph of text.\n  But this line should be indented',15)
-'this is a short\nparagraph of\ntext.\n  But this line\nshould be\nindented'
+>>> wordwrap(u'this is a short paragraph of text.\n  But this line should be indented',15)
+u'this is a short\nparagraph of\ntext.\n  But this line\nshould be\nindented'
 
->>> ljust('test', 10)
-'test      '
+>>> ljust(u'test', 10)
+u'test      '
 
->>> ljust('test', 3)
-'test'
+>>> ljust(u'test', 3)
+u'test'
 
->>> rjust('test', 10)
-'      test'
+>>> rjust(u'test', 10)
+u'      test'
 
->>> rjust('test', 3)
-'test'
+>>> rjust(u'test', 3)
+u'test'
 
->>> center('test', 6)
-' test '
+>>> center(u'test', 6)
+u' test '
 
->>> cut('a string to be mangled', 'a')
-' string to be mngled'
+>>> cut(u'a string to be mangled', 'a')
+u' string to be mngled'
 
->>> cut('a string to be mangled', 'ng')
-'a stri to be maled'
+>>> cut(u'a string to be mangled', 'ng')
+u'a stri to be maled'
 
->>> cut('a string to be mangled', 'strings')
-'a string to be mangled'
+>>> cut(u'a string to be mangled', 'strings')
+u'a string to be mangled'
 
->>> escape('<some html & special characters > here')
-'&lt;some html &amp; special characters &gt; here'
+>>> escape(u'<some html & special characters > here')
+u'&lt;some html &amp; special characters &gt; here'
 
 >>> escape(u'<some html & special characters > here ĐÅ€£')
 u'&lt;some html &amp; special characters &gt; here \xc4\x90\xc3\x85\xe2\x82\xac\xc2\xa3'
 
->>> linebreaks('line 1')
-'<p>line 1</p>'
+>>> linebreaks(u'line 1')
+u'<p>line 1</p>'
 
->>> linebreaks('line 1\nline 2')
-'<p>line 1<br />line 2</p>'
+>>> linebreaks(u'line 1\nline 2')
+u'<p>line 1<br />line 2</p>'
 
->>> removetags('some <b>html</b> with <script>alert("You smell")</script> disallowed <img /> tags', 'script img')
-'some <b>html</b> with alert("You smell") disallowed  tags'
+>>> removetags(u'some <b>html</b> with <script>alert("You smell")</script> disallowed <img /> tags', 'script img')
+u'some <b>html</b> with alert("You smell") disallowed  tags'
 
->>> striptags('some <b>html</b> with <script>alert("You smell")</script> disallowed <img /> tags')
-'some html with alert("You smell") disallowed  tags'
+>>> striptags(u'some <b>html</b> with <script>alert("You smell")</script> disallowed <img /> tags')
+u'some html with alert("You smell") disallowed  tags'
 
 >>> dictsort([{'age': 23, 'name': 'Barbara-Ann'},
 ...           {'age': 63, 'name': 'Ra Ra Rasputin'},
@@ -196,16 +196,16 @@ u'&lt;some html &amp; special characters &gt; here \xc4\x90\xc3\x85\xe2\x82\xac\
 >>> first([0,1,2])
 0
 
->>> first('')
-''
+>>> first(u'')
+u''
 
->>> first('test')
-'t'
+>>> first(u'test')
+u't'
 
->>> join([0,1,2], 'glue')
-'0glue1glue2'
+>>> join([0,1,2], u'glue')
+u'0glue1glue2'
 
->>> length('1234')
+>>> length(u'1234')
 4
 
 >>> length([1,2,3,4])
@@ -220,37 +220,37 @@ False
 >>> length_is('a', 1)
 True
 
->>> length_is('a', 10)
+>>> length_is(u'a', 10)
 False
 
->>> slice_('abcdefg', '0')
-''
+>>> slice_(u'abcdefg', u'0')
+u''
 
->>> slice_('abcdefg', '1')
-'a'
+>>> slice_(u'abcdefg', u'1')
+u'a'
 
->>> slice_('abcdefg', '-1')
-'abcdef'
+>>> slice_(u'abcdefg', u'-1')
+u'abcdef'
 
->>> slice_('abcdefg', '1:2')
-'b'
+>>> slice_(u'abcdefg', u'1:2')
+u'b'
 
->>> slice_('abcdefg', '1:3')
-'bc'
+>>> slice_(u'abcdefg', u'1:3')
+u'bc'
 
->>> slice_('abcdefg', '0::2')
-'aceg'
+>>> slice_(u'abcdefg', u'0::2')
+u'aceg'
 
->>> unordered_list(['item 1', []])
-'\t<li>item 1</li>'
+>>> unordered_list([u'item 1', []])
+u'\t<li>item 1</li>'
 
->>> unordered_list(['item 1', [['item 1.1', []]]])
-'\t<li>item 1\n\t<ul>\n\t\t<li>item 1.1</li>\n\t</ul>\n\t</li>'
+>>> unordered_list([u'item 1', [[u'item 1.1', []]]])
+u'\t<li>item 1\n\t<ul>\n\t\t<li>item 1.1</li>\n\t</ul>\n\t</li>'
 
->>> unordered_list(['item 1', [['item 1.1', []], ['item 1.2', []]]])
-'\t<li>item 1\n\t<ul>\n\t\t<li>item 1.1</li>\n\t\t<li>item 1.2</li>\n\t</ul>\n\t</li>'
+>>> unordered_list([u'item 1', [[u'item 1.1', []], [u'item 1.2', []]]])
+u'\t<li>item 1\n\t<ul>\n\t\t<li>item 1.1</li>\n\t\t<li>item 1.2</li>\n\t</ul>\n\t</li>'
 
->>> add('1', '2')
+>>> add(u'1', u'2')
 3
 
 >>> get_digit(123, 1)
@@ -268,43 +268,43 @@ False
 >>> get_digit(123, 0)
 123
 
->>> get_digit('xyz', 0)
-'xyz'
+>>> get_digit(u'xyz', 0)
+u'xyz'
 
 # real testing of date() is in dateformat.py
->>> date(datetime.datetime(2005, 12, 29), "d F Y")
-'29 December 2005'
->>> date(datetime.datetime(2005, 12, 29), r'jS o\f F')
-'29th of December'
+>>> date(datetime.datetime(2005, 12, 29), u"d F Y")
+u'29 December 2005'
+>>> date(datetime.datetime(2005, 12, 29), ur'jS o\f F')
+u'29th of December'
 
 # real testing of time() is done in dateformat.py
->>> time(datetime.time(13), "h")
-'01'
+>>> time(datetime.time(13), u"h")
+u'01'
 
->>> time(datetime.time(0), "h")
-'12'
+>>> time(datetime.time(0), u"h")
+u'12'
 
 # real testing is done in timesince.py, where we can provide our own 'now'
 >>> timesince(datetime.datetime.now() - datetime.timedelta(1))
-'1 day'
+u'1 day'
 
->>> default("val", "default")
-'val'
+>>> default(u"val", u"default")
+u'val'
 
->>> default(None, "default")
-'default'
+>>> default(None, u"default")
+u'default'
 
->>> default('', "default")
-'default'
+>>> default(u'', u"default")
+u'default'
 
->>> default_if_none("val", "default")
-'val'
+>>> default_if_none(u"val", u"default")
+u'val'
 
->>> default_if_none(None, "default")
-'default'
+>>> default_if_none(None, u"default")
+u'default'
 
->>> default_if_none('', "default")
-''
+>>> default_if_none(u'', u"default")
+u''
 
 >>> divisibleby(4, 2)
 True
@@ -313,139 +313,139 @@ True
 False
 
 >>> yesno(True)
-'yes'
+u'yes'
 
 >>> yesno(False)
-'no'
+u'no'
 
 >>> yesno(None)
-'maybe'
+u'maybe'
 
->>> yesno(True, 'certainly,get out of town,perhaps')
-'certainly'
+>>> yesno(True, u'certainly,get out of town,perhaps')
+u'certainly'
 
->>> yesno(False, 'certainly,get out of town,perhaps')
-'get out of town'
+>>> yesno(False, u'certainly,get out of town,perhaps')
+u'get out of town'
 
->>> yesno(None, 'certainly,get out of town,perhaps')
-'perhaps'
+>>> yesno(None, u'certainly,get out of town,perhaps')
+u'perhaps'
 
->>> yesno(None, 'certainly,get out of town')
-'get out of town'
+>>> yesno(None, u'certainly,get out of town')
+u'get out of town'
 
 >>> filesizeformat(1023)
-'1023 bytes'
+u'1023 bytes'
 
 >>> filesizeformat(1024)
-'1.0 KB'
+u'1.0 KB'
 
 >>> filesizeformat(10*1024)
-'10.0 KB'
+u'10.0 KB'
 
 >>> filesizeformat(1024*1024-1)
-'1024.0 KB'
+u'1024.0 KB'
 
 >>> filesizeformat(1024*1024)
-'1.0 MB'
+u'1.0 MB'
 
 >>> filesizeformat(1024*1024*50)
-'50.0 MB'
+u'50.0 MB'
 
 >>> filesizeformat(1024*1024*1024-1)
-'1024.0 MB'
+u'1024.0 MB'
 
 >>> filesizeformat(1024*1024*1024)
-'1.0 GB'
+u'1.0 GB'
 
 >>> pluralize(1)
-''
+u''
 
 >>> pluralize(0)
-'s'
+u's'
 
 >>> pluralize(2)
-'s'
+u's'
 
 >>> pluralize([1])
-''
+u''
 
 >>> pluralize([])
-'s'
+u's'
 
 >>> pluralize([1,2,3])
-'s'
+u's'
 
->>> pluralize(1,'es')
-''
+>>> pluralize(1,u'es')
+u''
 
->>> pluralize(0,'es')
-'es'
+>>> pluralize(0,u'es')
+u'es'
 
->>> pluralize(2,'es')
-'es'
+>>> pluralize(2,u'es')
+u'es'
 
->>> pluralize(1,'y,ies')
-'y'
+>>> pluralize(1,u'y,ies')
+u'y'
 
->>> pluralize(0,'y,ies')
-'ies'
+>>> pluralize(0,u'y,ies')
+u'ies'
 
->>> pluralize(2,'y,ies')
-'ies'
+>>> pluralize(2,u'y,ies')
+u'ies'
 
->>> pluralize(0,'y,ies,error')
-''
+>>> pluralize(0,u'y,ies,error')
+u''
 
->>> phone2numeric('0800 flowers')
-'0800 3569377'
+>>> phone2numeric(u'0800 flowers')
+u'0800 3569377'
 
 # Filters shouldn't break if passed non-strings
 >>> addslashes(123)
-'123'
+u'123'
 >>> linenumbers(123)
-'1. 123'
+u'1. 123'
 >>> lower(123)
-'123'
+u'123'
 >>> make_list(123)
-['1', '2', '3']
+[u'1', u'2', u'3']
 >>> slugify(123)
-'123'
+u'123'
 >>> title(123)
-'123'
+u'123'
 >>> truncatewords(123, 2)
-'123'
+u'123'
 >>> upper(123)
-'123'
+u'123'
 >>> urlencode(123)
-'123'
+u'123'
 >>> urlize(123)
-'123'
+u'123'
 >>> urlizetrunc(123, 1)
-'123'
+u'123'
 >>> wordcount(123)
 1
 >>> wordwrap(123, 2)
-'123'
+u'123'
 >>> ljust('123', 4)
-'123 '
+u'123 '
 >>> rjust('123', 4)
-' 123'
+u' 123'
 >>> center('123', 5)
-' 123 '
+u' 123 '
 >>> center('123', 6)
-' 123  '
+u' 123  '
 >>> cut(123, '2')
-'13'
+u'13'
 >>> escape(123)
-'123'
+u'123'
 >>> linebreaks(123)
-'<p>123</p>'
+u'<p>123</p>'
 >>> linebreaksbr(123)
-'123'
+u'123'
 >>> removetags(123, 'a')
-'123'
+u'123'
 >>> striptags(123)
-'123'
+u'123'
 
 """
 
diff --git a/tests/regressiontests/text/tests.py b/tests/regressiontests/text/tests.py
index f758ecaf90..e88cadba46 100644
--- a/tests/regressiontests/text/tests.py
+++ b/tests/regressiontests/text/tests.py
@@ -5,13 +5,13 @@
 
 ### smart_split ###########################################################
 >>> list(smart_split(r'''This is "a person" test.'''))
-['This', 'is', '"a person"', 'test.']
+[u'This', u'is', u'"a person"', u'test.']
 >>> print list(smart_split(r'''This is "a person's" test.'''))[2]
 "a person's"
 >>> print list(smart_split(r'''This is "a person\\"s" test.'''))[2]
 "a person"s"
 >>> list(smart_split('''"a 'one'''))
-['"a', "'one"]
+[u'"a', u"'one"]
 >>> print list(smart_split(r'''all friends' tests'''))[1]
 friends'
 """