diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py index a025365c90..c82ce8edc7 100644 --- a/django/template/defaultfilters.py +++ b/django/template/defaultfilters.py @@ -2,7 +2,8 @@ from django.template import resolve_variable, Library from django.conf import settings -from django.utils.translation import gettext +from django.utils.translation import ugettext +from django.utils.encoding import smart_unicode, smart_str import re import random as random_module @@ -12,29 +13,17 @@ register = Library() # STRING DECORATOR # ####################### -def smart_string(obj): - # FUTURE: Unicode strings should probably be normalized to a specific - # encoding and non-unicode strings should be converted to unicode too. -# if isinstance(obj, unicode): -# obj = obj.encode(settings.DEFAULT_CHARSET) -# else: -# obj = unicode(obj, settings.DEFAULT_CHARSET) - # FUTURE: Replace dumb string logic below with cool unicode logic above. - if not isinstance(obj, basestring): - obj = str(obj) - return obj - def stringfilter(func): """ - Decorator for filters which should only receive strings. The object passed - as the first positional argument will be converted to a string. + Decorator for filters which should only receive unicode objects. The object passed + as the first positional argument will be converted to a unicode object. """ def _dec(*args, **kwargs): if args: args = list(args) - args[0] = smart_string(args[0]) + args[0] = smart_unicode(args[0]) return func(*args, **kwargs) - + # Include a reference to the real function (used to check original # arguments by the template parser). _dec._decorated_function = getattr(func, '_decorated_function', func) @@ -54,7 +43,7 @@ def capfirst(value): "Capitalizes the first character of the value" return value and value[0].upper() + value[1:] capfirst = stringfilter(capfirst) - + def fix_ampersands(value): "Replaces ampersands with ``&`` entities" from django.utils.html import fix_ampersands @@ -83,27 +72,27 @@ def floatformat(text, arg=-1): try: f = float(text) except ValueError: - return '' + return u'' try: d = int(arg) except ValueError: - return smart_string(f) + return smart_unicode(f) m = f - int(f) if not m and d < 0: - return '%d' % int(f) + return u'%d' % int(f) else: - formatstr = '%%.%df' % abs(d) + formatstr = u'%%.%df' % abs(d) return formatstr % f def linenumbers(value): "Displays text with line numbers" from django.utils.html import escape - lines = value.split('\n') + lines = value.split(u'\n') # Find the maximum width of the line count, for use with zero padding string format command - width = str(len(str(len(lines)))) + width = unicode(len(unicode(len(lines)))) for i, line in enumerate(lines): - lines[i] = ("%0" + width + "d. %s") % (i + 1, escape(line)) - return '\n'.join(lines) + lines[i] = (u"%0" + width + u"d. %s") % (i + 1, escape(line)) + return u'\n'.join(lines) linenumbers = stringfilter(linenumbers) def lower(value): @@ -121,6 +110,7 @@ make_list = stringfilter(make_list) def slugify(value): "Converts to lowercase, removes non-alpha chars and converts spaces to hyphens" + # Don't compile patterns as unicode because \w then would mean any letter. Slugify is effectively an asciiization. value = re.sub('[^\w\s-]', '', value).strip().lower() return re.sub('[-\s]+', '-', value) slugify = stringfilter(slugify) @@ -135,9 +125,9 @@ def stringformat(value, arg): of Python string formatting """ try: - return ("%" + str(arg)) % value + return (u"%" + unicode(arg)) % value except (ValueError, TypeError): - return "" + return u"" def title(value): "Converts a string into titlecase" @@ -155,8 +145,6 @@ def truncatewords(value, arg): length = int(arg) except ValueError: # invalid literal for int() return value # Fail silently. - if not isinstance(value, basestring): - value = str(value) return truncate_words(value, length) truncatewords = stringfilter(truncatewords) @@ -171,8 +159,6 @@ def truncatewords_html(value, arg): length = int(arg) except ValueError: # invalid literal for int() return value # Fail silently. - if not isinstance(value, basestring): - value = str(value) return truncate_html_words(value, length) truncatewords_html = stringfilter(truncatewords_html) @@ -184,9 +170,7 @@ upper = stringfilter(upper) def urlencode(value): "Escapes a value for use in a URL" import urllib - if not isinstance(value, basestring): - value = str(value) - return urllib.quote(value) + return urllib.quote(value).decode('utf-8') urlencode = stringfilter(urlencode) def urlize(value): @@ -246,7 +230,7 @@ center = stringfilter(center) def cut(value, arg): "Removes all values of arg from the given string" - return value.replace(arg, '') + return value.replace(arg, u'') cut = stringfilter(cut) ################### @@ -273,11 +257,11 @@ linebreaksbr = stringfilter(linebreaksbr) def removetags(value, tags): "Removes a space separated list of [X]HTML tags from the output" tags = [re.escape(tag) for tag in tags.split()] - tags_re = '(%s)' % '|'.join(tags) - starttag_re = re.compile(r'<%s(/?>|(\s+[^>]*>))' % tags_re) - endtag_re = re.compile('%s>' % tags_re) - value = starttag_re.sub('', value) - value = endtag_re.sub('', value) + tags_re = u'(%s)' % u'|'.join(tags) + starttag_re = re.compile(ur'<%s(/?>|(\s+[^>]*>))' % tags_re, re.U) + endtag_re = re.compile(u'%s>' % tags_re) + value = starttag_re.sub(u'', value) + value = endtag_re.sub(u'', value) return value removetags = stringfilter(removetags) @@ -296,7 +280,7 @@ def dictsort(value, arg): Takes a list of dicts, returns that list sorted by the property given in the argument. """ - decorated = [(resolve_variable('var.' + arg, {'var' : item}), item) for item in value] + decorated = [(resolve_variable(u'var.' + arg, {u'var' : item}), item) for item in value] decorated.sort() return [item[1] for item in decorated] @@ -305,7 +289,7 @@ def dictsortreversed(value, arg): Takes a list of dicts, returns that list sorted in reverse order by the property given in the argument. """ - decorated = [(resolve_variable('var.' + arg, {'var' : item}), item) for item in value] + decorated = [(resolve_variable(u'var.' + arg, {u'var' : item}), item) for item in value] decorated.sort() decorated.reverse() return [item[1] for item in decorated] @@ -315,12 +299,12 @@ def first(value): try: return value[0] except IndexError: - return '' + return u'' def join(value, arg): "Joins a list with a string, like Python's ``str.join(list)``" try: - return arg.join(map(smart_string, value)) + return arg.join(map(smart_unicode, value)) except AttributeError: # fail silently but nicely return value @@ -346,7 +330,7 @@ def slice_(value, arg): """ try: bits = [] - for x in arg.split(':'): + for x in arg.split(u':'): if len(x) == 0: bits.append(None) else: @@ -378,12 +362,12 @@ def unordered_list(value): """ def _helper(value, tabs): - indent = '\t' * tabs + indent = u'\t' * tabs if value[1]: - return '%s
%s
' % p.strip().replace('\n', '%s' % d, '
", but only if it's at the bottom of the text. text = trailing_empty_content_re.sub('', text) diff --git a/django/utils/stopwords.py b/django/utils/stopwords.py index dea5660413..18aeb7f5d3 100644 --- a/django/utils/stopwords.py +++ b/django/utils/stopwords.py @@ -38,5 +38,5 @@ def strip_stopwords(sentence): for word in words: if word.lower() not in stopwords: sentence.append(word) - return ' '.join(sentence) + return u' '.join(sentence) diff --git a/django/utils/text.py b/django/utils/text.py index c73ab908f3..a355744743 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -1,6 +1,6 @@ import re - from django.conf import settings +from django.utils.encoding import smart_unicode # Capitalizes the first letter of a string. capfirst = lambda x: x and x[0].upper() + x[1:] @@ -10,6 +10,7 @@ def wrap(text, width): A word-wrap function that preserves existing line breaks and most spaces in the text. Expects that existing line breaks are posix newlines. """ + text = smart_unicode(text) def _generator(): it = iter(text.split(' ')) word = it.next() @@ -29,26 +30,29 @@ def wrap(text, width): if len(lines) > 1: pos = len(lines[-1]) yield word - return "".join(_generator()) + return u''.join(_generator()) def truncate_words(s, num): "Truncates a string after a certain number of words." + s = smart_unicode(s) length = int(num) words = s.split() if len(words) > length: words = words[:length] if not words[-1].endswith('...'): words.append('...') - return ' '.join(words) + return u' '.join(words) def truncate_html_words(s, num): """ - Truncates html to a certain number of words (not counting tags and comments). - Closes opened tags if they were correctly closed in the given html. + Truncates html to a certain number of words (not counting tags and + comments). Closes opened tags if they were correctly closed in the given + html. """ + s = smart_unicode(s) length = int(num) if length <= 0: - return '' + return u'' html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') # Set up regular expressions re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') @@ -110,10 +114,10 @@ def get_valid_filename(s): >>> get_valid_filename("john's portrait in 2004.jpg") 'johns_portrait_in_2004.jpg' """ - s = s.strip().replace(' ', '_') + s = smart_unicode(s).strip().replace(' ', '_') return re.sub(r'[^-A-Za-z0-9_.]', '', s) -def get_text_list(list_, last_word='or'): +def get_text_list(list_, last_word=u'or'): """ >>> get_text_list(['a', 'b', 'c', 'd']) 'a, b, c or d' @@ -126,22 +130,18 @@ def get_text_list(list_, last_word='or'): >>> get_text_list([]) '' """ - if len(list_) == 0: return '' - if len(list_) == 1: return list_[0] - return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1]) + if len(list_) == 0: return u'' + if len(list_) == 1: return smart_unicode(list_[0]) + return u'%s %s %s' % (', '.join([smart_unicode(i) for i in list_][:-1]), smart_unicode(last_word), smart_unicode(list_[-1])) def normalize_newlines(text): - return re.sub(r'\r\n|\r|\n', '\n', text) + return smart_unicode(re.sub(r'\r\n|\r|\n', '\n', text)) def recapitalize(text): "Recapitalizes text, placing caps after end-of-sentence punctuation." -# capwords = () - text = text.lower() + text = smart_unicode(text).lower() capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])') text = capsRE.sub(lambda x: x.group(1).upper(), text) -# for capword in capwords: -# capwordRE = re.compile(r'\b%s\b' % capword, re.I) -# text = capwordRE.sub(capword, text) return text def phone2numeric(phone): @@ -172,7 +172,7 @@ def javascript_quote(s, quote_double_quotes=False): return r"\u%04x" % ord(match.group(1)) if type(s) == str: - s = s.decode(settings.DEFAULT_CHARSET) + s = s.decode('utf-8') elif type(s) != unicode: raise TypeError, s s = s.replace('\\', '\\\\') @@ -195,6 +195,7 @@ def smart_split(text): >>> list(smart_split('This is "a person\'s" test.')) ['This', 'is', '"a person\'s"', 'test.'] """ + text = smart_unicode(text) for bit in smart_split_re.finditer(text): bit = bit.group(0) if bit[0] == '"' and bit[-1] == '"': diff --git a/django/utils/timesince.py b/django/utils/timesince.py index e69c45c8c1..dd1d20ac94 100644 --- a/django/utils/timesince.py +++ b/django/utils/timesince.py @@ -1,6 +1,6 @@ import datetime, math, time from django.utils.tzinfo import LocalTimezone -from django.utils.translation import ngettext +from django.utils.translation import ungettext def timesince(d, now=None): """ @@ -9,12 +9,12 @@ def timesince(d, now=None): Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since """ chunks = ( - (60 * 60 * 24 * 365, lambda n: ngettext('year', 'years', n)), - (60 * 60 * 24 * 30, lambda n: ngettext('month', 'months', n)), - (60 * 60 * 24 * 7, lambda n : ngettext('week', 'weeks', n)), - (60 * 60 * 24, lambda n : ngettext('day', 'days', n)), - (60 * 60, lambda n: ngettext('hour', 'hours', n)), - (60, lambda n: ngettext('minute', 'minutes', n)) + (60 * 60 * 24 * 365, lambda n: ungettext('year', 'years', n)), + (60 * 60 * 24 * 30, lambda n: ungettext('month', 'months', n)), + (60 * 60 * 24 * 7, lambda n : ungettext('week', 'weeks', n)), + (60 * 60 * 24, lambda n : ungettext('day', 'days', n)), + (60 * 60, lambda n: ungettext('hour', 'hours', n)), + (60, lambda n: ungettext('minute', 'minutes', n)) ) # Convert datetime.date to datetime.datetime for comparison if d.__class__ is not datetime.datetime: @@ -37,8 +37,8 @@ def timesince(d, now=None): if count != 0: break if count < 0: - return '%d milliseconds' % math.floor((now - d).microseconds / 1000) - s = '%d %s' % (count, name(count)) + return u'%d milliseconds' % math.floor((now - d).microseconds / 1000) + s = u'%d %s' % (count, name(count)) if i + 1 < len(chunks): # Now get the second item seconds2, name2 = chunks[i + 1] diff --git a/django/utils/tzinfo.py b/django/utils/tzinfo.py index cc9f028e91..b486db4f33 100644 --- a/django/utils/tzinfo.py +++ b/django/utils/tzinfo.py @@ -7,7 +7,7 @@ class FixedOffset(tzinfo): "Fixed offset in minutes east from UTC." def __init__(self, offset): self.__offset = timedelta(minutes=offset) - self.__name = "%+03d%02d" % (offset // 60, offset % 60) + self.__name = u"%+03d%02d" % (offset // 60, offset % 60) def __repr__(self): return self.__name @@ -25,7 +25,7 @@ class LocalTimezone(tzinfo): "Proxy timezone information from time module." def __init__(self, dt): tzinfo.__init__(self, dt) - self._tzname = time.tzname[self._isdst(dt)] + self._tzname = unicode(time.tzname[self._isdst(dt)]) def __repr__(self): return self._tzname @@ -43,7 +43,7 @@ class LocalTimezone(tzinfo): return timedelta(0) def tzname(self, dt): - return time.tzname[self._isdst(dt)] + return unicode(time.tzname[self._isdst(dt)]) def _isdst(self, dt): tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.weekday(), 0, -1) diff --git a/tests/regressiontests/dateformat/tests.py b/tests/regressiontests/dateformat/tests.py index f9f84145c5..30c9a4e6dd 100644 --- a/tests/regressiontests/dateformat/tests.py +++ b/tests/regressiontests/dateformat/tests.py @@ -1,54 +1,54 @@ r""" >>> format(my_birthday, '') -'' +u'' >>> format(my_birthday, 'a') -'p.m.' +u'p.m.' >>> format(my_birthday, 'A') -'PM' +u'PM' >>> format(my_birthday, 'd') -'08' +u'08' >>> format(my_birthday, 'j') -'8' +u'8' >>> format(my_birthday, 'l') -'Sunday' +u'Sunday' >>> format(my_birthday, 'L') -'False' +u'False' >>> format(my_birthday, 'm') -'07' +u'07' >>> format(my_birthday, 'M') -'Jul' +u'Jul' >>> format(my_birthday, 'b') -'jul' +u'jul' >>> format(my_birthday, 'n') -'7' +u'7' >>> format(my_birthday, 'N') -'July' +u'July' >>> no_tz or format(my_birthday, 'O') == '+0100' True >>> format(my_birthday, 'P') -'10 p.m.' +u'10 p.m.' >>> no_tz or format(my_birthday, 'r') == 'Sun, 8 Jul 1979 22:00:00 +0100' True >>> format(my_birthday, 's') -'00' +u'00' >>> format(my_birthday, 'S') -'th' +u'th' >>> format(my_birthday, 't') -'31' +u'31' >>> no_tz or format(my_birthday, 'T') == 'CET' True >>> no_tz or format(my_birthday, 'U') == '300531600' True >>> format(my_birthday, 'w') -'0' +u'0' >>> format(my_birthday, 'W') -'27' +u'27' >>> format(my_birthday, 'y') -'79' +u'79' >>> format(my_birthday, 'Y') -'1979' +u'1979' >>> format(my_birthday, 'z') -'189' +u'189' >>> no_tz or format(my_birthday, 'Z') == '3600' True @@ -62,10 +62,10 @@ True True >>> format(my_birthday, r'Y z \C\E\T') -'1979 189 CET' +u'1979 189 CET' >>> format(my_birthday, r'jS o\f F') -'8th of July' +u'8th of July' """ from django.utils import dateformat, translation diff --git a/tests/regressiontests/defaultfilters/tests.py b/tests/regressiontests/defaultfilters/tests.py index 4a2e9432b0..f1af7b7bfb 100644 --- a/tests/regressiontests/defaultfilters/tests.py +++ b/tests/regressiontests/defaultfilters/tests.py @@ -2,186 +2,186 @@ r""" >>> floatformat(7.7) -'7.7' +u'7.7' >>> floatformat(7.0) -'7' +u'7' >>> floatformat(0.7) -'0.7' +u'0.7' >>> floatformat(0.07) -'0.1' +u'0.1' >>> floatformat(0.007) -'0.0' +u'0.0' >>> floatformat(0.0) -'0' +u'0' >>> floatformat(7.7,3) -'7.700' +u'7.700' >>> floatformat(6.000000,3) -'6.000' +u'6.000' >>> floatformat(13.1031,-3) -'13.103' +u'13.103' >>> floatformat(11.1197, -2) -'11.12' +u'11.12' >>> floatformat(11.0000, -2) -'11' +u'11' >>> floatformat(11.000001, -2) -'11.00' +u'11.00' >>> floatformat(8.2798, 3) -'8.280' ->>> floatformat('foo') -'' ->>> floatformat(13.1031, 'bar') -'13.1031' ->>> floatformat('foo', 'bar') -'' +u'8.280' +>>> floatformat(u'foo') +u'' +>>> floatformat(13.1031, u'bar') +u'13.1031' +>>> floatformat(u'foo', u'bar') +u'' ->>> addslashes('"double quotes" and \'single quotes\'') -'\\"double quotes\\" and \\\'single quotes\\\'' +>>> addslashes(u'"double quotes" and \'single quotes\'') +u'\\"double quotes\\" and \\\'single quotes\\\'' ->>> addslashes(r'\ : backslashes, too') -'\\\\ : backslashes, too' +>>> addslashes(ur'\ : backslashes, too') +u'\\\\ : backslashes, too' ->>> capfirst('hello world') -'Hello world' +>>> capfirst(u'hello world') +u'Hello world' ->>> fix_ampersands('Jack & Jill & Jeroboam') -'Jack & Jill & Jeroboam' +>>> fix_ampersands(u'Jack & Jill & Jeroboam') +u'Jack & Jill & Jeroboam' ->>> linenumbers('line 1\nline 2') -'1. line 1\n2. line 2' +>>> linenumbers(u'line 1\nline 2') +u'1. line 1\n2. line 2' ->>> linenumbers('\n'.join(['x'] * 10)) -'01. x\n02. x\n03. x\n04. x\n05. x\n06. x\n07. x\n08. x\n09. x\n10. x' +>>> linenumbers(u'\n'.join([u'x'] * 10)) +u'01. x\n02. x\n03. x\n04. x\n05. x\n06. x\n07. x\n08. x\n09. x\n10. x' >>> lower('TEST') -'test' +u'test' >>> lower(u'\xcb') # uppercase E umlaut u'\xeb' >>> make_list('abc') -['a', 'b', 'c'] +[u'a', u'b', u'c'] >>> make_list(1234) -['1', '2', '3', '4'] +[u'1', u'2', u'3', u'4'] >>> slugify(' Jack & Jill like numbers 1,2,3 and 4 and silly characters ?%.$!/') -'jack-jill-like-numbers-123-and-4-and-silly-characters' +u'jack-jill-like-numbers-123-and-4-and-silly-characters' ->>> stringformat(1, '03d') -'001' +>>> stringformat(1, u'03d') +u'001' ->>> stringformat(1, 'z') -'' +>>> stringformat(1, u'z') +u'' >>> title('a nice title, isn\'t it?') -"A Nice Title, Isn't It?" +u"A Nice Title, Isn't It?" ->>> truncatewords('A sentence with a few words in it', 1) -'A ...' +>>> truncatewords(u'A sentence with a few words in it', 1) +u'A ...' ->>> truncatewords('A sentence with a few words in it', 5) -'A sentence with a few ...' +>>> truncatewords(u'A sentence with a few words in it', 5) +u'A sentence with a few ...' ->>> truncatewords('A sentence with a few words in it', 100) -'A sentence with a few words in it' +>>> truncatewords(u'A sentence with a few words in it', 100) +u'A sentence with a few words in it' ->>> truncatewords('A sentence with a few words in it', 'not a number') -'A sentence with a few words in it' +>>> truncatewords(u'A sentence with a few words in it', 'not a number') +u'A sentence with a few words in it' ->>> truncatewords_html('
one two - three
four five
one two - three
four five
one two - three
four five
one two ...
' +>>> truncatewords_html(u'one two - three
four five
one two ...
' ->>> truncatewords_html('one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
one two - three
four five
line 1
' +>>> linebreaks(u'line 1') +u'line 1
' ->>> linebreaks('line 1\nline 2') -'line 1
line 2
line 1
line 2
123
' +u'123
' >>> linebreaksbr(123) -'123' +u'123' >>> removetags(123, 'a') -'123' +u'123' >>> striptags(123) -'123' +u'123' """ diff --git a/tests/regressiontests/text/tests.py b/tests/regressiontests/text/tests.py index f758ecaf90..e88cadba46 100644 --- a/tests/regressiontests/text/tests.py +++ b/tests/regressiontests/text/tests.py @@ -5,13 +5,13 @@ ### smart_split ########################################################### >>> list(smart_split(r'''This is "a person" test.''')) -['This', 'is', '"a person"', 'test.'] +[u'This', u'is', u'"a person"', u'test.'] >>> print list(smart_split(r'''This is "a person's" test.'''))[2] "a person's" >>> print list(smart_split(r'''This is "a person\\"s" test.'''))[2] "a person"s" >>> list(smart_split('''"a 'one''')) -['"a', "'one"] +[u'"a', u"'one"] >>> print list(smart_split(r'''all friends' tests'''))[1] friends' """