1
0
mirror of https://github.com/django/django.git synced 2025-07-04 09:49:12 +00:00

unicode: Made lazy translation objects work properly. Fixed #4295, #4320.

git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5314 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick 2007-05-22 08:06:57 +00:00
parent c2c585f0f0
commit dd52eed2fb
17 changed files with 128 additions and 82 deletions

View File

@ -7,7 +7,7 @@ from django.utils import dateformat
from django.utils.html import escape
from django.utils.text import capfirst
from django.utils.translation import get_date_formats, get_partial_date_formats, ugettext as _
from django.utils.encoding import smart_unicode, smart_str
from django.utils.encoding import smart_unicode, smart_str, force_unicode
from django.template import Library
import datetime
@ -181,7 +181,7 @@ def items_for_result(cl, result):
result_repr = dict(f.choices).get(field_val, EMPTY_CHANGELIST_VALUE)
else:
result_repr = escape(field_val)
if result_repr == '':
if force_unicode(result_repr) == '':
result_repr = ' '
# If list_display_links not defined, add the link tag to the first field
if (first and not cl.lookup_opts.admin.list_display_links) or field_name in cl.lookup_opts.admin.list_display_links:

View File

@ -1,5 +1,5 @@
from django.utils.translation import ungettext, ugettext_lazy as _
from django.utils.encoding import smart_unicode
from django.utils.translation import ungettext, ugettext as _
from django.utils.encoding import force_unicode
from django import template
import re
@ -25,8 +25,8 @@ def intcomma(value):
Converts an integer to a string containing commas every three digits.
For example, 3000 becomes '3,000' and 45000 becomes '45,000'.
"""
orig = smart_unicode(value)
new = re.sub("^(-?\d+)(\d{3})", '\g<1>,\g<2>', smart_unicode(value))
orig = force_unicode(value)
new = re.sub("^(-?\d+)(\d{3})", '\g<1>,\g<2>', orig)
if orig == new:
return new
else:

View File

@ -12,6 +12,7 @@ import urllib2
from django.conf import settings
from django.utils.translation import ugettext as _, ugettext_lazy, ungettext
from django.utils.functional import Promise, lazy
from django.utils.encoding import force_unicode
import re
_datere = r'\d{4}-\d{1,2}-\d{1,2}'
@ -38,10 +39,11 @@ class ValidationError(Exception):
def __init__(self, message):
"ValidationError can be passed a string or a list."
if isinstance(message, list):
self.messages = message
self.messages = [force_unicode(msg) for msg in message]
else:
assert isinstance(message, (basestring, Promise)), ("%s should be a string" % repr(message))
self.messages = [message]
self.messages = [force_unicode(message)]
def __str__(self):
# This is needed because, without a __str__(), printing an exception
# instance would result in this:
@ -53,10 +55,11 @@ class CriticalValidationError(Exception):
def __init__(self, message):
"ValidationError can be passed a string or a list."
if isinstance(message, list):
self.messages = message
self.messages = [force_unicode(msg) for msg in message]
else:
assert isinstance(message, (basestring, Promise)), ("'%s' should be a string" % message)
self.messages = [message]
self.messages = [force_unicode(message)]
def __str__(self):
return str(self.messages)

View File

@ -119,7 +119,7 @@ class Field(object):
Subclasses should implement validate(), not validate_full().
"""
if not self.blank and not field_data:
return [ugettext_lazy('This field is required.')]
return [_('This field is required.')]
try:
self.validate(field_data, all_data)
except validators.ValidationError, e:

View File

@ -5,7 +5,7 @@ from django.db.models.fields import AutoField, FieldDoesNotExist
from django.db.models.loading import get_models
from django.db.models.query import orderlist2sql
from django.db.models import Manager
from django.utils.translation import activate, deactivate_all, get_language
from django.utils.translation import activate, deactivate_all, get_language, string_concat
from bisect import bisect
import re
@ -60,12 +60,12 @@ class Options(object):
setattr(self, attr_name, meta_attrs.pop(attr_name, getattr(self, attr_name)))
# verbose_name_plural is a special case because it uses a 's'
# by default.
setattr(self, 'verbose_name_plural', meta_attrs.pop('verbose_name_plural', self.verbose_name + 's'))
setattr(self, 'verbose_name_plural', meta_attrs.pop('verbose_name_plural', string_concat(self.verbose_name, 's')))
# Any leftover attributes must be invalid.
if meta_attrs != {}:
raise TypeError, "'class Meta' got invalid attribute(s): %s" % ','.join(meta_attrs.keys())
else:
self.verbose_name_plural = self.verbose_name + 's'
self.verbose_name_plural = string_concat(self.verbose_name, 's')
del self.meta
def _prepare(self, model):

View File

@ -6,7 +6,7 @@ import copy
from django.utils.datastructures import SortedDict
from django.utils.html import escape
from django.utils.encoding import StrAndUnicode, smart_unicode
from django.utils.encoding import StrAndUnicode, smart_unicode, force_unicode
from fields import Field
from widgets import TextInput, Textarea
@ -125,7 +125,7 @@ class BaseForm(StrAndUnicode):
if errors_on_separate_row and bf_errors:
output.append(error_row % bf_errors)
if bf.label:
label = escape(bf.label)
label = escape(force_unicode(bf.label))
# Only add a colon if the label does not end in punctuation.
if label[-1] not in ':?.!':
label += ':'

View File

@ -3,7 +3,7 @@
from django.template import resolve_variable, Library
from django.conf import settings
from django.utils.translation import ugettext, ungettext
from django.utils.encoding import smart_unicode, smart_str, iri_to_uri
from django.utils.encoding import force_unicode, smart_str, iri_to_uri
import re
import random as random_module
@ -15,13 +15,14 @@ register = Library()
def stringfilter(func):
"""
Decorator for filters which should only receive unicode objects. The object passed
as the first positional argument will be converted to a unicode object.
Decorator for filters which should only receive unicode objects. The object
passed as the first positional argument will be converted to a unicode
object.
"""
def _dec(*args, **kwargs):
if args:
args = list(args)
args[0] = smart_unicode(args[0])
args[0] = force_unicode(args[0])
return func(*args, **kwargs)
# Include a reference to the real function (used to check original
@ -76,7 +77,7 @@ def floatformat(text, arg=-1):
try:
d = int(arg)
except ValueError:
return smart_unicode(f)
return force_unicode(f)
m = f - int(f)
if not m and d < 0:
return u'%d' % int(f)
@ -86,7 +87,7 @@ def floatformat(text, arg=-1):
def iriencode(value):
"Escapes an IRI value for use in a URL"
return smart_unicode(iri_to_uri(value))
return force_unicode(iri_to_uri(value))
iriencode = stringfilter(iriencode)
def linenumbers(value):
@ -175,7 +176,7 @@ upper = stringfilter(upper)
def urlencode(value):
"Escapes a value for use in a URL"
import urllib
return smart_unicode(urllib.quote(value))
return force_unicode(urllib.quote(value))
urlencode = stringfilter(urlencode)
def urlize(value):
@ -309,7 +310,7 @@ def first(value):
def join(value, arg):
"Joins a list with a string, like Python's ``str.join(list)``"
try:
return arg.join(map(smart_unicode, value))
return arg.join(map(force_unicode, value))
except AttributeError: # fail silently but nicely
return value
@ -369,10 +370,10 @@ def unordered_list(value):
def _helper(value, tabs):
indent = u'\t' * tabs
if value[1]:
return u'%s<li>%s\n%s<ul>\n%s\n%s</ul>\n%s</li>' % (indent, smart_unicode(value[0]), indent,
return u'%s<li>%s\n%s<ul>\n%s\n%s</ul>\n%s</li>' % (indent, force_unicode(value[0]), indent,
u'\n'.join([_helper(v, tabs+1) for v in value[1]]), indent, indent)
else:
return u'%s<li>%s</li>' % (indent, smart_unicode(value[0]))
return u'%s<li>%s</li>' % (indent, force_unicode(value[0]))
return _helper(value, 1)
###################
@ -551,7 +552,7 @@ def pprint(value):
try:
return pformat(value)
except Exception, e:
return u"Error in formatting:%s" % smart_unicode(e)
return u"Error in formatting:%s" % force_unicode(e)
# Syntax: register.filter(name of filter, callback)
register.filter(add)

View File

@ -11,10 +11,10 @@ Usage:
>>>
"""
from django.utils.dates import MONTHS, MONTHS_3, MONTHS_AP, WEEKDAYS
from django.utils.dates import MONTHS, MONTHS_3, MONTHS_AP, WEEKDAYS, WEEKDAYS_ABBR
from django.utils.tzinfo import LocalTimezone
from django.utils.translation import ugettext as _
from django.utils.encoding import smart_unicode
from django.utils.translation import string_concat, ugettext as _
from django.utils.encoding import force_unicode
from calendar import isleap, monthrange
import re, time
@ -24,9 +24,9 @@ re_escaped = re.compile(r'\\(.)')
class Formatter(object):
def format(self, formatstr):
pieces = []
for i, piece in enumerate(re_formatchars.split(formatstr)):
for i, piece in enumerate(re_formatchars.split(force_unicode(formatstr))):
if i % 2:
pieces.append(smart_unicode(getattr(self, piece)()))
pieces.append(force_unicode(getattr(self, piece)()))
elif piece:
pieces.append(re_escaped.sub(r'\1', piece))
return u''.join(pieces)
@ -123,7 +123,7 @@ class DateFormat(TimeFormat):
def D(self):
"Day of the week, textual, 3 letters; e.g. 'Fri'"
return WEEKDAYS[self.data.weekday()][0:3]
return WEEKDAYS_ABBR[self.data.weekday()]
def F(self):
"Month, textual, long; e.g. 'January'"

View File

@ -6,6 +6,10 @@ WEEKDAYS = {
0:_('Monday'), 1:_('Tuesday'), 2:_('Wednesday'), 3:_('Thursday'), 4:_('Friday'),
5:_('Saturday'), 6:_('Sunday')
}
WEEKDAYS_ABBR = {
0:_('Mon'), 1:_('Tue'), 2:_('Wed'), 3:_('Thu'), 4:_('Fri'),
5:_('Sat'), 6:_('Sun')
}
WEEKDAYS_REV = {
'monday':0, 'tuesday':1, 'wednesday':2, 'thursday':3, 'friday':4,
'saturday':5, 'sunday':6

View File

@ -17,15 +17,16 @@ def smart_unicode(s, encoding='utf-8', errors='strict'):
Returns a unicode object representing 's'. Treats bytestrings using the
'encoding' codec.
"""
#if isinstance(s, Promise):
# # The input is the result of a gettext_lazy() call, or similar. It will
# # already be encoded in DEFAULT_CHARSET on evaluation and we don't want
# # to evaluate it until render time.
# # FIXME: This isn't totally consistent, because it eventually returns a
# # bytestring rather than a unicode object. It works wherever we use
# # smart_unicode() at the moment. Fixing this requires work in the
# # i18n internals.
# return s
if isinstance(s, Promise):
# The input is the result of a gettext_lazy() call.
return s
return force_unicode(s, encoding, errors)
def force_unicode(s, encoding='utf-8', errors='strict'):
"""
Similar to smart_unicode, except that lazy instances are resolved to
strings, rather than kept as lazy objects.
"""
if not isinstance(s, basestring,):
if hasattr(s, '__unicode__'):
s = unicode(s)

View File

@ -3,7 +3,7 @@ def curry(_curried_func, *args, **kwargs):
return _curried_func(*(args+moreargs), **dict(kwargs, **morekwargs))
return _curried
class Promise:
class Promise(object):
"""
This is just a base class for the proxy class created in
the closure of the lazy function. It can be used to recognize
@ -57,3 +57,19 @@ def lazy(func, *resultclasses):
return __proxy__(args, kw)
return __wrapper__
def allow_lazy(func, *resultclasses):
"""
A decorator that allows a function to be called with one or more lazy
arguments. If none of the args are lazy, the function is evaluated
immediately, otherwise a __proxy__ is returned that will evaluate the
function when needed.
"""
def wrapper(*args, **kwargs):
for arg in list(args) + kwargs.values():
if isinstance(arg, Promise):
break
else:
return func(*args, **kwargs)
return lazy(func, *resultclasses)(*args, **kwargs)
return wrapper

View File

@ -1,7 +1,8 @@
"HTML utilities suitable for global use."
import re, string
from django.utils.encoding import smart_unicode
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy
# Configuration for urlize() function
LEADING_PUNCTUATION = ['(', '<', '&lt;']
@ -25,31 +26,37 @@ del x # Temporary variable
def escape(html):
"Returns the given HTML with ampersands, quotes and carets encoded"
if not isinstance(html, basestring):
html = smart_unicode(html)
return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
html = force_unicode(html)
return force_unicode(html).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
escape = allow_lazy(escape, unicode)
def linebreaks(value):
"Converts newlines into <p> and <br />s"
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines
paras = re.split('\n{2,}', value)
paras = [u'<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
return u'\n\n'.join(paras)
linebreaks = allow_lazy(linebreaks, unicode)
def strip_tags(value):
"Returns the given HTML with all tags stripped"
return re.sub(r'<[^>]*?>', '', value)
return re.sub(r'<[^>]*?>', '', force_unicode(value))
strip_tags = allow_lazy(strip_tags)
def strip_spaces_between_tags(value):
"Returns the given HTML with spaces between tags removed"
return re.sub(r'>\s+<', '><', value)
return re.sub(r'>\s+<', '><', force_unicode(value))
strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode)
def strip_entities(value):
"Returns the given HTML with all entities (&something;) stripped"
return re.sub(r'&(?:\w+|#\d);', '', value)
return re.sub(r'&(?:\w+|#\d);', '', force_unicode(value))
strip_entities = allow_lazy(strip_entities, unicode)
def fix_ampersands(value):
"Returns the given HTML with all unencoded ampersands encoded correctly"
return unencoded_ampersands_re.sub('&amp;', value)
return unencoded_ampersands_re.sub('&amp;', force_unicode(value))
fix_ampersands = allow_lazy(fix_ampersands, unicode)
def urlize(text, trim_url_limit=None, nofollow=False):
"""
@ -65,7 +72,7 @@ def urlize(text, trim_url_limit=None, nofollow=False):
attribute.
"""
trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x
words = word_split_re.split(text)
words = word_split_re.split(force_unicode(text))
nofollow_attr = nofollow and ' rel="nofollow"' or ''
for i, word in enumerate(words):
match = punctuation_re.match(word)
@ -83,6 +90,7 @@ def urlize(text, trim_url_limit=None, nofollow=False):
if lead + middle + trail != word:
words[i] = lead + middle + trail
return u''.join(words)
urlize = allow_lazy(urlize, unicode)
def clean_html(text):
"""
@ -97,7 +105,7 @@ def clean_html(text):
bottom of the text.
"""
from django.utils.text import normalize_newlines
text = normalize_newlines(text)
text = normalize_newlines(force_unicode(text))
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
text = fix_ampersands(text)
@ -115,4 +123,5 @@ def clean_html(text):
# Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
text = trailing_empty_content_re.sub('', text)
return text
clean_html = allow_lazy(clean_html, unicode)

View File

@ -1,16 +1,18 @@
import re
from django.conf import settings
from django.utils.encoding import smart_unicode
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy
# Capitalizes the first letter of a string.
capfirst = lambda x: x and x[0].upper() + x[1:]
capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
capfirst = allow_lazy(capfirst, unicode)
def wrap(text, width):
"""
A word-wrap function that preserves existing line breaks and most spaces in
the text. Expects that existing line breaks are posix newlines.
"""
text = smart_unicode(text)
text = force_unicode(text)
def _generator():
it = iter(text.split(' '))
word = it.next()
@ -31,10 +33,11 @@ def wrap(text, width):
pos = len(lines[-1])
yield word
return u''.join(_generator())
wrap = allow_lazy(wrap, unicode)
def truncate_words(s, num):
"Truncates a string after a certain number of words."
s = smart_unicode(s)
s = force_unicode(s)
length = int(num)
words = s.split()
if len(words) > length:
@ -42,6 +45,7 @@ def truncate_words(s, num):
if not words[-1].endswith('...'):
words.append('...')
return u' '.join(words)
truncate_words = allow_lazy(truncate_words, unicode)
def truncate_html_words(s, num):
"""
@ -49,7 +53,7 @@ def truncate_html_words(s, num):
comments). Closes opened tags if they were correctly closed in the given
html.
"""
s = smart_unicode(s)
s = force_unicode(s)
length = int(num)
if length <= 0:
return u''
@ -104,6 +108,7 @@ def truncate_html_words(s, num):
out += '</%s>' % tag
# Return string
return out
truncate_html_words = allow_lazy(truncate_html_words, unicode)
def get_valid_filename(s):
"""
@ -114,8 +119,9 @@ def get_valid_filename(s):
>>> get_valid_filename("john's portrait in 2004.jpg")
'johns_portrait_in_2004.jpg'
"""
s = smart_unicode(s).strip().replace(' ', '_')
s = force_unicode(s).strip().replace(' ', '_')
return re.sub(r'[^-A-Za-z0-9_.]', '', s)
get_valid_filename = allow_lazy(get_valid_filename, unicode)
def get_text_list(list_, last_word=u'or'):
"""
@ -131,18 +137,21 @@ def get_text_list(list_, last_word=u'or'):
''
"""
if len(list_) == 0: return u''
if len(list_) == 1: return smart_unicode(list_[0])
return u'%s %s %s' % (', '.join([smart_unicode(i) for i in list_][:-1]), smart_unicode(last_word), smart_unicode(list_[-1]))
if len(list_) == 1: return force_unicode(list_[0])
return u'%s %s %s' % (', '.join([force_unicode(i) for i in list_][:-1]), force_unicode(last_word), force_unicode(list_[-1]))
get_text_list = allow_lazy(get_text_list, unicode)
def normalize_newlines(text):
return smart_unicode(re.sub(r'\r\n|\r|\n', '\n', text))
return force_unicode(re.sub(r'\r\n|\r|\n', '\n', text))
normalize_newlines = allow_lazy(normalize_newlines, unicode)
def recapitalize(text):
"Recapitalizes text, placing caps after end-of-sentence punctuation."
text = smart_unicode(text).lower()
text = force_unicode(text).lower()
capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
text = capsRE.sub(lambda x: x.group(1).upper(), text)
return text
recapitalize = allow_lazy(recapitalize)
def phone2numeric(phone):
"Converts a phone number with letters into its numeric equivalent."
@ -153,6 +162,7 @@ def phone2numeric(phone):
's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
'y': '9', 'x': '9'}.get(m.group(0).lower())
return letters.sub(char2number, phone)
phone2numeric = allow_lazy(phone2numeric)
# From http://www.xhaus.com/alan/python/httpcomp.html#gzip
# Used with permission.
@ -183,6 +193,7 @@ def javascript_quote(s, quote_double_quotes=False):
if quote_double_quotes:
s = s.replace('"', '&quot;')
return str(ustring_re.sub(fix, s))
javascript_quote = allow_lazy(javascript_quote, unicode)
smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
def smart_split(text):
@ -195,7 +206,7 @@ def smart_split(text):
>>> list(smart_split('This is "a person\'s" test.'))
['This', 'is', '"a person\'s"', 'test.']
"""
text = smart_unicode(text)
text = force_unicode(text)
for bit in smart_split_re.finditer(text):
bit = bit.group(0)
if bit[0] == '"' and bit[-1] == '"':
@ -204,3 +215,5 @@ def smart_split(text):
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
else:
yield bit
smart_split = allow_lazy(smart_split, unicode)

View File

@ -13,7 +13,7 @@ ngettext_lazy = ngettext
def ungettext(singular, plural, number):
return smart_unicode(ngettext(singular, plural, number))
string_concat = lambda *strings: ''.join([str(el) for el in strings])
string_concat = lambda *strings: u''.join([smart_unicode(el) for el in strings])
activate = lambda x: None
deactivate = deactivate_all = install = lambda: None
get_language = lambda: settings.LANGUAGE_CODE

View File

@ -3,7 +3,7 @@
import os, re, sys
import gettext as gettext_module
from cStringIO import StringIO
from django.utils.encoding import smart_str, smart_unicode
from django.utils.encoding import force_unicode
try:
import threading
@ -516,8 +516,7 @@ def templatize(src):
def string_concat(*strings):
""""
lazy variant of string concatenation, needed for translations that are
constructed from multiple parts. Handles lazy strings and non-strings by
first turning all arguments to unicode, before joining them.
Lazy variant of string concatenation, needed for translations that are
constructed from multiple parts.
"""
return u''.join([smart_unicode(el) for el in strings])
return u''.join([force_unicode(s) for s in strings])

View File

@ -27,13 +27,13 @@ Translations are done at rendering time, so multi-lingual apps can define forms
early and still send back the right translation.
# XFAIL
# >>> activate('de')
# >>> print f.as_p()
# <p><label for="id_username">Benutzername:</label> <input id="id_username" type="text" name="username" maxlength="10" /></p>
# >>> activate('pl')
# >>> f.as_p()
# u'<p><label for="id_username">Nazwa u\u017cytkownika:</label> <input id="id_username" type="text" name="username" maxlength="10" /></p>'
# >>> deactivate()
>>> activate('de')
>>> print f.as_p()
<p><label for="id_username">Benutzername:</label> <input id="id_username" type="text" name="username" maxlength="10" /></p>
>>> activate('pl')
>>> f.as_p()
u'<p><label for="id_username">Nazwa u\u017cytkownika:</label> <input id="id_username" type="text" name="username" maxlength="10" /></p>'
>>> deactivate()
Unicode decoding problems...
>>> GENDERS = ((u'\xc5', u'En tied\xe4'), (u'\xf8', u'Mies'), (u'\xdf', u'Nainen'))

View File

@ -15,7 +15,7 @@ class HumanizeTests(unittest.TestCase):
self.assertEqual(rendered, result_list[index],
msg="""%s test failed, produced %s,
should've produced %s""" % (method, rendered, result_list[index]))
def test_ordinal(self):
test_list = ('1','2','3','4','11','12',
'13','101','102','103','111',
@ -43,12 +43,12 @@ should've produced %s""" % (method, rendered, result_list[index]))
self.humanize_tester(test_list, result_list, 'intword')
def test_apnumber(self):
test_list = [str(x) for x in xrange(1,11)]
result_list = ('one', 'two', 'three', 'four', 'five', 'six',
'seven', 'eight', 'nine', '10')
test_list = [str(x) for x in range(1, 11)]
result_list = (u'one', u'two', u'three', u'four', u'five', u'six',
u'seven', u'eight', u'nine', u'10')
self.humanize_tester(test_list, result_list, 'apnumber')
if __name__ == '__main__':
unittest.main()