From 7c45aad6735b98106a5167532674c5182164c816 Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Sat, 12 May 2007 06:54:41 +0000 Subject: [PATCH] unicode: Added some bullet-proofing to the output encoding path. Passing bady encoded data to template rendering shouldn't crash the framework. git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5199 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/template/__init__.py | 8 +++++++- django/utils/encoding.py | 14 +++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/django/template/__init__.py b/django/template/__init__.py index cfeab16c22..ae2243b47e 100644 --- a/django/template/__init__.py +++ b/django/template/__init__.py @@ -724,6 +724,11 @@ class Node(object): return nodes class NodeList(list): + # How invalid encoding sequences are handled. The default 'strict' is not + # appropriate, because the framework is not in control of all the string + # data. + codec_errors = 'replace' + def render(self, context): bits = [] for node in self: @@ -731,7 +736,8 @@ class NodeList(list): bits.append(self.render_node(node, context)) else: bits.append(node) - return ''.join([smart_str(b, settings.DEFAULT_CHARSET) for b in bits]) + encoding = settings.DEFAULT_CHARSET + return ''.join([smart_str(b, encoding, errors=self.codec_errors) for b in bits]) def get_nodes_by_type(self, nodetype): "Return a list of all nodes of the given type" diff --git a/django/utils/encoding.py b/django/utils/encoding.py index 402b946e65..d77269bde8 100644 --- a/django/utils/encoding.py +++ b/django/utils/encoding.py @@ -2,7 +2,7 @@ import types from django.conf import settings from django.utils.functional import Promise -def smart_unicode(s, encoding='utf-8'): +def smart_unicode(s, encoding='utf-8', errors='strict'): """ Returns a unicode object representing 's'. Treats bytestrings using the 'encoding' codec. @@ -20,12 +20,12 @@ def smart_unicode(s, encoding='utf-8'): if hasattr(s, '__unicode__'): s = unicode(s) else: - s = unicode(str(s), encoding) + s = unicode(str(s), encoding, errors) elif not isinstance(s, unicode): - s = unicode(s, encoding) + s = unicode(s, encoding, errors) return s -def smart_str(s, encoding='utf-8', strings_only=False): +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): """ Returns a bytestring version of 's', encoded as specified in 'encoding'. @@ -37,11 +37,11 @@ def smart_str(s, encoding='utf-8', strings_only=False): try: return str(s) except UnicodeEncodeError: - return unicode(s).encode(encoding) + return unicode(s).encode(encoding, errors) elif isinstance(s, unicode): - return s.encode(encoding) + return s.encode(encoding, errors) elif s and encoding != 'utf-8': - return s.decode('utf-8').encode(encoding) + return s.decode('utf-8', errors).encode(encoding, errors) else: return s