From 5e9aead902c57d3c00d661695697314d352dee0e Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Sat, 12 May 2007 05:29:10 +0000 Subject: [PATCH] unicode: Added handling for illegaly encoded form input. git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5197 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/http/__init__.py | 6 ++++-- tests/regressiontests/httpwrappers/tests.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/django/http/__init__.py b/django/http/__init__.py index f3c6fac9fc..4361796dcd 100644 --- a/django/http/__init__.py +++ b/django/http/__init__.py @@ -360,12 +360,14 @@ def get_host(request): # this slightly more restricted function. def str_to_unicode(s, encoding): """ - Convert basestring objects to unicode, using the given encoding. + Convert basestring objects to unicode, using the given encoding. Illegaly + encoded input characters are replaced with Unicode "unknown" codepoint + (\ufffd). Returns any non-basestring objects without change. """ if isinstance(s, str): - return unicode(s, encoding) + return unicode(s, encoding, 'replace') else: return s diff --git a/tests/regressiontests/httpwrappers/tests.py b/tests/regressiontests/httpwrappers/tests.py index bdd9a7d190..f0a7ba5fef 100644 --- a/tests/regressiontests/httpwrappers/tests.py +++ b/tests/regressiontests/httpwrappers/tests.py @@ -367,6 +367,16 @@ AttributeError: This QueryDict instance is immutable >>> q.urlencode() 'vote=yes&vote=no' +# QueryDicts must be able to handle invalid input encoding (in this case, bad +# UTF-8 encoding). +>>> q = QueryDict('foo=bar&foo=\xff') + +>>> q['foo'] +u'\ufffd' + +>>> q.getlist('foo') +[u'bar', u'\ufffd'] + """ from django.http import QueryDict