diff --git a/django/http/__init__.py b/django/http/__init__.py index bca89f6133..12ef104c39 100644 --- a/django/http/__init__.py +++ b/django/http/__init__.py @@ -13,7 +13,7 @@ except ImportError: from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.encoding import smart_str, iri_to_uri, force_unicode from django.http.multipartparser import MultiPartParser -from django.http.charsets import determine_charset, get_codec +from django.http.charsets import get_response_encoding, get_codec from django.conf import settings from django.core.files import uploadhandler from utils import * @@ -270,24 +270,21 @@ class BadHeaderError(ValueError): class HttpResponse(object): """A basic HTTP response, with content and dictionary-accessed headers.""" - status_code = 200 + _status_code = 200 + _codec = None + _charset = settings.DEFAULT_CHARSET def __init__(self, content='', mimetype=None, status=None, content_type=None, request=None): from django.conf import settings - self._charset = settings.DEFAULT_CHARSET - self._codec = None accept_charset = None if mimetype: - content_type = mimetype # Mimetype is an alias for content-type + content_type = mimetype # Mimetype arg is an alias for content-type if request: accept_charset = request.META.get("ACCEPT_CHARSET") if accept_charset or content_type: - charset, codec = determine_charset(content_type, accept_charset) - if charset: - self._charset = charset - if codec: - self._codec = codec + encoding = get_response_encoding(content_type, accept_charset) + (self._charset, self._codec) = encoding if not content_type: content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE, self._charset) @@ -370,12 +367,27 @@ class HttpResponse(object): self.set_cookie(key, max_age=0, path=path, domain=domain, expires='Thu, 01-Jan-1970 00:00:00 GMT') + def _get_status_code(self): + if not self._valid_codec(): + self._status_code = 406 + self._container = [''] + return self._status_code + + def _set_status_code(self, value): + self._status_code = value + + status_code = property(_get_status_code, _set_status_code) + + def _valid_codec(self): + if not self._codec: + self._codec = get_codec(self._charset) + if not self._codec: + return False + return True + def _get_content(self): if self.has_header('Content-Encoding'): return ''.join(self._container) - - if not self._codec: - self._codec = get_codec(self._charset) return smart_str(''.join(self._container), self._codec.name) def _set_content(self, value): @@ -390,8 +402,6 @@ class HttpResponse(object): def next(self): chunk = self._iterator.next() - if not self._codec: - self._codec = get_codec(self._charset) if isinstance(chunk, unicode): chunk = chunk.encode(self._codec.name) return str(chunk) @@ -432,57 +442,57 @@ class HttpResponseSendFile(HttpResponse): self[settings.HTTPRESPONSE_SENDFILE_HEADER] = path_to_file def _get_content(self): - return open(self.sendfile_filename) + return open(self.sendfile_filename).read() content = property(_get_content) class HttpResponseRedirect(HttpResponse): - status_code = 302 + _status_code = 302 def __init__(self, redirect_to): HttpResponse.__init__(self) self['Location'] = redirect_to class HttpResponsePermanentRedirect(HttpResponse): - status_code = 301 + _status_code = 301 def __init__(self, redirect_to): HttpResponse.__init__(self) self['Location'] = redirect_to class HttpResponseNotModified(HttpResponse): - status_code = 304 + _status_code = 304 class HttpResponseBadRequest(HttpResponse): - status_code = 400 + _status_code = 400 class HttpResponseNotFound(HttpResponse): - status_code = 404 + _status_code = 404 class HttpResponseForbidden(HttpResponse): - status_code = 403 + _status_code = 403 class HttpResponseNotAllowed(HttpResponse): - status_code = 405 + _status_code = 405 def __init__(self, permitted_methods): HttpResponse.__init__(self) self['Allow'] = ', '.join(permitted_methods) class HttpResponseNotAcceptable(HttpResponse): - status_code = 406 + _status_code = 406 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html # if we want to make this more verbose (compliant, actually) class HttpResponseGone(HttpResponse): - status_code = 410 + _status_code = 410 def __init__(self, *args, **kwargs): HttpResponse.__init__(self, *args, **kwargs) class HttpResponseServerError(HttpResponse): - status_code = 500 + _status_code = 500 def __init__(self, *args, **kwargs): HttpResponse.__init__(self, *args, **kwargs) diff --git a/django/http/charsets.py b/django/http/charsets.py index 8b2e582320..5ac333ed58 100644 --- a/django/http/charsets.py +++ b/django/http/charsets.py @@ -235,16 +235,14 @@ def get_codec(charset): CODEC_CHARSETS above has the codecs that correspond to character sets. """ - try: - codec_name = CHARSET_CODECS[charset.strip().lower()] - codec = codecs.lookup(codec_name) - except KeyError: - #print "The charset %s is not supported by Django." % charset - codec = None - except LookupError: - #print "The encoding '%s' is not supported in this version of Python." % codec_name - codec = None - + codec = None + if charset: + try: + codec_name = CHARSET_CODECS[charset.strip().lower()] + codec = codecs.lookup(codec_name) + except LookupError: + # The encoding is not supported in this version of Python. + pass return codec # Returns the key for the maximum value in a dictionary @@ -252,7 +250,7 @@ max_dict_key = lambda l:sorted(l.iteritems(), key=itemgetter(1), reverse=True)[0 CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') ACCEPT_CHARSET_RE = re.compile('(?P([\w\d-]+)|(\*))(;q=(?P[01](\.\d{1,3})?))?,?') -def determine_charset(content_type, accept_charset_header): +def get_response_encoding(content_type, accept_charset_header): """ Searches request headers from clients and mimetype settings (which may be set by users) for indicators of which charset and encoding the response should use. @@ -268,56 +266,54 @@ def determine_charset(content_type, accept_charset_header): 406 error """ - codec = None + used_content_type = False charset = None - # Attempt to get the codec from a content-type, and verify that the charset is valid. + codec = None + # Try to get the codec from a content-type, verify that the charset is valid. if content_type: match = CONTENT_TYPE_RE.match(content_type) if match: charset = match.group(1) codec = get_codec(charset) if not codec: # Unsupported charset - # we should throw an exception here - # print "No CODEC ON MIMETYPE" - pass - # If we don't match a content-type header WITH charset, we give the default + raise Exception("Unsupported charset in Content-Type header.") else: charset = settings.DEFAULT_CHARSET - codec = get_codec(settings.DEFAULT_CHARSET) - - # Handle Accept-Charset (which we only do if we do not deal with content_type). - else: - if accept_charset_header: - # Get list of matches for Accepted-Charsets. - # [{ charset : q }, { charset : q }] - match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header) - accept_charset = [m.groupdict() for m in match_iterator] - else: - accept_charset = [] # use settings.DEFAULT_CHARSET - charset = settings.DEFAULT_CHARSET - + used_content_type = True + + # Handle Accept-Charset (only if we have not gotten one with content_type). + if not used_content_type: + if not accept_charset_header: # No information to find a charset with. + return None, None + # Get list of matches for Accepted-Charsets. + # [{ charset : q }, { charset : q }] + match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header) + accept_charset = [m.groupdict() for m in match_iterator] + # Remove charsets we cannot encode and whose q values are 0 charsets = _process_accept_charset(accept_charset) - # If we did not get a charset from the content type, we get it from accept_charset. - if not charset: - default_charset = settings.DEFAULT_CHARSET - fallback_charset = "ISO-8859-1" - # Prefer default_charset if its q value is 1 or we have no valid acceptable charsets. - max_q_charset = max_dict_key(charsets) - max_q_value = charsets[max_q_charset] - if max_q_value == 0 and fallback_charset not in charsets: + # Establish the prioritized charsets (ones we know about beforehand) + default_charset = settings.DEFAULT_CHARSET + fallback_charset = "ISO-8859-1" + + # Prefer default_charset if its q value is 1 or we have no valid acceptable charsets. + max_q_charset = max_dict_key(charsets) + max_q_value = charsets[max_q_charset] + if max_q_value == 0: + if fallback_charset not in charsets or charsets[fallback_charset] > 0: charset = fallback_charset - elif charsets[default_charset] == 1 or charsets[default_charset] == max_q_value: - charset = default_charset - # Get the highest valued acceptable charset (if we aren't going to the fallback - # or defaulting) - else: - charset = max_q_charset + elif charsets[default_charset] == 1 or charsets[default_charset] == max_q_value: + charset = default_charset + # Get the highest valued acceptable charset (if we aren't going to the fallback + # or defaulting) + else: + charset = max_q_charset - codec = get_codec(charset) + codec = get_codec(charset) # We may reach here with no codec or no charset. We will change the status # code in the HttpResponse. + #print charset, codec return charset, codec # NOTE -- make sure we are not duping the processing of q values @@ -352,4 +348,4 @@ def _process_accept_charset(accept_charset): accepted_charsets["ISO-8859-1"] = default_value - return accepted_charsets \ No newline at end of file + return accepted_charsets diff --git a/tests/regressiontests/charsets/tests.py b/tests/regressiontests/charsets/tests.py index c97b91bb70..580c71357a 100644 --- a/tests/regressiontests/charsets/tests.py +++ b/tests/regressiontests/charsets/tests.py @@ -2,12 +2,12 @@ import re from django.test import Client, TestCase from django.conf import settings -from django.http.charsets import determine_charset, get_codec +from django.http.charsets import get_response_encoding, get_codec -CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') +CHARSET_RE = re.compile('.*; charset=([\w\d-]+);?') def get_charset(response): - match = CONTENT_TYPE_RE.match(response.get("content-type","")) + match = CHARSET_RE.match(response.get("content-type","")) if match: charset = match.group(1) else: @@ -18,7 +18,7 @@ class ClientTest(TestCase): urls = 'regressiontests.charsets.urls' def test_good_accept_charset(self): - "Use Accept-Charset" + "Use Accept-Charset, with a quality value that throws away default_charset" # The data is ignored, but let's check it doesn't crash the system # anyway. @@ -27,61 +27,67 @@ class ClientTest(TestCase): self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "ascii") - def test_good_accept_charset2(self): - # us is an alias for ascii + def test_quality_sorting_wildcard_wins(self): response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.9") self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) - def test_good_accept_charset3(self): + def test_quality_sorting_wildcard_loses_alias_wins(self): + # us is an alias for ascii response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7") self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "us") - def test_good_accept_charset4(self): + def test_quality_sorting(self): response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9") self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) - def test_good_accept_charset5(self): + def test_fallback_charset(self): response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0") self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "ISO-8859-1") def test_bad_accept_charset(self): - "Do not use a malformed Accept-Charset" - # The data is ignored, but let's check it doesn't crash the system - # anyway. - - response = self.client.post('/accept_charset/', ACCEPT_CHARSET="this_is_junk") + "Do not use a charset that Python does not support" + + response = self.client.post('/accept_charset/', ACCEPT_CHARSET="Huttese") self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "utf-8") - + + def test_force_no_charset(self): + "If we have no accepted charsets that we have codecs for, 406" + response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0,*;q=0") + + self.assertEqual(response.status_code, 406) + def test_good_content_type(self): "Use good content-type" - # The data is ignored, but let's check it doesn't crash the system - # anyway. - + response = self.client.post('/good_content_type/') self.assertEqual(response.status_code, 200) - + self.assertEqual(get_charset(response), "us") + def test_bad_content_type(self): "Use bad content-type" - - response = self.client.post('/bad_content_type/') - self.assertEqual(response.status_code, 200) - self.assertEqual(get_codec(get_charset(response)), None) - + self.assertRaises(Exception, self.client.get, "/bad_content_type/") + def test_content_type_no_charset(self): response = self.client.post('/content_type_no_charset/') self.assertEqual(get_charset(response), None) - + def test_determine_charset(self): - content_type, codec = determine_charset("", "utf-8;q=0.8,*;q=0.9") + content_type, codec = get_response_encoding("", "utf-8;q=0.8,*;q=0.9") self.assertEqual(codec, get_codec("ISO-8859-1")) - \ No newline at end of file + + def test_basic_response(self): + "Make sure a normal request gets the default charset, with a 200 response." + response = self.client.post('/basic_response/') + self.assertEqual(response.status_code, 200) + self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + diff --git a/tests/regressiontests/charsets/urls.py b/tests/regressiontests/charsets/urls.py index 730d6a2fbe..a54a11f62d 100644 --- a/tests/regressiontests/charsets/urls.py +++ b/tests/regressiontests/charsets/urls.py @@ -19,4 +19,5 @@ urlpatterns = patterns('', (r'^good_content_type/', views.good_content_type), (r'^bad_content_type/', views.bad_content_type), (r'^content_type_no_charset/', views.content_type_no_charset), + (r'^basic_response/', views.basic_response), ) diff --git a/tests/regressiontests/charsets/views.py b/tests/regressiontests/charsets/views.py index 7005e8c0d7..ea5258be4d 100644 --- a/tests/regressiontests/charsets/views.py +++ b/tests/regressiontests/charsets/views.py @@ -14,4 +14,7 @@ def content_type_no_charset(request): return HttpResponse("UTF-8", content_type="text/html") def encode_response(request): - return HttpResponse(u"\ue863", content_type="text/html; charset=GBK") \ No newline at end of file + return HttpResponse(u"\ue863", content_type="text/html; charset=GBK") + +def basic_response(request): + return HttpResponse("ASCII.")