1
0
mirror of https://github.com/django/django.git synced 2025-07-04 01:39:20 +00:00

[soc2009/http-wsgi-improvements] Added more tests for #10190, changed logic to pass them. http.charsets.determine_charset now takes the accept_charset header instead of the request.

Passes the test suite, including the extensive tests on HttpResponse's detection of Accept-Charset and finding the codec from content_type. However, it does not test that the codec encodes properly.

git-svn-id: http://code.djangoproject.com/svn/django/branches/soc2009/http-wsgi-improvements@11030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Chris Cahoon 2009-06-17 20:42:15 +00:00
parent 1896d531cb
commit bab5ab348e
6 changed files with 98 additions and 33 deletions

View File

@ -13,7 +13,7 @@ except ImportError:
from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.datastructures import MultiValueDict, ImmutableList
from django.utils.encoding import smart_str, iri_to_uri, force_unicode from django.utils.encoding import smart_str, iri_to_uri, force_unicode
from django.http.multipartparser import MultiPartParser from django.http.multipartparser import MultiPartParser
from django.http.charsets import determine_charset from django.http.charsets import determine_charset, get_codec
from django.conf import settings from django.conf import settings
from django.core.files import uploadhandler from django.core.files import uploadhandler
from utils import * from utils import *
@ -273,13 +273,20 @@ class HttpResponse(object):
status_code = 200 status_code = 200
def __init__(self, content='', mimetype=None, status=None, def __init__(self, content='', mimetype=None, status=None,
content_type=None, origin_request=None): content_type=None, request=None):
from django.conf import settings from django.conf import settings
self._charset = settings.DEFAULT_CHARSET self._charset = settings.DEFAULT_CHARSET
accept_charset = None
if mimetype: if mimetype:
content_type = mimetype # Mimetype is an alias for content-type content_type = mimetype # Mimetype is an alias for content-type
if origin_request or content_type: if request:
self._charset, self._codec = determine_charset(content_type, origin_request) accept_charset = request.META.get("ACCEPT_CHARSET")
if accept_charset or content_type:
charset, codec = determine_charset(content_type, accept_charset)
if charset:
self._charset = charset
if codec:
self._codec = codec
if not content_type: if not content_type:
content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE, content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
self._charset) self._charset)
@ -365,7 +372,10 @@ class HttpResponse(object):
def _get_content(self): def _get_content(self):
if self.has_header('Content-Encoding'): if self.has_header('Content-Encoding'):
return ''.join(self._container) return ''.join(self._container)
return smart_str(''.join(self._container), self._charset)
if not self._codec:
self._codec = get_codec(self._charset)
return smart_str(''.join(self._container), self._codec.name)
def _set_content(self, value): def _set_content(self, value):
self._container = [value] self._container = [value]
@ -379,8 +389,10 @@ class HttpResponse(object):
def next(self): def next(self):
chunk = self._iterator.next() chunk = self._iterator.next()
if not self._codec:
self._codec = get_codec(self._charset)
if isinstance(chunk, unicode): if isinstance(chunk, unicode):
chunk = chunk.encode(self._charset) chunk = chunk.encode(self._codec.name)
return str(chunk) return str(chunk)
def close(self): def close(self):

View File

@ -252,7 +252,7 @@ max_dict_key = lambda l:sorted(l.iteritems(), key=itemgetter(1), reverse=True)[0
CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
ACCEPT_CHARSET_RE = re.compile('(?P<charset>([\w\d-]+)|(\*))(;q=(?P<q>[01](\.\d{1,3})?))?,?') ACCEPT_CHARSET_RE = re.compile('(?P<charset>([\w\d-]+)|(\*))(;q=(?P<q>[01](\.\d{1,3})?))?,?')
def determine_charset(content_type, request): def determine_charset(content_type, accept_charset_header):
""" """
Searches request headers from clients and mimetype settings (which may be set Searches request headers from clients and mimetype settings (which may be set
by users) for indicators of which charset and encoding the response should use. by users) for indicators of which charset and encoding the response should use.
@ -270,7 +270,6 @@ def determine_charset(content_type, request):
""" """
codec = None codec = None
charset = None charset = None
# Attempt to get the codec from a content-type, and verify that the charset is valid. # Attempt to get the codec from a content-type, and verify that the charset is valid.
if content_type: if content_type:
match = CONTENT_TYPE_RE.match(content_type) match = CONTENT_TYPE_RE.match(content_type)
@ -279,14 +278,19 @@ def determine_charset(content_type, request):
codec = get_codec(charset) codec = get_codec(charset)
if not codec: # Unsupported charset if not codec: # Unsupported charset
# we should throw an exception here # we should throw an exception here
print "No CODEC ON MIMETYPE" # print "No CODEC ON MIMETYPE"
pass
# If we don't match a content-type header WITH charset, we give the default
else:
charset = settings.DEFAULT_CHARSET
codec = get_codec(settings.DEFAULT_CHARSET)
# Handle Accept-Charset (which we only do if we do not deal with content_type). # Handle Accept-Charset (which we only do if we do not deal with content_type).
else: else:
if request and "ACCEPT_CHARSET" in request.META: if accept_charset_header:
# Get list of matches for Accepted-Charsets. # Get list of matches for Accepted-Charsets.
# [{ charset : q }, { charset : q }] # [{ charset : q }, { charset : q }]
match_iterator = ACCEPT_CHARSET_RE.finditer(request.META["ACCEPT_CHARSET"]) match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header)
accept_charset = [m.groupdict() for m in match_iterator] accept_charset = [m.groupdict() for m in match_iterator]
else: else:
accept_charset = [] # use settings.DEFAULT_CHARSET accept_charset = [] # use settings.DEFAULT_CHARSET

View File

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

View File

@ -1,10 +1,11 @@
from django.test import Client, TestCase
import re import re
from django.test import Client, TestCase
from django.conf import settings from django.conf import settings
from django.http.charsets import determine_charset, get_codec
CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
def get_charset(response): def get_charset(response):
match = CONTENT_TYPE_RE.match(response.get("content-type","")) match = CONTENT_TYPE_RE.match(response.get("content-type",""))
if match: if match:
@ -14,6 +15,7 @@ def get_charset(response):
return charset return charset
class ClientTest(TestCase): class ClientTest(TestCase):
urls = 'regressiontests.charsets.urls'
def test_good_accept_charset(self): def test_good_accept_charset(self):
"Use Accept-Charset" "Use Accept-Charset"
@ -21,40 +23,65 @@ class ClientTest(TestCase):
# anyway. # anyway.
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii,utf-8;q=0") response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii,utf-8;q=0")
self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "ascii") self.assertEqual(get_charset(response), "ascii")
def test_good_accept_charset2(self):
# us is an alias for ascii # us is an alias for ascii
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.9") response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.9")
self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7")
self.assertEqual(get_charset(response), "us")
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9")
self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0")
self.assertEqual(get_charset(response), "ISO-8859-1")
def test_good_accept_charset3(self):
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7")
self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "us")
def test_good_accept_charset4(self):
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9")
self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
def test_good_accept_charset5(self):
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0")
self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "ISO-8859-1")
def test_bad_accept_charset(self): def test_bad_accept_charset(self):
"Do not use a malformed Accept-Charset" "Do not use a malformed Accept-Charset"
# The data is ignored, but let's check it doesn't crash the system # The data is ignored, but let's check it doesn't crash the system
# anyway. # anyway.
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="this_is_junk") response = self.client.post('/accept_charset/', ACCEPT_CHARSET="this_is_junk")
self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "utf-8") self.assertEqual(get_charset(response), "utf-8")
def test_good_content_type(self): def test_good_content_type(self):
"Use content-type" "Use good content-type"
# The data is ignored, but let's check it doesn't crash the system # The data is ignored, but let's check it doesn't crash the system
# anyway. # anyway.
response = self.client.post('/good_content_type/') response = self.client.post('/good_content_type/')
self.assertEqual(get_charset(response), "us") self.assertEqual(response.status_code, 200)
def test_bad_content_type(self): def test_bad_content_type(self):
"Use content-type" "Use bad content-type"
# The data is ignored, but let's check it doesn't crash the system
# anyway.
response = self.client.post('/bad_content_type/') response = self.client.post('/bad_content_type/')
self.assertEqual(response.status_code, 200)
self.assertEqual(get_codec(get_charset(response)), None)
def test_content_type_no_charset(self):
response = self.client.post('/content_type_no_charset/')
self.assertEqual(get_charset(response), None)
def test_determine_charset(self):
content_type, codec = determine_charset("", "utf-8;q=0.8,*;q=0.9")
self.assertEqual(codec, get_codec("ISO-8859-1"))

View File

@ -1,9 +1,22 @@
from django.conf.urls.defaults import * from django.conf.urls.defaults import *
import views import views
# Uncomment the next two lines to enable the admin:
# from django.contrib import admin
# admin.autodiscover()
urlpatterns = patterns('', urlpatterns = patterns('',
# Example:
# (r'^tutu/', include('tutu.foo.urls')),
# Uncomment the admin/doc line below and add 'django.contrib.admindocs'
# to INSTALLED_APPS to enable admin documentation:
# (r'^admin/doc/', include('django.contrib.admindocs.urls')),
# Uncomment the next line to enable the admin:
# (r'^admin/', include(admin.site.urls)),
(r'^accept_charset/', views.accept_charset), (r'^accept_charset/', views.accept_charset),
(r'^good_content_type/', views.good_content_type), (r'^good_content_type/', views.good_content_type),
(r'^bad_content_type/', views.bad_content_type), (r'^bad_content_type/', views.bad_content_type),
(r'^content_type_no_charset/', views.content_type_no_charset),
) )

View File

@ -2,10 +2,16 @@ from django.http import HttpResponse
from django.shortcuts import render_to_response from django.shortcuts import render_to_response
def accept_charset(request): def accept_charset(request):
return HttpResponse("ASCII.", origin_request=request) return HttpResponse("ASCII.", request=request)
def good_content_type(request): def good_content_type(request):
return HttpResponse("ASCII.", content_type="text/html; charset=us") return HttpResponse("ASCII.", content_type="text/html; charset=us")
def bad_content_type(request): def bad_content_type(request):
return HttpResponse("ASCII.", content_type="text/html; charset=this_should_be_junk") return HttpResponse("UTF-8", content_type="text/html; charset=this_should_be_junk")
def content_type_no_charset(request):
return HttpResponse("UTF-8", content_type="text/html")
def encode_response(request):
return HttpResponse(u"\ue863", content_type="text/html; charset=GBK")