diff --git a/django/middleware/http.py b/django/middleware/http.py index 13c1b89c41..e98858f772 100644 --- a/django/middleware/http.py +++ b/django/middleware/http.py @@ -1,5 +1,5 @@ from django.core.exceptions import MiddlewareNotUsed -from django.utils.http import http_date +from django.utils.http import http_date, parse_http_date_safe class ConditionalGetMiddleware(object): """ @@ -15,7 +15,7 @@ class ConditionalGetMiddleware(object): response['Content-Length'] = str(len(response.content)) if response.has_header('ETag'): - if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None) + if_none_match = request.META.get('HTTP_IF_NONE_MATCH') if if_none_match == response['ETag']: # Setting the status is enough here. The response handling path # automatically removes content for this status code (in @@ -23,10 +23,14 @@ class ConditionalGetMiddleware(object): response.status_code = 304 if response.has_header('Last-Modified'): - if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None) - if if_modified_since == response['Last-Modified']: - # Setting the status code is enough here (same reasons as - # above). - response.status_code = 304 + if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE') + if if_modified_since is not None: + if_modified_since = parse_http_date_safe(if_modified_since) + if if_modified_since is not None: + last_modified = parse_http_date_safe(response['Last-Modified']) + if last_modified is not None and last_modified <= if_modified_since: + # Setting the status code is enough here (same reasons as + # above). + response.status_code = 304 return response diff --git a/django/utils/http.py b/django/utils/http.py index 1384b4294c..bdc367c8f7 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -1,3 +1,5 @@ +import calendar +import datetime import re import sys import urllib @@ -8,6 +10,17 @@ from django.utils.functional import allow_lazy ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"') +MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split() +__D = r'(?P\d{2})' +__D2 = r'(?P[ \d]\d)' +__M = r'(?P\w{3})' +__Y = r'(?P\d{4})' +__Y2 = r'(?P\d{2})' +__T = r'(?P\d{2}):(?P\d{2}):(?P\d{2})' +RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) +RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) +ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) + def urlquote(url, safe='/'): """ A version of Python's urllib.quote() function that can operate on unicode @@ -70,6 +83,48 @@ def http_date(epoch_seconds=None): rfcdate = formatdate(epoch_seconds) return '%s GMT' % rfcdate[:25] +def parse_http_date(date): + """ + Parses a date format as specified by HTTP RFC2616 section 3.3.1. + + The three formats allowed by the RFC are accepted, even if only the first + one is still in widespread use. + + Returns an floating point number expressed in seconds since the epoch, in + UTC. + """ + # emails.Util.parsedate does the job for RFC1123 dates; unfortunately + # RFC2616 makes it mandatory to support RFC850 dates too. So we roll + # our own RFC-compliant parsing. + for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE: + m = regex.match(date) + if m is not None: + break + else: + raise ValueError("%r is not in a valid HTTP date format" % date) + try: + year = int(m.group('year')) + if year < 100: + year += 2000 if year < 70 else 1900 + month = MONTHS.index(m.group('mon').lower()) + 1 + day = int(m.group('day')) + hour = int(m.group('hour')) + min = int(m.group('min')) + sec = int(m.group('sec')) + result = datetime.datetime(year, month, day, hour, min, sec) + return calendar.timegm(result.utctimetuple()) + except Exception: + raise ValueError("%r is not a valid date" % date) + +def parse_http_date_safe(date): + """ + Same as parse_http_date, but returns None if the input is invalid. + """ + try: + return parse_http_date(date) + except Exception: + pass + # Base 36 functions: useful for generating compact URLs def base36_to_int(s): diff --git a/django/views/decorators/http.py b/django/views/decorators/http.py index b763d6ee92..fb3181e10e 100644 --- a/django/views/decorators/http.py +++ b/django/views/decorators/http.py @@ -9,10 +9,9 @@ except ImportError: from calendar import timegm from datetime import timedelta -from email.Utils import formatdate from django.utils.decorators import decorator_from_middleware, available_attrs -from django.utils.http import parse_etags, quote_etag +from django.utils.http import http_date, parse_http_date_safe, parse_etags, quote_etag from django.utils.log import getLogger from django.middleware.http import ConditionalGetMiddleware from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse @@ -79,6 +78,8 @@ def condition(etag_func=None, last_modified_func=None): def inner(request, *args, **kwargs): # Get HTTP request headers if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE") + if if_modified_since: + if_modified_since = parse_http_date_safe(if_modified_since) if_none_match = request.META.get("HTTP_IF_NONE_MATCH") if_match = request.META.get("HTTP_IF_MATCH") if if_none_match or if_match: @@ -102,7 +103,7 @@ def condition(etag_func=None, last_modified_func=None): if last_modified_func: dt = last_modified_func(request, *args, **kwargs) if dt: - res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT' + res_last_modified = timegm(dt.utctimetuple()) else: res_last_modified = None else: @@ -116,7 +117,8 @@ def condition(etag_func=None, last_modified_func=None): if ((if_none_match and (res_etag in etags or "*" in etags and res_etag)) and (not if_modified_since or - res_last_modified == if_modified_since)): + (res_last_modified and if_modified_since and + res_last_modified <= if_modified_since))): if request.method in ("GET", "HEAD"): response = HttpResponseNotModified() else: @@ -136,9 +138,9 @@ def condition(etag_func=None, last_modified_func=None): } ) response = HttpResponse(status=412) - elif (not if_none_match and if_modified_since and - request.method == "GET" and - res_last_modified == if_modified_since): + elif (not if_none_match and request.method == "GET" and + res_last_modified and if_modified_since and + res_last_modified <= if_modified_since): response = HttpResponseNotModified() if response is None: @@ -146,7 +148,7 @@ def condition(etag_func=None, last_modified_func=None): # Set relevant headers on the response if they don't already exist. if res_last_modified and not response.has_header('Last-Modified'): - response['Last-Modified'] = res_last_modified + response['Last-Modified'] = http_date(res_last_modified) if res_etag and not response.has_header('ETag'): response['ETag'] = quote_etag(res_etag) diff --git a/django/views/static.py b/django/views/static.py index da1158d9d2..3aeb2ed98e 100644 --- a/django/views/static.py +++ b/django/views/static.py @@ -9,12 +9,11 @@ import posixpath import re import stat import urllib -from email.Utils import parsedate_tz, mktime_tz from django.template import loader from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseNotModified from django.template import Template, Context, TemplateDoesNotExist -from django.utils.http import http_date +from django.utils.http import http_date, parse_http_date def serve(request, path, document_root=None, show_indexes=False): """ @@ -128,10 +127,7 @@ def was_modified_since(header=None, mtime=0, size=0): raise ValueError matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header, re.IGNORECASE) - header_date = parsedate_tz(matches.group(1)) - if header_date is None: - raise ValueError - header_mtime = mktime_tz(header_date) + header_mtime = parse_http_date(matches.group(1)) header_len = matches.group(3) if header_len and int(header_len) != size: raise ValueError diff --git a/tests/regressiontests/conditional_processing/models.py b/tests/regressiontests/conditional_processing/models.py index b291aed337..129d11b07f 100644 --- a/tests/regressiontests/conditional_processing/models.py +++ b/tests/regressiontests/conditional_processing/models.py @@ -1,17 +1,20 @@ # -*- coding:utf-8 -*- -from datetime import datetime, timedelta -from calendar import timegm +from datetime import datetime from django.test import TestCase -from django.utils.http import parse_etags, quote_etag +from django.utils import unittest +from django.utils.http import parse_etags, quote_etag, parse_http_date FULL_RESPONSE = 'Test conditional get response' LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47) LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT' +LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT' +LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT' EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT' ETAG = 'b4246ffc4f62314ca13147c9d4f76974' EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6' + class ConditionalGet(TestCase): def assertFullResponse(self, response, check_last_modified=True, check_etag=True): self.assertEquals(response.status_code, 200) @@ -33,6 +36,12 @@ class ConditionalGet(TestCase): self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR response = self.client.get('/condition/') self.assertNotModified(response) + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_NEWER_STR + response = self.client.get('/condition/') + self.assertNotModified(response) + self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_INVALID_STR + response = self.client.get('/condition/') + self.assertFullResponse(response) self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR response = self.client.get('/condition/') self.assertFullResponse(response) @@ -118,7 +127,7 @@ class ConditionalGet(TestCase): self.assertFullResponse(response, check_last_modified=False) -class ETagProcesing(TestCase): +class ETagProcessing(unittest.TestCase): def testParsing(self): etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"') self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak']) @@ -126,3 +135,20 @@ class ETagProcesing(TestCase): def testQuoting(self): quoted_etag = quote_etag(r'e\t"ag') self.assertEquals(quoted_etag, r'"e\\t\"ag"') + + +class HttpDateProcessing(unittest.TestCase): + def testParsingRfc1123(self): + parsed = parse_http_date('Sun, 06 Nov 1994 08:49:37 GMT') + self.assertEqual(datetime.utcfromtimestamp(parsed), + datetime(1994, 11, 06, 8, 49, 37)) + + def testParsingRfc850(self): + parsed = parse_http_date('Sunday, 06-Nov-94 08:49:37 GMT') + self.assertEqual(datetime.utcfromtimestamp(parsed), + datetime(1994, 11, 06, 8, 49, 37)) + + def testParsingAsctime(self): + parsed = parse_http_date('Sun Nov 6 08:49:37 1994') + self.assertEqual(datetime.utcfromtimestamp(parsed), + datetime(1994, 11, 06, 8, 49, 37)) diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py index b77a2a3813..5d90ffc5dc 100644 --- a/tests/regressiontests/middleware/tests.py +++ b/tests/regressiontests/middleware/tests.py @@ -3,6 +3,7 @@ from django.conf import settings from django.http import HttpRequest from django.middleware.common import CommonMiddleware +from django.middleware.http import ConditionalGetMiddleware from django.test import TestCase @@ -247,3 +248,89 @@ class CommonMiddlewareTest(TestCase): self.assertEquals(r.status_code, 301) self.assertEquals(r['Location'], 'http://www.testserver/middleware/customurlconf/slash/') + +class ConditionalGetMiddlewareTest(TestCase): + urls = 'regressiontests.middleware.cond_get_urls' + def setUp(self): + self.req = HttpRequest() + self.req.META = { + 'SERVER_NAME': 'testserver', + 'SERVER_PORT': 80, + } + self.req.path = self.req.path_info = "/" + self.resp = self.client.get(self.req.path) + + # Tests for the Date header + + def test_date_header_added(self): + self.assertFalse('Date' in self.resp) + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertTrue('Date' in self.resp) + + # Tests for the Content-Length header + + def test_content_length_header_added(self): + content_length = len(self.resp.content) + self.assertFalse('Content-Length' in self.resp) + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertTrue('Content-Length' in self.resp) + self.assertEqual(int(self.resp['Content-Length']), content_length) + + def test_content_length_header_not_changed(self): + bad_content_length = len(self.resp.content) + 10 + self.resp['Content-Length'] = bad_content_length + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEqual(int(self.resp['Content-Length']), bad_content_length) + + # Tests for the ETag header + + def test_if_none_match_and_no_etag(self): + self.req.META['HTTP_IF_NONE_MATCH'] = 'spam' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_no_if_none_match_and_etag(self): + self.resp['ETag'] = 'eggs' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_if_none_match_and_same_etag(self): + self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 304) + + def test_if_none_match_and_different_etag(self): + self.req.META['HTTP_IF_NONE_MATCH'] = 'spam' + self.resp['ETag'] = 'eggs' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + # Tests for the Last-Modified header + + def test_if_modified_since_and_no_last_modified(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_no_if_modified_since_and_last_modified(self): + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) + + def test_if_modified_since_and_same_last_modified(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 304) + + def test_if_modified_since_and_last_modified_in_the_past(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:35:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 304) + + def test_if_modified_since_and_last_modified_in_the_future(self): + self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT' + self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:41:44 GMT' + self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp) + self.assertEquals(self.resp.status_code, 200) diff --git a/tests/regressiontests/views/tests/static.py b/tests/regressiontests/views/tests/static.py index c0565a17ee..e3bc1643c5 100644 --- a/tests/regressiontests/views/tests/static.py +++ b/tests/regressiontests/views/tests/static.py @@ -51,7 +51,7 @@ class StaticTests(TestCase): file_name = 'file.txt' response = self.client.get( '/views/%s/%s' % (self.prefix, file_name), - HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 UTC' + HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 GMT' # This is 24h before max Unix time. Remember to fix Django and # update this test well before 2038 :) )