diff --git a/AUTHORS b/AUTHORS index d31e36069f..164ec50404 100644 --- a/AUTHORS +++ b/AUTHORS @@ -59,7 +59,7 @@ answer newbie questions, and generally made Django that much better: Arthur av0000@mail.ru David Avsajanishvili - axiak@mit.edu + Mike Axiak Niran Babalola Morten Bagai Mikaƫl Barbero @@ -141,7 +141,9 @@ answer newbie questions, and generally made Django that much better: Marc Fargas Szilveszter Farkas favo@exoweb.net + fdr Dmitri Fedortchenko + Jonathan Feignberg Liang Feng Bill Fenner Stefane Fermgier diff --git a/django/conf/global_settings.py b/django/conf/global_settings.py index 006ab421dd..2c9720da10 100644 --- a/django/conf/global_settings.py +++ b/django/conf/global_settings.py @@ -231,6 +231,21 @@ MEDIA_ROOT = '' # Example: "http://media.lawrence.com" MEDIA_URL = '' +# List of upload handler classes to be applied in order. +FILE_UPLOAD_HANDLERS = ( + 'django.core.files.uploadhandler.MemoryFileUploadHandler', + 'django.core.files.uploadhandler.TemporaryFileUploadHandler', +) + +# Maximum size, in bytes, of a request before it will be streamed to the +# file system instead of into memory. +FILE_UPLOAD_MAX_MEMORY_SIZE = 2621440 # i.e. 2.5 MB + +# Directory in which upload streamed files will be temporarily saved. A value of +# `None` will make Django use the operating system's default temporary directory +# (i.e. "/tmp" on *nix systems). +FILE_UPLOAD_TEMP_DIR = None + # Default formatting for date objects. See all available format strings here: # http://www.djangoproject.com/documentation/templates/#now DATE_FORMAT = 'N j, Y' diff --git a/django/core/files/__init__.py b/django/core/files/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/django/core/files/locks.py b/django/core/files/locks.py new file mode 100644 index 0000000000..212b51a73d --- /dev/null +++ b/django/core/files/locks.py @@ -0,0 +1,66 @@ +""" +Portable file locking utilities. + +Based partially on example by Jonathan Feignberg in the Python +Cookbook, licensed under the Python Software License. + + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203 + +Example Usage:: + + >>> from django.core.files import locks + >>> f = open('./file', 'wb') + >>> locks.lock(f, locks.LOCK_EX) + >>> f.write('Django') + >>> f.close() +""" + +__all__ = ('LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock') + +system_type = None + +try: + import win32con + import win32file + import pywintypes + LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK + LOCK_SH = 0 + LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY + __overlapped = pywintypes.OVERLAPPED() + system_type = 'nt' +except (ImportError, AttributeError): + pass + +try: + import fcntl + LOCK_EX = fcntl.LOCK_EX + LOCK_SH = fcntl.LOCK_SH + LOCK_NB = fcntl.LOCK_NB + system_type = 'posix' +except (ImportError, AttributeError): + pass + +if system_type == 'nt': + def lock(file, flags): + hfile = win32file._get_osfhandle(file.fileno()) + win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped) + + def unlock(file): + hfile = win32file._get_osfhandle(file.fileno()) + win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped) +elif system_type == 'posix': + def lock(file, flags): + fcntl.flock(file.fileno(), flags) + + def unlock(file): + fcntl.flock(file.fileno(), fcntl.LOCK_UN) +else: + # File locking is not supported. + LOCK_EX = LOCK_SH = LOCK_NB = None + + # Dummy functions that don't do anything. + def lock(file, flags): + pass + + def unlock(file): + pass diff --git a/django/core/files/move.py b/django/core/files/move.py new file mode 100644 index 0000000000..66873d450c --- /dev/null +++ b/django/core/files/move.py @@ -0,0 +1,59 @@ +""" +Move a file in the safest way possible:: + + >>> from django.core.files.move import file_move_save + >>> file_move_save("/tmp/old_file", "/tmp/new_file") +""" + +import os +from django.core.files import locks + +__all__ = ['file_move_safe'] + +try: + import shutil + file_move = shutil.move +except ImportError: + file_move = os.rename + +def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False): + """ + Moves a file from one location to another in the safest way possible. + + First, try using ``shutils.move``, which is OS-dependent but doesn't break + if moving across filesystems. Then, try ``os.rename``, which will break + across filesystems. Finally, streams manually from one file to another in + pure Python. + + If the destination file exists and ``allow_overwrite`` is ``False``, this + function will throw an ``IOError``. + """ + + # There's no reason to move if we don't have to. + if old_file_name == new_file_name: + return + + if not allow_overwrite and os.path.exists(new_file_name): + raise IOError("Cannot overwrite existing file '%s'." % new_file_name) + + try: + file_move(old_file_name, new_file_name) + return + except OSError: + # This will happen with os.rename if moving to another filesystem + pass + + # If the built-in didn't work, do it the hard way. + new_file = open(new_file_name, 'wb') + locks.lock(new_file, locks.LOCK_EX) + old_file = open(old_file_name, 'rb') + current_chunk = None + + while current_chunk != '': + current_chunk = old_file.read(chunk_size) + new_file.write(current_chunk) + + new_file.close() + old_file.close() + + os.remove(old_file_name) diff --git a/django/core/files/uploadedfile.py b/django/core/files/uploadedfile.py new file mode 100644 index 0000000000..51cec172d4 --- /dev/null +++ b/django/core/files/uploadedfile.py @@ -0,0 +1,190 @@ +""" +Classes representing uploaded files. +""" + +import os +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile') + +class UploadedFile(object): + """ + A abstract uploadded file (``TemporaryUploadedFile`` and + ``InMemoryUploadedFile`` are the built-in concrete subclasses). + + An ``UploadedFile`` object behaves somewhat like a file object and + represents some file data that the user submitted with a form. + """ + DEFAULT_CHUNK_SIZE = 64 * 2**10 + + def __init__(self, file_name=None, content_type=None, file_size=None, charset=None): + self.file_name = file_name + self.file_size = file_size + self.content_type = content_type + self.charset = charset + + def __repr__(self): + return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type) + + def _set_file_name(self, name): + # Sanitize the file name so that it can't be dangerous. + if name is not None: + # Just use the basename of the file -- anything else is dangerous. + name = os.path.basename(name) + + # File names longer than 255 characters can cause problems on older OSes. + if len(name) > 255: + name, ext = os.path.splitext(name) + name = name[:255 - len(ext)] + ext + + self._file_name = name + + def _get_file_name(self): + return self._file_name + + file_name = property(_get_file_name, _set_file_name) + + def chunk(self, chunk_size=None): + """ + Read the file and yield chucks of ``chunk_size`` bytes (defaults to + ``UploadedFile.DEFAULT_CHUNK_SIZE``). + """ + if not chunk_size: + chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE + + if hasattr(self, 'seek'): + self.seek(0) + # Assume the pointer is at zero... + counter = self.file_size + + while counter > 0: + yield self.read(chunk_size) + counter -= chunk_size + + def multiple_chunks(self, chunk_size=None): + """ + Returns ``True`` if you can expect multiple chunks. + + NB: If a particular file representation is in memory, subclasses should + always return ``False`` -- there's no good reason to read from memory in + chunks. + """ + if not chunk_size: + chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE + return self.file_size < chunk_size + + # Abstract methods; subclasses *must* default read() and probably should + # define open/close. + def read(self, num_bytes=None): + raise NotImplementedError() + + def open(self): + pass + + def close(self): + pass + + # Backwards-compatible support for uploaded-files-as-dictionaries. + def __getitem__(self, key): + import warnings + warnings.warn( + message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + backwards_translate = { + 'filename': 'file_name', + 'content-type': 'content_type', + } + + if key == 'content': + return self.read() + elif key == 'filename': + return self.file_name + elif key == 'content-type': + return self.content_type + else: + return getattr(self, key) + +class TemporaryUploadedFile(UploadedFile): + """ + A file uploaded to a temporary location (i.e. stream-to-disk). + """ + + def __init__(self, file, file_name, content_type, file_size, charset): + super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset) + self.file = file + self.path = file.name + self.file.seek(0) + + def temporary_file_path(self): + """ + Returns the full path of this file. + """ + return self.path + + def read(self, *args, **kwargs): + return self.file.read(*args, **kwargs) + + def open(self): + self.seek(0) + + def seek(self, *args, **kwargs): + self.file.seek(*args, **kwargs) + +class InMemoryUploadedFile(UploadedFile): + """ + A file uploaded into memory (i.e. stream-to-memory). + """ + def __init__(self, file, field_name, file_name, content_type, charset, file_size): + super(InMemoryUploadedFile, self).__init__(file_name, content_type, charset, file_size) + self.file = file + self.field_name = field_name + self.file.seek(0) + + def seek(self, *args, **kwargs): + self.file.seek(*args, **kwargs) + + def open(self): + self.seek(0) + + def read(self, *args, **kwargs): + return self.file.read(*args, **kwargs) + + def chunk(self, chunk_size=None): + self.file.seek(0) + yield self.read() + + def multiple_chunks(self, chunk_size=None): + # Since it's in memory, we'll never have multiple chunks. + return False + +class SimpleUploadedFile(InMemoryUploadedFile): + """ + A simple representation of a file, which just has content, size, and a name. + """ + def __init__(self, name, content, content_type='text/plain'): + self.file = StringIO(content or '') + self.file_name = name + self.field_name = None + self.file_size = len(content or '') + self.content_type = content_type + self.charset = None + self.file.seek(0) + + def from_dict(cls, file_dict): + """ + Creates a SimpleUploadedFile object from + a dictionary object with the following keys: + - filename + - content-type + - content + """ + return cls(file_dict['filename'], + file_dict['content'], + file_dict.get('content-type', 'text/plain')) + + from_dict = classmethod(from_dict) diff --git a/django/core/files/uploadhandler.py b/django/core/files/uploadhandler.py new file mode 100644 index 0000000000..034953972a --- /dev/null +++ b/django/core/files/uploadhandler.py @@ -0,0 +1,235 @@ +""" +Base file upload handler classes, and the built-in concrete subclasses +""" +import os +import tempfile +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.core.files.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile + +__all__ = ['UploadFileException','StopUpload', 'SkipFile', 'FileUploadHandler', + 'TemporaryFileUploadHandler', 'MemoryFileUploadHandler', + 'load_handler'] + +class UploadFileException(Exception): + """ + Any error having to do with uploading files. + """ + pass + +class StopUpload(UploadFileException): + """ + This exception is raised when an upload must abort. + """ + def __init__(self, connection_reset=False): + """ + If ``connection_reset`` is ``True``, Django knows will halt the upload + without consuming the rest of the upload. This will cause the browser to + show a "connection reset" error. + """ + self.connection_reset = connection_reset + + def __unicode__(self): + if self.connection_reset: + return u'StopUpload: Halt current upload.' + else: + return u'StopUpload: Consume request data, then halt.' + +class SkipFile(UploadFileException): + """ + This exception is raised by an upload handler that wants to skip a given file. + """ + pass + +class StopFutureHandlers(UploadFileException): + """ + Upload handers that have handled a file and do not want future handlers to + run should raise this exception instead of returning None. + """ + pass + +class FileUploadHandler(object): + """ + Base class for streaming upload handlers. + """ + chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB. + + def __init__(self, request=None): + self.file_name = None + self.content_type = None + self.content_length = None + self.charset = None + self.request = request + + def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None): + """ + Handle the raw input from the client. + + Parameters: + + :input_data: + An object that supports reading via .read(). + :META: + ``request.META``. + :content_length: + The (integer) value of the Content-Length header from the + client. + :boundary: The boundary from the Content-Type header. Be sure to + prepend two '--'. + """ + pass + + def new_file(self, field_name, file_name, content_type, content_length, charset=None): + """ + Signal that a new file has been started. + + Warning: As with any data from the client, you should not trust + content_length (and sometimes won't even get it). + """ + self.field_name = field_name + self.file_name = file_name + self.content_type = content_type + self.content_length = content_length + self.charset = charset + + def receive_data_chunk(self, raw_data, start): + """ + Receive data from the streamed upload parser. ``start`` is the position + in the file of the chunk. + """ + raise NotImplementedError() + + def file_complete(self, file_size): + """ + Signal that a file has completed. File size corresponds to the actual + size accumulated by all the chunks. + + Subclasses must should return a valid ``UploadedFile`` object. + """ + raise NotImplementedError() + + def upload_complete(self): + """ + Signal that the upload is complete. Subclasses should perform cleanup + that is necessary for this handler. + """ + pass + +class TemporaryFileUploadHandler(FileUploadHandler): + """ + Upload handler that streams data into a temporary file. + """ + def __init__(self, *args, **kwargs): + super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs) + + def new_file(self, file_name, *args, **kwargs): + """ + Create the file object to append to as data is coming in. + """ + super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs) + self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR) + self.write = self.file.write + + def receive_data_chunk(self, raw_data, start): + self.write(raw_data) + + def file_complete(self, file_size): + self.file.seek(0) + return TemporaryUploadedFile(self.file, self.file_name, + self.content_type, file_size, + self.charset) + +class MemoryFileUploadHandler(FileUploadHandler): + """ + File upload handler to stream uploads into memory (used for small files). + """ + + def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None): + """ + Use the content_length to signal whether or not this handler should be in use. + """ + # Check the content-length header to see if we should + # If the the post is too large, we cannot use the Memory handler. + if content_length > settings.FILE_UPLOAD_MAX_MEMORY_SIZE: + self.activated = False + else: + self.activated = True + + def new_file(self, *args, **kwargs): + super(MemoryFileUploadHandler, self).new_file(*args, **kwargs) + if self.activated: + self.file = StringIO() + raise StopFutureHandlers() + + def receive_data_chunk(self, raw_data, start): + """ + Add the data to the StringIO file. + """ + if self.activated: + self.file.write(raw_data) + else: + return raw_data + + def file_complete(self, file_size): + """ + Return a file object if we're activated. + """ + if not self.activated: + return + + return InMemoryUploadedFile(self.file, self.field_name, self.file_name, + self.content_type, self.charset, file_size) + +class TemporaryFile(object): + """ + A temporary file that tries to delete itself when garbage collected. + """ + def __init__(self, dir): + if not dir: + dir = tempfile.gettempdir() + try: + (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir) + self.file = os.fdopen(fd, 'w+b') + except (OSError, IOError): + raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?") + self.name = name + + def __getattr__(self, name): + a = getattr(self.__dict__['file'], name) + if type(a) != type(0): + setattr(self, name, a) + return a + + def __del__(self): + try: + os.unlink(self.name) + except OSError: + pass + +def load_handler(path, *args, **kwargs): + """ + Given a path to a handler, return an instance of that handler. + + E.g.:: + >>> load_handler('django.core.files.uploadhandler.TemporaryFileUploadHandler', request) + + + """ + i = path.rfind('.') + module, attr = path[:i], path[i+1:] + try: + mod = __import__(module, {}, {}, [attr]) + except ImportError, e: + raise ImproperlyConfigured('Error importing upload handler module %s: "%s"' % (module, e)) + except ValueError, e: + raise ImproperlyConfigured('Error importing upload handler module. Is FILE_UPLOAD_HANDLERS a correctly defined list or tuple?') + try: + cls = getattr(mod, attr) + except AttributeError: + raise ImproperlyConfigured('Module "%s" does not define a "%s" upload handler backend' % (module, attr)) + return cls(*args, **kwargs) diff --git a/django/core/handlers/modpython.py b/django/core/handlers/modpython.py index abab399009..332df6f54c 100644 --- a/django/core/handlers/modpython.py +++ b/django/core/handlers/modpython.py @@ -53,7 +53,8 @@ class ModPythonRequest(http.HttpRequest): def _load_post_and_files(self): "Populates self._post and self._files" if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'): - self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data) + self._raw_post_data = '' + self._post, self._files = self.parse_file_upload(self.META, self._req) else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index df2ba19b65..795f139042 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -112,9 +112,8 @@ class WSGIRequest(http.HttpRequest): # Populates self._post and self._files if self.method == 'POST': if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): - header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')]) - header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '') - self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data) + self._raw_post_data = '' + self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input']) else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() else: diff --git a/django/db/models/base.py b/django/db/models/base.py index 659c67c0d3..5669694a1b 100644 --- a/django/db/models/base.py +++ b/django/db/models/base.py @@ -19,6 +19,8 @@ from django.dispatch import dispatcher from django.utils.datastructures import SortedDict from django.utils.functional import curry from django.utils.encoding import smart_str, force_unicode, smart_unicode +from django.core.files.move import file_move_safe +from django.core.files import locks from django.conf import settings try: @@ -469,16 +471,51 @@ class Model(object): def _get_FIELD_size(self, field): return os.path.getsize(self._get_FIELD_filename(field)) - def _save_FIELD_file(self, field, filename, raw_contents, save=True): + def _save_FIELD_file(self, field, filename, raw_field, save=True): directory = field.get_directory_name() try: # Create the date-based directory if it doesn't exist. os.makedirs(os.path.join(settings.MEDIA_ROOT, directory)) except OSError: # Directory probably already exists. pass + + # + # Check for old-style usage (files-as-dictionaries). Warn here first + # since there are multiple locations where we need to support both new + # and old usage. + # + if isinstance(raw_field, dict): + import warnings + warnings.warn( + message = "Representing uploaded files as dictionaries is"\ + " deprected. Use django.core.files.SimpleUploadedFile"\ + " instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + from django.core.files.uploadedfile import SimpleUploadedFile + raw_field = SimpleUploadedFile.from_dict(raw_field) + + elif isinstance(raw_field, basestring): + import warnings + warnings.warn( + message = "Representing uploaded files as strings is "\ + " deprecated. Use django.core.files.SimpleUploadedFile "\ + " instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + from django.core.files.uploadedfile import SimpleUploadedFile + raw_field = SimpleUploadedFile(filename, raw_field) + + if filename is None: + filename = raw_field.file_name + filename = field.get_filename(filename) + # # If the filename already exists, keep adding an underscore to the name of # the file until the filename doesn't exist. + # while os.path.exists(os.path.join(settings.MEDIA_ROOT, filename)): try: dot_index = filename.rindex('.') @@ -486,14 +523,27 @@ class Model(object): filename += '_' else: filename = filename[:dot_index] + '_' + filename[dot_index:] + # + # Save the file name on the object and write the file to disk + # - # Write the file to disk. setattr(self, field.attname, filename) full_filename = self._get_FIELD_filename(field) - fp = open(full_filename, 'wb') - fp.write(raw_contents) - fp.close() + + if hasattr(raw_field, 'temporary_file_path'): + # This file has a file path that we can move. + raw_field.close() + file_move_safe(raw_field.temporary_file_path(), full_filename) + + else: + # This is a normal uploadedfile that we can stream. + fp = open(full_filename, 'wb') + locks.lock(fp, locks.LOCK_EX) + for chunk in raw_field.chunk(): + fp.write(chunk) + locks.unlock(fp) + fp.close() # Save the width and/or height, if applicable. if isinstance(field, ImageField) and (field.width_field or field.height_field): diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index 189f0ba4ad..1eefed641c 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -811,7 +811,7 @@ class FileField(Field): setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self)) setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self)) setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self)) - setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save)) + setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save)) dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls) def delete_file(self, instance): @@ -834,9 +834,19 @@ class FileField(Field): if new_data.get(upload_field_name, False): func = getattr(new_object, 'save_%s_file' % self.name) if rel: - func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save) + file = new_data[upload_field_name][0] else: - func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save) + file = new_data[upload_field_name] + + # Backwards-compatible support for files-as-dictionaries. + # We don't need to raise a warning because Model._save_FIELD_file will + # do so for us. + try: + file_name = file.file_name + except AttributeError: + file_name = file['filename'] + + func(file_name, file, save) def get_directory_name(self): return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to)))) @@ -849,7 +859,7 @@ class FileField(Field): def save_form_data(self, instance, data): from django.newforms.fields import UploadedFile if data and isinstance(data, UploadedFile): - getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False) + getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False) def formfield(self, **kwargs): defaults = {'form_class': forms.FileField} diff --git a/django/http/__init__.py b/django/http/__init__.py index 7faa3c875e..ef15479983 100644 --- a/django/http/__init__.py +++ b/django/http/__init__.py @@ -9,14 +9,15 @@ try: except ImportError: from cgi import parse_qsl -from django.utils.datastructures import MultiValueDict, FileDict +from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.encoding import smart_str, iri_to_uri, force_unicode - +from django.http.multipartparser import MultiPartParser +from django.conf import settings +from django.core.files import uploadhandler from utils import * RESERVED_CHARS="!*'();:@&=+$,/?%#[]" - class Http404(Exception): pass @@ -25,6 +26,7 @@ class HttpRequest(object): # The encoding used in GET/POST dicts. None means use default setting. _encoding = None + _upload_handlers = [] def __init__(self): self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {} @@ -102,39 +104,31 @@ class HttpRequest(object): encoding = property(_get_encoding, _set_encoding) -def parse_file_upload(header_dict, post_data): - """Returns a tuple of (POST QueryDict, FILES MultiValueDict).""" - import email, email.Message - from cgi import parse_header - raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()]) - raw_message += '\r\n\r\n' + post_data - msg = email.message_from_string(raw_message) - POST = QueryDict('', mutable=True) - FILES = MultiValueDict() - for submessage in msg.get_payload(): - if submessage and isinstance(submessage, email.Message.Message): - name_dict = parse_header(submessage['Content-Disposition'])[1] - # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads - # or {'name': 'blah'} for POST fields - # We assume all uploaded files have a 'filename' set. - if 'filename' in name_dict: - assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" - if not name_dict['filename'].strip(): - continue - # IE submits the full path, so trim everything but the basename. - # (We can't use os.path.basename because that uses the server's - # directory separator, which may not be the same as the - # client's one.) - filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] - FILES.appendlist(name_dict['name'], FileDict({ - 'filename': filename, - 'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None, - 'content': submessage.get_payload(), - })) - else: - POST.appendlist(name_dict['name'], submessage.get_payload()) - return POST, FILES + def _initialize_handlers(self): + self._upload_handlers = [uploadhandler.load_handler(handler, self) + for handler in settings.FILE_UPLOAD_HANDLERS] + def _set_upload_handlers(self, upload_handlers): + if hasattr(self, '_files'): + raise AttributeError("You cannot set the upload handlers after the upload has been processed.") + self._upload_handlers = upload_handlers + + def _get_upload_handlers(self): + if not self._upload_handlers: + # If thre are no upload handlers defined, initialize them from settings. + self._initialize_handlers() + return self._upload_handlers + + upload_handlers = property(_get_upload_handlers, _set_upload_handlers) + + def parse_file_upload(self, META, post_data): + """Returns a tuple of (POST QueryDict, FILES MultiValueDict).""" + self.upload_handlers = ImmutableList( + self.upload_handlers, + warning = "You cannot alter upload handlers after the upload has been processed." + ) + parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) + return parser.parse() class QueryDict(MultiValueDict): """ diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py new file mode 100644 index 0000000000..8bed5681cf --- /dev/null +++ b/django/http/multipartparser.py @@ -0,0 +1,658 @@ +""" +Multi-part parsing for file uploads. + +Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to +file upload handlers for processing. +""" +import cgi +from django.conf import settings +from django.core.exceptions import SuspiciousOperation +from django.utils.datastructures import MultiValueDict +from django.utils.encoding import force_unicode +from django.utils.text import unescape_entities +from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers + +__all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted') + +class MultiPartParserError(Exception): + pass + +class InputStreamExhausted(Exception): + """ + No more reads are allowed from this device. + """ + pass + +RAW = "raw" +FILE = "file" +FIELD = "field" + +class MultiPartParser(object): + """ + A rfc2388 multipart/form-data parser. + + ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks + and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If + ``file_upload_dir`` is defined files will be streamed to temporary files in + that directory. + """ + def __init__(self, META, input_data, upload_handlers, encoding=None): + """ + Initialize the MultiPartParser object. + + :META: + The standard ``META`` dictionary in Django request objects. + :input_data: + The raw post data, as a bytestring. + :upload_handler: + An UploadHandler instance that performs operations on the uploaded + data. + :encoding: + The encoding with which to treat the incoming data. + """ + + # + # Content-Type should containt multipart and the boundary information. + # + + content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', '')) + if not content_type.startswith('multipart/'): + raise MultiPartParserError('Invalid Content-Type: %s' % content_type) + + # Parse the header to get the boundary to split the parts. + ctypes, opts = parse_header(content_type) + boundary = opts.get('boundary') + if not boundary or not cgi.valid_boundary(boundary): + raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary) + + + # + # Content-Length should contain the length of the body we are about + # to receive. + # + try: + content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0))) + except (ValueError, TypeError): + # For now set it to 0; we'll try again later on down. + content_length = 0 + + if content_length <= 0: + # This means we shouldn't continue...raise an error. + raise MultiPartParserError("Invalid content length: %r" % content_length) + + self._boundary = boundary + self._input_data = input_data + + # For compatibility with low-level network APIs (with 32-bit integers), + # the chunk size should be < 2^31, but still divisible by 4. + self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size]) + + self._meta = META + self._encoding = encoding or settings.DEFAULT_CHARSET + self._content_length = content_length + self._upload_handlers = upload_handlers + + def parse(self): + """ + Parse the POST data and break it into a FILES MultiValueDict and a POST + MultiValueDict. + + Returns a tuple containing the POST and FILES dictionary, respectively. + """ + # We have to import QueryDict down here to avoid a circular import. + from django.http import QueryDict + + encoding = self._encoding + handlers = self._upload_handlers + + limited_input_data = LimitBytes(self._input_data, self._content_length) + + # See if the handler will want to take care of the parsing. + # This allows overriding everything if somebody wants it. + for handler in handlers: + result = handler.handle_raw_input(limited_input_data, + self._meta, + self._content_length, + self._boundary, + encoding) + if result is not None: + return result[0], result[1] + + # Create the data structures to be used later. + self._post = QueryDict('', mutable=True) + self._files = MultiValueDict() + + # Instantiate the parser and stream: + stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size)) + + # Whether or not to signal a file-completion at the beginning of the loop. + old_field_name = None + counters = [0] * len(handlers) + + try: + for item_type, meta_data, field_stream in Parser(stream, self._boundary): + if old_field_name: + # We run this at the beginning of the next loop + # since we cannot be sure a file is complete until + # we hit the next boundary/part of the multipart content. + self.handle_file_complete(old_field_name, counters) + + try: + disposition = meta_data['content-disposition'][1] + field_name = disposition['name'].strip() + except (KeyError, IndexError, AttributeError): + continue + + transfer_encoding = meta_data.get('content-transfer-encoding') + field_name = force_unicode(field_name, encoding, errors='replace') + + if item_type == FIELD: + # This is a post field, we can just set it in the post + if transfer_encoding == 'base64': + raw_data = field_stream.read() + try: + data = str(raw_data).decode('base64') + except: + data = raw_data + else: + data = field_stream.read() + + self._post.appendlist(field_name, + force_unicode(data, encoding, errors='replace')) + elif item_type == FILE: + # This is a file, use the handler... + file_successful = True + file_name = disposition.get('filename') + if not file_name: + continue + file_name = force_unicode(file_name, encoding, errors='replace') + file_name = self.IE_sanitize(unescape_entities(file_name)) + + content_type = meta_data.get('content-type', ('',))[0].strip() + try: + charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) + except: + charset = None + + try: + content_length = int(meta_data.get('content-length')[0]) + except (IndexError, TypeError, ValueError): + content_length = None + + counters = [0] * len(handlers) + try: + for handler in handlers: + try: + handler.new_file(field_name, file_name, + content_type, content_length, + charset) + except StopFutureHandlers: + break + + for chunk in field_stream: + if transfer_encoding == 'base64': + # We only special-case base64 transfer encoding + try: + chunk = str(chunk).decode('base64') + except Exception, e: + # Since this is only a chunk, any error is an unfixable error. + raise MultiPartParserError("Could not decode base64 data: %r" % e) + + for i, handler in enumerate(handlers): + chunk_length = len(chunk) + chunk = handler.receive_data_chunk(chunk, + counters[i]) + counters[i] += chunk_length + if chunk is None: + # If the chunk received by the handler is None, then don't continue. + break + + except SkipFile, e: + file_successful = False + # Just use up the rest of this file... + exhaust(field_stream) + else: + # Handle file upload completions on next iteration. + old_field_name = field_name + else: + # If this is neither a FIELD or a FILE, just exhaust the stream. + exhaust(stream) + except StopUpload, e: + if not e.connection_reset: + exhaust(limited_input_data) + else: + # Make sure that the request data is all fed + exhaust(limited_input_data) + + # Signal that the upload has completed. + for handler in handlers: + retval = handler.upload_complete() + if retval: + break + + return self._post, self._files + + def handle_file_complete(self, old_field_name, counters): + """ + Handle all the signalling that takes place when a file is complete. + """ + for i, handler in enumerate(self._upload_handlers): + file_obj = handler.file_complete(counters[i]) + if file_obj: + # If it returns a file object, then set the files dict. + self._files.appendlist(force_unicode(old_field_name, + self._encoding, + errors='replace'), + file_obj) + break + + def IE_sanitize(self, filename): + """Cleanup filename from Internet Explorer full paths.""" + return filename and filename[filename.rfind("\\")+1:].strip() + +class LazyStream(object): + """ + The LazyStream wrapper allows one to get and "unget" bytes from a stream. + + Given a producer object (an iterator that yields bytestrings), the + LazyStream object will support iteration, reading, and keeping a "look-back" + variable in case you need to "unget" some bytes. + """ + def __init__(self, producer, length=None): + """ + Every LazyStream must have a producer when instantiated. + + A producer is an iterable that returns a string each time it + is called. + """ + self._producer = producer + self._empty = False + self._leftover = '' + self.length = length + self._position = 0 + self._remaining = length + + # These fields are to do sanity checking to make sure we don't + # have infinite loops getting/ungetting from the stream. The + # purpose overall is to raise an exception if we perform lots + # of stream get/unget gymnastics without getting + # anywhere. Naturally this is not sound, but most probably + # would indicate a bug if the exception is raised. + + # largest position tell us how far this lazystream has ever + # been advanced + self._largest_position = 0 + + # "modifications since" will start at zero and increment every + # time the position is modified but a new largest position is + # not achieved. + self._modifications_since = 0 + + def tell(self): + return self.position + + def read(self, size=None): + def parts(): + remaining = (size is not None and [size] or [self._remaining])[0] + # do the whole thing in one shot if no limit was provided. + if remaining is None: + yield ''.join(self) + return + + # otherwise do some bookkeeping to return exactly enough + # of the stream and stashing any extra content we get from + # the producer + while remaining != 0: + assert remaining > 0, 'remaining bytes to read should never go negative' + + chunk = self.next() + + emitting = chunk[:remaining] + self.unget(chunk[remaining:]) + remaining -= len(emitting) + yield emitting + + out = ''.join(parts()) + return out + + def next(self): + """ + Used when the exact number of bytes to read is unimportant. + + This procedure just returns whatever is chunk is conveniently returned + from the iterator instead. Useful to avoid unnecessary bookkeeping if + performance is an issue. + """ + if self._leftover: + output = self._leftover + self._leftover = '' + else: + output = self._producer.next() + self.position += len(output) + return output + + def close(self): + """ + Used to invalidate/disable this lazy stream. + + Replaces the producer with an empty list. Any leftover bytes that have + already been read will still be reported upon read() and/or next(). + """ + self._producer = [] + + def __iter__(self): + return self + + def unget(self, bytes): + """ + Places bytes back onto the front of the lazy stream. + + Future calls to read() will return those bytes first. The + stream position and thus tell() will be rewound. + """ + self.position -= len(bytes) + self._leftover = ''.join([bytes, self._leftover]) + + def _set_position(self, value): + if value > self._largest_position: + self._modifications_since = 0 + self._largest_position = value + else: + self._modifications_since += 1 + if self._modifications_since > 500: + raise SuspiciousOperation( + "The multipart parser got stuck, which shouldn't happen with" + " normal uploaded files. Check for malicious upload activity;" + " if there is none, report this to the Django developers." + ) + + self._position = value + + position = property(lambda self: self._position, _set_position) + +class ChunkIter(object): + """ + An iterable that will yield chunks of data. Given a file-like object as the + constructor, this object will yield chunks of read operations from that + object. + """ + def __init__(self, flo, chunk_size=64 * 1024): + self.flo = flo + self.chunk_size = chunk_size + + def next(self): + try: + data = self.flo.read(self.chunk_size) + except InputStreamExhausted: + raise StopIteration() + if data: + return data + else: + raise StopIteration() + + def __iter__(self): + return self + +class LimitBytes(object): + """ Limit bytes for a file object. """ + def __init__(self, fileobject, length): + self._file = fileobject + self.remaining = length + + def read(self, num_bytes=None): + """ + Read data from the underlying file. + If you ask for too much or there isn't anything left, + this will raise an InputStreamExhausted error. + """ + if self.remaining <= 0: + raise InputStreamExhausted() + if num_bytes is None: + num_bytes = self.remaining + else: + num_bytes = min(num_bytes, self.remaining) + self.remaining -= num_bytes + return self._file.read(num_bytes) + +class InterBoundaryIter(object): + """ + A Producer that will iterate over boundaries. + """ + def __init__(self, stream, boundary): + self._stream = stream + self._boundary = boundary + + def __iter__(self): + return self + + def next(self): + try: + return LazyStream(BoundaryIter(self._stream, self._boundary)) + except InputStreamExhausted: + raise StopIteration() + +class BoundaryIter(object): + """ + A Producer that is sensitive to boundaries. + + Will happily yield bytes until a boundary is found. Will yield the bytes + before the boundary, throw away the boundary bytes themselves, and push the + post-boundary bytes back on the stream. + + The future calls to .next() after locating the boundary will raise a + StopIteration exception. + """ + + def __init__(self, stream, boundary): + self._stream = stream + self._boundary = boundary + self._done = False + # rollback an additional six bytes because the format is like + # this: CRLF[--CRLF] + self._rollback = len(boundary) + 6 + + # Try to use mx fast string search if available. Otherwise + # use Python find. Wrap the latter for consistency. + unused_char = self._stream.read(1) + if not unused_char: + raise InputStreamExhausted() + self._stream.unget(unused_char) + try: + from mx.TextTools import FS + self._fs = FS(boundary).find + except ImportError: + self._fs = lambda data: data.find(boundary) + + def __iter__(self): + return self + + def next(self): + if self._done: + raise StopIteration() + + stream = self._stream + rollback = self._rollback + + bytes_read = 0 + chunks = [] + for bytes in stream: + bytes_read += len(bytes) + chunks.append(bytes) + if bytes_read > rollback: + break + if not bytes: + break + else: + self._done = True + + if not chunks: + raise StopIteration() + + chunk = ''.join(chunks) + boundary = self._find_boundary(chunk, len(chunk) < self._rollback) + + if boundary: + end, next = boundary + stream.unget(chunk[next:]) + self._done = True + return chunk[:end] + else: + # make sure we dont treat a partial boundary (and + # its separators) as data + if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6): + # There's nothing left, we should just return and mark as done. + self._done = True + return chunk + else: + stream.unget(chunk[-rollback:]) + return chunk[:-rollback] + + def _find_boundary(self, data, eof = False): + """ + Finds a multipart boundary in data. + + Should no boundry exist in the data None is returned instead. Otherwise + a tuple containing the indices of the following are returned: + + * the end of current encapsulation + * the start of the next encapsulation + """ + index = self._fs(data) + if index < 0: + return None + else: + end = index + next = index + len(self._boundary) + data_len = len(data) - 1 + # backup over CRLF + if data[max(0,end-1)] == '\n': + end -= 1 + if data[max(0,end-1)] == '\r': + end -= 1 + # skip over --CRLF + #if data[min(data_len,next)] == '-': + # next += 1 + #if data[min(data_len,next)] == '-': + # next += 1 + #if data[min(data_len,next)] == '\r': + # next += 1 + #if data[min(data_len,next)] == '\n': + # next += 1 + return end, next + +def exhaust(stream_or_iterable): + """ + Completely exhausts an iterator or stream. + + Raise a MultiPartParserError if the argument is not a stream or an iterable. + """ + iterator = None + try: + iterator = iter(stream_or_iterable) + except TypeError: + iterator = ChunkIter(stream_or_iterable, 16384) + + if iterator is None: + raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter') + + for __ in iterator: + pass + +def parse_boundary_stream(stream, max_header_size): + """ + Parses one and exactly one stream that encapsulates a boundary. + """ + # Stream at beginning of header, look for end of header + # and parse it if found. The header must fit within one + # chunk. + chunk = stream.read(max_header_size) + + # 'find' returns the top of these four bytes, so we'll + # need to munch them later to prevent them from polluting + # the payload. + header_end = chunk.find('\r\n\r\n') + + def _parse_header(line): + main_value_pair, params = parse_header(line) + try: + name, value = main_value_pair.split(':', 1) + except: + raise ValueError("Invalid header: %r" % line) + return name, (value, params) + + if header_end == -1: + # we find no header, so we just mark this fact and pass on + # the stream verbatim + stream.unget(chunk) + return (RAW, {}, stream) + + header = chunk[:header_end] + + # here we place any excess chunk back onto the stream, as + # well as throwing away the CRLFCRLF bytes from above. + stream.unget(chunk[header_end + 4:]) + + TYPE = RAW + outdict = {} + + # Eliminate blank lines + for line in header.split('\r\n'): + # This terminology ("main value" and "dictionary of + # parameters") is from the Python docs. + try: + name, (value, params) = _parse_header(line) + except: + continue + + if name == 'content-disposition': + TYPE = FIELD + if params.get('filename'): + TYPE = FILE + + outdict[name] = value, params + + if TYPE == RAW: + stream.unget(chunk) + + return (TYPE, outdict, stream) + +class Parser(object): + def __init__(self, stream, boundary): + self._stream = stream + self._separator = '--' + boundary + + def __iter__(self): + boundarystream = InterBoundaryIter(self._stream, self._separator) + for sub_stream in boundarystream: + # Iterate over each part + yield parse_boundary_stream(sub_stream, 1024) + +def parse_header(line): + """ Parse the header into a key-value. """ + plist = _parse_header_params(';' + line) + key = plist.pop(0).lower() + pdict = {} + for p in plist: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i+1:].strip() + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + value = value.replace('\\\\', '\\').replace('\\"', '"') + pdict[name] = value + return key, pdict + +def _parse_header_params(s): + plist = [] + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and s.count('"', 0, end) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + plist.append(f.strip()) + s = s[end:] + return plist diff --git a/django/newforms/fields.py b/django/newforms/fields.py index 6734c5450e..1feef31ee0 100644 --- a/django/newforms/fields.py +++ b/django/newforms/fields.py @@ -7,6 +7,11 @@ import datetime import os import re import time +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + # Python 2.3 fallbacks try: from decimal import Decimal, DecimalException @@ -416,9 +421,9 @@ except ImportError: class UploadedFile(StrAndUnicode): "A wrapper for files uploaded in a FileField" - def __init__(self, filename, content): + def __init__(self, filename, data): self.filename = filename - self.content = content + self.data = data def __unicode__(self): """ @@ -444,15 +449,34 @@ class FileField(Field): return None elif not data and initial: return initial + + if isinstance(data, dict): + # We warn once, then support both ways below. + import warnings + warnings.warn( + message = "Representing uploaded files as dictionaries is"\ + " deprecated. Use django.core.files.SimpleUploadedFile "\ + " instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + try: - f = UploadedFile(data['filename'], data['content']) - except TypeError: + file_name = data.file_name + file_size = data.file_size + except AttributeError: + try: + file_name = data.get('filename') + file_size = bool(data['content']) + except (AttributeError, KeyError): + raise ValidationError(self.error_messages['invalid']) + + if not file_name: raise ValidationError(self.error_messages['invalid']) - except KeyError: - raise ValidationError(self.error_messages['missing']) - if not f.content: + if not file_size: raise ValidationError(self.error_messages['empty']) - return f + + return UploadedFile(file_name, data) class ImageField(FileField): default_error_messages = { @@ -470,15 +494,31 @@ class ImageField(FileField): elif not data and initial: return initial from PIL import Image - from cStringIO import StringIO + + # We need to get a file object for PIL. We might have a path or we might + # have to read the data into memory. + if hasattr(data, 'temporary_file_path'): + file = data.temporary_file_path() + else: + if hasattr(data, 'read'): + file = StringIO(data.read()) + else: + file = StringIO(data['content']) + try: # load() is the only method that can spot a truncated JPEG, # but it cannot be called sanely after verify() - trial_image = Image.open(StringIO(f.content)) + trial_image = Image.open(file) trial_image.load() + + # Since we're about to use the file again we have to reset the + # file object if possible. + if hasattr(file, 'reset'): + file.reset() + # verify() is the only method that can spot a corrupt PNG, # but it must be called immediately after the constructor - trial_image = Image.open(StringIO(f.content)) + trial_image = Image.open(file) trial_image.verify() except Exception: # Python Imaging Library doesn't recognize it as an image raise ValidationError(self.error_messages['invalid_image']) diff --git a/django/oldforms/__init__.py b/django/oldforms/__init__.py index fc8727185f..ee838d234a 100644 --- a/django/oldforms/__init__.py +++ b/django/oldforms/__init__.py @@ -680,18 +680,27 @@ class FileUploadField(FormField): self.field_name, self.is_required = field_name, is_required self.validator_list = [self.isNonEmptyFile] + validator_list - def isNonEmptyFile(self, field_data, all_data): + def isNonEmptyFile(self, new_data, all_data): + if hasattr(new_data, 'upload_errors'): + upload_errors = new_data.upload_errors() + if upload_errors: + raise validators.CriticalValidationError, upload_errors try: - content = field_data['content'] - except TypeError: - raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.") - if not content: + file_size = new_data.file_size + except AttributeError: + file_size = len(new_data['content']) + if not file_size: raise validators.CriticalValidationError, ugettext("The submitted file is empty.") def render(self, data): return mark_safe(u'' % \ (self.get_id(), self.__class__.__name__, self.field_name)) + def prepare(self, new_data): + if hasattr(new_data, 'upload_errors'): + upload_errors = new_data.upload_errors() + new_data[self.field_name] = { '_file_upload_error': upload_errors } + def html2python(data): if data is None: raise EmptyValue diff --git a/django/test/client.py b/django/test/client.py index a15876e6f9..6313181d61 100644 --- a/django/test/client.py +++ b/django/test/client.py @@ -1,7 +1,10 @@ import urllib import sys import os -from cStringIO import StringIO +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO from django.conf import settings from django.contrib.auth import authenticate, login from django.core.handlers.base import BaseHandler @@ -19,6 +22,25 @@ from django.utils.itercompat import is_iterable BOUNDARY = 'BoUnDaRyStRiNg' MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY +class FakePayload(object): + """ + A wrapper around StringIO that restricts what can be read since data from + the network can't be seeked and cannot be read outside of its content + length. This makes sure that views can't do anything under the test client + that wouldn't work in Real Life. + """ + def __init__(self, content): + self.__content = StringIO(content) + self.__len = len(content) + + def read(self, num_bytes=None): + if num_bytes is None: + num_bytes = self.__len or 1 + assert self.__len >= num_bytes, "Cannot read more than the available bytes from the HTTP incoming data." + content = self.__content.read(num_bytes) + self.__len -= num_bytes + return content + class ClientHandler(BaseHandler): """ A HTTP Handler that can be used for testing purposes. @@ -236,7 +258,7 @@ class Client: 'CONTENT_TYPE': content_type, 'PATH_INFO': urllib.unquote(path), 'REQUEST_METHOD': 'POST', - 'wsgi.input': StringIO(post_data), + 'wsgi.input': FakePayload(post_data), } r.update(extra) diff --git a/django/utils/datastructures.py b/django/utils/datastructures.py index 21a72f2d1e..f27bc1cfff 100644 --- a/django/utils/datastructures.py +++ b/django/utils/datastructures.py @@ -332,17 +332,49 @@ class DotExpandedDict(dict): except TypeError: # Special-case if current isn't a dict. current = {bits[-1]: v} -class FileDict(dict): +class ImmutableList(tuple): """ - A dictionary used to hold uploaded file contents. The only special feature - here is that repr() of this object won't dump the entire contents of the - file to the output. A handy safeguard for a large file upload. + A tuple-like object that raises useful errors when it is asked to mutate. + + Example:: + + >>> a = ImmutableList(range(5), warning="You cannot mutate this.") + >>> a[3] = '4' + Traceback (most recent call last): + ... + AttributeError: You cannot mutate this. """ - def __repr__(self): - if 'content' in self: - d = dict(self, content='') - return dict.__repr__(d) - return dict.__repr__(self) + + def __new__(cls, *args, **kwargs): + if 'warning' in kwargs: + warning = kwargs['warning'] + del kwargs['warning'] + else: + warning = 'ImmutableList object is immutable.' + self = tuple.__new__(cls, *args, **kwargs) + self.warning = warning + return self + + def complain(self, *wargs, **kwargs): + if isinstance(self.warning, Exception): + raise self.warning + else: + raise AttributeError, self.warning + + # All list mutation functions complain. + __delitem__ = complain + __delslice__ = complain + __iadd__ = complain + __imul__ = complain + __setitem__ = complain + __setslice__ = complain + append = complain + extend = complain + insert = complain + pop = complain + remove = complain + sort = complain + reverse = complain class DictWrapper(dict): """ diff --git a/django/utils/text.py b/django/utils/text.py index aa190c8c4f..3686a454a8 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -3,6 +3,7 @@ from django.conf import settings from django.utils.encoding import force_unicode from django.utils.functional import allow_lazy from django.utils.translation import ugettext_lazy +from htmlentitydefs import name2codepoint # Capitalizes the first letter of a string. capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:] @@ -222,3 +223,26 @@ def smart_split(text): yield bit smart_split = allow_lazy(smart_split, unicode) +def _replace_entity(match): + text = match.group(1) + if text[0] == u'#': + text = text[1:] + try: + if text[0] in u'xX': + c = int(text[1:], 16) + else: + c = int(text) + return unichr(c) + except ValueError: + return match.group(0) + else: + try: + return unichr(name2codepoint[text]) + except (ValueError, KeyError): + return match.group(0) + +_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") + +def unescape_entities(text): + return _entity_re.sub(_replace_entity, text) +unescape_entities = allow_lazy(unescape_entities, unicode) diff --git a/docs/newforms.txt b/docs/newforms.txt index 04e4c1aaa0..296fc04c85 100644 --- a/docs/newforms.txt +++ b/docs/newforms.txt @@ -805,12 +805,12 @@ ContactForm to include an ``ImageField`` called ``mugshot``, we need to bind the file data containing the mugshot image:: # Bound form with an image field + >>> from django.core.files.uploadedfile import SimpleUploadedFile >>> data = {'subject': 'hello', ... 'message': 'Hi there', ... 'sender': 'foo@example.com', ... 'cc_myself': True} - >>> file_data = {'mugshot': {'filename':'face.jpg' - ... 'content': }} + >>> file_data = {'mugshot': SimpleUploadedFile('face.jpg', )} >>> f = ContactFormWithMugshot(data, file_data) In practice, you will usually specify ``request.FILES`` as the source diff --git a/docs/request_response.txt b/docs/request_response.txt index 866a697e31..54fc24df9e 100644 --- a/docs/request_response.txt +++ b/docs/request_response.txt @@ -80,19 +80,36 @@ All attributes except ``session`` should be considered read-only. strings. ``FILES`` + + .. admonition:: Changed in Django development version + + In previous versions of Django, ``request.FILES`` contained + simple ``dict`` objects representing uploaded files. This is + no longer true -- files are represented by ``UploadedFile`` + objects as described below. + + These ``UploadedFile`` objects will emulate the old-style ``dict`` + interface, but this is deprecated and will be removed in the next + release of Django. + A dictionary-like object containing all uploaded files. Each key in ``FILES`` is the ``name`` from the ````. Each - value in ``FILES`` is a standard Python dictionary with the following three - keys: + value in ``FILES`` is an ``UploadedFile`` object containing the following + attributes: - * ``filename`` -- The name of the uploaded file, as a Python string. - * ``content-type`` -- The content type of the uploaded file. - * ``content`` -- The raw content of the uploaded file. + * ``read(num_bytes=None)`` -- Read a number of bytes from the file. + * ``file_name`` -- The name of the uploaded file. + * ``file_size`` -- The size, in bytes, of the uploaded file. + * ``chunk()`` -- A generator that yields sequential chunks of data. + See `File Uploads`_ for more information. + Note that ``FILES`` will only contain data if the request method was POST and the ``
`` that posted to the request had ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank dictionary-like object. + + .. _File Uploads: ../upload_handling/ ``META`` A standard Python dictionary containing all available HTTP headers. diff --git a/docs/settings.txt b/docs/settings.txt index 3fe999d0d2..a68d2ff92f 100644 --- a/docs/settings.txt +++ b/docs/settings.txt @@ -279,7 +279,7 @@ Default: ``''`` (Empty string) The database backend to use. The build-in database backends are ``'postgresql_psycopg2'``, ``'postgresql'``, ``'mysql'``, ``'mysql_old'``, -``'sqlite3'`` and ``'oracle'``. +``'sqlite3'``, ``'oracle'``, and ``'oracle'``. In the Django development version, you can use a database backend that doesn't ship with Django by setting ``DATABASE_ENGINE`` to a fully-qualified path (i.e. @@ -530,6 +530,43 @@ Default: ``'utf-8'`` The character encoding used to decode any files read from disk. This includes template files and initial SQL data files. +FILE_UPLOAD_HANDLERS +-------------------- + +**New in Django development version** + +Default:: + + ("django.core.files.fileuploadhandler.MemoryFileUploadHandler", + "django.core.files.fileuploadhandler.TemporaryFileUploadHandler",) + +A tuple of handlers to use for uploading. See `file uploads`_ for details. + +.. _file uploads: ../upload_handling/ + +FILE_UPLOAD_MAX_MEMORY_SIZE +--------------------------- + +**New in Django development version** + +Default: ``2621440`` (i.e. 2.5 MB). + +The maximum size (in bytes) that an upload will be before it gets streamed to +the file system. See `file uploads`_ for details. + +FILE_UPLOAD_TEMP_DIR +-------------------- + +**New in Django development version** + +Default: ``None`` + +The directory to store data temporarily while uploading files. If ``None``, +Django will use the standard temporary directory for the operating system. For +example, this will default to '/tmp' on *nix-style operating systems. + +See `file uploads`_ for details. + FIXTURE_DIRS ------------- diff --git a/docs/upload_handling.txt b/docs/upload_handling.txt new file mode 100644 index 0000000000..068acf3a42 --- /dev/null +++ b/docs/upload_handling.txt @@ -0,0 +1,346 @@ +============ +File Uploads +============ + +**New in Django development version** + +Most Web sites wouldn't be complete without a way to upload files. When Django +handles a file upload, the file data ends up placed in ``request.FILES`` (for +more on the ``request`` object see the documentation for `request and response +objects`_). This document explains how files are stored on disk an in memory, +and how to customize the default behavior. + +.. _request and response objects: ../request_response/#attributes + +Basic file uploads +================== + +Consider a simple form containing a ``FileField``:: + + from django import newforms as forms + + class UploadFileForm(forms.Form): + title = forms.CharField(max_length=50) + file = forms.FileField() + +A view handling this form will receive the file data in ``request.FILES``, which +is a dictionary containing a key for each ``FileField`` (or ``ImageField``, or +other ``FileField`` subclass) in the form. So the data from the above form would +be accessible as ``request.FILES['file']``. + +Most of the time, you'll simply pass the file data from ``request`` into the +form as described in `binding uploaded files to a form`_. This would look +something like:: + + from django.http import HttpResponseRedirect + from django.shortcuts import render_to_response + + # Imaginary function to handle an uploaded file. + from somewhere import handle_uploaded_file + + def upload_file(request): + if request.method == 'POST': + form = UploadFileForm(request.POST, request.FILES) + if form.is_valid(): + handle_uploaded_file(request.FILES['file']) + return HttpResponseRedirect('/success/url/') + else: + form = UploadFileForm() + return render_to_response('upload.html', {'form': form}) + +.. _binding uploaded files to a form: ../newforms/#binding-uploaded-files-to-a- form + +Notice that we have to pass ``request.FILES`` into the form's constructor; this +is how file data gets bound into a form. + +Handling uploaded files +----------------------- + +The final piece of the puzzle is handling the actual file data from +``request.FILES``. Each entry in this dictionary is an ``UploadedFile`` object +-- a simple wrapper around an uploaded file. You'll usually use one of these +methods to access the uploaded content: + + ``UploadedFile.read()`` + Read the entire uploaded data from the file. Be careful with this + method: if the uploaded file is huge it can overwhelm your system if you + try to read it into memory. You'll probably want to use ``chunk()`` + instead; see below. + + ``UploadedFile.multiple_chunks()`` + Returns ``True`` if the uploaded file is big enough to require + reading in multiple chunks. By default this will be any file + larger than 2.5 megabytes, but that's configurable; see below. + + ``UploadedFile.chunks()`` + A generator returning chunks of the file. If ``multiple_chunks()`` is + ``True``, you should use this method in a loop instead of ``read()``. + + In practice, it's often easiest simply to use ``chunks()`` all the time; + see the example below. + + ``UploadedFile.file_name`` + The name of the uploaded file (e.g. ``my_file.txt``). + + ``UploadedFile.file_size`` + The size, in bytes, of the uploaded file. + +There are a few other methods and attributes available on ``UploadedFile`` +objects; see `UploadedFile objects`_ for a complete reference. + +Putting it all together, here's a common way you might handle an uploaded file:: + + def handle_uploaded_file(f): + destination = open('some/file/name.txt', 'wb') + for chunk in f.chunks(): + destination.write(chunk) + +Looping over ``UploadedFile.chunks()`` instead of using ``read()`` ensures that +large files don't overwhelm your system's memory. + +Where uploaded data is stored +----------------------------- + +Before you save uploaded files, the data needs to be stored somewhere. + +By default, if an uploaded file is smaller than 2.5 megabytes, Django will hold +the entire contents of the upload in memory. This means that saving the file +involves only a read from memory and a write to disk and thus is very fast. + +However, if an uploaded file is too large, Django will write the uploaded file +to a temporary file stored in your system's temporary directory. On a Unix-like +platform this means you can expect Django to generate a file called something +like ``/tmp/tmpzfp6I6.upload``. If an upload is large enough, you can watch this +file grow in size as Django streams the data onto disk. + +These specifics -- 2.5 megabytes; ``/tmp``; etc. -- are simply "reasonable +defaults". Read on for details on how you can customize or completely replace +upload behavior. + +Changing upload handler behavior +-------------------------------- + +Three `settings`_ control Django's file upload behavior: + + ``FILE_UPLOAD_MAX_MEMORY_SIZE`` + The maximum size, in bytes, for files that will be uploaded + into memory. Files larger than ``FILE_UPLOAD_MAX_MEMORY_SIZE`` + will be streamed to disk. + + Defaults to 2.5 megabytes. + + ``FILE_UPLOAD_TEMP_DIR`` + The directory where uploaded files larger than ``FILE_UPLOAD_TEMP_DIR`` + will be stored. + + Defaults to your system's standard temporary directory (i.e. ``/tmp`` on + most Unix-like systems). + + ``FILE_UPLOAD_HANDLERS`` + The actual handlers for uploaded files. Changing this setting + allows complete customization -- even replacement -- of + Django's upload process. See `upload handlers`_, below, + for details. + + Defaults to:: + + ("django.core.files.uploadhandler.MemoryFileUploadHandler", + "django.core.files.uploadhandler.TemporaryFileUploadHandler",) + + Which means "try to upload to memory first, then fall back to temporary + files." + +.. _settings: ../settings/ + +``UploadedFile`` objects +======================== + +All ``UploadedFile`` objects define the following methods/attributes: + + ``UploadedFile.read(self, num_bytes=None)`` + Returns a byte string of length ``num_bytes``, or the complete file if + ``num_bytes`` is ``None``. + + ``UploadedFile.chunk(self, chunk_size=None)`` + A generator yielding small chunks from the file. If ``chunk_size`` isn't + given, chunks will be 64 kb. + + ``UploadedFile.multiple_chunks(self, chunk_size=None)`` + Returns ``True`` if you can expect more than one chunk when calling + ``UploadedFile.chunk(self, chunk_size)``. + + ``UploadedFile.file_size`` + The size, in bytes, of the uploaded file. + + ``UploadedFile.file_name`` + The name of the uploaded file as provided by the user. + + ``UploadedFile.content_type`` + The content-type header uploaded with the file (e.g. ``text/plain`` or + ``application/pdf``). Like any data supplied by the user, you shouldn't + trust that the uploaded file is actually this type. You'll still need to + validate that the file contains the content that the content-type header + claims -- "trust but verify." + + ``UploadedFile.charset`` + For ``text/*`` content-types, the character set (i.e. ``utf8``) supplied + by the browser. Again, "trust but verify" is the best policy here. + + ``UploadedFile.temporary_file_path()`` + Only files uploaded onto disk will have this method; it returns the full + path to the temporary uploaded file. + +Upload Handlers +=============== + +When a user uploads a file, Django passes off the file data to an *upload +handler* -- a small class that handles file data as it gets uploaded. Upload +handlers are initially defined in the ``FILE_UPLOAD_HANDLERS`` setting, which +defaults to:: + + ("django.core.files.uploadhandler.MemoryFileUploadHandler", + "django.core.files.uploadhandler.TemporaryFileUploadHandler",) + +Together the ``MemoryFileUploadHandler`` and ``TemporaryFileUploadHandler`` +provide Django's default file upload behavior of reading small files into memory +and large ones onto disk. + +You can write custom handlers that customize how Django handles files. You +could, for example, use custom handlers to enforce user-level quotas, compress +data on the fly, render progress bars, and even send data to another storage +location directly without storing it locally. + +Modifying upload handlers on the fly +------------------------------------ + +Sometimes particular views require different upload behavior. In these cases, +you can override upload handlers on a per-request basis by modifying +``request.upload_handlers``. By default, this list will contain the upload +handlers given by ``FILE_UPLOAD_HANDLERS``, but you can modify the list as you +would any other list. + +For instance, suppose you've written a ``ProgressBarUploadHandler`` that +provides feedback on upload progress to some sort of AJAX widget. You'd add this +handler to your upload handers like this:: + + request.upload_handlers.insert(0, ProgressBarUploadHandler()) + +You'd probably want to use ``list.insert()`` in this case (instead of +``append()``) because a progress bar handler would need to run *before* any +other handlers. Remember, the upload handlers are processed in order. + +If you want to replace the upload handlers completely, you can just assign a new +list:: + + request.upload_handlers = [ProgressBarUploadHandler()] + +.. note:: + + You can only modify upload handlers *before* accessing ``request.FILES`` -- + it doesn't make sense to change upload handlers after upload handling has + already started. If you try to modify ``request.upload_handlers`` after + reading from ``request.FILES`` Django will throw an error. + + Thus, you should always modify uploading handlers as early in your view as + possible. + +Writing custom upload handlers +------------------------------ + +All file upload handlers should be subclasses of +``django.core.files.uploadhandler.FileUploadHandler``. You can define upload +handlers wherever you wish. + +Required methods +~~~~~~~~~~~~~~~~ + +Custom file upload handlers **must** define the following methods: + + ``FileUploadHandler.receive_data_chunk(self, raw_data, start)`` + Receives a "chunk" of data from the file upload. + + ``raw_data`` is a byte string containing the uploaded data. + + ``start`` is the position in the file where this ``raw_data`` chunk + begins. + + The data you return will get fed into the subsequent upload handlers' + ``receive_data_chunk`` methods. In this way, one handler can be a + "filter" for other handlers. + + Return ``None`` from ``receive_data_chunk`` to sort-circuit remaining + upload handlers from getting this chunk.. This is useful if you're + storing the uploaded data yourself and don't want future handlers to + store a copy of the data. + + If you raise a ``StopUpload`` or a ``SkipFile`` exception, the upload + will abort or the file will be completely skipped. + + ``FileUploadHandler.file_complete(self, file_size)`` + Called when a file has finished uploading. + + The handler should return an ``UploadedFile`` object that will be stored + in ``request.FILES``. Handlers may also return ``None`` to indicate that + the ``UploadedFile`` object should come from subsequent upload handlers. + +Optional methods +~~~~~~~~~~~~~~~~ + +Custom upload handlers may also define any of the following optional methods or +attributes: + + ``FileUploadHandler.chunk_size`` + Size, in bytes, of the "chunks" Django should store into memory and feed + into the handler. That is, this attribute controls the size of chunks + fed into ``FileUploadHandler.receive_data_chunk``. + + For maximum performance the chunk sizes should be divisible by ``4`` and + should not exceed 2 GB (2\ :sup:`31` bytes) in size. When there are + multiple chunk sizes provided by multiple handlers, Django will use the + smallest chunk size defined by any handler. + + The default is 64*2\ :sup:`10` bytes, or 64 Kb. + + ``FileUploadHandler.new_file(self, field_name, file_name, content_type, content_length, charset)`` + Callback signaling that a new file upload is starting. This is called + before any data has been fed to any upload handlers. + + ``field_name`` is a string name of the file ```` field. + + ``file_name`` is the unicode filename that was provided by the browser. + + ``content_type`` is the MIME type provided by the browser -- E.g. + ``'image/jpeg'``. + + ``content_length`` is the length of the image given by the browser. + Sometimes this won't be provided and will be ``None``., ``None`` + otherwise. + + ``charset`` is the character set (i.e. ``utf8``) given by the browser. + Like ``content_length``, this sometimes won't be provided. + + This method may raise a ``StopFutureHandlers`` exception to prevent + future handlers from handling this file. + + ``FileUploadHandler.upload_complete(self)`` + Callback signaling that the entire upload (all files) has completed. + + ``FileUploadHandler.``handle_raw_input(self, input_data, META, content_length, boundary, encoding)`` + Allows the handler to completely override the parsing of the raw + HTTP input. + + ``input_data`` is a file-like object that supports ``read()``-ing. + + ``META`` is the same object as ``request.META``. + + ``content_length`` is the length of the data in ``input_data``. Don't + read more than ``content_length`` bytes from ``input_data``. + + ``boundary`` is the MIME boundary for this request. + + ``encoding`` is the encoding of the request. + + Return ``None`` if you want upload handling to continue, or a tuple of + ``(POST, FILES)`` if you want to return the new data structures suitable + for the request directly. + diff --git a/tests/modeltests/model_forms/models.py b/tests/modeltests/model_forms/models.py index 470312f5ca..c856720a74 100644 --- a/tests/modeltests/model_forms/models.py +++ b/tests/modeltests/model_forms/models.py @@ -67,7 +67,13 @@ class TextFile(models.Model): class ImageFile(models.Model): description = models.CharField(max_length=20) - image = models.FileField(upload_to=tempfile.gettempdir()) + try: + # If PIL is available, try testing PIL. + # Otherwise, it's equivalent to TextFile above. + import Image + image = models.ImageField(upload_to=tempfile.gettempdir()) + except ImportError: + image = models.FileField(upload_to=tempfile.gettempdir()) def __unicode__(self): return self.description @@ -75,6 +81,7 @@ class ImageFile(models.Model): __test__ = {'API_TESTS': """ >>> from django import newforms as forms >>> from django.newforms.models import ModelForm +>>> from django.core.files.uploadedfile import SimpleUploadedFile The bare bones, absolutely nothing custom, basic case. @@ -792,7 +799,18 @@ False # Upload a file and ensure it all works as expected. ->>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test1.txt', 'content': 'hello world'}}) +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test1.txt', 'hello world')}) +>>> f.is_valid() +True +>>> type(f.cleaned_data['file']) + +>>> instance = f.save() +>>> instance.file +u'...test1.txt' + +>>> os.unlink(instance.get_file_filename()) + +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test1.txt', 'hello world')}) >>> f.is_valid() True >>> type(f.cleaned_data['file']) @@ -814,18 +832,30 @@ u'...test1.txt' u'...test1.txt' # Delete the current file since this is not done by Django. - >>> os.unlink(instance.get_file_filename()) # Override the file by uploading a new one. ->>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test2.txt', 'content': 'hello world'}}, instance=instance) +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test2.txt', 'hello world')}, instance=instance) >>> f.is_valid() True >>> instance = f.save() >>> instance.file u'...test2.txt' +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_file_filename()) + +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test2.txt', 'hello world')}) +>>> f.is_valid() +True +>>> instance = f.save() +>>> instance.file +u'...test2.txt' + +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_file_filename()) + >>> instance.delete() # Test the non-required FileField @@ -838,12 +868,26 @@ True >>> instance.file '' ->>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': {'filename': 'test3.txt', 'content': 'hello world'}}, instance=instance) +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test3.txt', 'hello world')}, instance=instance) >>> f.is_valid() True >>> instance = f.save() >>> instance.file u'...test3.txt' + +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_file_filename()) +>>> instance.delete() + +>>> f = TextFileForm(data={'description': u'Assistance'}, files={'file': SimpleUploadedFile('test3.txt', 'hello world')}) +>>> f.is_valid() +True +>>> instance = f.save() +>>> instance.file +u'...test3.txt' + +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_file_filename()) >>> instance.delete() # ImageField ################################################################### @@ -858,7 +902,19 @@ u'...test3.txt' >>> image_data = open(os.path.join(os.path.dirname(__file__), "test.png")).read() ->>> f = ImageFileForm(data={'description': u'An image'}, files={'image': {'filename': 'test.png', 'content': image_data}}) +>>> f = ImageFileForm(data={'description': u'An image'}, files={'image': SimpleUploadedFile('test.png', image_data)}) +>>> f.is_valid() +True +>>> type(f.cleaned_data['image']) + +>>> instance = f.save() +>>> instance.image +u'...test.png' + +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_image_filename()) + +>>> f = ImageFileForm(data={'description': u'An image'}, files={'image': SimpleUploadedFile('test.png', image_data)}) >>> f.is_valid() True >>> type(f.cleaned_data['image']) @@ -885,13 +941,26 @@ u'...test.png' # Override the file by uploading a new one. ->>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': {'filename': 'test2.png', 'content': image_data}}, instance=instance) +>>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': SimpleUploadedFile('test2.png', image_data)}, instance=instance) >>> f.is_valid() True >>> instance = f.save() >>> instance.image u'...test2.png' +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_image_filename()) +>>> instance.delete() + +>>> f = ImageFileForm(data={'description': u'Changed it'}, files={'image': SimpleUploadedFile('test2.png', image_data)}) +>>> f.is_valid() +True +>>> instance = f.save() +>>> instance.image +u'...test2.png' + +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_image_filename()) >>> instance.delete() # Test the non-required ImageField @@ -904,7 +973,18 @@ True >>> instance.image '' ->>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': {'filename': 'test3.png', 'content': image_data}}, instance=instance) +>>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': SimpleUploadedFile('test3.png', image_data)}, instance=instance) +>>> f.is_valid() +True +>>> instance = f.save() +>>> instance.image +u'...test3.png' + +# Delete the current file since this is not done by Django. +>>> os.unlink(instance.get_image_filename()) +>>> instance.delete() + +>>> f = ImageFileForm(data={'description': u'And a final one'}, files={'image': SimpleUploadedFile('test3.png', image_data)}) >>> f.is_valid() True >>> instance = f.save() diff --git a/tests/regressiontests/bug639/tests.py b/tests/regressiontests/bug639/tests.py index f9596d06cb..2726dec897 100644 --- a/tests/regressiontests/bug639/tests.py +++ b/tests/regressiontests/bug639/tests.py @@ -9,6 +9,7 @@ import unittest from regressiontests.bug639.models import Photo from django.http import QueryDict from django.utils.datastructures import MultiValueDict +from django.core.files.uploadedfile import SimpleUploadedFile class Bug639Test(unittest.TestCase): @@ -21,12 +22,8 @@ class Bug639Test(unittest.TestCase): # Fake a request query dict with the file qd = QueryDict("title=Testing&image=", mutable=True) - qd["image_file"] = { - "filename" : "test.jpg", - "content-type" : "image/jpeg", - "content" : img - } - + qd["image_file"] = SimpleUploadedFile('test.jpg', img, 'image/jpeg') + manip = Photo.AddManipulator() manip.do_html2python(qd) p = manip.save(qd) @@ -39,4 +36,4 @@ class Bug639Test(unittest.TestCase): Make sure to delete the "uploaded" file to avoid clogging /tmp. """ p = Photo.objects.get() - os.unlink(p.get_image_filename()) \ No newline at end of file + os.unlink(p.get_image_filename()) diff --git a/tests/regressiontests/datastructures/tests.py b/tests/regressiontests/datastructures/tests.py index d6141b09ce..62c57bc019 100644 --- a/tests/regressiontests/datastructures/tests.py +++ b/tests/regressiontests/datastructures/tests.py @@ -117,14 +117,25 @@ Init from sequence of tuples >>> d['person']['2']['firstname'] ['Adrian'] -### FileDict ################################################################ - ->>> d = FileDict({'content': 'once upon a time...'}) +### ImmutableList ################################################################ +>>> d = ImmutableList(range(10)) +>>> d.sort() +Traceback (most recent call last): + File "", line 1, in + File "/var/lib/python-support/python2.5/django/utils/datastructures.py", line 359, in complain + raise AttributeError, self.warning +AttributeError: ImmutableList object is immutable. >>> repr(d) -"{'content': ''}" ->>> d = FileDict({'other-key': 'once upon a time...'}) ->>> repr(d) -"{'other-key': 'once upon a time...'}" +'(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)' +>>> d = ImmutableList(range(10), warning="Object is immutable!") +>>> d[1] +1 +>>> d[1] = 'test' +Traceback (most recent call last): + File "", line 1, in + File "/var/lib/python-support/python2.5/django/utils/datastructures.py", line 359, in complain + raise AttributeError, self.warning +AttributeError: Object is immutable! ### DictWrapper ############################################################# diff --git a/tests/regressiontests/file_uploads/__init__.py b/tests/regressiontests/file_uploads/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/regressiontests/file_uploads/models.py b/tests/regressiontests/file_uploads/models.py new file mode 100644 index 0000000000..2d5607b2a7 --- /dev/null +++ b/tests/regressiontests/file_uploads/models.py @@ -0,0 +1,2 @@ +# This file unintentionally left blank. +# Oops. \ No newline at end of file diff --git a/tests/regressiontests/file_uploads/tests.py b/tests/regressiontests/file_uploads/tests.py new file mode 100644 index 0000000000..8992298470 --- /dev/null +++ b/tests/regressiontests/file_uploads/tests.py @@ -0,0 +1,158 @@ +import os +import sha +import tempfile +from django.test import TestCase, client +from django.utils import simplejson + +class FileUploadTests(TestCase): + def test_simple_upload(self): + post_data = { + 'name': 'Ringo', + 'file_field': open(__file__), + } + response = self.client.post('/file_uploads/upload/', post_data) + self.assertEqual(response.status_code, 200) + + def test_large_upload(self): + tdir = tempfile.gettempdir() + + file1 = tempfile.NamedTemporaryFile(suffix=".file1", dir=tdir) + file1.write('a' * (2 ** 21)) + file1.seek(0) + + file2 = tempfile.NamedTemporaryFile(suffix=".file2", dir=tdir) + file2.write('a' * (10 * 2 ** 20)) + file2.seek(0) + + # This file contains chinese symbols for a name. + file3 = open(os.path.join(tdir, u'test_中文_Orl\u00e9ans.jpg'), 'w+b') + file3.write('b' * (2 ** 10)) + file3.seek(0) + + post_data = { + 'name': 'Ringo', + 'file_field1': open(file1.name), + 'file_field2': open(file2.name), + 'file_unicode': file3, + } + + for key in post_data.keys(): + try: + post_data[key + '_hash'] = sha.new(post_data[key].read()).hexdigest() + post_data[key].seek(0) + except AttributeError: + post_data[key + '_hash'] = sha.new(post_data[key]).hexdigest() + + response = self.client.post('/file_uploads/verify/', post_data) + + try: + os.unlink(file3.name) + except: + pass + + self.assertEqual(response.status_code, 200) + + def test_dangerous_file_names(self): + """Uploaded file names should be sanitized before ever reaching the view.""" + # This test simulates possible directory traversal attacks by a + # malicious uploader We have to do some monkeybusiness here to construct + # a malicious payload with an invalid file name (containing os.sep or + # os.pardir). This similar to what an attacker would need to do when + # trying such an attack. + scary_file_names = [ + "/tmp/hax0rd.txt", # Absolute path, *nix-style. + "C:\\Windows\\hax0rd.txt", # Absolute path, win-syle. + "C:/Windows/hax0rd.txt", # Absolute path, broken-style. + "\\tmp\\hax0rd.txt", # Absolute path, broken in a different way. + "/tmp\\hax0rd.txt", # Absolute path, broken by mixing. + "subdir/hax0rd.txt", # Descendant path, *nix-style. + "subdir\\hax0rd.txt", # Descendant path, win-style. + "sub/dir\\hax0rd.txt", # Descendant path, mixed. + "../../hax0rd.txt", # Relative path, *nix-style. + "..\\..\\hax0rd.txt", # Relative path, win-style. + "../..\\hax0rd.txt" # Relative path, mixed. + ] + + payload = [] + for i, name in enumerate(scary_file_names): + payload.extend([ + '--' + client.BOUNDARY, + 'Content-Disposition: form-data; name="file%s"; filename="%s"' % (i, name), + 'Content-Type: application/octet-stream', + '', + 'You got pwnd.' + ]) + payload.extend([ + '--' + client.BOUNDARY + '--', + '', + ]) + + payload = "\r\n".join(payload) + r = { + 'CONTENT_LENGTH': len(payload), + 'CONTENT_TYPE': client.MULTIPART_CONTENT, + 'PATH_INFO': "/file_uploads/echo/", + 'REQUEST_METHOD': 'POST', + 'wsgi.input': client.FakePayload(payload), + } + response = self.client.request(**r) + + # The filenames should have been sanitized by the time it got to the view. + recieved = simplejson.loads(response.content) + for i, name in enumerate(scary_file_names): + got = recieved["file%s" % i] + self.assertEqual(got, "hax0rd.txt") + + def test_filename_overflow(self): + """File names over 256 characters (dangerous on some platforms) get fixed up.""" + name = "%s.txt" % ("f"*500) + payload = "\r\n".join([ + '--' + client.BOUNDARY, + 'Content-Disposition: form-data; name="file"; filename="%s"' % name, + 'Content-Type: application/octet-stream', + '', + 'Oops.' + '--' + client.BOUNDARY + '--', + '', + ]) + r = { + 'CONTENT_LENGTH': len(payload), + 'CONTENT_TYPE': client.MULTIPART_CONTENT, + 'PATH_INFO': "/file_uploads/echo/", + 'REQUEST_METHOD': 'POST', + 'wsgi.input': client.FakePayload(payload), + } + got = simplejson.loads(self.client.request(**r).content) + self.assert_(len(got['file']) < 256, "Got a long file name (%s characters)." % len(got['file'])) + + def test_custom_upload_handler(self): + # A small file (under the 5M quota) + smallfile = tempfile.NamedTemporaryFile() + smallfile.write('a' * (2 ** 21)) + + # A big file (over the quota) + bigfile = tempfile.NamedTemporaryFile() + bigfile.write('a' * (10 * 2 ** 20)) + + # Small file posting should work. + response = self.client.post('/file_uploads/quota/', {'f': open(smallfile.name)}) + got = simplejson.loads(response.content) + self.assert_('f' in got) + + # Large files don't go through. + response = self.client.post("/file_uploads/quota/", {'f': open(bigfile.name)}) + got = simplejson.loads(response.content) + self.assert_('f' not in got) + + def test_broken_custom_upload_handler(self): + f = tempfile.NamedTemporaryFile() + f.write('a' * (2 ** 21)) + + # AttributeError: You cannot alter upload handlers after the upload has been processed. + self.assertRaises( + AttributeError, + self.client.post, + '/file_uploads/quota/broken/', + {'f': open(f.name)} + ) + \ No newline at end of file diff --git a/tests/regressiontests/file_uploads/uploadhandler.py b/tests/regressiontests/file_uploads/uploadhandler.py new file mode 100644 index 0000000000..54f82f626c --- /dev/null +++ b/tests/regressiontests/file_uploads/uploadhandler.py @@ -0,0 +1,26 @@ +""" +Upload handlers to test the upload API. +""" + +from django.core.files.uploadhandler import FileUploadHandler, StopUpload + +class QuotaUploadHandler(FileUploadHandler): + """ + This test upload handler terminates the connection if more than a quota + (5MB) is uploaded. + """ + + QUOTA = 5 * 2**20 # 5 MB + + def __init__(self, request=None): + super(QuotaUploadHandler, self).__init__(request) + self.total_upload = 0 + + def receive_data_chunk(self, raw_data, start): + self.total_upload += len(raw_data) + if self.total_upload >= self.QUOTA: + raise StopUpload(connection_reset=True) + return raw_data + + def file_complete(self, file_size): + return None \ No newline at end of file diff --git a/tests/regressiontests/file_uploads/urls.py b/tests/regressiontests/file_uploads/urls.py new file mode 100644 index 0000000000..529bee312d --- /dev/null +++ b/tests/regressiontests/file_uploads/urls.py @@ -0,0 +1,10 @@ +from django.conf.urls.defaults import * +import views + +urlpatterns = patterns('', + (r'^upload/$', views.file_upload_view), + (r'^verify/$', views.file_upload_view_verify), + (r'^echo/$', views.file_upload_echo), + (r'^quota/$', views.file_upload_quota), + (r'^quota/broken/$', views.file_upload_quota_broken), +) diff --git a/tests/regressiontests/file_uploads/views.py b/tests/regressiontests/file_uploads/views.py new file mode 100644 index 0000000000..833cf90531 --- /dev/null +++ b/tests/regressiontests/file_uploads/views.py @@ -0,0 +1,70 @@ +import os +import sha +from django.core.files.uploadedfile import UploadedFile +from django.http import HttpResponse, HttpResponseServerError +from django.utils import simplejson +from uploadhandler import QuotaUploadHandler + +def file_upload_view(request): + """ + Check that a file upload can be updated into the POST dictionary without + going pear-shaped. + """ + form_data = request.POST.copy() + form_data.update(request.FILES) + if isinstance(form_data.get('file_field'), UploadedFile) and isinstance(form_data['name'], unicode): + # If a file is posted, the dummy client should only post the file name, + # not the full path. + if os.path.dirname(form_data['file_field'].file_name) != '': + return HttpResponseServerError() + return HttpResponse('') + else: + return HttpResponseServerError() + +def file_upload_view_verify(request): + """ + Use the sha digest hash to verify the uploaded contents. + """ + form_data = request.POST.copy() + form_data.update(request.FILES) + + # Check to see if unicode names worked out. + if not request.FILES['file_unicode'].file_name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'): + return HttpResponseServerError() + + for key, value in form_data.items(): + if key.endswith('_hash'): + continue + if key + '_hash' not in form_data: + continue + submitted_hash = form_data[key + '_hash'] + if isinstance(value, UploadedFile): + new_hash = sha.new(value.read()).hexdigest() + else: + new_hash = sha.new(value).hexdigest() + if new_hash != submitted_hash: + return HttpResponseServerError() + + return HttpResponse('') + +def file_upload_echo(request): + """ + Simple view to echo back info about uploaded files for tests. + """ + r = dict([(k, f.file_name) for k, f in request.FILES.items()]) + return HttpResponse(simplejson.dumps(r)) + +def file_upload_quota(request): + """ + Dynamically add in an upload handler. + """ + request.upload_handlers.insert(0, QuotaUploadHandler()) + return file_upload_echo(request) + +def file_upload_quota_broken(request): + """ + You can't change handlers after reading FILES; this view shouldn't work. + """ + response = file_upload_echo(request) + request.upload_handlers.insert(0, QuotaUploadHandler()) + return response \ No newline at end of file diff --git a/tests/regressiontests/forms/error_messages.py b/tests/regressiontests/forms/error_messages.py index 381282f121..580326f894 100644 --- a/tests/regressiontests/forms/error_messages.py +++ b/tests/regressiontests/forms/error_messages.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- tests = r""" >>> from django.newforms import * +>>> from django.core.files.uploadedfile import SimpleUploadedFile # CharField ################################################################### @@ -214,11 +215,11 @@ ValidationError: [u'REQUIRED'] Traceback (most recent call last): ... ValidationError: [u'INVALID'] ->>> f.clean({}) +>>> f.clean(SimpleUploadedFile('name', None)) Traceback (most recent call last): ... -ValidationError: [u'MISSING'] ->>> f.clean({'filename': 'name', 'content':''}) +ValidationError: [u'EMPTY FILE'] +>>> f.clean(SimpleUploadedFile('name', '')) Traceback (most recent call last): ... ValidationError: [u'EMPTY FILE'] diff --git a/tests/regressiontests/forms/fields.py b/tests/regressiontests/forms/fields.py index c9f3efdbda..4725c3ecf3 100644 --- a/tests/regressiontests/forms/fields.py +++ b/tests/regressiontests/forms/fields.py @@ -2,6 +2,7 @@ tests = r""" >>> from django.newforms import * >>> from django.newforms.widgets import RadioFieldRenderer +>>> from django.core.files.uploadedfile import SimpleUploadedFile >>> import datetime >>> import time >>> import re @@ -770,17 +771,17 @@ ValidationError: [u'This field is required.'] >>> f.clean(None, 'files/test2.pdf') 'files/test2.pdf' ->>> f.clean({}) +>>> f.clean(SimpleUploadedFile('', '')) Traceback (most recent call last): ... -ValidationError: [u'No file was submitted.'] +ValidationError: [u'No file was submitted. Check the encoding type on the form.'] ->>> f.clean({}, '') +>>> f.clean(SimpleUploadedFile('', ''), '') Traceback (most recent call last): ... -ValidationError: [u'No file was submitted.'] +ValidationError: [u'No file was submitted. Check the encoding type on the form.'] ->>> f.clean({}, 'files/test3.pdf') +>>> f.clean(None, 'files/test3.pdf') 'files/test3.pdf' >>> f.clean('some content that is not a file') @@ -788,20 +789,20 @@ Traceback (most recent call last): ... ValidationError: [u'No file was submitted. Check the encoding type on the form.'] ->>> f.clean({'filename': 'name', 'content': None}) +>>> f.clean(SimpleUploadedFile('name', None)) Traceback (most recent call last): ... ValidationError: [u'The submitted file is empty.'] ->>> f.clean({'filename': 'name', 'content': ''}) +>>> f.clean(SimpleUploadedFile('name', '')) Traceback (most recent call last): ... ValidationError: [u'The submitted file is empty.'] ->>> type(f.clean({'filename': 'name', 'content': 'Some File Content'})) +>>> type(f.clean(SimpleUploadedFile('name', 'Some File Content'))) ->>> type(f.clean({'filename': 'name', 'content': 'Some File Content'}, 'files/test4.pdf')) +>>> type(f.clean(SimpleUploadedFile('name', 'Some File Content'), 'files/test4.pdf')) # URLField ################################################################## diff --git a/tests/regressiontests/forms/forms.py b/tests/regressiontests/forms/forms.py index 7fc206de4c..041fa4054c 100644 --- a/tests/regressiontests/forms/forms.py +++ b/tests/regressiontests/forms/forms.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- tests = r""" >>> from django.newforms import * +>>> from django.core.files.uploadedfile import SimpleUploadedFile >>> import datetime >>> import time >>> import re @@ -1465,7 +1466,7 @@ not request.POST. >>> print f File1:
  • This field is required.
->>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':''}}, auto_id=False) +>>> f = FileForm(data={}, files={'file1': SimpleUploadedFile('name', '')}, auto_id=False) >>> print f File1:
  • The submitted file is empty.
@@ -1473,7 +1474,7 @@ not request.POST. >>> print f File1:
  • No file was submitted. Check the encoding type on the form.
->>> f = FileForm(data={}, files={'file1': {'filename': 'name', 'content':'some content'}}, auto_id=False) +>>> f = FileForm(data={}, files={'file1': SimpleUploadedFile('name', 'some content')}, auto_id=False) >>> print f File1: >>> f.is_valid() diff --git a/tests/regressiontests/test_client_regress/models.py b/tests/regressiontests/test_client_regress/models.py index 37e81668b6..1eb55e312e 100644 --- a/tests/regressiontests/test_client_regress/models.py +++ b/tests/regressiontests/test_client_regress/models.py @@ -6,6 +6,7 @@ from django.test import Client, TestCase from django.core.urlresolvers import reverse from django.core.exceptions import SuspiciousOperation import os +import sha class AssertContainsTests(TestCase): def test_contains(self): @@ -240,16 +241,6 @@ class AssertFormErrorTests(TestCase): except AssertionError, e: self.assertEqual(str(e), "The form 'form' in context 0 does not contain the non-field error 'Some error.' (actual errors: )") -class FileUploadTests(TestCase): - def test_simple_upload(self): - fd = open(os.path.join(os.path.dirname(__file__), "views.py")) - post_data = { - 'name': 'Ringo', - 'file_field': fd, - } - response = self.client.post('/test_client_regress/file_upload/', post_data) - self.assertEqual(response.status_code, 200) - class LoginTests(TestCase): fixtures = ['testdata'] @@ -269,7 +260,6 @@ class LoginTests(TestCase): # default client. self.assertRedirects(response, "http://testserver/test_client_regress/get_view/") - class URLEscapingTests(TestCase): def test_simple_argument_get(self): "Get a view that has a simple string argument" diff --git a/tests/regressiontests/test_client_regress/urls.py b/tests/regressiontests/test_client_regress/urls.py index dc26d1260a..12f6afacf3 100644 --- a/tests/regressiontests/test_client_regress/urls.py +++ b/tests/regressiontests/test_client_regress/urls.py @@ -3,7 +3,6 @@ import views urlpatterns = patterns('', (r'^no_template_view/$', views.no_template_view), - (r'^file_upload/$', views.file_upload_view), (r'^staff_only/$', views.staff_only_view), (r'^get_view/$', views.get_view), url(r'^arg_view/(?P.+)/$', views.view_with_argument, name='arg_view'), diff --git a/tests/regressiontests/test_client_regress/views.py b/tests/regressiontests/test_client_regress/views.py index 9632c17284..d703c82124 100644 --- a/tests/regressiontests/test_client_regress/views.py +++ b/tests/regressiontests/test_client_regress/views.py @@ -1,36 +1,18 @@ -import os - from django.contrib.auth.decorators import login_required -from django.http import HttpResponse, HttpResponseRedirect, HttpResponseServerError +from django.http import HttpResponse, HttpResponseRedirect from django.core.exceptions import SuspiciousOperation def no_template_view(request): "A simple view that expects a GET request, and returns a rendered template" return HttpResponse("No template used. Sample content: twice once twice. Content ends.") -def file_upload_view(request): - """ - Check that a file upload can be updated into the POST dictionary without - going pear-shaped. - """ - form_data = request.POST.copy() - form_data.update(request.FILES) - if isinstance(form_data['file_field'], dict) and isinstance(form_data['name'], unicode): - # If a file is posted, the dummy client should only post the file name, - # not the full path. - if os.path.dirname(form_data['file_field']['filename']) != '': - return HttpResponseServerError() - return HttpResponse('') - else: - return HttpResponseServerError() - def staff_only_view(request): "A view that can only be visited by staff. Non staff members get an exception" if request.user.is_staff: return HttpResponse('') else: raise SuspiciousOperation() - + def get_view(request): "A simple login protected view" return HttpResponse("Hello world") @@ -51,4 +33,4 @@ def view_with_argument(request, name): def login_protected_redirect_view(request): "A view that redirects all requests to the GET view" return HttpResponseRedirect('/test_client_regress/get_view/') -login_protected_redirect_view = login_required(login_protected_redirect_view) \ No newline at end of file +login_protected_redirect_view = login_required(login_protected_redirect_view) diff --git a/tests/urls.py b/tests/urls.py index dbdf9a8064..cea453ef37 100644 --- a/tests/urls.py +++ b/tests/urls.py @@ -5,6 +5,9 @@ urlpatterns = patterns('', (r'^test_client/', include('modeltests.test_client.urls')), (r'^test_client_regress/', include('regressiontests.test_client_regress.urls')), + # File upload test views + (r'^file_uploads/', include('regressiontests.file_uploads.urls')), + # Always provide the auth system login and logout views (r'^accounts/login/$', 'django.contrib.auth.views.login', {'template_name': 'login.html'}), (r'^accounts/logout/$', 'django.contrib.auth.views.logout'),