From a28b75b0ba9650ae3bd46e38d12d95d48f5c5664 Mon Sep 17 00:00:00 2001 From: Jacob Kaplan-Moss Date: Mon, 7 Jul 2008 23:16:00 +0000 Subject: [PATCH] Fixed #7614: the quickening has come, and there now is only one UploadedFile. On top of that, UploadedFile's interface has been improved: * The API now more closely matches a proper file API. This unfortunately means a few backwards-incompatible renamings; see BackwardsIncompatibleChanges. This refs #7593. * While we were at it, renamed chunk() to chunks() to clarify that it's an iterator. * Temporary uploaded files now property use the tempfile library behind the scenes which should ensure better cleanup of tempfiles (refs #7593 again). Thanks to Mike Axiak for the bulk of this patch. git-svn-id: http://code.djangoproject.com/svn/django/trunk@7859 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/core/files/uploadedfile.py | 148 ++++++++++++++------ django/core/files/uploadhandler.py | 43 +----- django/db/models/base.py | 2 +- django/db/models/fields/__init__.py | 13 +- django/newforms/fields.py | 27 +--- django/oldforms/__init__.py | 2 +- docs/newforms.txt | 15 +- docs/settings.txt | 2 +- docs/upload_handling.txt | 48 ++++--- tests/modeltests/model_forms/models.py | 8 +- tests/regressiontests/file_uploads/views.py | 6 +- tests/regressiontests/forms/fields.py | 4 +- 12 files changed, 165 insertions(+), 153 deletions(-) diff --git a/django/core/files/uploadedfile.py b/django/core/files/uploadedfile.py index 637609d085..c0c9473529 100644 --- a/django/core/files/uploadedfile.py +++ b/django/core/files/uploadedfile.py @@ -3,12 +3,40 @@ Classes representing uploaded files. """ import os +import tempfile +import warnings try: from cStringIO import StringIO except ImportError: from StringIO import StringIO -__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile') +from django.conf import settings + +__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile', 'SimpleUploadedFile') + +# Because we fooled around with it a bunch, UploadedFile has a bunch +# of deprecated properties. This little shortcut helps define 'em +# without too much code duplication. +def deprecated_property(old, new, readonly=False): + def issue_warning(): + warnings.warn( + message = "UploadedFile.%s is deprecated; use UploadedFile.%s instead." % (old, new), + category = DeprecationWarning, + stacklevel = 3 + ) + + def getter(self): + issue_warning() + return getattr(self, new) + + def setter(self, value): + issue_warning() + setattr(self, new, value) + + if readonly: + return property(getter) + else: + return property(getter, setter) class UploadedFile(object): """ @@ -20,34 +48,34 @@ class UploadedFile(object): """ DEFAULT_CHUNK_SIZE = 64 * 2**10 - def __init__(self, file_name=None, content_type=None, file_size=None, charset=None): - self.file_name = file_name - self.file_size = file_size + def __init__(self, name=None, content_type=None, size=None, charset=None): + self.name = name + self.size = size self.content_type = content_type self.charset = charset def __repr__(self): - return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type) + return "<%s: %s (%s)>" % (self.__class__.__name__, self.name, self.content_type) - def _set_file_name(self, name): + def _get_name(self): + return self._name + + def _set_name(self, name): # Sanitize the file name so that it can't be dangerous. if name is not None: # Just use the basename of the file -- anything else is dangerous. name = os.path.basename(name) - + # File names longer than 255 characters can cause problems on older OSes. if len(name) > 255: name, ext = os.path.splitext(name) name = name[:255 - len(ext)] + ext - - self._file_name = name - - def _get_file_name(self): - return self._file_name - - file_name = property(_get_file_name, _set_file_name) - def chunk(self, chunk_size=None): + self._name = name + + name = property(_get_name, _set_name) + + def chunks(self, chunk_size=None): """ Read the file and yield chucks of ``chunk_size`` bytes (defaults to ``UploadedFile.DEFAULT_CHUNK_SIZE``). @@ -58,12 +86,18 @@ class UploadedFile(object): if hasattr(self, 'seek'): self.seek(0) # Assume the pointer is at zero... - counter = self.file_size + counter = self.size while counter > 0: yield self.read(chunk_size) counter -= chunk_size + # Deprecated properties + file_name = deprecated_property(old="file_name", new="name") + file_size = deprecated_property(old="file_size", new="size") + data = deprecated_property(old="data", new="read", readonly=True) + chunk = deprecated_property(old="chunk", new="chunks", readonly=True) + def multiple_chunks(self, chunk_size=None): """ Returns ``True`` if you can expect multiple chunks. @@ -74,9 +108,9 @@ class UploadedFile(object): """ if not chunk_size: chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE - return self.file_size < chunk_size + return self.size > chunk_size - # Abstract methods; subclasses *must* default read() and probably should + # Abstract methods; subclasses *must* define read() and probably should # define open/close. def read(self, num_bytes=None): raise NotImplementedError() @@ -87,23 +121,49 @@ class UploadedFile(object): def close(self): pass + def xreadlines(self): + return self + + def readlines(self): + return list(self.xreadlines()) + + def __iter__(self): + # Iterate over this file-like object by newlines + buffer_ = None + for chunk in self.chunks(): + chunk_buffer = StringIO(chunk) + + for line in chunk_buffer: + if buffer_: + line = buffer_ + line + buffer_ = None + + # If this is the end of a line, yield + # otherwise, wait for the next round + if line[-1] in ('\n', '\r'): + yield line + else: + buffer_ = line + + if buffer_ is not None: + yield buffer_ + # Backwards-compatible support for uploaded-files-as-dictionaries. def __getitem__(self, key): - import warnings warnings.warn( message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.", category = DeprecationWarning, stacklevel = 2 ) backwards_translate = { - 'filename': 'file_name', + 'filename': 'name', 'content-type': 'content_type', - } + } if key == 'content': return self.read() elif key == 'filename': - return self.file_name + return self.name elif key == 'content-type': return self.content_type else: @@ -113,34 +173,36 @@ class TemporaryUploadedFile(UploadedFile): """ A file uploaded to a temporary location (i.e. stream-to-disk). """ - - def __init__(self, file, file_name, content_type, file_size, charset): - super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset) - self.file = file - self.path = file.name - self.file.seek(0) + def __init__(self, name, content_type, size, charset): + super(TemporaryUploadedFile, self).__init__(name, content_type, size, charset) + if settings.FILE_UPLOAD_TEMP_DIR: + self._file = tempfile.NamedTemporaryFile(suffix='.upload', dir=settings.FILE_UPLOAD_TEMP_DIR) + else: + self._file = tempfile.NamedTemporaryFile(suffix='.upload') def temporary_file_path(self): """ Returns the full path of this file. """ - return self.path - - def read(self, *args, **kwargs): - return self.file.read(*args, **kwargs) - - def open(self): - self.seek(0) - - def seek(self, *args, **kwargs): - self.file.seek(*args, **kwargs) + return self.name + + # Most methods on this object get proxied to NamedTemporaryFile. + # We can't directly subclass because NamedTemporaryFile is actually a + # factory function + def read(self, *args): return self._file.read(*args) + def seek(self, offset): return self._file.seek(offset) + def write(self, s): return self._file.write(s) + def close(self): return self._file.close() + def __iter__(self): return iter(self._file) + def readlines(self, size=None): return self._file.readlines(size) + def xreadlines(self): return self._file.xreadlines() class InMemoryUploadedFile(UploadedFile): """ A file uploaded into memory (i.e. stream-to-memory). """ - def __init__(self, file, field_name, file_name, content_type, file_size, charset): - super(InMemoryUploadedFile, self).__init__(file_name, content_type, file_size, charset) + def __init__(self, file, field_name, name, content_type, size, charset): + super(InMemoryUploadedFile, self).__init__(name, content_type, size, charset) self.file = file self.field_name = field_name self.file.seek(0) @@ -154,7 +216,7 @@ class InMemoryUploadedFile(UploadedFile): def read(self, *args, **kwargs): return self.file.read(*args, **kwargs) - def chunk(self, chunk_size=None): + def chunks(self, chunk_size=None): self.file.seek(0) yield self.read() @@ -168,9 +230,9 @@ class SimpleUploadedFile(InMemoryUploadedFile): """ def __init__(self, name, content, content_type='text/plain'): self.file = StringIO(content or '') - self.file_name = name + self.name = name self.field_name = None - self.file_size = len(content or '') + self.size = len(content or '') self.content_type = content_type self.charset = None self.file.seek(0) diff --git a/django/core/files/uploadhandler.py b/django/core/files/uploadhandler.py index ab587769f7..008a05a148 100644 --- a/django/core/files/uploadhandler.py +++ b/django/core/files/uploadhandler.py @@ -132,21 +132,15 @@ class TemporaryFileUploadHandler(FileUploadHandler): Create the file object to append to as data is coming in. """ super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs) - self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR) - self.write = self.file.write + self.file = TemporaryUploadedFile(self.file_name, self.content_type, 0, self.charset) def receive_data_chunk(self, raw_data, start): - self.write(raw_data) + self.file.write(raw_data) def file_complete(self, file_size): self.file.seek(0) - return TemporaryUploadedFile( - file = self.file, - file_name = self.file_name, - content_type = self.content_type, - file_size = file_size, - charset = self.charset - ) + self.file.size = file_size + return self.file class MemoryFileUploadHandler(FileUploadHandler): """ @@ -189,37 +183,12 @@ class MemoryFileUploadHandler(FileUploadHandler): return InMemoryUploadedFile( file = self.file, field_name = self.field_name, - file_name = self.file_name, + name = self.file_name, content_type = self.content_type, - file_size = file_size, + size = file_size, charset = self.charset ) -class TemporaryFile(object): - """ - A temporary file that tries to delete itself when garbage collected. - """ - def __init__(self, dir): - if not dir: - dir = tempfile.gettempdir() - try: - (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir) - self.file = os.fdopen(fd, 'w+b') - except (OSError, IOError): - raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?") - self.name = name - - def __getattr__(self, name): - a = getattr(self.__dict__['file'], name) - if type(a) != type(0): - setattr(self, name, a) - return a - - def __del__(self): - try: - os.unlink(self.name) - except OSError: - pass def load_handler(path, *args, **kwargs): """ diff --git a/django/db/models/base.py b/django/db/models/base.py index 5e92a5adc1..e881c32ebc 100644 --- a/django/db/models/base.py +++ b/django/db/models/base.py @@ -536,7 +536,7 @@ class Model(object): # This is a normal uploadedfile that we can stream. fp = open(full_filename, 'wb') locks.lock(fp, locks.LOCK_EX) - for chunk in raw_field.chunk(): + for chunk in raw_field.chunks(): fp.write(chunk) locks.unlock(fp) fp.close() diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index 1eefed641c..a69cc5ac8c 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -766,9 +766,12 @@ class FileField(Field): def get_db_prep_save(self, value): "Returns field's value prepared for saving into a database." # Need to convert UploadedFile objects provided via a form to unicode for database insertion - if value is None: + if hasattr(value, 'name'): + return value.name + elif value is None: return None - return unicode(value) + else: + return unicode(value) def get_manipulator_fields(self, opts, manipulator, change, name_prefix='', rel=False, follow=True): field_list = Field.get_manipulator_fields(self, opts, manipulator, change, name_prefix, rel, follow) @@ -842,7 +845,7 @@ class FileField(Field): # We don't need to raise a warning because Model._save_FIELD_file will # do so for us. try: - file_name = file.file_name + file_name = file.name except AttributeError: file_name = file['filename'] @@ -857,9 +860,9 @@ class FileField(Field): return os.path.normpath(f) def save_form_data(self, instance, data): - from django.newforms.fields import UploadedFile + from django.core.files.uploadedfile import UploadedFile if data and isinstance(data, UploadedFile): - getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False) + getattr(instance, "save_%s_file" % self.name)(data.name, data, save=False) def formfield(self, **kwargs): defaults = {'form_class': forms.FileField} diff --git a/django/newforms/fields.py b/django/newforms/fields.py index 1feef31ee0..ad46d78859 100644 --- a/django/newforms/fields.py +++ b/django/newforms/fields.py @@ -27,7 +27,7 @@ from django.utils.encoding import StrAndUnicode, smart_unicode, smart_str from util import ErrorList, ValidationError from widgets import TextInput, PasswordInput, HiddenInput, MultipleHiddenInput, FileInput, CheckboxInput, Select, NullBooleanSelect, SelectMultiple, DateTimeInput - +from django.core.files.uploadedfile import SimpleUploadedFile as UploadedFile __all__ = ( 'Field', 'CharField', 'IntegerField', @@ -419,18 +419,6 @@ except ImportError: # It's OK if Django settings aren't configured. URL_VALIDATOR_USER_AGENT = 'Django (http://www.djangoproject.com/)' -class UploadedFile(StrAndUnicode): - "A wrapper for files uploaded in a FileField" - def __init__(self, filename, data): - self.filename = filename - self.data = data - - def __unicode__(self): - """ - The unicode representation is the filename, so that the pre-database-insertion - logic can use UploadedFile objects - """ - return self.filename class FileField(Field): widget = FileInput @@ -460,23 +448,20 @@ class FileField(Field): category = DeprecationWarning, stacklevel = 2 ) + data = UploadedFile(data['filename'], data['content']) try: - file_name = data.file_name - file_size = data.file_size + file_name = data.name + file_size = data.size except AttributeError: - try: - file_name = data.get('filename') - file_size = bool(data['content']) - except (AttributeError, KeyError): - raise ValidationError(self.error_messages['invalid']) + raise ValidationError(self.error_messages['invalid']) if not file_name: raise ValidationError(self.error_messages['invalid']) if not file_size: raise ValidationError(self.error_messages['empty']) - return UploadedFile(file_name, data) + return data class ImageField(FileField): default_error_messages = { diff --git a/django/oldforms/__init__.py b/django/oldforms/__init__.py index ee838d234a..2a300df0bd 100644 --- a/django/oldforms/__init__.py +++ b/django/oldforms/__init__.py @@ -686,7 +686,7 @@ class FileUploadField(FormField): if upload_errors: raise validators.CriticalValidationError, upload_errors try: - file_size = new_data.file_size + file_size = new_data.size except AttributeError: file_size = len(new_data['content']) if not file_size: diff --git a/docs/newforms.txt b/docs/newforms.txt index f0a5e1ff9f..4240fe9985 100644 --- a/docs/newforms.txt +++ b/docs/newforms.txt @@ -1331,23 +1331,12 @@ given length. * Validates that non-empty file data has been bound to the form. * Error message keys: ``required``, ``invalid``, ``missing``, ``empty`` -An ``UploadedFile`` object has two attributes: - - ====================== ==================================================== - Attribute Description - ====================== ==================================================== - ``filename`` The name of the file, provided by the uploading - client. - - ``content`` The array of bytes comprising the file content. - ====================== ==================================================== - -The string representation of an ``UploadedFile`` is the same as the filename -attribute. +To learn more about the ``UploadedFile`` object, see the `file uploads documentation`_. When you use a ``FileField`` in a form, you must also remember to `bind the file data to the form`_. +.. _file uploads documentation: ../upload_handling/ .. _`bind the file data to the form`: `Binding uploaded files to a form`_ ``FilePathField`` diff --git a/docs/settings.txt b/docs/settings.txt index 4566eea1f9..d7715714d3 100644 --- a/docs/settings.txt +++ b/docs/settings.txt @@ -279,7 +279,7 @@ Default: ``''`` (Empty string) The database backend to use. The build-in database backends are ``'postgresql_psycopg2'``, ``'postgresql'``, ``'mysql'``, ``'mysql_old'``, -``'sqlite3'``, ``'oracle'``, and ``'oracle'``. +``'sqlite3'``, and ``'oracle'``. In the Django development version, you can use a database backend that doesn't ship with Django by setting ``DATABASE_ENGINE`` to a fully-qualified path (i.e. diff --git a/docs/upload_handling.txt b/docs/upload_handling.txt index 34cd085ac9..897c7817f6 100644 --- a/docs/upload_handling.txt +++ b/docs/upload_handling.txt @@ -22,7 +22,7 @@ Consider a simple form containing a ``FileField``:: class UploadFileForm(forms.Form): title = forms.CharField(max_length=50) file = forms.FileField() - + A view handling this form will receive the file data in ``request.FILES``, which is a dictionary containing a key for each ``FileField`` (or ``ImageField``, or other ``FileField`` subclass) in the form. So the data from the above form would @@ -64,32 +64,32 @@ methods to access the uploaded content: ``UploadedFile.read()`` Read the entire uploaded data from the file. Be careful with this method: if the uploaded file is huge it can overwhelm your system if you - try to read it into memory. You'll probably want to use ``chunk()`` + try to read it into memory. You'll probably want to use ``chunks()`` instead; see below. - + ``UploadedFile.multiple_chunks()`` Returns ``True`` if the uploaded file is big enough to require reading in multiple chunks. By default this will be any file larger than 2.5 megabytes, but that's configurable; see below. - + ``UploadedFile.chunk()`` A generator returning chunks of the file. If ``multiple_chunks()`` is ``True``, you should use this method in a loop instead of ``read()``. - + In practice, it's often easiest simply to use ``chunks()`` all the time; see the example below. - + ``UploadedFile.file_name`` The name of the uploaded file (e.g. ``my_file.txt``). - + ``UploadedFile.file_size`` The size, in bytes, of the uploaded file. - + There are a few other methods and attributes available on ``UploadedFile`` objects; see `UploadedFile objects`_ for a complete reference. Putting it all together, here's a common way you might handle an uploaded file:: - + def handle_uploaded_file(f): destination = open('some/file/name.txt', 'wb') for chunk in f.chunks(): @@ -126,27 +126,27 @@ Three `settings`_ control Django's file upload behavior: The maximum size, in bytes, for files that will be uploaded into memory. Files larger than ``FILE_UPLOAD_MAX_MEMORY_SIZE`` will be streamed to disk. - + Defaults to 2.5 megabytes. - + ``FILE_UPLOAD_TEMP_DIR`` The directory where uploaded files larger than ``FILE_UPLOAD_TEMP_DIR`` will be stored. - + Defaults to your system's standard temporary directory (i.e. ``/tmp`` on most Unix-like systems). - + ``FILE_UPLOAD_HANDLERS`` The actual handlers for uploaded files. Changing this setting allows complete customization -- even replacement -- of Django's upload process. See `upload handlers`_, below, for details. - + Defaults to:: - + ("django.core.files.uploadhandler.MemoryFileUploadHandler", "django.core.files.uploadhandler.TemporaryFileUploadHandler",) - + Which means "try to upload to memory first, then fall back to temporary files." @@ -161,35 +161,39 @@ All ``UploadedFile`` objects define the following methods/attributes: Returns a byte string of length ``num_bytes``, or the complete file if ``num_bytes`` is ``None``. - ``UploadedFile.chunk(self, chunk_size=None)`` + ``UploadedFile.chunks(self, chunk_size=None)`` A generator yielding small chunks from the file. If ``chunk_size`` isn't - given, chunks will be 64 kb. + given, chunks will be 64 KB. ``UploadedFile.multiple_chunks(self, chunk_size=None)`` Returns ``True`` if you can expect more than one chunk when calling - ``UploadedFile.chunk(self, chunk_size)``. + ``UploadedFile.chunks(self, chunk_size)``. ``UploadedFile.file_size`` The size, in bytes, of the uploaded file. - + ``UploadedFile.file_name`` The name of the uploaded file as provided by the user. - + ``UploadedFile.content_type`` The content-type header uploaded with the file (e.g. ``text/plain`` or ``application/pdf``). Like any data supplied by the user, you shouldn't trust that the uploaded file is actually this type. You'll still need to validate that the file contains the content that the content-type header claims -- "trust but verify." - + ``UploadedFile.charset`` For ``text/*`` content-types, the character set (i.e. ``utf8``) supplied by the browser. Again, "trust but verify" is the best policy here. + ``UploadedFile.__iter__()`` + Iterates over the lines in the file. + ``UploadedFile.temporary_file_path()`` Only files uploaded onto disk will have this method; it returns the full path to the temporary uploaded file. + Upload Handlers =============== diff --git a/tests/modeltests/model_forms/models.py b/tests/modeltests/model_forms/models.py index c856720a74..f2b0e05cc4 100644 --- a/tests/modeltests/model_forms/models.py +++ b/tests/modeltests/model_forms/models.py @@ -803,7 +803,7 @@ False >>> f.is_valid() True >>> type(f.cleaned_data['file']) - + >>> instance = f.save() >>> instance.file u'...test1.txt' @@ -814,7 +814,7 @@ u'...test1.txt' >>> f.is_valid() True >>> type(f.cleaned_data['file']) - + >>> instance = f.save() >>> instance.file u'...test1.txt' @@ -906,7 +906,7 @@ u'...test3.txt' >>> f.is_valid() True >>> type(f.cleaned_data['image']) - + >>> instance = f.save() >>> instance.image u'...test.png' @@ -918,7 +918,7 @@ u'...test.png' >>> f.is_valid() True >>> type(f.cleaned_data['image']) - + >>> instance = f.save() >>> instance.image u'...test.png' diff --git a/tests/regressiontests/file_uploads/views.py b/tests/regressiontests/file_uploads/views.py index dfa877da3a..c45f6a609f 100644 --- a/tests/regressiontests/file_uploads/views.py +++ b/tests/regressiontests/file_uploads/views.py @@ -15,7 +15,7 @@ def file_upload_view(request): if isinstance(form_data.get('file_field'), UploadedFile) and isinstance(form_data['name'], unicode): # If a file is posted, the dummy client should only post the file name, # not the full path. - if os.path.dirname(form_data['file_field'].file_name) != '': + if os.path.dirname(form_data['file_field'].name) != '': return HttpResponseServerError() return HttpResponse('') else: @@ -29,7 +29,7 @@ def file_upload_view_verify(request): form_data.update(request.FILES) # Check to see if unicode names worked out. - if not request.FILES['file_unicode'].file_name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'): + if not request.FILES['file_unicode'].name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'): return HttpResponseServerError() for key, value in form_data.items(): @@ -51,7 +51,7 @@ def file_upload_echo(request): """ Simple view to echo back info about uploaded files for tests. """ - r = dict([(k, f.file_name) for k, f in request.FILES.items()]) + r = dict([(k, f.name) for k, f in request.FILES.items()]) return HttpResponse(simplejson.dumps(r)) def file_upload_quota(request): diff --git a/tests/regressiontests/forms/fields.py b/tests/regressiontests/forms/fields.py index 4725c3ecf3..f266e7bc50 100644 --- a/tests/regressiontests/forms/fields.py +++ b/tests/regressiontests/forms/fields.py @@ -800,10 +800,10 @@ Traceback (most recent call last): ValidationError: [u'The submitted file is empty.'] >>> type(f.clean(SimpleUploadedFile('name', 'Some File Content'))) - + >>> type(f.clean(SimpleUploadedFile('name', 'Some File Content'), 'files/test4.pdf')) - + # URLField ##################################################################