From 055f9a0ebf50ecc02e540304f81cc2e2d95613e3 Mon Sep 17 00:00:00 2001 From: Karen Tracey Date: Sat, 4 Apr 2009 17:42:43 +0000 Subject: [PATCH] [1.0.X] Fixed #10254: Changed the regex in get_valid_filename to allow unicode alphanumerics (thanks gulliver). Also updated the file_uploads test for this case to check the name after saving the uploaded file. As it was the test ensured that files with unicode characters in their names could be uploaded, but it wasn't actually ensuring that the unicode characters were preserved through save. Backport of [10388] git-svn-id: http://code.djangoproject.com/svn/django/branches/releases/1.0.X@10389 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/utils/text.py | 6 ++-- tests/regressiontests/file_uploads/tests.py | 27 ++++++++++++----- tests/regressiontests/file_uploads/urls.py | 1 + tests/regressiontests/file_uploads/views.py | 32 ++++++++++++++++++--- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/django/utils/text.py b/django/utils/text.py index 1548cfa77e..cd631983d7 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -116,13 +116,13 @@ def get_valid_filename(s): """ Returns the given string converted to a string that can be used for a clean filename. Specifically, leading and trailing spaces are removed; other - spaces are converted to underscores; and all non-filename-safe characters - are removed. + spaces are converted to underscores; and anything that is not a unicode + alphanumeric, dash, underscore, or dot, is removed. >>> get_valid_filename("john's portrait in 2004.jpg") u'johns_portrait_in_2004.jpg' """ s = force_unicode(s).strip().replace(' ', '_') - return re.sub(r'[^-A-Za-z0-9_.]', '', s) + return re.sub(r'(?u)[^-\w.]', '', s) get_valid_filename = allow_lazy(get_valid_filename, unicode) def get_text_list(list_, last_word=ugettext_lazy(u'or')): diff --git a/tests/regressiontests/file_uploads/tests.py b/tests/regressiontests/file_uploads/tests.py index 6fcd8a99aa..cc498b97f6 100644 --- a/tests/regressiontests/file_uploads/tests.py +++ b/tests/regressiontests/file_uploads/tests.py @@ -1,3 +1,4 @@ +#! -*- coding: utf-8 -*- import os import errno import shutil @@ -12,6 +13,8 @@ from django.utils.hashcompat import sha_constructor from models import FileModel, temp_storage, UPLOAD_TO import uploadhandler +UNICODE_FILENAME = u'test-0123456789_中文_Orléans.jpg' + class FileUploadTests(TestCase): def test_simple_upload(self): post_data = { @@ -32,16 +35,10 @@ class FileUploadTests(TestCase): file2.write('a' * (10 * 2 ** 20)) file2.seek(0) - # This file contains chinese symbols for a name. - file3 = open(os.path.join(tdir, u'test_中文_Orl\u00e9ans.jpg'.encode('utf-8')), 'w+b') - file3.write('b' * (2 ** 10)) - file3.seek(0) - post_data = { 'name': 'Ringo', 'file_field1': open(file1.name), 'file_field2': open(file2.name), - 'file_unicode': file3, } for key in post_data.keys(): @@ -53,8 +50,24 @@ class FileUploadTests(TestCase): response = self.client.post('/file_uploads/verify/', post_data) + self.assertEqual(response.status_code, 200) + + def test_unicode_file_name(self): + tdir = tempfile.gettempdir() + + # This file contains chinese symbols and an accented char in the name. + file1 = open(os.path.join(tdir, UNICODE_FILENAME.encode('utf-8')), 'w+b') + file1.write('b' * (2 ** 10)) + file1.seek(0) + + post_data = { + 'file_unicode': file1, + } + + response = self.client.post('/file_uploads/unicode_name/', post_data) + try: - os.unlink(file3.name) + os.unlink(file1.name) except: pass diff --git a/tests/regressiontests/file_uploads/urls.py b/tests/regressiontests/file_uploads/urls.py index 607e1d1034..413080eb4f 100644 --- a/tests/regressiontests/file_uploads/urls.py +++ b/tests/regressiontests/file_uploads/urls.py @@ -4,6 +4,7 @@ import views urlpatterns = patterns('', (r'^upload/$', views.file_upload_view), (r'^verify/$', views.file_upload_view_verify), + (r'^unicode_name/$', views.file_upload_unicode_name), (r'^echo/$', views.file_upload_echo), (r'^quota/$', views.file_upload_quota), (r'^quota/broken/$', views.file_upload_quota_broken), diff --git a/tests/regressiontests/file_uploads/views.py b/tests/regressiontests/file_uploads/views.py index a989069a30..5dcbcd3c64 100644 --- a/tests/regressiontests/file_uploads/views.py +++ b/tests/regressiontests/file_uploads/views.py @@ -5,6 +5,7 @@ from django.utils import simplejson from models import FileModel from uploadhandler import QuotaUploadHandler, ErroringUploadHandler from django.utils.hashcompat import sha_constructor +from tests import UNICODE_FILENAME def file_upload_view(request): """ @@ -29,10 +30,6 @@ def file_upload_view_verify(request): form_data = request.POST.copy() form_data.update(request.FILES) - # Check to see if unicode names worked out. - if not request.FILES['file_unicode'].name.endswith(u'test_\u4e2d\u6587_Orl\xe9ans.jpg'): - return HttpResponseServerError() - for key, value in form_data.items(): if key.endswith('_hash'): continue @@ -53,6 +50,33 @@ def file_upload_view_verify(request): return HttpResponse('') +def file_upload_unicode_name(request): + + # Check to see if unicode name came through properly. + if not request.FILES['file_unicode'].name.endswith(UNICODE_FILENAME): + return HttpResponseServerError() + + response = None + + # Check to make sure the exotic characters are preserved even + # through file save. + uni_named_file = request.FILES['file_unicode'] + obj = FileModel.objects.create(testfile=uni_named_file) + obj = FileModel.objects.get(pk=obj.pk) + if not obj.testfile.name.endswith(uni_named_file.name): + response = HttpResponseServerError() + + # Cleanup the object with its exotic file name immediately. + # (shutil.rmtree used elsewhere in the tests to clean up the + # upload directory has been seen to choke on unicode + # filenames on Windows.) + obj.delete() + + if response: + return response + else: + return HttpResponse('') + def file_upload_echo(request): """ Simple view to echo back info about uploaded files for tests.