From 72d541b61cd7b0a14f70242e2207fdb3f600c4d5 Mon Sep 17 00:00:00 2001 From: Michael Schwarz Date: Wed, 3 Aug 2016 15:53:06 +0200 Subject: [PATCH] Fixed #27007 -- Handled non-UTF-8 bytes objects for text/* attachments. The fallback logic which allows non-UTF-8 encoded files to be passed to attach_file() even when a `text/*` mime type has been specified is moved to attach(). Both functions now fall back to a content type of `application/octet-stream`. A side effect is that a file's content is decoded in memory instead of opening it in text mode and reading it into a string. Some mimetype-related logic in _create_attachment() has become obsolete as the code moved from attach_file() to attach() already handles this. --- AUTHORS | 1 + django/core/mail/message.py | 57 +++++++++++++++++++------------------ docs/releases/1.11.txt | 4 +++ docs/topics/email.txt | 13 +++++++++ tests/mail/tests.py | 25 ++++++++++++++++ 5 files changed, 72 insertions(+), 28 deletions(-) diff --git a/AUTHORS b/AUTHORS index d0c9deffeb..4a35b84176 100644 --- a/AUTHORS +++ b/AUTHORS @@ -519,6 +519,7 @@ answer newbie questions, and generally made Django that much better: michael.mcewan@gmail.com Michael Placentra II Michael Radziej + Michael Schwarz Michael Thornhill Michal Chruszcz michal@plovarna.cz diff --git a/django/core/mail/message.py b/django/core/mail/message.py index 29012201ef..44178d2447 100644 --- a/django/core/mail/message.py +++ b/django/core/mail/message.py @@ -356,6 +356,11 @@ class EmailMessage(object): If the first parameter is a MIMEBase subclass it is inserted directly into the resulting message attachments. + + For a text/* mimetype (guessed or specified), when a bytes object is + specified as content, it will be decoded as UTF-8. If that fails, + the mimetype will be set to DEFAULT_ATTACHMENT_MIME_TYPE and the + content is not decoded. """ if isinstance(filename, MIMEBase): assert content is None @@ -363,6 +368,22 @@ class EmailMessage(object): self.attachments.append(filename) else: assert content is not None + + if not mimetype: + mimetype, _ = mimetypes.guess_type(filename) + if not mimetype: + mimetype = DEFAULT_ATTACHMENT_MIME_TYPE + basetype, subtype = mimetype.split('/', 1) + + if basetype == 'text': + if isinstance(content, six.binary_type): + try: + content = content.decode('utf-8') + except UnicodeDecodeError: + # If mimetype suggests the file is text but it's actually + # binary, read() will raise a UnicodeDecodeError on Python 3. + mimetype = DEFAULT_ATTACHMENT_MIME_TYPE + self.attachments.append((filename, content, mimetype)) def attach_file(self, path, mimetype=None): @@ -370,33 +391,17 @@ class EmailMessage(object): Attaches a file from the filesystem. The mimetype will be set to the DEFAULT_ATTACHMENT_MIME_TYPE if it is - not specified and cannot be guessed or (PY3 only) if it suggests - text/* for a binary file. + not specified and cannot be guessed. + + For a text/* mimetype (guessed or specified), the file's content + will be decoded as UTF-8. If that fails, the mimetype will be set to + DEFAULT_ATTACHMENT_MIME_TYPE and the content is not decoded. """ filename = os.path.basename(path) - if not mimetype: - mimetype, _ = mimetypes.guess_type(filename) - if not mimetype: - mimetype = DEFAULT_ATTACHMENT_MIME_TYPE - basetype, subtype = mimetype.split('/', 1) - read_mode = 'r' if basetype == 'text' else 'rb' - content = None - with open(path, read_mode) as f: - try: - content = f.read() - except UnicodeDecodeError: - # If mimetype suggests the file is text but it's actually - # binary, read() will raise a UnicodeDecodeError on Python 3. - pass - - # If the previous read in text mode failed, try binary mode. - if content is None: - with open(path, 'rb') as f: - content = f.read() - mimetype = DEFAULT_ATTACHMENT_MIME_TYPE - - self.attach(filename, content, mimetype) + with open(path, 'rb') as file: + content = file.read() + self.attach(filename, content, mimetype) def _create_message(self, msg): return self._create_attachments(msg) @@ -450,10 +455,6 @@ class EmailMessage(object): Converts the filename, content, mimetype triple into a MIME attachment object. """ - if mimetype is None: - mimetype, _ = mimetypes.guess_type(filename) - if mimetype is None: - mimetype = DEFAULT_ATTACHMENT_MIME_TYPE attachment = self._create_mime_attachment(content, mimetype) if filename: try: diff --git a/docs/releases/1.11.txt b/docs/releases/1.11.txt index a23d5f3663..46c780eda8 100644 --- a/docs/releases/1.11.txt +++ b/docs/releases/1.11.txt @@ -162,6 +162,10 @@ Email * Added the :setting:`EMAIL_USE_LOCALTIME` setting to allow sending SMTP date headers in the local time zone rather than in UTC. +* ``EmailMessage.attach()`` and ``attach_file()`` now fall back to MIME type + ``application/octet-stream`` when binary content that can't be decoded as + UTF-8 is specified for a ``text/*`` attachment. + File Storage ~~~~~~~~~~~~ diff --git a/docs/topics/email.txt b/docs/topics/email.txt index 2b880e0ba9..7c8f5e6112 100644 --- a/docs/topics/email.txt +++ b/docs/topics/email.txt @@ -345,6 +345,11 @@ The class has the following methods: If you specify a ``mimetype`` of ``message/rfc822``, it will also accept :class:`django.core.mail.EmailMessage` and :py:class:`email.message.Message`. + For a ``mimetype`` starting with ``text/``, content is expected to be a + string. Binary data will be decoded using UTF-8, and if that fails, the + MIME type will be changed to ``application/octet-stream`` and the data will + be attached unchanged. + In addition, ``message/rfc822`` attachments will no longer be base64-encoded in violation of :rfc:`2046#section-5.2.1`, which can cause issues with displaying the attachments in `Evolution`__ and `Thunderbird`__. @@ -359,6 +364,14 @@ The class has the following methods: message.attach_file('/images/weather_map.png') + For MIME types starting with ``text/``, binary data is handled as in + ``attach()``. + +.. versionchanged:: 1.11 + + Added the fallback to MIME type ``application/octet-stream`` when binary + data for a ``text/*`` attachment cannot be decoded. + Sending alternative content types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/mail/tests.py b/tests/mail/tests.py index 0f0eb5bdf1..3d1a74518f 100644 --- a/tests/mail/tests.py +++ b/tests/mail/tests.py @@ -422,6 +422,31 @@ class MailTests(HeadersCheckMixin, SimpleTestCase): self.assertEqual(content, b'file content') self.assertEqual(mimetype, 'text/plain') + def test_attach_utf8_text_as_bytes(self): + """ + Non-ASCII characters encoded as valid UTF-8 are correctly transported + and decoded. + """ + msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com']) + msg.attach('file.txt', b'\xc3\xa4') # UTF-8 encoded a umlaut. + filename, content, mimetype = self.get_decoded_attachments(msg)[0] + self.assertEqual(filename, 'file.txt') + self.assertEqual(content, b'\xc3\xa4') + self.assertEqual(mimetype, 'text/plain') + + def test_attach_non_utf8_text_as_bytes(self): + """ + Binary data that can't be decoded as UTF-8 overrides the MIME type + instead of decoding the data. + """ + msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com']) + msg.attach('file.txt', b'\xff') # Invalid UTF-8. + filename, content, mimetype = self.get_decoded_attachments(msg)[0] + self.assertEqual(filename, 'file.txt') + # Content should be passed through unmodified. + self.assertEqual(content, b'\xff') + self.assertEqual(mimetype, 'application/octet-stream') + def test_dummy_backend(self): """ Make sure that dummy backends returns correct number of sent messages