Fixed #27007 -- Handled non-UTF-8 bytes objects for text/* attachments.

The fallback logic which allows non-UTF-8 encoded files to be passed to
attach_file() even when a `text/*` mime type has been specified is
moved to attach(). Both functions now fall back to a content type of
`application/octet-stream`.

A side effect is that a file's content is decoded in memory instead of
opening it in text mode and reading it into a string.

Some mimetype-related logic in _create_attachment() has become
obsolete as the code moved from attach_file() to attach() already
handles this.
This commit is contained in:
Michael Schwarz 2016-08-03 15:53:06 +02:00 committed by Tim Graham
parent 311a8e8d50
commit 72d541b61c
5 changed files with 72 additions and 28 deletions

View File

@ -519,6 +519,7 @@ answer newbie questions, and generally made Django that much better:
michael.mcewan@gmail.com
Michael Placentra II <someone@michaelplacentra2.net>
Michael Radziej <mir@noris.de>
Michael Schwarz <michi.schwarz@gmail.com>
Michael Thornhill <michael.thornhill@gmail.com>
Michal Chruszcz <troll@pld-linux.org>
michal@plovarna.cz

View File

@ -356,6 +356,11 @@ class EmailMessage(object):
If the first parameter is a MIMEBase subclass it is inserted directly
into the resulting message attachments.
For a text/* mimetype (guessed or specified), when a bytes object is
specified as content, it will be decoded as UTF-8. If that fails,
the mimetype will be set to DEFAULT_ATTACHMENT_MIME_TYPE and the
content is not decoded.
"""
if isinstance(filename, MIMEBase):
assert content is None
@ -363,6 +368,22 @@ class EmailMessage(object):
self.attachments.append(filename)
else:
assert content is not None
if not mimetype:
mimetype, _ = mimetypes.guess_type(filename)
if not mimetype:
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
basetype, subtype = mimetype.split('/', 1)
if basetype == 'text':
if isinstance(content, six.binary_type):
try:
content = content.decode('utf-8')
except UnicodeDecodeError:
# If mimetype suggests the file is text but it's actually
# binary, read() will raise a UnicodeDecodeError on Python 3.
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
self.attachments.append((filename, content, mimetype))
def attach_file(self, path, mimetype=None):
@ -370,33 +391,17 @@ class EmailMessage(object):
Attaches a file from the filesystem.
The mimetype will be set to the DEFAULT_ATTACHMENT_MIME_TYPE if it is
not specified and cannot be guessed or (PY3 only) if it suggests
text/* for a binary file.
not specified and cannot be guessed.
For a text/* mimetype (guessed or specified), the file's content
will be decoded as UTF-8. If that fails, the mimetype will be set to
DEFAULT_ATTACHMENT_MIME_TYPE and the content is not decoded.
"""
filename = os.path.basename(path)
if not mimetype:
mimetype, _ = mimetypes.guess_type(filename)
if not mimetype:
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
basetype, subtype = mimetype.split('/', 1)
read_mode = 'r' if basetype == 'text' else 'rb'
content = None
with open(path, read_mode) as f:
try:
content = f.read()
except UnicodeDecodeError:
# If mimetype suggests the file is text but it's actually
# binary, read() will raise a UnicodeDecodeError on Python 3.
pass
# If the previous read in text mode failed, try binary mode.
if content is None:
with open(path, 'rb') as f:
content = f.read()
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
self.attach(filename, content, mimetype)
with open(path, 'rb') as file:
content = file.read()
self.attach(filename, content, mimetype)
def _create_message(self, msg):
return self._create_attachments(msg)
@ -450,10 +455,6 @@ class EmailMessage(object):
Converts the filename, content, mimetype triple into a MIME attachment
object.
"""
if mimetype is None:
mimetype, _ = mimetypes.guess_type(filename)
if mimetype is None:
mimetype = DEFAULT_ATTACHMENT_MIME_TYPE
attachment = self._create_mime_attachment(content, mimetype)
if filename:
try:

View File

@ -162,6 +162,10 @@ Email
* Added the :setting:`EMAIL_USE_LOCALTIME` setting to allow sending SMTP date
headers in the local time zone rather than in UTC.
* ``EmailMessage.attach()`` and ``attach_file()`` now fall back to MIME type
``application/octet-stream`` when binary content that can't be decoded as
UTF-8 is specified for a ``text/*`` attachment.
File Storage
~~~~~~~~~~~~

View File

@ -345,6 +345,11 @@ The class has the following methods:
If you specify a ``mimetype`` of ``message/rfc822``, it will also accept
:class:`django.core.mail.EmailMessage` and :py:class:`email.message.Message`.
For a ``mimetype`` starting with ``text/``, content is expected to be a
string. Binary data will be decoded using UTF-8, and if that fails, the
MIME type will be changed to ``application/octet-stream`` and the data will
be attached unchanged.
In addition, ``message/rfc822`` attachments will no longer be
base64-encoded in violation of :rfc:`2046#section-5.2.1`, which can cause
issues with displaying the attachments in `Evolution`__ and `Thunderbird`__.
@ -359,6 +364,14 @@ The class has the following methods:
message.attach_file('/images/weather_map.png')
For MIME types starting with ``text/``, binary data is handled as in
``attach()``.
.. versionchanged:: 1.11
Added the fallback to MIME type ``application/octet-stream`` when binary
data for a ``text/*`` attachment cannot be decoded.
Sending alternative content types
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -422,6 +422,31 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
self.assertEqual(content, b'file content')
self.assertEqual(mimetype, 'text/plain')
def test_attach_utf8_text_as_bytes(self):
"""
Non-ASCII characters encoded as valid UTF-8 are correctly transported
and decoded.
"""
msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com'])
msg.attach('file.txt', b'\xc3\xa4') # UTF-8 encoded a umlaut.
filename, content, mimetype = self.get_decoded_attachments(msg)[0]
self.assertEqual(filename, 'file.txt')
self.assertEqual(content, b'\xc3\xa4')
self.assertEqual(mimetype, 'text/plain')
def test_attach_non_utf8_text_as_bytes(self):
"""
Binary data that can't be decoded as UTF-8 overrides the MIME type
instead of decoding the data.
"""
msg = EmailMessage('subject', 'body', 'from@example.com', ['to@example.com'])
msg.attach('file.txt', b'\xff') # Invalid UTF-8.
filename, content, mimetype = self.get_decoded_attachments(msg)[0]
self.assertEqual(filename, 'file.txt')
# Content should be passed through unmodified.
self.assertEqual(content, b'\xff')
self.assertEqual(mimetype, 'application/octet-stream')
def test_dummy_backend(self):
"""
Make sure that dummy backends returns correct number of sent messages