Fixed #30512 -- Used email.headerregistry.parser for parsing emails in sanitize_address().

This commit is contained in:
Joachim Jablon 2019-04-29 18:48:20 +02:00 committed by Carlton Gibson
parent 0c2ffdd526
commit 2628ea9515
2 changed files with 53 additions and 45 deletions

View File

@ -2,15 +2,15 @@ import mimetypes
from email import ( from email import (
charset as Charset, encoders as Encoders, generator, message_from_string, charset as Charset, encoders as Encoders, generator, message_from_string,
) )
from email.errors import InvalidHeaderDefect, NonASCIILocalPartDefect from email.errors import HeaderParseError
from email.header import Header from email.header import Header
from email.headerregistry import Address from email.headerregistry import Address, parser
from email.message import Message from email.message import Message
from email.mime.base import MIMEBase from email.mime.base import MIMEBase
from email.mime.message import MIMEMessage from email.mime.message import MIMEMessage
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.utils import formatdate, getaddresses, make_msgid, parseaddr from email.utils import formatdate, getaddresses, make_msgid
from io import BytesIO, StringIO from io import BytesIO, StringIO
from pathlib import Path from pathlib import Path
@ -71,56 +71,44 @@ def forbid_multi_line_headers(name, val, encoding):
return name, val return name, val
def split_addr(addr, encoding):
"""
Split the address into local part and domain and encode them.
When non-ascii characters are present in the local part, it must be
MIME-word encoded. The domain name must be idna-encoded if it contains
non-ascii characters.
"""
if '@' in addr:
localpart, domain = addr.split('@', 1)
# Try to get the simplest encoding - ascii if possible so that
# to@example.com doesn't become =?utf-8?q?to?=@example.com. This
# makes unit testing a bit easier and more readable.
try:
localpart.encode('ascii')
except UnicodeEncodeError:
localpart = Header(localpart, encoding).encode()
domain = domain.encode('idna').decode('ascii')
else:
localpart = Header(addr, encoding).encode()
domain = ''
return (localpart, domain)
def sanitize_address(addr, encoding): def sanitize_address(addr, encoding):
""" """
Format a pair of (name, address) or an email address string. Format a pair of (name, address) or an email address string.
""" """
address = None
if not isinstance(addr, tuple): if not isinstance(addr, tuple):
addr = parseaddr(addr) addr = force_str(addr)
nm, addr = addr try:
localpart, domain = None, None token, rest = parser.get_mailbox(addr)
except (HeaderParseError, ValueError, IndexError):
raise ValueError('Invalid address "%s"' % addr)
else:
if rest:
# The entire email address must be parsed.
raise ValueError(
'Invalid adddress; only %s could be parsed from "%s"'
% (token, addr)
)
nm = token.display_name or ''
localpart = token.local_part
domain = token.domain or ''
else:
nm, address = addr
localpart, domain = address.rsplit('@', 1)
nm = Header(nm, encoding).encode() nm = Header(nm, encoding).encode()
# Avoid UTF-8 encode, if it's possible.
try: try:
addr.encode('ascii') localpart.encode('ascii')
except UnicodeEncodeError: # IDN or non-ascii in the local part except UnicodeEncodeError:
localpart, domain = split_addr(addr, encoding) localpart = Header(localpart, encoding).encode()
# An `email.headerregistry.Address` object is used since
# email.utils.formataddr() naively encodes the name as ascii (see #25986).
if localpart and domain:
address = Address(nm, username=localpart, domain=domain)
return str(address)
try: try:
address = Address(nm, addr_spec=addr) domain.encode('ascii')
except (InvalidHeaderDefect, NonASCIILocalPartDefect): except UnicodeEncodeError:
localpart, domain = split_addr(addr, encoding) domain = domain.encode('idna').decode('ascii')
address = Address(nm, username=localpart, domain=domain)
return str(address) parsed_address = Address(nm, username=localpart, domain=domain)
return str(parsed_address)
class MIMEMixin: class MIMEMixin:

View File

@ -748,10 +748,30 @@ class MailTests(HeadersCheckMixin, SimpleTestCase):
'utf-8', 'utf-8',
'=?utf-8?q?to=40other=2Ecom?= <to@example.com>', '=?utf-8?q?to=40other=2Ecom?= <to@example.com>',
), ),
(
('To Example', 'to@other.com@example.com'),
'utf-8',
'=?utf-8?q?To_Example?= <"to@other.com"@example.com>',
),
): ):
with self.subTest(email_address=email_address, encoding=encoding): with self.subTest(email_address=email_address, encoding=encoding):
self.assertEqual(sanitize_address(email_address, encoding), expected_result) self.assertEqual(sanitize_address(email_address, encoding), expected_result)
def test_sanitize_address_invalid(self):
for email_address in (
# Invalid address with two @ signs.
'to@other.com@example.com',
# Invalid address without the quotes.
'to@other.com <to@example.com>',
# Other invalid addresses.
'@',
'to@',
'@example.com',
):
with self.subTest(email_address=email_address):
with self.assertRaises(ValueError):
sanitize_address(email_address, encoding='utf-8')
@requires_tz_support @requires_tz_support
class MailTimeZoneTests(SimpleTestCase): class MailTimeZoneTests(SimpleTestCase):