Skip to content

Commit

Permalink
[3.12] pythongh-106669: Revert "pythongh-102988: Detect email address…
Browse files Browse the repository at this point in the history
… parsing errors ... (pythonGH-105127)" (pythonGH-106733) (python#106941)

This reverts commit 18dfbd0.
Adds a regression test from the issue.

See python#106669..
(cherry picked from commit a31dea1)
  • Loading branch information
gpshead authored and mgorny committed Jul 23, 2023
1 parent 1c4c1b0 commit 401e398
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 167 deletions.
26 changes: 1 addition & 25 deletions Doc/library/email.utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,6 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.

.. versionchanged:: 3.12
For security reasons, addresses that were ambiguous and could parse into
multiple different addresses now cause ``('', '')`` to be returned
instead of only one of the *potential* addresses.


.. function:: formataddr(pair, charset='utf-8')

Expand All @@ -92,7 +87,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
example that gets all the recipients of a message:
example that gets all the recipients of a message::

from email.utils import getaddresses

Expand All @@ -102,25 +97,6 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)

When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
is returned in its place. Other errors in parsing the list of
addresses such as a fieldvalue seemingly parsing into multiple
addresses may result in a list containing a single empty 2-tuple
``[('', '')]`` being returned rather than returning potentially
invalid output.

Example malformed input parsing:

.. doctest::

>>> from email.utils import getaddresses
>>> getaddresses(['[email protected] <[email protected]>', '[email protected]'])
[('', '')]

.. versionchanged:: 3.12
The 2-tuple of ``('', '')`` in the returned values when parsing
fails were added as to address a security issue.


.. function:: parsedate(date)

Expand Down
8 changes: 0 additions & 8 deletions Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -570,14 +570,6 @@ dis
:data:`~dis.hasarg` collection instead.
(Contributed by Irit Katriel in :gh:`94216`.)

email
-----

* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
``('', '')`` 2-tuples in more situations where invalid email addresses are
encountered instead of potentially inaccurate values.
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)

fractions
---------

Expand Down
63 changes: 6 additions & 57 deletions Lib/email/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'):
return address


def _pre_parse_validation(email_header_fields):
accepted_values = []
for v in email_header_fields:
s = v.replace('\\(', '').replace('\\)', '')
if s.count('(') != s.count(')'):
v = "('', '')"
accepted_values.append(v)

return accepted_values


def _post_parse_validation(parsed_email_header_tuples):
accepted_values = []
# The parser would have parsed a correctly formatted domain-literal
# The existence of an [ after parsing indicates a parsing failure
for v in parsed_email_header_tuples:
if '[' in v[1]:
v = ('', '')
accepted_values.append(v)

return accepted_values


def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
its place.
If the resulting list of parsed address is not the same as the number of
fieldvalues in the input list a parsing error has occurred. A list
containing a single empty 2-tuple [('', '')] is returned in its place.
This is done to avoid invalid output.
"""
fieldvalues = [str(v) for v in fieldvalues]
fieldvalues = _pre_parse_validation(fieldvalues)
all = COMMASPACE.join(v for v in fieldvalues)
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
result = _post_parse_validation(a.addresslist)

n = 0
for v in fieldvalues:
n += v.count(',') + 1

if len(result) != n:
return [('', '')]

return result
return a.addresslist


def _format_timetuple_and_zone(timetuple, zone):
Expand Down Expand Up @@ -254,18 +212,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
if isinstance(addr, list):
addr = addr[0]

if not isinstance(addr, str):
return ('', '')

addr = _pre_parse_validation([addr])[0]
addrs = _post_parse_validation(_AddressList(addr).addresslist)

if not addrs or len(addrs) > 1:
return ('', '')

addrs = _AddressList(addr).addresslist
if not addrs:
return '', ''
return addrs[0]


Expand Down
96 changes: 19 additions & 77 deletions Lib/test/test_email/test_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -3319,90 +3319,32 @@ def test_getaddresses(self):
[('Al Person', '[email protected]'),
('Bud Person', '[email protected]')])

def test_getaddresses_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.getaddresses(['[email protected](<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected])<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]<<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]><[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]@<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected],<[email protected]>']),
[('', '[email protected]'), ('', '[email protected]')])
eq(utils.getaddresses(['[email protected];<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]:<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected].<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]"<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected][<[email protected]>']),
[('', '')])
eq(utils.getaddresses(['[email protected]]<[email protected]>']),
[('', '')])

def test_parseaddr_parsing_errors(self):
"""Test for parsing errors from CVE-2023-27043"""
eq = self.assertEqual
eq(utils.parseaddr(['[email protected](<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected])<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]<<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]><[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]@<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected],<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected];<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]:<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected].<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]"<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected][<[email protected]>']),
('', ''))
eq(utils.parseaddr(['[email protected]]<[email protected]>']),
('', ''))
def test_getaddresses_comma_in_name(self):
"""GH-106669 regression test."""
self.assertEqual(
utils.getaddresses(
[
'"Bud, Person" <[email protected]>',
'[email protected] (Al Person)',
'"Mariusz Felisiak" <[email protected]>',
]
),
[
('Bud, Person', '[email protected]'),
('Al Person', '[email protected]'),
('Mariusz Felisiak', '[email protected]'),
],
)

def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
eq(utils.getaddresses(
['[]*-- =~$']),
[('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <[email protected]>']),
[('', ''), ('Jason R. Mastaler', '[email protected]')])
eq(utils.getaddresses(
[r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
[('Pete (A nice ) chap his account his host)', '[email protected]')])
eq(utils.getaddresses(
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
[('', '')])
eq(utils.getaddresses(
['Mary <@machine.tld:[email protected]>, , jdoe@test . example']),
[('Mary', '[email protected]'), ('', ''), ('', '[email protected]')])
eq(utils.getaddresses(
['John Doe <jdoe@machine(comment). example>']),
[('John Doe (comment)', '[email protected]')])
eq(utils.getaddresses(
['"Mary Smith: Personal Account" <[email protected]>']),
[('Mary Smith: Personal Account', '[email protected]')])
eq(utils.getaddresses(
['Undisclosed recipients:;']),
[('', '')])
eq(utils.getaddresses(
[r'<[email protected]>, "Giant; \"Big\" Box" <[email protected]>']),
[('', '[email protected]'), ('Giant; "Big" Box', '[email protected]')])

def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Reverted the :mod:`email.utils` security improvement change released in
3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
to parse email addresses with a comma in the quoted name field.
See :gh:`106669`.

0 comments on commit 401e398

Please sign in to comment.