Skip to content

Commit

Permalink
Ensure UTF-8 surogates escaped on save - fix coddingtonbear#159
Browse files Browse the repository at this point in the history
  • Loading branch information
ad-m committed Jan 3, 2018
1 parent 7252295 commit 38d2ed1
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 5 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ include/*
dummy_project/*
.cache/
.tox/
messages
/messages

9 changes: 6 additions & 3 deletions django_mailbox/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,6 @@ def _get_dehydrated_message(self, msg, record):
def _process_message(self, message):
msg = Message()
settings = utils.get_settings()

if settings['store_original_message']:
self._process_save_original_message(message, msg)
msg.mailbox = self
Expand Down Expand Up @@ -386,10 +385,14 @@ def _process_message(self, message):

def _process_save_original_message(self, message, msg):
settings = utils.get_settings()
if six.PY3:
content = message.as_string().encode('ascii', 'surrogateescape')
else:
content = message.as_string()
if settings['compress_original_message']:
with NamedTemporaryFile(suffix=".eml.gz") as fp_tmp:
with gzip.GzipFile(fileobj=fp_tmp, mode="w") as fp:
fp.write(message.as_string().encode('utf-8'))
fp.write(content)
msg.eml.save(
"%s.eml.gz" % (uuid.uuid4(), ),
File(fp_tmp),
Expand All @@ -399,7 +402,7 @@ def _process_save_original_message(self, message, msg):
else:
msg.eml.save(
'%s.eml' % uuid.uuid4(),
ContentFile(message.as_string()),
ContentFile(content),
save=False
)

Expand Down
46 changes: 46 additions & 0 deletions django_mailbox/tests/messages/message_with_utf8_surrogates.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
Return-path: <SRS0=HZOvtU=DM=[email protected]>
Envelope-to: [email protected]
Delivery-date: Sat, 16 Dec 2017 16:22:42 +0100
Received: from mx1.wp.pl ([212.77.101.6])
by s50.hekko.net.pl with esmtps (TLSv1.2:ECDHE-RSA-AES256-GCM-SHA384:256)
(Exim 4.89) (envelope-from <[email protected]>) id 1eQEII-0005Fu-Cs
for [email protected]; Sat, 16 Dec 2017 16:22:42 +0100
Received: (wp-smtpd smtp.wp.pl 33592 invoked from network);
16 Dec 2017 16:22:11 +0100
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=wp.pl; s=1024a;
t=1513437731; bh=6Ox0FVna7vMxBu5CbF0s6HvpkNalENwSSgDxNJ/Rsdc=;
h=From:To:Subject;
b=QnrdNDBDNLuENN9QS0Pvn85/bDE1Fc6jJrvKUdnApFrykwzbHxXxCG4qX7g3sS3Qj
xxHGNf8UXZh3zyCln2EZpUD03LgkppMpTbv3tLKA4HSnaT7txr6AWHq2y8A/YQo7EY
2806CYWtFCKYoVolzDN9lctM2nEoZpD5jOVZqYsM=
Received: from public-gprs394416.centertel.pl (HELO REDACTED)
([email protected]@[37.47.171.241]) (envelope-sender <[email protected]>)
by smtp.wp.pl (WP-SMTPD) with SMTP
for <[email protected]>; 16 Dec 2017 16:22:11 +0100
Message-ID: <BD6D8EECA3A74E9A9ABD7A1EEA593F76@REDACTED>
From: <[email protected]>
To: <[email protected]>
Subject: =?windows-1250?Q?Do_czego_te=BF_s=B9_zdolni_Polscy_s=EAdziowie_..._?=
Date: Sat, 16 Dec 2017 16:21:04 +0100
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="----=_NextPart_000_0018_01D37689.E058CEF0"
X-Spam-Status: No, message_size=7331926 larger than 200K

To jest wielocz�ciowa wiadomo�� w formacie MIME.

------=_NextPart_000_0018_01D37689.E058CEF0
Content-Type: multipart/alternative;
boundary="----=_NextPart_001_0019_01D37689.E058CEF0"
------=_NextPart_001_0019_01D37689.E058CEF0
Content-Type: text/plain;
charset="windows-1250"
Content-Transfer-Encoding: quoted-printable
REDACTED
------=_NextPart_001_0019_01D37689.E058CEF0--

------=_NextPart_000_0018_01D37689.E058CEF0--
36 changes: 35 additions & 1 deletion django_mailbox/tests/test_process_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,40 @@ def test_message_with_utf8_attachment_header(self):
u'odpowied\u017a Burmistrza.jpg'
)

def test_message_with_utf8_surrogates(self):
"""Ensure that we properly handle UTF-8 surrogates
The problem observed in Python 3.5. It safes from regress of #159.
"""

email_object = self._get_email_object(
'message_with_utf8_surrogates.eml',
)
mailbox = Mailbox.objects.create()
default_settings = utils.get_settings()
with mock.patch('django_mailbox.utils.get_settings') as get_settings:
altered = copy.deepcopy(default_settings)
altered['store_original_message'] = True

get_settings.return_value = altered

# This call throws the UnicodeEncodeError exception.
msg = mailbox.process_incoming_message(email_object)

self.assertEqual(
msg.subject,
u'Do czego te\u017c s\u0105 zdolni Polscy s\u0119dziowie ... '
)

self.assertEqual(
msg.attachments.count(),
0
)

with open(msg.eml.name, 'rb') as f:
self.assertEqual(f.read(),
self._get_email_as_text('message_with_utf8_surrogates.eml'))

def test_message_get_text_body(self):
message = self._get_email_object('multipart_text.eml')

Expand Down Expand Up @@ -468,4 +502,4 @@ def test_message_compressed(self):

with gzip.open(msg.eml.name, 'rb') as f:
self.assertEqual(f.read(),
self._get_email_as_text('generic_message.eml'))
self._get_email_as_text('generic_message.eml'))

0 comments on commit 38d2ed1

Please sign in to comment.