[tor-commits] [bridgedb/develop] Only process text/plain part of emails.

phw at torproject.org phw at torproject.org
Tue Jul 14 02:22:57 UTC 2020


commit 61e63879157343571f89ec02e0e21dd19e270e64
Author: Philipp Winter <phw at nymity.ch>
Date:   Mon Jun 1 15:12:34 2020 -0700

    Only process text/plain part of emails.
    
    When our autoresponder receives a multipart email from a user, it may
    get confused by the parts that are not text/plain.  Instead of bending
    over backwards to parse all sorts email encodings, this patch discard
    the parts of a multipart email that aren't text/plain.
    
    This fixes tpo/anti-censorship/bridgedb#33835.
---
 CHANGELOG                             |  4 +++
 bridgedb/distributors/email/server.py | 18 +++++++++-
 bridgedb/test/test_email_server.py    | 63 +++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index 860cbda..ad20b91 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+    * FIXES https://gitlab.torproject.org/tpo/anti-censorship/bridgedb/-/issues/33835
+    If a user sends a multipart email to our autoresponder, we now only parse
+    the part that has a text/plain encoding.
+
     * FIXES https://bugs.torproject.org/33647
     Fix broken link in the README and fix our Makefile's pylint target.
 
diff --git a/bridgedb/distributors/email/server.py b/bridgedb/distributors/email/server.py
index 54a8231..b2baf86 100644
--- a/bridgedb/distributors/email/server.py
+++ b/bridgedb/distributors/email/server.py
@@ -50,6 +50,7 @@ Servers which interface with clients and distribute bridges over SMTP.
 from __future__ import unicode_literals
 
 import email.message
+import email.policy
 import logging
 import io
 import socket
@@ -228,6 +229,20 @@ class SMTPMessage(object):
         if not self.ignoring:
             self.message = self.getIncomingMessage()
             self.responder.reply()
+
+        # If a user sends a multipart email, we only consider the part whose
+        # content type is text/plain.
+        if self.message.is_multipart():
+            has_plaintext = False
+            for part in self.message.get_payload():
+                if part.get_content_type() != "text/plain":
+                    continue
+                self.lines = part.get_payload().split("\n")
+                has_plaintext = True
+
+            if not has_plaintext:
+                logging.warning("User email had no text/plain content type.")
+
         return defer.succeed(None)
 
     def connectionLost(self):
@@ -242,7 +257,8 @@ class SMTPMessage(object):
         :returns: A ``Message`` comprised of all lines received thus far.
         """
 
-        return email.message_from_string('\n'.join(self.lines))
+        return email.message_from_string('\n'.join(self.lines),
+                                         policy=email.policy.compat32)
 
 
 @implementer(smtp.IMessageDelivery)
diff --git a/bridgedb/test/test_email_server.py b/bridgedb/test/test_email_server.py
index fb82cc0..e0aee18 100644
--- a/bridgedb/test/test_email_server.py
+++ b/bridgedb/test/test_email_server.py
@@ -100,6 +100,69 @@ class SMTPMessageTests(unittest.TestCase):
         self.assertIsInstance(self.message.getIncomingMessage(),
                               email.message.Message)
 
+    def test_SMTPMessage_multipart1(self):
+        """`eomReceived` should get rid of HTML multiparts."""
+
+        # Gmail's web interface would send a message like this when replying to
+        # one of BridgeDB's emails.
+        multipartEmail = [
+        'MIME-Version: 1.0',
+        'Date: Mon, 1 Jun 2020 15:55:33 -0700',
+        'Subject: Re: test',
+        'From: Foo Bar <foo at bar.com>',
+        'To: bridges at bridges.torproject.org',
+        'Content-Type: multipart/alternative; boundary="00000000000041b34105a70db186"',
+        '',
+        '--00000000000041b34105a70db186',
+        'Content-Type: text/plain; charset="UTF-8"',
+        '',
+        'This is plaintext.',
+        '',
+        '--00000000000041b34105a70db186',
+        'Content-Type: text/html; charset="UTF-8"',
+        'Content-Transfer-Encoding: quoted-printable',
+        '',
+        'This is HTML.',
+        '',
+        '--00000000000041b34105a70db186--']
+
+        for line in multipartEmail:
+            self.message.lineReceived(line)
+        self.message.eomReceived()
+        content = "".join(self.message.lines)
+        self.assertTrue("This is plaintext." in content)
+        self.assertFalse("This is HTML." in content)
+
+    def test_SMTPMessage_multipart2(self):
+        """`eomReceived` should get rid of HTML multiparts."""
+
+        # Outlook would send a message like this when replying to one of
+        # BridgeDB's emails.
+        multipartEmail = [
+        'Content-Type: multipart/alternative;',
+        '              boundary="_000_VI1PR08MB4351A21D6C4A31C2B0FDA34F8CC10VI1PR08MB4351eurp_"',
+        '',
+        '--_000_VI1PR08MB4351A21D6C4A31C2B0FDA34F8CC10VI1PR08MB4351eurp_',
+        'Content-Type: text/plain; charset="us-ascii"',
+        'Content-Transfer-Encoding: quoted-printable',
+        '',
+        'This is plaintext.',
+        '',
+        '--_000_VI1PR08MB4351A21D6C4A31C2B0FDA34F8CC10VI1PR08MB4351eurp_',
+        'Content-Type: text/html; charset="us-ascii"',
+        'Content-Transfer-Encoding: quoted-printable',
+        '',
+        'This is HTML.',
+        '',
+        '--_000_VI1PR08MB4351A21D6C4A31C2B0FDA34F8CC10VI1PR08MB4351eurp_--']
+
+        for line in multipartEmail:
+            self.message.lineReceived(line)
+        self.message.eomReceived()
+        content = "".join(self.message.lines)
+        self.assertTrue("This is plaintext." in content)
+        self.assertFalse("This is HTML." in content)
+
 
 class SMTPIncomingDeliveryTests(unittest.TestCase):
     """Unittests for :class:`email.server.SMTPIncomingDelivery`."""





More information about the tor-commits mailing list