[tor-commits] [gettor/master] Only parse email subject and body

cohosh at torproject.org cohosh at torproject.org
Tue Jan 12 19:01:02 UTC 2021


commit 6a95ed9e0b9cc99cc77567164c117876aeecf376
Author: Cecylia Bocovich <cohosh at torproject.org>
Date:   Mon Jan 11 16:41:26 2021 -0500

    Only parse email subject and body
    
    Closes issue #75 where the headers were being parsed for locale
    instead of the message body.
---
 gettor/parse/email.py       | 12 +++++-
 tests/test_email_service.py | 90 +++++++++++++++++++++++++++++++--------------
 2 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/gettor/parse/email.py b/gettor/parse/email.py
index fa945ba..1f70ea3 100644
--- a/gettor/parse/email.py
+++ b/gettor/parse/email.py
@@ -145,9 +145,15 @@ class EmailParser(object):
 
     def build_request(self, msg_str, norm_addr):
         # Search for commands keywords
-        subject_re = re.compile(r"Subject: (.*)\r\n")
+        subject_re = re.compile("Subject: (.*)\n")
         subject = subject_re.search(msg_str)
 
+        # the body of a message is "a sequence of characters that follows the header
+        # section and is separated from the header section by an empty line"
+        # https://tools.ietf.org/html/rfc5322#section-2.1
+        body_re = re.compile("\r?\n\r?\n(.*)$", re.DOTALL)
+        body = body_re.search(msg_str)
+
         request = {
             "id": norm_addr,
             "command": None,
@@ -161,7 +167,9 @@ class EmailParser(object):
             request = self.parse_keywords(subject, request)
 
         # Always parse the body too, to see if there's more specific information
-        request = self.parse_keywords(msg_str, request)
+        if body:
+            body = body.group(1)
+            request = self.parse_keywords(body, request)
 
         if not request["language"]:
             request["language"] = "en-US"
diff --git a/tests/test_email_service.py b/tests/test_email_service.py
index 47d7e5f..de421a6 100644
--- a/tests/test_email_service.py
+++ b/tests/test_email_service.py
@@ -57,7 +57,7 @@ class EmailServiceTests(unittest.TestCase):
 
     def test_build_request(self):
         ep = conftests.EmailParser(self.settings, "gettor at torproject.org")
-        msg_str = "From: \"silvia [hiro]\" <hiro at torproject.org>\n Subject: \r\n Reply-To: hiro at torproject.org \nTo: gettor at torproject.org\r\n osx es"
+        msg_str = "From: \"silvia [hiro]\" <hiro at torproject.org>\n Subject: \r\n Reply-To: hiro at torproject.org \nTo: gettor at torproject.org\r\n\r\n osx es"
         msg = conftests.message_from_string(msg_str)
         ep.locales = ["es", "en"]
         request = ep.build_request(msg_str, "hiro at torproject.org")
@@ -81,32 +81,32 @@ class EmailServiceTests(unittest.TestCase):
         ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n\n")
+                "gettor at torproject.org\r\n\r\n")
         self.assertEqual(request["language"], "en-US")
         self.assertEqual(request["command"], "help")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n\n please send me tor\n")
+                "gettor at torproject.org\r\n\r\n please send me tor\n")
         self.assertEqual(request["language"], "en-US")
         self.assertEqual(request["command"], "help")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n\nwindows\n")
+                "gettor at torproject.org\r\n\r\nwindows\n")
         self.assertEqual(request["language"], "en-US")
         self.assertEqual(request["platform"], "windows")
         self.assertEqual(request["command"], "links")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n\n fa\n")
+                "gettor at torproject.org\r\n\r\n fa\n")
         self.assertEqual(request["language"], "fa")
         self.assertEqual(request["command"], "help")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n\n please help me get tor for windows\n")
+                "gettor at torproject.org\r\n\r\n please help me get tor for windows\n")
         self.assertEqual(request["language"], "en-US")
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["platform"], "windows")
@@ -116,60 +116,93 @@ class EmailServiceTests(unittest.TestCase):
         ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n osx en")
+                "gettor at torproject.org\r\n\r\n osx en")
         self.assertEqual(request["language"], "en-US")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n osx ES")
+                "gettor at torproject.org\r\n\r\n osx ES")
         self.assertEqual(request["language"], "es-ES")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n osx en-US")
+                "gettor at torproject.org\r\n\r\n osx en-US")
         self.assertEqual(request["language"], "en-US")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux fa")
+                "gettor at torproject.org\r\n\r\n linux fa")
         self.assertEqual(request["language"], "fa")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n osx es")
+                "gettor at torproject.org\r\n\r\n osx es")
         self.assertEqual(request["language"], "es-ES")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux zz")
+                "gettor at torproject.org\r\n\r\n linux zz")
         self.assertEqual(request["language"], "en-US")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux pt-PT")
+                "gettor at torproject.org\r\n\r\n linux pt-PT")
         self.assertEqual(request["language"], "pt-BR")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: \r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux es-AR")
+                "gettor at torproject.org\r\n\r\n linux es-AR")
         self.assertEqual(request["language"], "es-AR")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: linux es\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux es-AR")
+                "gettor at torproject.org\r\n\r\n linux es-AR")
         self.assertEqual(request["language"], "es-AR")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: linux es\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux")
+                "gettor at torproject.org\r\n\r\n linux")
         self.assertEqual(request["language"], "es-ES")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: linux es-AR\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n linux es")
+                "gettor at torproject.org\r\n\r\n linux es")
         self.assertEqual(request["language"], "es-AR")
         del ep
 
+    def test_body_subject_parser(self):
+        ep = conftests.EmailParser(self.settings, "gettor at torproject.org")
+        ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
+        request = ep.parse(
+            "To: gettor at torproject.org\r\n"
+            "From: Cecylia Bocovich <cohosh at torproject.org>\r\n"
+            "Subject: windows es\r\n"
+            "Message-ID: <0befc58c-c94d-c262-9597-4365122c04b4 at torproject.org>\r\n"
+            "Date: Mon, 11 Jan 2021 11:28:37 -0500\r\n"
+            "MIME-Version: 1.0\r\n"
+            "Content-Type: text/plain; charset=utf-8\r\n"
+            "Content-Language: en-US\r\n"
+            "Content-Transfer-Encoding: 7bit\r\n"
+            "\r\n"
+            "windows es\r\n"
+        )
+        self.assertEqual(request["language"], "es-ES")
+        request = ep.parse(
+            "To: gettor at torproject.org\n"
+            "From: Cecylia Bocovich <cohosh at torproject.org>\n"
+            "Subject: linux fa\n"
+            "Message-ID: <0befc58c-c94d-c262-9597-4365122c04b4 at torproject.org>\n"
+            "Date: Mon, 11 Jan 2021 11:28:37 -0500\n"
+            "MIME-Version: 1.0\n"
+            "Content-Type: text/plain; charset=utf-8\n"
+            "Content-Language: en-US\n"
+            "Content-Transfer-Encoding: 7bit\n"
+            "\n"
+            "linux fa\n"
+        )
+        self.assertEqual(request["language"], "fa")
+        del ep
+
     @pytest_twisted.inlineCallbacks
     def test_sent_links_message(self):
         ep = self.sm_client
@@ -216,19 +249,19 @@ class EmailServiceTests(unittest.TestCase):
         ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa", "fr"]
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n osx en\n")
+                "gettor at torproject.org\r\n\r\n osx en\n")
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["language"], "en-US")
         self.assertEqual(request["platform"], "osx")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n i like french fries\n")
+                "gettor at torproject.org\r\n\r\n i like french fries\n")
         self.assertEqual(request["command"], "help")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\nlinux fa\n\n"
+                "gettor at torproject.org\r\n\r\nlinux fa\n"
                 "On 2020-02-10 11:54 a.m., gettor at torproject.org wrote:\n"
                 "> This is how you can request a tor browser bundle link.\n"
                 ">\n"
@@ -237,14 +270,15 @@ class EmailServiceTests(unittest.TestCase):
                 "> In the body of the email only write: <operating system> <language>.\n"
                 ">\n"
                 "> We only support windows, osx and linux as operating systems.\n"
-                ">\n")
+                ">\n"
+        )
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["language"], "fa")
         self.assertEqual(request["platform"], "linux")
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\nlinux fa\n\n"
+                "gettor at torproject.org\r\n\r\nlinux fa\n"
                 "On 2020-02-10 11:54 a.m., gettor at torproject.org wrote:\n"
                 "This is how you can request a tor browser bundle link.\n"
                 "\n"
@@ -260,7 +294,7 @@ class EmailServiceTests(unittest.TestCase):
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n"
+                "gettor at torproject.org\r\n\r\n"
                 "On 2020-02-10 11:54 a.m., gettor at torproject.org wrote:\n"
                 "> This is how you can request a tor browser bundle link.\n"
                 ">\n"
@@ -277,7 +311,7 @@ class EmailServiceTests(unittest.TestCase):
 
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n"
                 "Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro at torproject.org \nTo:"
-                "gettor at torproject.org\n"
+                "gettor at torproject.org\r\n\r\n"
                 "On 2020-02-10 11:54 a.m., gettor at torproject.org wrote:\n"
                 "> This is how you can request a tor browser bundle link.\n"
                 ">\n"
@@ -293,22 +327,22 @@ class EmailServiceTests(unittest.TestCase):
         ep = conftests.EmailParser(self.settings, "gettor at torproject.org")
         request = ep.parse("From: MAILER-DAEMON at mx1.riseup.net\n"
                 "Subject: Undelivered Mail Returned to Sender\r\n"
-                "To: gettor at torproject.org\n osx en\n")
+                "To: gettor at torproject.org\r\n\r\n osx en\n")
 
         self.assertEqual(request, {})
         request = ep.parse("From: postmaster at example.sk\n"
                 "Subject: Undelivered Mail Returned to Sender\r\n"
-                "To: gettor at torproject.org\n\n osx en\n")
+                "To: gettor at torproject.org\r\n\r\n osx en\n")
 
         self.assertEqual(request, {})
         request = ep.parse("From: gettor at torproject.org\n"
                 "Subject: links\r\n"
-                "To: gettor at torproject.org\n\n osx en\n")
+                "To: gettor at torproject.org\r\n\r\n osx en\n")
 
         self.assertEqual(request, {})
         request = ep.parse("From: gettor+en at torproject.org\n"
                 "Subject: links\r\n"
-                "To: gettor at torproject.org\n\n osx en\n")
+                "To: gettor at torproject.org\r\n\r\n osx en\n")
 
         self.assertEqual(request, {})
 



More information about the tor-commits mailing list