[tor-commits] [gettor/master] Modified email parser to get locales from db

cohosh at torproject.org cohosh at torproject.org
Fri Jan 31 14:27:36 UTC 2020


commit 187e27da768d9fda5033da03b6c922a0baaa5d0c
Author: Cecylia Bocovich <cohosh at torproject.org>
Date:   Fri Jan 24 17:26:55 2020 -0500

    Modified email parser to get locales from db
    
    Before we were conflating the locales for the tor browser binaries with
    the available locale for gettor email body localizations. This patch
    checks our links database for available locales.
---
 gettor/parse/email.py       | 19 ++++++++++++++-----
 gettor/utils/db.py          |  8 ++++++++
 scripts/process_email       |  1 +
 tests/test.conf.json        |  2 +-
 tests/test_email_service.py | 11 +++++++++--
 5 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/gettor/parse/email.py b/gettor/parse/email.py
index 4629b7c..56b91bf 100644
--- a/gettor/parse/email.py
+++ b/gettor/parse/email.py
@@ -56,6 +56,7 @@ class EmailParser(object):
         self.settings = settings
         self.dkim = dkim
         self.to_addr = to_addr
+        self.locales = []
 
     def normalize(self, msg):
         # Normalization will convert <Alice Wonderland> alice at wonderland.net
@@ -112,7 +113,7 @@ class EmailParser(object):
             return True
 
 
-    def build_request(self, msg_str, norm_addr, languages, platforms):
+    def build_request(self, msg_str, norm_addr, platforms):
         # Search for commands keywords
         subject_re = re.compile(r"Subject: (.*)\r\n")
         subject = subject_re.search(msg_str)
@@ -128,7 +129,7 @@ class EmailParser(object):
         if subject:
             subject = subject.group(1)
             for word in re.split(r"\s+", subject.strip()):
-                if word.lower() in languages:
+                if word.lower() in self.locales:
                     request["language"] = word.lower()
                 if word.lower() in platforms:
                     request["command"] = "links"
@@ -139,7 +140,7 @@ class EmailParser(object):
 
         if not request["command"] or not request["language"]:
             for word in re.split(r"\s+", msg_str.strip()):
-                if word.lower() in languages:
+                if word.lower() in self.locales:
                     request["language"] = word.lower()
                 if word.lower() in platforms:
                     request["command"] = "links"
@@ -159,6 +160,15 @@ class EmailParser(object):
         else:
             return True
 
+    @defer.inlineCallbacks
+    def get_locales(self):
+        dbname = self.settings.get("dbname")
+        conn = SQLite3(dbname)
+
+        locales = yield conn.get_locales()
+        for l in locales:
+            self.locales.append(l[0])
+
 
     def parse(self, msg_str):
         """
@@ -177,7 +187,6 @@ class EmailParser(object):
         log.msg("Building email message from string.", system="email parser")
 
         platforms = self.settings.get("platforms")
-        languages = [*strings.get_locales().keys()]
         msg = message_from_string(msg_str)
 
         name, norm_addr, to_name, norm_to_addr = self.normalize(msg)
@@ -203,7 +212,7 @@ class EmailParser(object):
         except ValueError as e:
             log.msg("DKIM error: {}".format(e.args))
 
-        request = self.build_request(msg_str, norm_addr, languages, platforms)
+        request = self.build_request(msg_str, norm_addr, platforms)
 
         return request
 
diff --git a/gettor/utils/db.py b/gettor/utils/db.py
index 525287b..1ccdf8e 100644
--- a/gettor/utils/db.py
+++ b/gettor/utils/db.py
@@ -104,3 +104,11 @@ class SQLite3(object):
 		return self.dbpool.runQuery(
 			query, (platform, language, status)
 		).addCallback(self.query_callback).addErrback(self.query_errback)
+
+	def get_locales(self):
+		"""
+		Get a list of the supported tor browser binary locales
+		"""
+		query = "SELECT DISTINCT language FROM links"
+		return self.dbpool.runQuery(query
+		).addCallback(self.query_callback).addErrback(self.query_errback)
diff --git a/scripts/process_email b/scripts/process_email
index 37c4e0b..cce7bcc 100755
--- a/scripts/process_email
+++ b/scripts/process_email
@@ -28,6 +28,7 @@ def process_email(message):
 
     try:
         ep = EmailParser(settings, "gettor at torproject.org")
+        yield ep.get_locales().addErrback(ep.parse_errback)
         yield defer.maybeDeferred(
             ep.parse, message
         ).addCallback(ep.parse_callback).addErrback(ep.parse_errback)
diff --git a/tests/test.conf.json b/tests/test.conf.json
index 8f296cc..03bbaf6 100644
--- a/tests/test.conf.json
+++ b/tests/test.conf.json
@@ -1,6 +1,6 @@
 {
   "platforms": ["linux", "osx", "windows"],
-  "dbname": "gettor.db",
+  "dbname": "tests/gettor.db",
   "email_parser_logfile": "email_parser.log",
   "email_requests_limit": 30,
   "twitter_requests_limit": 1,
diff --git a/tests/test_email_service.py b/tests/test_email_service.py
index 187711e..5fa87fc 100644
--- a/tests/test_email_service.py
+++ b/tests/test_email_service.py
@@ -65,9 +65,9 @@ class EmailServiceTests(unittest.TestCase):
         ep = conftests.EmailParser(self.settings, "gettor at torproject.org")
         msg_str = "From: \"silvia [hiro]\" <hiro at torproject.org>\n Subject: \r\n Reply-To: hiro at torproject.org \nTo: gettor at torproject.org\r\n osx es"
         msg = conftests.message_from_string(msg_str)
-        languages = [*self.locales.keys()]
         platforms = self.settings.get('platforms')
-        request = ep.build_request(msg_str, "hiro at torproject.org", languages, platforms)
+        ep.locales = ["es", "en"]
+        request = ep.build_request(msg_str, "hiro at torproject.org", platforms)
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["platform"], "osx")
         self.assertEqual(request["language"], "es")
@@ -83,11 +83,18 @@ class EmailServiceTests(unittest.TestCase):
 
     def test_language_email_parser(self):
         ep = conftests.EmailParser(self.settings, "gettor at torproject.org")
+        ep.locales = ["en", "ru"]
         request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n Subject: \r\n Reply-To: hiro at torproject.org \nTo: gettor at torproject.org\n osx en")
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["platform"], "osx")
         self.assertEqual(request["language"], "en")
 
+        request = ep.parse("From: \"silvia [hiro]\" <hiro at torproject.org>\n Subject: \r\n Reply-To: hiro at torproject.org \nTo: gettor at torproject.org\n linux ru")
+        self.assertEqual(request["command"], "links")
+        self.assertEqual(request["platform"], "linux")
+        self.assertEqual(request["language"], "ru")
+
+
     def test_sent_links_message(self):
         ep = self.sm_client
         links = self.links





More information about the tor-commits mailing list