commit 187e27da768d9fda5033da03b6c922a0baaa5d0c Author: Cecylia Bocovich cohosh@torproject.org Date: Fri Jan 24 17:26:55 2020 -0500
Modified email parser to get locales from db
Before we were conflating the locales for the tor browser binaries with the available locale for gettor email body localizations. This patch checks our links database for available locales. --- gettor/parse/email.py | 19 ++++++++++++++----- gettor/utils/db.py | 8 ++++++++ scripts/process_email | 1 + tests/test.conf.json | 2 +- tests/test_email_service.py | 11 +++++++++-- 5 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/gettor/parse/email.py b/gettor/parse/email.py index 4629b7c..56b91bf 100644 --- a/gettor/parse/email.py +++ b/gettor/parse/email.py @@ -56,6 +56,7 @@ class EmailParser(object): self.settings = settings self.dkim = dkim self.to_addr = to_addr + self.locales = []
def normalize(self, msg): # Normalization will convert <Alice Wonderland> alice@wonderland.net @@ -112,7 +113,7 @@ class EmailParser(object): return True
- def build_request(self, msg_str, norm_addr, languages, platforms): + def build_request(self, msg_str, norm_addr, platforms): # Search for commands keywords subject_re = re.compile(r"Subject: (.*)\r\n") subject = subject_re.search(msg_str) @@ -128,7 +129,7 @@ class EmailParser(object): if subject: subject = subject.group(1) for word in re.split(r"\s+", subject.strip()): - if word.lower() in languages: + if word.lower() in self.locales: request["language"] = word.lower() if word.lower() in platforms: request["command"] = "links" @@ -139,7 +140,7 @@ class EmailParser(object):
if not request["command"] or not request["language"]: for word in re.split(r"\s+", msg_str.strip()): - if word.lower() in languages: + if word.lower() in self.locales: request["language"] = word.lower() if word.lower() in platforms: request["command"] = "links" @@ -159,6 +160,15 @@ class EmailParser(object): else: return True
+ @defer.inlineCallbacks + def get_locales(self): + dbname = self.settings.get("dbname") + conn = SQLite3(dbname) + + locales = yield conn.get_locales() + for l in locales: + self.locales.append(l[0]) +
def parse(self, msg_str): """ @@ -177,7 +187,6 @@ class EmailParser(object): log.msg("Building email message from string.", system="email parser")
platforms = self.settings.get("platforms") - languages = [*strings.get_locales().keys()] msg = message_from_string(msg_str)
name, norm_addr, to_name, norm_to_addr = self.normalize(msg) @@ -203,7 +212,7 @@ class EmailParser(object): except ValueError as e: log.msg("DKIM error: {}".format(e.args))
- request = self.build_request(msg_str, norm_addr, languages, platforms) + request = self.build_request(msg_str, norm_addr, platforms)
return request
diff --git a/gettor/utils/db.py b/gettor/utils/db.py index 525287b..1ccdf8e 100644 --- a/gettor/utils/db.py +++ b/gettor/utils/db.py @@ -104,3 +104,11 @@ class SQLite3(object): return self.dbpool.runQuery( query, (platform, language, status) ).addCallback(self.query_callback).addErrback(self.query_errback) + + def get_locales(self): + """ + Get a list of the supported tor browser binary locales + """ + query = "SELECT DISTINCT language FROM links" + return self.dbpool.runQuery(query + ).addCallback(self.query_callback).addErrback(self.query_errback) diff --git a/scripts/process_email b/scripts/process_email index 37c4e0b..cce7bcc 100755 --- a/scripts/process_email +++ b/scripts/process_email @@ -28,6 +28,7 @@ def process_email(message):
try: ep = EmailParser(settings, "gettor@torproject.org") + yield ep.get_locales().addErrback(ep.parse_errback) yield defer.maybeDeferred( ep.parse, message ).addCallback(ep.parse_callback).addErrback(ep.parse_errback) diff --git a/tests/test.conf.json b/tests/test.conf.json index 8f296cc..03bbaf6 100644 --- a/tests/test.conf.json +++ b/tests/test.conf.json @@ -1,6 +1,6 @@ { "platforms": ["linux", "osx", "windows"], - "dbname": "gettor.db", + "dbname": "tests/gettor.db", "email_parser_logfile": "email_parser.log", "email_requests_limit": 30, "twitter_requests_limit": 1, diff --git a/tests/test_email_service.py b/tests/test_email_service.py index 187711e..5fa87fc 100644 --- a/tests/test_email_service.py +++ b/tests/test_email_service.py @@ -65,9 +65,9 @@ class EmailServiceTests(unittest.TestCase): ep = conftests.EmailParser(self.settings, "gettor@torproject.org") msg_str = "From: "silvia [hiro]" hiro@torproject.org\n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\r\n osx es" msg = conftests.message_from_string(msg_str) - languages = [*self.locales.keys()] platforms = self.settings.get('platforms') - request = ep.build_request(msg_str, "hiro@torproject.org", languages, platforms) + ep.locales = ["es", "en"] + request = ep.build_request(msg_str, "hiro@torproject.org", platforms) self.assertEqual(request["command"], "links") self.assertEqual(request["platform"], "osx") self.assertEqual(request["language"], "es") @@ -83,11 +83,18 @@ class EmailServiceTests(unittest.TestCase):
def test_language_email_parser(self): ep = conftests.EmailParser(self.settings, "gettor@torproject.org") + ep.locales = ["en", "ru"] request = ep.parse("From: "silvia [hiro]" hiro@torproject.org\n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\n osx en") self.assertEqual(request["command"], "links") self.assertEqual(request["platform"], "osx") self.assertEqual(request["language"], "en")
+ request = ep.parse("From: "silvia [hiro]" hiro@torproject.org\n Subject: \r\n Reply-To: hiro@torproject.org \nTo: gettor@torproject.org\n linux ru") + self.assertEqual(request["command"], "links") + self.assertEqual(request["platform"], "linux") + self.assertEqual(request["language"], "ru") + + def test_sent_links_message(self): ep = self.sm_client links = self.links
tor-commits@lists.torproject.org