[tor-commits] [bridgedb/develop] Fuzzy match incoming email addresses.

isis at torproject.org isis at torproject.org
Thu Jul 24 04:37:11 UTC 2014


commit feea15673d1690ceb1050c9068eaa85de855b637
Author: Isis Lovecruft <isis at torproject.org>
Date:   Sun Jul 6 20:37:05 2014 +0000

    Fuzzy match incoming email addresses.
    
     * ADD a branch to `bridgedb.email.autoresponder.SMTPAutoresponder.runChecks()`
       which calculates the Levenshtein Distance of an incoming email address in
       order to fuzzy match it against those in the EMAIL_BLACKLIST setting.
    
     * FIXES #9385:
       https://trac.torproject.org/projects/tor/ticket/9385
---
 lib/bridgedb/email/autoresponder.py |   14 ++++++++++++++
 lib/bridgedb/email/server.py        |    7 +++++++
 2 files changed, 21 insertions(+)

diff --git a/lib/bridgedb/email/autoresponder.py b/lib/bridgedb/email/autoresponder.py
index 3674702..8aa004e 100644
--- a/lib/bridgedb/email/autoresponder.py
+++ b/lib/bridgedb/email/autoresponder.py
@@ -38,6 +38,7 @@ from bridgedb.email import request
 from bridgedb.email import templates
 from bridgedb.parse import addr
 from bridgedb.parse.addr import canonicalizeEmailDomain
+from bridgedb.util import levenshteinDistance
 from bridgedb import translations
 
 
@@ -646,6 +647,19 @@ class SMTPAutoresponder(smtp.SMTPClient):
         if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules):
             return False
 
+        # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then
+        # calculate the Levenshtein String Distance (see
+        # :func:`~bridgedb.util.levenshteinDistance`):
+        if self.incoming.context.fuzzyMatch != 0:
+            for blacklistedAddress in self.incoming.context.blacklist:
+                distance = levenshteinDistance(self.incoming.canonicalFromEmail,
+                                               blacklistedAddress)
+                if distance <= self.incoming.context.fuzzyMatch:
+                    logging.info("Fuzzy-matched %s to blacklisted address %s!"
+                                 % (self.incoming.canonicalFromEmail,
+                                    blacklistedAddress))
+                    return False
+
         return True
 
     def send(self, response, retries=0, timeout=30, reaktor=reactor):
diff --git a/lib/bridgedb/email/server.py b/lib/bridgedb/email/server.py
index 4bcfed9..194e74a 100644
--- a/lib/bridgedb/email/server.py
+++ b/lib/bridgedb/email/server.py
@@ -57,6 +57,11 @@ class MailServerContext(object):
     :ivar str fromAddr: Use this address in the email :header:`From:`
         line for outgoing mail. (default: ``bridges at torproject.org``)
     :ivar int nBridges: The number of bridges to send for each email.
+    :ivar list blacklist: A list of blacklisted email addresses, taken from
+        the ``EMAIL_BLACKLIST`` config setting.
+    :ivar int fuzzyMatch: An integer specifying the maximum Levenshtein
+        Distance from an incoming email address to a blacklisted email address
+        for the incoming email to be dropped.
     :ivar gpgContext: A ``gpgme.GpgmeContext`` (as created by
         :func:`bridgedb.crypto.getGPGContext`), or None if we couldn't create
         a proper GPGME context for some reason.
@@ -92,6 +97,8 @@ class MailServerContext(object):
         self.domainMap = config.EMAIL_DOMAIN_MAP or {}
         self.canon = self.buildCanonicalDomainMap()
         self.whitelist = config.EMAIL_WHITELIST or {}
+        self.blacklist = config.EMAIL_BLACKLIST or []
+        self.fuzzyMatch = config.EMAIL_FUZZY_MATCH or 0
 
         self.gpgContext = getGPGContext(config)
 





More information about the tor-commits mailing list