[tor-commits] [gettor/master] Enhance the blacklisting mechanism:

kaner at torproject.org kaner at torproject.org
Sat Sep 3 12:35:54 UTC 2011


commit 6a240005e764defa6269392ded78857e13426ba4
Author: Christian Fromme <kaner at strace.org>
Date:   Tue Aug 30 22:41:59 2011 +0200

    Enhance the blacklisting mechanism:
    - Users can request a configurable number of packages until they're
      blacklisted, instead of only one
    - After copying the email normalization code from BridgeDB (thanks,
      rransom) abusing GetTor just got a bit harder
    
    Closes #3381
---
 gettor.conf             |    3 +
 lib/gettor/blacklist.py |   42 ++++++++++++++++----
 lib/gettor/config.py    |    3 +
 lib/gettor/requests.py  |    4 +-
 lib/gettor/responses.py |   27 +++++++------
 lib/gettor/utils.py     |   99 ++++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 142 insertions(+), 36 deletions(-)

diff --git a/gettor.conf b/gettor.conf
index 93b8d68..29a2494 100644
--- a/gettor.conf
+++ b/gettor.conf
@@ -27,6 +27,9 @@ DUMPFILE = "gettor.dump"
 # for it?
 BLACKLIST_BY_TYPE = True
 
+# How many packages per type do we sent to a user before we blacklist him?
+BLACKLIST_THRES = 3
+
 # Which mirror to sync packages from
 RSYNC_MIRROR = "rsync.torproject.org"
 
diff --git a/lib/gettor/blacklist.py b/lib/gettor/blacklist.py
index 83cf188..1ddcfd2 100644
--- a/lib/gettor/blacklist.py
+++ b/lib/gettor/blacklist.py
@@ -5,15 +5,17 @@
 import os
 import re
 import glob
+import struct
 import logging
 import gettor.utils
 
 class BWList:
-    def __init__(self, blacklistDir):
+    def __init__(self, blacklistDir, blacklistThres):
         """A blacklist lives as hash files inside a directory and is simply a 
            number of files that represent hashed email addresses.
         """
         self.blacklistDir = blacklistDir
+        self.blacklistThres = blacklistThres
         # "general" is the main blacklist
         self.createSublist("general")
 
@@ -29,17 +31,39 @@ class BWList:
                 # XXX Change this to something more appropriate
                 raise IOError("Bad dir: %s" % fullDir)
 
-    def lookupListEntry(self, address, blacklistName="*"):
+    def entryExists(self, address, blacklistName="general"):
+        """Look up if a certain address is already blacklisted
+        """
+        hashString = self.getHash(address)
+        globPath = os.path.join(self.blacklistDir, blacklistName)
+        hashVec = glob.glob(os.path.join(globPath, hashString))
+        if len(hashVec) > 0:
+           if os.path.isfile(hashVec[0]):
+               return True
+
+        return False
+
+    def checkAndUpdate(self, address, blacklistName="*", update=False):
         """Check to see if we have a list entry for the given address.
         """
-        if address is None:
-           logging.error("Argument 'address' is None")
-           return False
         hashString = self.getHash(address)
         globPath = os.path.join(self.blacklistDir, blacklistName)
         hashVec = glob.glob(os.path.join(globPath, hashString))
         if len(hashVec) > 0:
-            return True
+            count = ""
+            with open(hashVec[0], 'r') as fd:
+                count = fd.read()
+
+            i_count = int(count)
+            i_count += 1
+            count = str(i_count)
+
+            if update == True:
+                with open(hashVec[0], 'w+') as fd:
+                    fd.write("%s\n" % count)
+
+            if i_count >= self.blacklistThres:
+                return True
         return False
 
     def createListEntry(self, address, blacklistName="general"):
@@ -48,12 +72,12 @@ class BWList:
         if address is None:
            logging.error("Bad args in createListEntry()")
            return False
-        if self.lookupListEntry(address, blacklistName) == False:
+        if self.entryExists(address, blacklistName) == False:
             hashString = self.getHash(address)
             entry = os.path.join(self.blacklistDir, blacklistName, hashString)
             try:
-                fd = open(entry, 'w')
-                fd.close
+                with open(entry, 'w+') as fd:
+                    fd.write("0\n")
                 return True
             except:
                 logging.error("Creating list entry %s failed." % entry)
diff --git a/lib/gettor/config.py b/lib/gettor/config.py
index 16fb8b8..1f1eb88 100644
--- a/lib/gettor/config.py
+++ b/lib/gettor/config.py
@@ -18,6 +18,8 @@
  DUMPFILE:      Where failed mails get stored
  BLACKLIST_BY_TYPE:  Do we send every mail type to every user only once before 
                      we blacklist them for it?
+ BLACKLIST_THRES: How many packages per type do we sent to a user before we
+                  blacklist him/her?
  RSYNC_MIRROR:  Which rsync server to sync packages from
  DEFAULT_LOCALE: Default locale
  SUPP_LANGS:    Supported languages by GetTor
@@ -42,6 +44,7 @@ CONFIG_DEFAULTS = {
    'PASSFILE': "gettor.pass",
    'DUMPFILE': "./gettor.dump",
    'BLACKLIST_BY_TYPE': True,
+   'BLACKLIST_THRES': 3,
    'RSYNC_MIRROR': "rsync.torproject.org",
    'DEFAULT_LOCALE': "en",
    'SUPP_LANGS': { 'en': ("english", ), },
diff --git a/lib/gettor/requests.py b/lib/gettor/requests.py
index b29e335..2ee275e 100644
--- a/lib/gettor/requests.py
+++ b/lib/gettor/requests.py
@@ -22,7 +22,9 @@ class requestMail:
         self.config = config
         self.request = {}
         self.request['user'] = self.parsedMessage["Return-Path"]
-        self.request['hashed_user'] = gettor.utils.getHash(self.request['user'])
+        # Normalize address before hashing
+        normalized_addr = gettor.utils.normalizeAddress(self.request['user'])
+        self.request['hashed_user'] = gettor.utils.getHash(normalized_addr)
         self.request['ouraddr'] = self.getRealTo(self.parsedMessage["to"])
         self.request['locale'] = self.getLocaleInTo(self.request['ouraddr'])
         self.request['package'] = None
diff --git a/lib/gettor/responses.py b/lib/gettor/responses.py
index 0f3d2f8..e8dcc17 100644
--- a/lib/gettor/responses.py
+++ b/lib/gettor/responses.py
@@ -16,6 +16,7 @@ from email.mime.base import MIMEBase
 from email.mime.text import MIMEText
 
 import gettor.blacklist
+import gettor.utils
 import gettor.i18n as i18n
 
 def getGreetingText(t):
@@ -189,8 +190,8 @@ class Response:
         # Init black & whitelists
         wlStateDir = os.path.join(self.config.BASEDIR, "wl")
         blStateDir = os.path.join(self.config.BASEDIR, "bl")
-        self.wList = gettor.blacklist.BWList(wlStateDir)
-        self.bList = gettor.blacklist.BWList(blStateDir)
+        self.wList = gettor.blacklist.BWList(wlStateDir, config.BLACKLIST_THRES)
+        self.bList = gettor.blacklist.BWList(blStateDir, config.BLACKLIST_THRES)
 
     def sendReply(self):
         """All routing decisions take place here. Sending of mails takes place
@@ -222,21 +223,22 @@ class Response:
            type name we're looking for
         """
         # First of all, check if user is whitelisted: Whitelist beats Blacklist
-        if self.wList.lookupListEntry(self.reqInfo['user'], "general"):
+        normalized_addr = gettor.utils.normalizeAddress(self.reqInfo['user'])
+        if self.wList.entryExists(normalized_addr, "general"):
             logging.info("Whitelisted user " + self.reqInfo['hashed_user'])
             return False
         # Now check general and specific blacklists, in that order
-        if self.bList.lookupListEntry(self.reqInfo['user'], "general"):
+        if self.bList.entryExists(normalized_addr, "general"):
             logging.info("Blacklisted user " + self.reqInfo['hashed_user'])
             return True
         # Create a unique dir name for the requested routine
         self.bList.createSublist(fname)
-        if self.bList.lookupListEntry(self.reqInfo['user'], fname):
+        if self.bList.checkAndUpdate(normalized_addr, fname, True):
             logging.info("User %s is blacklisted for %s" \
                                    % (self.reqInfo['hashed_user'], fname))
             return True
         else:
-            self.bList.createListEntry(self.reqInfo['user'], fname)
+            self.bList.createListEntry(normalized_addr, fname)
             return False
 
     def sendPackage(self):
@@ -317,15 +319,16 @@ class Response:
         splitDir = os.path.join(self.config.BASEDIR, "packages", splitpack)
         fileList = os.listdir(splitDir)
 
-        # Be a polite bot and send message that mail is on the way
-        if self.config.DELAY_ALERT:
-	    if not self.sendDelayAlert():
-	        logging.error("Failed to sent delay alert.")
-
         # Sort the files, so we can send 01 before 02 and so on..
         fileList.sort()
         nFiles = len(fileList)
         num = 0
+
+        # Be a polite bot and send message that mail is on the way
+        if self.config.DELAY_ALERT:
+            if not self.sendDelayAlert(nFiles):
+                logging.error("Failed to sent delay alert.")
+
         # For each available split file, send a mail
         for filename in fileList:
             path = os.path.join(splitDir, filename)
@@ -360,7 +363,7 @@ class Response:
             packageInfo = self.reqInfo['package']
 
         logging.info("Sending delay alert to %s" % self.reqInfo['hashed_user'])
-        return self.sendTextEmail(getDelayAlertMsg(self.t), packageInfo)
+        return self.sendTextEmail(getDelayAlertMsg(self.t, packageInfo))
             
     def sendHelp(self):
         """Send a help mail. This happens when a user sent us a request we 
diff --git a/lib/gettor/utils.py b/lib/gettor/utils.py
index d7f0725..c06e40f 100644
--- a/lib/gettor/utils.py
+++ b/lib/gettor/utils.py
@@ -110,7 +110,7 @@ def addWhitelistEntry(conf, address):
     wlStateDir = conf.BASEDIR + "/wl"
     logging.debug("Adding address to whitelist: %s" % address)
     try:
-        whiteList = gettor.blacklist.BWList(wlStateDir)
+        whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES)
     except IOError, e:
         logging.error("Whitelist error: %s" % e)
         return False
@@ -127,7 +127,7 @@ def addBlacklistEntry(conf, address):
     logging.debug("Adding address to blacklist: %s" % address)
     blStateDir = conf.BASEDIR + "/bl"
     try:
-        blackList = gettor.blacklist.BWList(blStateDir)
+        blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES)
     except IOError, e:
         logging.error("Blacklist error: %s" % e)
         return False
@@ -146,15 +146,15 @@ def lookupAddress(conf, address):
     wlStateDir = conf.BASEDIR + "/wl"
     blStateDir = conf.BASEDIR + "/bl"
     try:
-        whiteList = gettor.blacklist.BWList(wlStateDir)
-        blackList = gettor.blacklist.BWList(blStateDir)
+        whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES)
+        blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES)
     except IOError, e:
         logging.error("White/Blacklist error: %s" % e)
         return False
-    if whiteList.lookupListEntry(address, "general"):
+    if whiteList.checkAndUpdate(address, "general"):
         logging.info("Address '%s' is present in the whitelist." % address)
         found = True
-    if blackList.lookupListEntry(address, "general"):
+    if blackList.checkAndUpdate(address, "general"):
         logging.info("Address '%s' is present in the blacklist." % address)
         found = True
     if not found:
@@ -169,7 +169,7 @@ def clearWhitelist(conf):
     """
     wlStateDir = conf.BASEDIR + "/wl"
     try:
-        whiteList = gettor.blacklist.BWList(wlStateDir)
+        whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES)
     except IOError, e:
         logging.error("Whitelist error: %s" % e)
         return False
@@ -188,7 +188,7 @@ def clearBlacklist(conf, olderThanDays):
     logging.debug("Clearing blacklist..")
     blStateDir = conf.BASEDIR + "/bl"
     try:
-        blackList = gettor.blacklist.BWList(blStateDir)
+        blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES)
     except IOError, e:
         logging.error("Blacklist error: %s" % e)
         return False
@@ -262,13 +262,11 @@ def getCurrentCrontab():
     return savedTab
 
 def normalizeAddress(address):
-    """We need this because we internally store email addresses in this format
-       in the black- and whitelists
+    """This does everything from checking if the address is ok to stripping
+       dots and "+" addresses so absuing GetTor gets harder.
     """
-    if address.startswith("<"):
-        return address
-    else:
-        return "<" + address + ">"
+    address = normalizeEmail(address)
+    return "<" + address + ">"
 
 
 def stripEmail(address):
@@ -300,3 +298,76 @@ def removeFromListByRegex(l, string):
 
     return l
 
+# The following code is more or less taken from BridgeDB
+
+class BadEmail(Exception):
+    """Exception raised when we get a bad email address."""
+    def __init__(self, msg, email):
+        Exception.__init__(self, msg)
+        self.email = email
+
+ASPECIAL = '-_+/=_~'
+
+ACHAR = r'[\w%s]' % "".join("\\%s"%c for c in ASPECIAL)
+DOTATOM = r'%s+(?:\.%s+)*'%(ACHAR,ACHAR)
+DOMAIN = r'\w+(?:\.\w+)*'
+ADDRSPEC = r'(%s)\@(%s)'%(DOTATOM, DOMAIN)
+
+SPACE_PAT = re.compile(r'\s+')
+ADDRSPEC_PAT = re.compile(ADDRSPEC)
+
+def extractAddrSpec(addr):
+    """Given an email From line, try to extract and parse the addrspec
+       portion.  Returns localpart,domain on success; raises BadEmail
+       on failure.
+    """
+    orig_addr = addr
+    addr = SPACE_PAT.sub(' ', addr)
+    addr = addr.strip()
+    # Only works on usual-form addresses; raises BadEmail on weird
+    # address form.  That's okay, since we'll only get those when
+    # people are trying to fool us.
+    if '<' in addr:
+        # Take the _last_ index of <, so that we don't need to bother
+        # with quoting tricks.
+        idx = addr.rindex('<')
+        addr = addr[idx:]
+        m = re.search(r'<([^>]*)>', addr)
+        if m is None:
+            raise BadEmail("Couldn't extract address spec", orig_addr)
+        addr = m.group(1)
+
+    # At this point, addr holds a putative addr-spec.  We only allow the
+    # following form:
+    #   addr-spec = local-part "@" domain
+    #   local-part = dot-atom
+    #   domain = dot-atom
+    #
+    # In particular, we are disallowing: obs-local-part, obs-domain,
+    # comment, obs-FWS,
+    #
+    # Other forms exist, but none of the incoming services we recognize
+    # support them.
+    addr = addr.replace(" ", "")
+    m = ADDRSPEC_PAT.match(addr)
+    if not m:
+        raise BadEmail("Bad address spec format", orig_addr)
+    localpart, domain = m.groups()
+    return localpart, domain
+
+def normalizeEmail(addr):
+    """Given the contents of a from line, and a map of supported email
+       domains (in lowercase), raise BadEmail or return a normalized
+       email address.
+    """
+    addr = addr.lower()
+    localpart, domain = extractAddrSpec(addr)
+
+    # addr+foo@ is an alias for addr@
+    idx = localpart.find('+')
+    if idx >= 0:
+        localpart = localpart[:idx]
+    localpart = localpart.replace(".", "")
+
+    return "%s@%s"%(localpart, domain)
+





More information about the tor-commits mailing list