commit 6a240005e764defa6269392ded78857e13426ba4 Author: Christian Fromme kaner@strace.org Date: Tue Aug 30 22:41:59 2011 +0200
Enhance the blacklisting mechanism: - Users can request a configurable number of packages until they're blacklisted, instead of only one - After copying the email normalization code from BridgeDB (thanks, rransom) abusing GetTor just got a bit harder
Closes #3381 --- gettor.conf | 3 + lib/gettor/blacklist.py | 42 ++++++++++++++++---- lib/gettor/config.py | 3 + lib/gettor/requests.py | 4 +- lib/gettor/responses.py | 27 +++++++------ lib/gettor/utils.py | 99 ++++++++++++++++++++++++++++++++++++++++------- 6 files changed, 142 insertions(+), 36 deletions(-)
diff --git a/gettor.conf b/gettor.conf index 93b8d68..29a2494 100644 --- a/gettor.conf +++ b/gettor.conf @@ -27,6 +27,9 @@ DUMPFILE = "gettor.dump" # for it? BLACKLIST_BY_TYPE = True
+# How many packages per type do we sent to a user before we blacklist him? +BLACKLIST_THRES = 3 + # Which mirror to sync packages from RSYNC_MIRROR = "rsync.torproject.org"
diff --git a/lib/gettor/blacklist.py b/lib/gettor/blacklist.py index 83cf188..1ddcfd2 100644 --- a/lib/gettor/blacklist.py +++ b/lib/gettor/blacklist.py @@ -5,15 +5,17 @@ import os import re import glob +import struct import logging import gettor.utils
class BWList: - def __init__(self, blacklistDir): + def __init__(self, blacklistDir, blacklistThres): """A blacklist lives as hash files inside a directory and is simply a number of files that represent hashed email addresses. """ self.blacklistDir = blacklistDir + self.blacklistThres = blacklistThres # "general" is the main blacklist self.createSublist("general")
@@ -29,17 +31,39 @@ class BWList: # XXX Change this to something more appropriate raise IOError("Bad dir: %s" % fullDir)
- def lookupListEntry(self, address, blacklistName="*"): + def entryExists(self, address, blacklistName="general"): + """Look up if a certain address is already blacklisted + """ + hashString = self.getHash(address) + globPath = os.path.join(self.blacklistDir, blacklistName) + hashVec = glob.glob(os.path.join(globPath, hashString)) + if len(hashVec) > 0: + if os.path.isfile(hashVec[0]): + return True + + return False + + def checkAndUpdate(self, address, blacklistName="*", update=False): """Check to see if we have a list entry for the given address. """ - if address is None: - logging.error("Argument 'address' is None") - return False hashString = self.getHash(address) globPath = os.path.join(self.blacklistDir, blacklistName) hashVec = glob.glob(os.path.join(globPath, hashString)) if len(hashVec) > 0: - return True + count = "" + with open(hashVec[0], 'r') as fd: + count = fd.read() + + i_count = int(count) + i_count += 1 + count = str(i_count) + + if update == True: + with open(hashVec[0], 'w+') as fd: + fd.write("%s\n" % count) + + if i_count >= self.blacklistThres: + return True return False
def createListEntry(self, address, blacklistName="general"): @@ -48,12 +72,12 @@ class BWList: if address is None: logging.error("Bad args in createListEntry()") return False - if self.lookupListEntry(address, blacklistName) == False: + if self.entryExists(address, blacklistName) == False: hashString = self.getHash(address) entry = os.path.join(self.blacklistDir, blacklistName, hashString) try: - fd = open(entry, 'w') - fd.close + with open(entry, 'w+') as fd: + fd.write("0\n") return True except: logging.error("Creating list entry %s failed." % entry) diff --git a/lib/gettor/config.py b/lib/gettor/config.py index 16fb8b8..1f1eb88 100644 --- a/lib/gettor/config.py +++ b/lib/gettor/config.py @@ -18,6 +18,8 @@ DUMPFILE: Where failed mails get stored BLACKLIST_BY_TYPE: Do we send every mail type to every user only once before we blacklist them for it? + BLACKLIST_THRES: How many packages per type do we sent to a user before we + blacklist him/her? RSYNC_MIRROR: Which rsync server to sync packages from DEFAULT_LOCALE: Default locale SUPP_LANGS: Supported languages by GetTor @@ -42,6 +44,7 @@ CONFIG_DEFAULTS = { 'PASSFILE': "gettor.pass", 'DUMPFILE': "./gettor.dump", 'BLACKLIST_BY_TYPE': True, + 'BLACKLIST_THRES': 3, 'RSYNC_MIRROR': "rsync.torproject.org", 'DEFAULT_LOCALE': "en", 'SUPP_LANGS': { 'en': ("english", ), }, diff --git a/lib/gettor/requests.py b/lib/gettor/requests.py index b29e335..2ee275e 100644 --- a/lib/gettor/requests.py +++ b/lib/gettor/requests.py @@ -22,7 +22,9 @@ class requestMail: self.config = config self.request = {} self.request['user'] = self.parsedMessage["Return-Path"] - self.request['hashed_user'] = gettor.utils.getHash(self.request['user']) + # Normalize address before hashing + normalized_addr = gettor.utils.normalizeAddress(self.request['user']) + self.request['hashed_user'] = gettor.utils.getHash(normalized_addr) self.request['ouraddr'] = self.getRealTo(self.parsedMessage["to"]) self.request['locale'] = self.getLocaleInTo(self.request['ouraddr']) self.request['package'] = None diff --git a/lib/gettor/responses.py b/lib/gettor/responses.py index 0f3d2f8..e8dcc17 100644 --- a/lib/gettor/responses.py +++ b/lib/gettor/responses.py @@ -16,6 +16,7 @@ from email.mime.base import MIMEBase from email.mime.text import MIMEText
import gettor.blacklist +import gettor.utils import gettor.i18n as i18n
def getGreetingText(t): @@ -189,8 +190,8 @@ class Response: # Init black & whitelists wlStateDir = os.path.join(self.config.BASEDIR, "wl") blStateDir = os.path.join(self.config.BASEDIR, "bl") - self.wList = gettor.blacklist.BWList(wlStateDir) - self.bList = gettor.blacklist.BWList(blStateDir) + self.wList = gettor.blacklist.BWList(wlStateDir, config.BLACKLIST_THRES) + self.bList = gettor.blacklist.BWList(blStateDir, config.BLACKLIST_THRES)
def sendReply(self): """All routing decisions take place here. Sending of mails takes place @@ -222,21 +223,22 @@ class Response: type name we're looking for """ # First of all, check if user is whitelisted: Whitelist beats Blacklist - if self.wList.lookupListEntry(self.reqInfo['user'], "general"): + normalized_addr = gettor.utils.normalizeAddress(self.reqInfo['user']) + if self.wList.entryExists(normalized_addr, "general"): logging.info("Whitelisted user " + self.reqInfo['hashed_user']) return False # Now check general and specific blacklists, in that order - if self.bList.lookupListEntry(self.reqInfo['user'], "general"): + if self.bList.entryExists(normalized_addr, "general"): logging.info("Blacklisted user " + self.reqInfo['hashed_user']) return True # Create a unique dir name for the requested routine self.bList.createSublist(fname) - if self.bList.lookupListEntry(self.reqInfo['user'], fname): + if self.bList.checkAndUpdate(normalized_addr, fname, True): logging.info("User %s is blacklisted for %s" \ % (self.reqInfo['hashed_user'], fname)) return True else: - self.bList.createListEntry(self.reqInfo['user'], fname) + self.bList.createListEntry(normalized_addr, fname) return False
def sendPackage(self): @@ -317,15 +319,16 @@ class Response: splitDir = os.path.join(self.config.BASEDIR, "packages", splitpack) fileList = os.listdir(splitDir)
- # Be a polite bot and send message that mail is on the way - if self.config.DELAY_ALERT: - if not self.sendDelayAlert(): - logging.error("Failed to sent delay alert.") - # Sort the files, so we can send 01 before 02 and so on.. fileList.sort() nFiles = len(fileList) num = 0 + + # Be a polite bot and send message that mail is on the way + if self.config.DELAY_ALERT: + if not self.sendDelayAlert(nFiles): + logging.error("Failed to sent delay alert.") + # For each available split file, send a mail for filename in fileList: path = os.path.join(splitDir, filename) @@ -360,7 +363,7 @@ class Response: packageInfo = self.reqInfo['package']
logging.info("Sending delay alert to %s" % self.reqInfo['hashed_user']) - return self.sendTextEmail(getDelayAlertMsg(self.t), packageInfo) + return self.sendTextEmail(getDelayAlertMsg(self.t, packageInfo))
def sendHelp(self): """Send a help mail. This happens when a user sent us a request we diff --git a/lib/gettor/utils.py b/lib/gettor/utils.py index d7f0725..c06e40f 100644 --- a/lib/gettor/utils.py +++ b/lib/gettor/utils.py @@ -110,7 +110,7 @@ def addWhitelistEntry(conf, address): wlStateDir = conf.BASEDIR + "/wl" logging.debug("Adding address to whitelist: %s" % address) try: - whiteList = gettor.blacklist.BWList(wlStateDir) + whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES) except IOError, e: logging.error("Whitelist error: %s" % e) return False @@ -127,7 +127,7 @@ def addBlacklistEntry(conf, address): logging.debug("Adding address to blacklist: %s" % address) blStateDir = conf.BASEDIR + "/bl" try: - blackList = gettor.blacklist.BWList(blStateDir) + blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES) except IOError, e: logging.error("Blacklist error: %s" % e) return False @@ -146,15 +146,15 @@ def lookupAddress(conf, address): wlStateDir = conf.BASEDIR + "/wl" blStateDir = conf.BASEDIR + "/bl" try: - whiteList = gettor.blacklist.BWList(wlStateDir) - blackList = gettor.blacklist.BWList(blStateDir) + whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES) + blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES) except IOError, e: logging.error("White/Blacklist error: %s" % e) return False - if whiteList.lookupListEntry(address, "general"): + if whiteList.checkAndUpdate(address, "general"): logging.info("Address '%s' is present in the whitelist." % address) found = True - if blackList.lookupListEntry(address, "general"): + if blackList.checkAndUpdate(address, "general"): logging.info("Address '%s' is present in the blacklist." % address) found = True if not found: @@ -169,7 +169,7 @@ def clearWhitelist(conf): """ wlStateDir = conf.BASEDIR + "/wl" try: - whiteList = gettor.blacklist.BWList(wlStateDir) + whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES) except IOError, e: logging.error("Whitelist error: %s" % e) return False @@ -188,7 +188,7 @@ def clearBlacklist(conf, olderThanDays): logging.debug("Clearing blacklist..") blStateDir = conf.BASEDIR + "/bl" try: - blackList = gettor.blacklist.BWList(blStateDir) + blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES) except IOError, e: logging.error("Blacklist error: %s" % e) return False @@ -262,13 +262,11 @@ def getCurrentCrontab(): return savedTab
def normalizeAddress(address): - """We need this because we internally store email addresses in this format - in the black- and whitelists + """This does everything from checking if the address is ok to stripping + dots and "+" addresses so absuing GetTor gets harder. """ - if address.startswith("<"): - return address - else: - return "<" + address + ">" + address = normalizeEmail(address) + return "<" + address + ">"
def stripEmail(address): @@ -300,3 +298,76 @@ def removeFromListByRegex(l, string):
return l
+# The following code is more or less taken from BridgeDB + +class BadEmail(Exception): + """Exception raised when we get a bad email address.""" + def __init__(self, msg, email): + Exception.__init__(self, msg) + self.email = email + +ASPECIAL = '-_+/=_~' + +ACHAR = r'[\w%s]' % "".join("\%s"%c for c in ASPECIAL) +DOTATOM = r'%s+(?:.%s+)*'%(ACHAR,ACHAR) +DOMAIN = r'\w+(?:.\w+)*' +ADDRSPEC = r'(%s)@(%s)'%(DOTATOM, DOMAIN) + +SPACE_PAT = re.compile(r'\s+') +ADDRSPEC_PAT = re.compile(ADDRSPEC) + +def extractAddrSpec(addr): + """Given an email From line, try to extract and parse the addrspec + portion. Returns localpart,domain on success; raises BadEmail + on failure. + """ + orig_addr = addr + addr = SPACE_PAT.sub(' ', addr) + addr = addr.strip() + # Only works on usual-form addresses; raises BadEmail on weird + # address form. That's okay, since we'll only get those when + # people are trying to fool us. + if '<' in addr: + # Take the _last_ index of <, so that we don't need to bother + # with quoting tricks. + idx = addr.rindex('<') + addr = addr[idx:] + m = re.search(r'<([^>]*)>', addr) + if m is None: + raise BadEmail("Couldn't extract address spec", orig_addr) + addr = m.group(1) + + # At this point, addr holds a putative addr-spec. We only allow the + # following form: + # addr-spec = local-part "@" domain + # local-part = dot-atom + # domain = dot-atom + # + # In particular, we are disallowing: obs-local-part, obs-domain, + # comment, obs-FWS, + # + # Other forms exist, but none of the incoming services we recognize + # support them. + addr = addr.replace(" ", "") + m = ADDRSPEC_PAT.match(addr) + if not m: + raise BadEmail("Bad address spec format", orig_addr) + localpart, domain = m.groups() + return localpart, domain + +def normalizeEmail(addr): + """Given the contents of a from line, and a map of supported email + domains (in lowercase), raise BadEmail or return a normalized + email address. + """ + addr = addr.lower() + localpart, domain = extractAddrSpec(addr) + + # addr+foo@ is an alias for addr@ + idx = localpart.find('+') + if idx >= 0: + localpart = localpart[:idx] + localpart = localpart.replace(".", "") + + return "%s@%s"%(localpart, domain) +