commit 0fc5f539b31f61b7206a9825ca589b62530e3d8f Author: aagbsn aagbsn@extc.org Date: Mon Jul 25 16:32:15 2011 -0700
1608 - don't give out known blocked bridges
This set of changes allows bridgedb to filter responses by country; if a bridge is known to be blocked in a country it can be filtered from the set of returned bridges. --- README | 16 +++++++++++ bridgedb.conf | 3 ++ lib/bridgedb/Bridges.py | 66 +++++++++++++++++++++++++++++++++++++++++++++- lib/bridgedb/Dist.py | 8 +++--- lib/bridgedb/I18n.py | 4 ++- lib/bridgedb/Main.py | 11 ++++++++ lib/bridgedb/Server.py | 61 +++++++++++++++++++++++++++++++++---------- lib/bridgedb/Storage.py | 64 ++++++++++++++++++++++++++++++++++++++++++--- lib/bridgedb/Tests.py | 18 +++++++++++++ lib/bridgedb/Time.py | 6 ++-- 10 files changed, 228 insertions(+), 29 deletions(-)
diff --git a/README b/README index 64b402a..c05317a 100644 --- a/README +++ b/README @@ -11,6 +11,9 @@ To set up: - You can make a self-signed certificate with openssl req -x509 -new -nodes > cert - Set up PYTHONPATH to include "~/lib/python2.4/site-packages/". + - To install Maxmind GeoIP + - Debian: apt-get install python-geoip + - Others: http://www.maxmind.com/app/python
To re-generate and update the i18n files (in case translated strings have changed in BridgeDB): @@ -47,5 +50,18 @@ To use with email: - Any mail sent to the email port with a subject or a single line _exactly_ equal to "get bridges" will get answered, assuming the domain is okay.
+To indicate which bridges are blocked: + - Uncomment or add COUNTRY_BLOCK_FILE to your bridgedb.conf + - The syntax of the COUNTRY_BLOCK_FILE, 1 entry per line: + fingerprint <bridge fingerprint> country-code <country code> + - If this file is present, bridgedb will filter blocked bridges from responses + - For GeoIP support make sure to install Maxmind GeoIP + +To update the SQL schema: + - CREATE TABLE BlockedBridges ( id INTEGER PRIMARY KEY NOT NULL, hex_key, blocking_country); + - CREATE INDEX BlockedBridgesBlockingCountry on BlockedBridges(hex_key); + - REPLACE INTO Config VALUES ( 'schema-version', 2 ); + + Support - Send your questions to nickm. diff --git a/bridgedb.conf b/bridgedb.conf index e486e0a..11afc02 100644 --- a/bridgedb.conf +++ b/bridgedb.conf @@ -21,6 +21,9 @@ BRIDGE_FILES = [ "./bridge-descriptors" ] # current bridges are Running. STATUS_FILE = "networkstatus-bridges"
+# Either a file that contains blocked bridges list or None +#COUNTRY_BLOCK_FILE = "./blocked-bridges" + # Only consider routers whose purpose matches this string. BRIDGE_PURPOSE = "bridge" # File to store persistent info in. diff --git a/lib/bridgedb/Bridges.py b/lib/bridgedb/Bridges.py index 229cc5a..358e362 100644 --- a/lib/bridgedb/Bridges.py +++ b/lib/bridgedb/Bridges.py @@ -104,6 +104,7 @@ class Bridge: ## fingerprint -- The bridge's identity digest, in lowercase hex, with ## no spaces. ## running,stable -- DOCDOC + ## blockingCountries -- list of country codes blocking this bridge def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None): """Create a new Bridge. One of fingerprint and id_digest must be set.""" @@ -111,6 +112,7 @@ class Bridge: self.ip = ip self.orport = orport self.running = self.stable = None + self.blockingCountries = None if id_digest is not None: assert fingerprint is None if len(id_digest) != DIGEST_LEN: @@ -151,6 +153,15 @@ class Bridge: if stable is not None: self.stable = stable
+ def setBlockingCountries(self, blockingCountries): + if blockingCountries is not None: + self.blockingCountries = blockingCountries + + def isBlocked(self, countryCode): + if self.blockingCountries is not None and countryCode is not None: + if countryCode in self.blockingCountries: + return True + return False
def parseDescFile(f, bridge_purpose='bridge'): """Generator. Parses a cached-descriptors file 'f', and yields a Bridge @@ -200,10 +211,24 @@ def parseStatusFile(f): flags = line.split() yield ID, ("Running" in flags), ("Stable" in flags)
+def parseCountryBlockFile(f): + """Generator. Parses a blocked-bridges file 'f', and yields a + fingerprint, countryCode tuple for every entry""" + fingerprint = countryCode = None + for line in f: + line = line.strip() + m = re.match(r"fingerprint\s+(?P<fingerprint>\w+?)\s+country-code\s+(?P<countryCode>\w+)$", line) + try: + fingerprint = m.group('fingerprint').lower() + countryCode = m.group('countryCode').lower() + yield fingerprint, countryCode + except AttributeError, IndexError: + logging.warn("Unparseable line in blocked-bridges file: %s", line) + class BridgeHolder: """Abstract base class for all classes that hold bridges.""" def insert(self, bridge): - raise NotImplemented() + raise NotImplementedError
def clear(self): pass @@ -326,7 +351,7 @@ class BridgeRing(BridgeHolder): assert len(r) == N return r
- def getBridges(self, pos, N=1): + def getBridges(self, pos, N=1, countryCode=None): """Return the N bridges appearing in the ring after position pos""" forced = [] for _,_,count,subring in self.subrings: @@ -523,3 +548,40 @@ class BridgeSplitter(BridgeHolder): def dumpAssignments(self, f, description=""): for name,ring in self.ringsByName.iteritems(): ring.dumpAssignments(f, "%s %s" % (description, name)) + +class BridgeBlock: + """Base class that abstracts bridge blocking""" + def __init__(self): + pass + + def insert(self, fingerprint, blockingRule): + raise NotImplementedError + + def clear(self): + pass + + def assignmentsArePersistent(self): + return True + +class CountryBlock(BridgeBlock): + """Countrywide bridge blocking""" + def __init__(self): + self.db = bridgedb.Storage.getDB() + + def clear(self): + assert self.db + self.db.cleanBridgeBlocks() + self.db.commit() + + def insert(self, fingerprint, blockingRule): + """ insert a country based blocking rule """ + assert self.db + countryCode = blockingRule + self.db.addBridgeBlock(fingerprint, countryCode) + self.db.commit() + + def getBlockingCountries(self, fingerprint): + """ returns a list of country codes where this fingerprint is blocked""" + assert self.db + if fingerprint is not None: + return self.db.getBlockingCountries(fingerprint) diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py index f2274c7..5394cfa 100644 --- a/lib/bridgedb/Dist.py +++ b/lib/bridgedb/Dist.py @@ -75,7 +75,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder): """Assign a bridge to this distributor.""" self.splitter.insert(bridge)
- def getBridgesForIP(self, ip, epoch, N=1): + def getBridgesForIP(self, ip, epoch, N=1, countryCode=None): """Return a list of bridges to give to a user. ip -- the user's IP address, as a dotted quad. epoch -- the time period when we got this request. This can @@ -94,7 +94,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder): if category.contains(ip): logging.info("category<%s>%s"%(epoch,area)) pos = self.areaOrderHmac("category<%s>%s"%(epoch,area)) - return ring.getBridges(pos, N) + return ring.getBridges(pos, N, countryCode)
# Which bridge cluster should we look at? h = int( self.areaClusterHmac(area)[:8], 16) @@ -240,7 +240,7 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder): """Assign a bridge to this distributor.""" self.ring.insert(bridge)
- def getBridgesForEmail(self, emailaddress, epoch, N=1, parameters=None): + def getBridgesForEmail(self, emailaddress, epoch, N=1, parameters=None, countryCode=None): """Return a list of bridges to give to a user. emailaddress -- the user's email address, as given in a from line. epoch -- the time period when we got this request. This can @@ -266,7 +266,7 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder): raise TooSoonEmail("Too many emails; wait till later", emailaddress)
pos = self.emailHmac("<%s>%s" % (epoch, emailaddress)) - result = self.ring.getBridges(pos, N) + result = self.ring.getBridges(pos, N, countryCode)
db.setEmailTime(emailaddress, now) db.commit() diff --git a/lib/bridgedb/I18n.py b/lib/bridgedb/I18n.py index fc023f7..44adb8c 100644 --- a/lib/bridgedb/I18n.py +++ b/lib/bridgedb/I18n.py @@ -49,5 +49,7 @@ bridge addresses."""), # BRIDGEDB_TEXT[7] _("""(no bridges currently available)"""), # BRIDGEDB_TEXT[8] - _("""(e-mail requests not currently supported)""") + _("""(e-mail requests not currently supported)"""), + # BRIDGEDB_TEXT[9] + _("""(Might be blocked)""") ] diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py index 99d43b6..42f78c3 100644 --- a/lib/bridgedb/Main.py +++ b/lib/bridgedb/Main.py @@ -146,15 +146,24 @@ def load(cfg, splitter, clear=False): """Read all the bridge files from cfg, and pass them into a splitter object. """ + countryblock = Bridges.CountryBlock() if clear: logging.info("Clearing old bridges") splitter.clear() + logging.info("Clearing old blocked bridges") + countryblock.clear() logging.info("Loading bridges") status = {} if hasattr(cfg, "STATUS_FILE"): f = open(cfg.STATUS_FILE, 'r') for ID, running, stable in Bridges.parseStatusFile(f): status[ID] = running, stable + f.close() + if hasattr(cfg, "COUNTRY_BLOCK_FILE"): + f = open(cfg.COUNTRY_BLOCK_FILE, 'r') + for fingerprint, countryCode in Bridges.parseCountryBlockFile(f): + countryblock.insert(fingerprint, countryCode) + f.close() for fname in cfg.BRIDGE_FILES: f = open(fname, 'r') for bridge in Bridges.parseDescFile(f, cfg.BRIDGE_PURPOSE): @@ -162,6 +171,8 @@ def load(cfg, splitter, clear=False): if s is not None: running, stable = s bridge.setStatus(running=running, stable=stable) + bridge.setBlockingCountries( + countryblock.getBlockingCountries(bridge.fingerprint)) splitter.insert(bridge) f.close()
diff --git a/lib/bridgedb/Server.py b/lib/bridgedb/Server.py index e2e0222..8ec659e 100644 --- a/lib/bridgedb/Server.py +++ b/lib/bridgedb/Server.py @@ -26,6 +26,16 @@ import twisted.mail.smtp import bridgedb.Dist import bridgedb.I18n as I18n
+try: + import GeoIP + # GeoIP data object: choose database here + # This is the same geoip implementation that pytorctl uses + geoip = GeoIP.new(GeoIP.GEOIP_STANDARD) + logging.info("GeoIP database loaded") +except: + geoip = None + logging.warn("GeoIP database not found") + class WebResource(twisted.web.resource.Resource): """This resource is used by Twisted Web to give a web page with some bridges in response to a request.""" @@ -51,6 +61,7 @@ class WebResource(twisted.web.resource.Resource): interval = self.schedule.getInterval(time.time()) bridges = ( ) ip = None + countryCode = None if self.useForwardedHeader: h = request.getHeader("X-Forwarded-For") if h: @@ -61,26 +72,29 @@ class WebResource(twisted.web.resource.Resource): else: ip = request.getClientIP()
- # See if we did get a request for a certain locale, otherwise fall back - # to 'en': - # Try evaluating the path /foo first, then check if we got a ?lang=foo - default_lang = lang = "en" - if len(request.path) > 1: - lang = request.path[1:] - if lang == default_lang: - lang = request.args.get("lang", [default_lang]) - lang = lang[0] - t = I18n.getLang(lang) + if geoip: + countryCode = geoip.country_code_by_addr(ip) + + # allow client to specify a country + forcecc = getCCFromRequest(request) + if forcecc != None: + countryCode = forcecc + + # get locale + t = getLocaleFromRequest(request)
format = request.args.get("format", None) if format and len(format): format = format[0] # choose the first arg
if ip: bridges = self.distributor.getBridgesForIP(ip, interval, - self.nBridgesToGive) + self.nBridgesToGive, + countryCode) if bridges: - answer = "".join("%s\n" % b.getConfigLine(self.includeFingerprints) - for b in bridges) + answer = "".join("%s %s\n" % ( + b.getConfigLine(self.includeFingerprints), + (I18n.BRIDGEDB_TEXT[9] if b.isBlocked(countryCode) else "") + ) for b in bridges) else: answer = t.gettext(I18n.BRIDGEDB_TEXT[7])
@@ -253,7 +267,8 @@ def getMailResponse(lines, ctx): try: interval = ctx.schedule.getInterval(time.time()) bridges = ctx.distributor.getBridgesForEmail(clientAddr, - interval, ctx.N) + interval, ctx.N, + countryCode=None) except bridgedb.Dist.BadEmail, e: logging.info("Got a mail from a bad email address %r: %s.", clientAddr, e) @@ -329,6 +344,7 @@ def getLocaleFromPlusAddr(address):
return replyLocale
+ class MailContext: """Helper object that holds information used by email subsystem.""" def __init__(self, cfg, dist, sched): @@ -451,3 +467,20 @@ def runServers(): """Start all the servers that we've configured. Exits when they do.""" reactor.run()
+def getLocaleFromRequest(request): + # See if we did get a request for a certain locale, otherwise fall back + # to 'en': + # Try evaluating the path /foo first, then check if we got a ?lang=foo + default_lang = lang = "en" + if len(request.path) > 1: + lang = request.path[1:] + if lang == default_lang: + lang = request.args.get("lang", [default_lang]) + lang = lang[0] + return I18n.getLang(lang) + +def getCCFromRequest(request): + path = re.sub(r'[^a-zA-Z]', '', request.path) + if len(path) == 2: + return path.lower() + return None diff --git a/lib/bridgedb/Storage.py b/lib/bridgedb/Storage.py index 30b4c57..5d42930 100644 --- a/lib/bridgedb/Storage.py +++ b/lib/bridgedb/Storage.py @@ -111,7 +111,7 @@ class SqliteDict:
# Here is the SQL schema.
-SCHEMA1_SCRIPT = """ +SCHEMA2_SCRIPT = """ CREATE TABLE Config ( key PRIMARY KEY NOT NULL, value @@ -136,7 +136,15 @@ SCHEMA1_SCRIPT = """
CREATE INDEX EmailedBridgesWhenMailed on EmailedBridges ( email );
- INSERT INTO Config VALUES ( 'schema-version', 1 ); + CREATE TABLE BlockedBridges ( + id INTEGER PRIMARY KEY NOT NULL, + hex_key, + blocking_country + ); + + CREATE INDEX BlockedBridgesBlockingCountry on BlockedBridges(hex_key); + + INSERT INTO Config VALUES ( 'schema-version', 2 ); """
class BridgeData: @@ -267,6 +275,52 @@ class Database: cur.execute("UPDATE Bridges SET distributor = ? WHERE hex_key = ?", (distributor, hex_key))
+ def addBridgeBlock(self, fingerprint, countryCode): + cur = self._cur + cur.execute("INSERT OR REPLACE INTO BlockedBridges " + "(hex_key,blocking_country) VALUES (?,?)", + (fingerprint, countryCode)) + + def delBridgeBlock(self, fingerprint, countryCode): + cur = self._cur + cur.execute("DELETE FROM BlockedBridges WHERE hex_key = ? " + "AND blocking_country = ?", (fingerprint, countryCode)) + + def cleanBridgeBlocks(self): + cur = self._cur + cur.execute("DELETE FROM BlockedBridges") + + def getBlockingCountries(self, fingerprint): + cur = self._cur + cur.execute("SELECT hex_key, blocking_country FROM BlockedBridges WHERE hex_key = ? ", + (fingerprint,)) + v = cur.fetchall() + if v is None: + return None + + # return list of country-codes + return [ str(result[1]) for (result) in v ] + + def getBlockedBridges(self, countryCode): + cur = self._cur + cur.execute("SELECT hex_key, blocking_country FROM BlockedBridges WHERE blocking_country = ? ", + (countryCode,)) + v = cur.fetchall() + if v is None: + return None + # return list of fingerprints + return [ str(result[0]) for (result) in v ] + + def isBlocked(self, fingerprint, countryCode): + cur = self._cur + cur.execute("SELECT hex_key, blocking_country FROM BlockedBridges WHERE " + "hex_key = ? AND blocking_country = ?", + (fingerprint, countryCode)) + v = cur.fetchone() + if v is None: + return False + return True + def openDatabase(sqlite_file): conn = sqlite3.Connection(sqlite_file) cur = conn.cursor() @@ -274,11 +328,11 @@ def openDatabase(sqlite_file): try: cur.execute("SELECT value FROM Config WHERE key = 'schema-version'") val, = cur.fetchone() - if val != 1: + if val != 2: logging.warn("Unknown schema version %s in database.", val) except sqlite3.OperationalError: logging.warn("No Config table found in DB; creating tables") - cur.executescript(SCHEMA1_SCRIPT) + cur.executescript(SCHEMA2_SCRIPT) conn.commit() finally: cur.close() @@ -292,7 +346,7 @@ def openOrConvertDatabase(sqlite_file, db_file):
conn = sqlite3.Connection(sqlite_file) cur = conn.cursor() - cur.executescript(SCHEMA1_SCRIPT) + cur.executescript(SCHEMA2_SCRIPT) conn.commit()
import anydbm diff --git a/lib/bridgedb/Tests.py b/lib/bridgedb/Tests.py index 9dea4b4..558fedf 100644 --- a/lib/bridgedb/Tests.py +++ b/lib/bridgedb/Tests.py @@ -233,6 +233,24 @@ class SQLStorageTests(unittest.TestCase): cur.execute("SELECT * FROM EmailedBridges") self.assertEquals(len(cur.fetchall()), 1)
+ db.addBridgeBlock(b2.fingerprint, 'us') + self.assertEquals(db.isBlocked(b2.fingerprint, 'us'), True) + db.delBridgeBlock(b2.fingerprint, 'us') + self.assertEquals(db.isBlocked(b2.fingerprint, 'us'), False) + db.addBridgeBlock(b2.fingerprint, 'uk') + db.addBridgeBlock(b3.fingerprint, 'uk') + self.assertEquals(set([b2.fingerprint, b3.fingerprint]), + set(db.getBlockedBridges('uk'))) + + db.addBridgeBlock(b2.fingerprint, 'cn') + db.addBridgeBlock(b2.fingerprint, 'de') + db.addBridgeBlock(b2.fingerprint, 'jp') + db.addBridgeBlock(b2.fingerprint, 'se') + db.addBridgeBlock(b2.fingerprint, 'kr') + + self.assertEquals(set(db.getBlockingCountries(b2.fingerprint)), + set(['uk', 'cn', 'de', 'jp', 'se', 'kr'])) + def testSuite(): suite = unittest.TestSuite() loader = unittest.TestLoader() diff --git a/lib/bridgedb/Time.py b/lib/bridgedb/Time.py index 63c50d5..8d7c65a 100644 --- a/lib/bridgedb/Time.py +++ b/lib/bridgedb/Time.py @@ -13,11 +13,11 @@ KNOWN_INTERVALS = [ "hour", "day", "week", "month" ]
class Schedule: def intervalStart(self, when): - raise NotImplemented() + raise NotImplementedError def getInterval(self, when): - raise NotImplemented() + raise NotImplementedError def nextIntervalStarts(self, when): - raise NotImplemented() + raise NotImplementedError
class IntervalSchedule(Schedule): """An IntervalSchedule splits time into somewhat natural periods,
tor-commits@lists.torproject.org