[tor-commits] [bridgedb/master] 1608 - don't give out known blocked bridges

arma at torproject.org arma at torproject.org
Mon Sep 19 01:10:01 UTC 2011


commit 0fc5f539b31f61b7206a9825ca589b62530e3d8f
Author: aagbsn <aagbsn at extc.org>
Date:   Mon Jul 25 16:32:15 2011 -0700

    1608 - don't give out known blocked bridges
    
    This set of changes allows bridgedb to filter responses
    by country; if a bridge is known to be blocked in a country
    it can be filtered from the set of returned bridges.
---
 README                  |   16 +++++++++++
 bridgedb.conf           |    3 ++
 lib/bridgedb/Bridges.py |   66 +++++++++++++++++++++++++++++++++++++++++++++-
 lib/bridgedb/Dist.py    |    8 +++---
 lib/bridgedb/I18n.py    |    4 ++-
 lib/bridgedb/Main.py    |   11 ++++++++
 lib/bridgedb/Server.py  |   61 +++++++++++++++++++++++++++++++++----------
 lib/bridgedb/Storage.py |   64 ++++++++++++++++++++++++++++++++++++++++++---
 lib/bridgedb/Tests.py   |   18 +++++++++++++
 lib/bridgedb/Time.py    |    6 ++--
 10 files changed, 228 insertions(+), 29 deletions(-)

diff --git a/README b/README
index 64b402a..c05317a 100644
--- a/README
+++ b/README
@@ -11,6 +11,9 @@ To set up:
    - You can make a self-signed certificate with
      openssl req -x509 -new -nodes > cert
  - Set up PYTHONPATH to include "~/lib/python2.4/site-packages/".
+ - To install Maxmind GeoIP
+   - Debian: apt-get install python-geoip
+   - Others: http://www.maxmind.com/app/python 
 
 To re-generate and update the i18n files (in case translated strings
 have changed in BridgeDB):
@@ -47,5 +50,18 @@ To use with email:
  - Any mail sent to the email port with a subject or a single line _exactly_
    equal to "get bridges" will get answered, assuming the domain is okay.
 
+To indicate which bridges are blocked:
+ - Uncomment or add COUNTRY_BLOCK_FILE to your bridgedb.conf
+ - The syntax of the COUNTRY_BLOCK_FILE, 1 entry per line:
+   fingerprint <bridge fingerprint> country-code <country code>
+ - If this file is present, bridgedb will filter blocked bridges from responses
+ - For GeoIP support make sure to install Maxmind GeoIP
+
+To update the SQL schema:
+ - CREATE TABLE BlockedBridges ( id INTEGER PRIMARY KEY NOT NULL, hex_key, blocking_country);
+ - CREATE INDEX BlockedBridgesBlockingCountry on BlockedBridges(hex_key);
+ - REPLACE INTO Config VALUES ( 'schema-version', 2 );
+ 
+
 Support
  - Send your questions to nickm.
diff --git a/bridgedb.conf b/bridgedb.conf
index e486e0a..11afc02 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -21,6 +21,9 @@ BRIDGE_FILES = [ "./bridge-descriptors" ]
 # current bridges are Running.
 STATUS_FILE = "networkstatus-bridges"
 
+# Either a file that contains blocked bridges list or None
+#COUNTRY_BLOCK_FILE = "./blocked-bridges" 
+
 # Only consider routers whose purpose matches this string.
 BRIDGE_PURPOSE = "bridge"
 # File to store persistent info in.
diff --git a/lib/bridgedb/Bridges.py b/lib/bridgedb/Bridges.py
index 229cc5a..358e362 100644
--- a/lib/bridgedb/Bridges.py
+++ b/lib/bridgedb/Bridges.py
@@ -104,6 +104,7 @@ class Bridge:
     ##   fingerprint -- The bridge's identity digest, in lowercase hex, with
     ##       no spaces.
     ##   running,stable -- DOCDOC
+    ##   blockingCountries -- list of country codes blocking this bridge
     def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None):
         """Create a new Bridge.  One of fingerprint and id_digest must be
            set."""
@@ -111,6 +112,7 @@ class Bridge:
         self.ip = ip
         self.orport = orport
         self.running = self.stable = None
+        self.blockingCountries = None
         if id_digest is not None:
             assert fingerprint is None
             if len(id_digest) != DIGEST_LEN:
@@ -151,6 +153,15 @@ class Bridge:
         if stable is not None:
             self.stable = stable
 
+    def setBlockingCountries(self, blockingCountries):
+        if blockingCountries is not None:
+            self.blockingCountries = blockingCountries
+
+    def isBlocked(self, countryCode):
+        if self.blockingCountries is not None and countryCode is not None:
+            if countryCode in self.blockingCountries:
+                return True
+        return False 
 
 def parseDescFile(f, bridge_purpose='bridge'):
     """Generator. Parses a cached-descriptors file 'f', and yields a Bridge
@@ -200,10 +211,24 @@ def parseStatusFile(f):
             flags = line.split()
             yield ID, ("Running" in flags), ("Stable" in flags)
 
+def parseCountryBlockFile(f):
+    """Generator. Parses a blocked-bridges file 'f', and yields a
+       fingerprint, countryCode tuple for every entry"""
+    fingerprint = countryCode = None
+    for line in f:
+        line = line.strip()
+        m = re.match(r"fingerprint\s+(?P<fingerprint>\w+?)\s+country-code\s+(?P<countryCode>\w+)$", line)
+        try:
+            fingerprint = m.group('fingerprint').lower()
+            countryCode = m.group('countryCode').lower()
+            yield fingerprint, countryCode
+        except AttributeError, IndexError:
+            logging.warn("Unparseable line in blocked-bridges file: %s", line) 
+
 class BridgeHolder:
     """Abstract base class for all classes that hold bridges."""
     def insert(self, bridge):
-        raise NotImplemented()
+        raise NotImplementedError
 
     def clear(self):
         pass
@@ -326,7 +351,7 @@ class BridgeRing(BridgeHolder):
         assert len(r) == N
         return r
 
-    def getBridges(self, pos, N=1):
+    def getBridges(self, pos, N=1, countryCode=None):
         """Return the N bridges appearing in the ring after position pos"""
         forced = []
         for _,_,count,subring in self.subrings:
@@ -523,3 +548,40 @@ class BridgeSplitter(BridgeHolder):
     def dumpAssignments(self, f, description=""):
         for name,ring in self.ringsByName.iteritems():
             ring.dumpAssignments(f, "%s %s" % (description, name))
+
+class BridgeBlock:
+    """Base class that abstracts bridge blocking"""
+    def __init__(self):
+        pass
+
+    def insert(self, fingerprint, blockingRule):
+        raise NotImplementedError
+
+    def clear(self):
+        pass
+
+    def assignmentsArePersistent(self):
+        return True
+
+class CountryBlock(BridgeBlock):
+    """Countrywide bridge blocking"""
+    def __init__(self):
+        self.db = bridgedb.Storage.getDB()
+
+    def clear(self):
+        assert self.db
+        self.db.cleanBridgeBlocks()
+        self.db.commit()
+
+    def insert(self, fingerprint, blockingRule):
+        """ insert a country based blocking rule """
+        assert self.db
+        countryCode = blockingRule
+        self.db.addBridgeBlock(fingerprint, countryCode)
+        self.db.commit()
+
+    def getBlockingCountries(self, fingerprint):
+        """ returns a list of country codes where this fingerprint is blocked"""
+        assert self.db
+        if fingerprint is not None:
+            return self.db.getBlockingCountries(fingerprint) 
diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index f2274c7..5394cfa 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -75,7 +75,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
         """Assign a bridge to this distributor."""
         self.splitter.insert(bridge)
 
-    def getBridgesForIP(self, ip, epoch, N=1):
+    def getBridgesForIP(self, ip, epoch, N=1, countryCode=None):
         """Return a list of bridges to give to a user.
            ip -- the user's IP address, as a dotted quad.
            epoch -- the time period when we got this request.  This can
@@ -94,7 +94,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
             if category.contains(ip):
                 logging.info("category<%s>%s"%(epoch,area))
                 pos = self.areaOrderHmac("category<%s>%s"%(epoch,area))
-                return ring.getBridges(pos, N)
+                return ring.getBridges(pos, N, countryCode)
 
         # Which bridge cluster should we look at?
         h = int( self.areaClusterHmac(area)[:8], 16)
@@ -240,7 +240,7 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
         """Assign a bridge to this distributor."""
         self.ring.insert(bridge)
 
-    def getBridgesForEmail(self, emailaddress, epoch, N=1, parameters=None):
+    def getBridgesForEmail(self, emailaddress, epoch, N=1, parameters=None, countryCode=None):
         """Return a list of bridges to give to a user.
            emailaddress -- the user's email address, as given in a from line.
            epoch -- the time period when we got this request.  This can
@@ -266,7 +266,7 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
             raise TooSoonEmail("Too many emails; wait till later", emailaddress)
 
         pos = self.emailHmac("<%s>%s" % (epoch, emailaddress))
-        result = self.ring.getBridges(pos, N)
+        result = self.ring.getBridges(pos, N, countryCode)
 
         db.setEmailTime(emailaddress, now)
         db.commit()
diff --git a/lib/bridgedb/I18n.py b/lib/bridgedb/I18n.py
index fc023f7..44adb8c 100644
--- a/lib/bridgedb/I18n.py
+++ b/lib/bridgedb/I18n.py
@@ -49,5 +49,7 @@ bridge addresses."""),
  # BRIDGEDB_TEXT[7]
  _("""(no bridges currently available)"""),
  # BRIDGEDB_TEXT[8]
- _("""(e-mail requests not currently supported)""")
+ _("""(e-mail requests not currently supported)"""),
+ # BRIDGEDB_TEXT[9]
+ _("""(Might be blocked)""") 
 ]
diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py
index 99d43b6..42f78c3 100644
--- a/lib/bridgedb/Main.py
+++ b/lib/bridgedb/Main.py
@@ -146,15 +146,24 @@ def load(cfg, splitter, clear=False):
     """Read all the bridge files from cfg, and pass them into a splitter
        object.
     """
+    countryblock = Bridges.CountryBlock()
     if clear:
         logging.info("Clearing old bridges")
         splitter.clear()
+        logging.info("Clearing old blocked bridges")
+        countryblock.clear() 
     logging.info("Loading bridges")
     status = {}
     if hasattr(cfg, "STATUS_FILE"):
         f = open(cfg.STATUS_FILE, 'r')
         for ID, running, stable in Bridges.parseStatusFile(f):
             status[ID] = running, stable
+        f.close()
+    if hasattr(cfg, "COUNTRY_BLOCK_FILE"):
+        f = open(cfg.COUNTRY_BLOCK_FILE, 'r')
+        for fingerprint, countryCode in Bridges.parseCountryBlockFile(f):
+            countryblock.insert(fingerprint, countryCode)
+        f.close() 
     for fname in cfg.BRIDGE_FILES:
         f = open(fname, 'r')
         for bridge in Bridges.parseDescFile(f, cfg.BRIDGE_PURPOSE):
@@ -162,6 +171,8 @@ def load(cfg, splitter, clear=False):
             if s is not None:
                 running, stable = s
                 bridge.setStatus(running=running, stable=stable)
+            bridge.setBlockingCountries(
+                    countryblock.getBlockingCountries(bridge.fingerprint)) 
             splitter.insert(bridge)
         f.close()
 
diff --git a/lib/bridgedb/Server.py b/lib/bridgedb/Server.py
index e2e0222..8ec659e 100644
--- a/lib/bridgedb/Server.py
+++ b/lib/bridgedb/Server.py
@@ -26,6 +26,16 @@ import twisted.mail.smtp
 import bridgedb.Dist
 import bridgedb.I18n as I18n
 
+try:
+    import GeoIP
+    # GeoIP data object: choose database here
+    # This is the same geoip implementation that pytorctl uses
+    geoip = GeoIP.new(GeoIP.GEOIP_STANDARD)
+    logging.info("GeoIP database loaded")
+except:
+    geoip = None
+    logging.warn("GeoIP database not found") 
+
 class WebResource(twisted.web.resource.Resource):
     """This resource is used by Twisted Web to give a web page with some
        bridges in response to a request."""
@@ -51,6 +61,7 @@ class WebResource(twisted.web.resource.Resource):
         interval = self.schedule.getInterval(time.time())
         bridges = ( )
         ip = None
+        countryCode = None
         if self.useForwardedHeader:
             h = request.getHeader("X-Forwarded-For")
             if h:
@@ -61,26 +72,29 @@ class WebResource(twisted.web.resource.Resource):
         else:
             ip = request.getClientIP()
 
-        # See if we did get a request for a certain locale, otherwise fall back
-        # to 'en':
-        # Try evaluating the path /foo first, then check if we got a ?lang=foo
-        default_lang = lang = "en"
-        if len(request.path) > 1:
-            lang = request.path[1:]
-        if lang == default_lang:
-            lang = request.args.get("lang", [default_lang])
-            lang = lang[0]
-        t = I18n.getLang(lang)
+        if geoip:
+            countryCode = geoip.country_code_by_addr(ip)
+
+        # allow client to specify a country
+        forcecc = getCCFromRequest(request)
+        if forcecc != None:
+            countryCode = forcecc
+
+        # get locale
+        t = getLocaleFromRequest(request) 
 
         format = request.args.get("format", None)
         if format and len(format): format = format[0] # choose the first arg
 
         if ip:
             bridges = self.distributor.getBridgesForIP(ip, interval,
-                                                       self.nBridgesToGive)
+                                                       self.nBridgesToGive,
+                                                       countryCode)
         if bridges:
-            answer = "".join("%s\n" % b.getConfigLine(self.includeFingerprints)
-                             for b in bridges)
+            answer = "".join("%s %s\n" % (
+                b.getConfigLine(self.includeFingerprints),
+                (I18n.BRIDGEDB_TEXT[9] if b.isBlocked(countryCode) else "")
+                ) for b in bridges) 
         else:
             answer = t.gettext(I18n.BRIDGEDB_TEXT[7])
 
@@ -253,7 +267,8 @@ def getMailResponse(lines, ctx):
     try:
         interval = ctx.schedule.getInterval(time.time())
         bridges = ctx.distributor.getBridgesForEmail(clientAddr,
-                                                     interval, ctx.N)
+                                                     interval, ctx.N,
+                                                     countryCode=None)
     except bridgedb.Dist.BadEmail, e:
         logging.info("Got a mail from a bad email address %r: %s.",
                      clientAddr, e)
@@ -329,6 +344,7 @@ def getLocaleFromPlusAddr(address):
 
     return replyLocale
 
+
 class MailContext:
     """Helper object that holds information used by email subsystem."""
     def __init__(self, cfg, dist, sched):
@@ -451,3 +467,20 @@ def runServers():
     """Start all the servers that we've configured. Exits when they do."""
     reactor.run()
 
+def getLocaleFromRequest(request):
+    # See if we did get a request for a certain locale, otherwise fall back
+    # to 'en':
+    # Try evaluating the path /foo first, then check if we got a ?lang=foo
+    default_lang = lang = "en"
+    if len(request.path) > 1:
+        lang = request.path[1:]
+    if lang == default_lang:
+        lang = request.args.get("lang", [default_lang])
+        lang = lang[0]
+    return I18n.getLang(lang)
+
+def getCCFromRequest(request):
+    path = re.sub(r'[^a-zA-Z]', '', request.path)
+    if len(path) ==  2:
+        return path.lower()
+    return None 
diff --git a/lib/bridgedb/Storage.py b/lib/bridgedb/Storage.py
index 30b4c57..5d42930 100644
--- a/lib/bridgedb/Storage.py
+++ b/lib/bridgedb/Storage.py
@@ -111,7 +111,7 @@ class SqliteDict:
 
 # Here is the SQL schema.
 
-SCHEMA1_SCRIPT = """
+SCHEMA2_SCRIPT = """
  CREATE TABLE Config (
      key PRIMARY KEY NOT NULL,
      value
@@ -136,7 +136,15 @@ SCHEMA1_SCRIPT = """
 
  CREATE INDEX EmailedBridgesWhenMailed on EmailedBridges ( email );
 
- INSERT INTO Config VALUES ( 'schema-version', 1 );
+ CREATE TABLE BlockedBridges (
+     id INTEGER PRIMARY KEY NOT NULL,
+     hex_key,
+     blocking_country
+ );
+
+ CREATE INDEX BlockedBridgesBlockingCountry on BlockedBridges(hex_key);
+
+ INSERT INTO Config VALUES ( 'schema-version', 2 ); 
 """
 
 class BridgeData:
@@ -267,6 +275,52 @@ class Database:
         cur.execute("UPDATE Bridges SET distributor = ? WHERE hex_key = ?",
                     (distributor, hex_key))
 
+    def addBridgeBlock(self, fingerprint, countryCode):
+        cur = self._cur
+        cur.execute("INSERT OR REPLACE INTO BlockedBridges "
+                    "(hex_key,blocking_country) VALUES (?,?)",
+                    (fingerprint, countryCode))
+
+    def delBridgeBlock(self, fingerprint, countryCode):
+        cur = self._cur
+        cur.execute("DELETE FROM BlockedBridges WHERE hex_key = ? "
+                    "AND blocking_country = ?", (fingerprint, countryCode))
+
+    def cleanBridgeBlocks(self):
+        cur = self._cur
+        cur.execute("DELETE FROM BlockedBridges")
+
+    def getBlockingCountries(self, fingerprint):
+        cur = self._cur
+        cur.execute("SELECT hex_key, blocking_country FROM BlockedBridges WHERE hex_key = ? ",
+                    (fingerprint,))
+        v = cur.fetchall()
+        if v is None:
+            return None
+
+        # return list of country-codes
+        return [ str(result[1]) for (result) in v ]
+
+    def getBlockedBridges(self, countryCode):
+        cur = self._cur
+        cur.execute("SELECT hex_key, blocking_country FROM BlockedBridges WHERE blocking_country = ? ",
+                    (countryCode,))
+        v = cur.fetchall()
+        if v is None:
+            return None
+        # return list of fingerprints
+        return [ str(result[0]) for (result) in v ]
+
+    def isBlocked(self, fingerprint, countryCode):
+        cur = self._cur
+        cur.execute("SELECT hex_key, blocking_country FROM BlockedBridges WHERE "
+                    "hex_key = ? AND blocking_country = ?",
+                    (fingerprint, countryCode))
+        v = cur.fetchone()
+        if v is None:
+            return False
+        return True 
+
 def openDatabase(sqlite_file):
     conn = sqlite3.Connection(sqlite_file)
     cur = conn.cursor()
@@ -274,11 +328,11 @@ def openDatabase(sqlite_file):
         try:
             cur.execute("SELECT value FROM Config WHERE key = 'schema-version'")
             val, = cur.fetchone()
-            if val != 1:
+            if val != 2:
                 logging.warn("Unknown schema version %s in database.", val)
         except sqlite3.OperationalError:
             logging.warn("No Config table found in DB; creating tables")
-            cur.executescript(SCHEMA1_SCRIPT)
+            cur.executescript(SCHEMA2_SCRIPT)
             conn.commit()
     finally:
         cur.close()
@@ -292,7 +346,7 @@ def openOrConvertDatabase(sqlite_file, db_file):
 
     conn = sqlite3.Connection(sqlite_file)
     cur = conn.cursor()
-    cur.executescript(SCHEMA1_SCRIPT)
+    cur.executescript(SCHEMA2_SCRIPT)
     conn.commit()
 
     import anydbm
diff --git a/lib/bridgedb/Tests.py b/lib/bridgedb/Tests.py
index 9dea4b4..558fedf 100644
--- a/lib/bridgedb/Tests.py
+++ b/lib/bridgedb/Tests.py
@@ -233,6 +233,24 @@ class SQLStorageTests(unittest.TestCase):
         cur.execute("SELECT * FROM EmailedBridges")
         self.assertEquals(len(cur.fetchall()), 1)
 
+        db.addBridgeBlock(b2.fingerprint, 'us')
+        self.assertEquals(db.isBlocked(b2.fingerprint, 'us'), True)
+        db.delBridgeBlock(b2.fingerprint, 'us')
+        self.assertEquals(db.isBlocked(b2.fingerprint, 'us'), False)
+        db.addBridgeBlock(b2.fingerprint, 'uk')
+        db.addBridgeBlock(b3.fingerprint, 'uk')
+        self.assertEquals(set([b2.fingerprint, b3.fingerprint]),
+                set(db.getBlockedBridges('uk')))
+
+        db.addBridgeBlock(b2.fingerprint, 'cn')
+        db.addBridgeBlock(b2.fingerprint, 'de')
+        db.addBridgeBlock(b2.fingerprint, 'jp')
+        db.addBridgeBlock(b2.fingerprint, 'se')
+        db.addBridgeBlock(b2.fingerprint, 'kr')
+
+        self.assertEquals(set(db.getBlockingCountries(b2.fingerprint)),
+                set(['uk', 'cn', 'de', 'jp', 'se', 'kr']))
+
 def testSuite():
     suite = unittest.TestSuite()
     loader = unittest.TestLoader()
diff --git a/lib/bridgedb/Time.py b/lib/bridgedb/Time.py
index 63c50d5..8d7c65a 100644
--- a/lib/bridgedb/Time.py
+++ b/lib/bridgedb/Time.py
@@ -13,11 +13,11 @@ KNOWN_INTERVALS = [ "hour", "day", "week", "month" ]
 
 class Schedule:
     def intervalStart(self, when):
-        raise NotImplemented()
+        raise NotImplementedError
     def getInterval(self, when):
-        raise NotImplemented()
+        raise NotImplementedError
     def nextIntervalStarts(self, when):
-        raise NotImplemented()
+        raise NotImplementedError
 
 class IntervalSchedule(Schedule):
     """An IntervalSchedule splits time into somewhat natural periods,





More information about the tor-commits mailing list