[or-cvs] [bridgedb/master 1/4] Adding the code that Nick reviewed to the bug_1612 branch

nickm at torproject.org nickm at torproject.org
Mon Sep 27 20:10:51 UTC 2010


Author: Christian Fromme <kaner at strace.org>
Date: Sun, 4 Jul 2010 11:55:58 +0200
Subject: Adding the code that Nick reviewed to the bug_1612 branch
Commit: 9ba4b0e7fe3ff8bb8678fa39db6d8de56bf032dc

---
 bridgedb.conf           |    1 +
 lib/bridgedb/Dist.py    |  165 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/bridgedb/Main.py    |   29 +++++++--
 lib/bridgedb/Storage.py |   16 +++++
 4 files changed, 205 insertions(+), 6 deletions(-)

diff --git a/bridgedb.conf b/bridgedb.conf
index d28775a..0af8bbb 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -140,3 +140,4 @@ EMAIL_INCLUDE_FINGERPRINTS=False
 #   HTTPS_SHARE : EMAIL_SHARE : RESERVED_SHARE
 RESERVED_SHARE=2
 
+FILE_DISTRIBUTORS = {}
diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index 81f6f21..99ab3b4 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -49,6 +49,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
             self.rings[-1].setName("IP ring %s"%len(self.rings))
         n = nClusters
         for c in ipCategories:
+            logging.info("Building ring: Order-Bridges-In-Ring-%d"%n)
             key1 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n)
             ring = bridgedb.Bridges.BridgeRing(key1, answerParameters)
             self.categoryRings.append( ring )
@@ -86,8 +87,12 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
 
         area = self.areaMapper(ip)
 
+        logging.info("area is %s" % area)
+
         for category, ring in self.categories:
+            logging.info("---------------------------------")
             if category.contains(ip):
+                logging.info("category<%s>%s"%(epoch,area))
                 pos = self.areaOrderHmac("category<%s>%s"%(epoch,area))
                 return ring.getBridges(pos, N)
 
@@ -276,3 +281,163 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
             raise
         else:
             db.commit()
+
+
+class FileDistributorBean:
+    """A file distributor bean
+    """
+    def __init__(self, name, needed):
+        self.name = name
+        if needed == "*":
+            # Set to rediculously high number
+            needed = 1000000
+        self.needed = int(needed)
+        self.allocated = 0
+
+class FileDistributor:
+    """FileDistributor reads a number of file distributors from the config.
+       They're expected to be in the following format:
+
+       FILE_DISTRIBUTORS = { "name1": 10, "name2": 15, "foobar": 3 }
+
+       This syntax means that certain distributors ("name1", "name2" and so on)
+       are given a number of bridges (10, 15 and so on). Names can be anything.
+       The name will later be the prefix of the file that is written with the
+       assigned number of bridges in it. Instead of a number, a wildcard item
+       ("*") is allowed, too. This means that that file distributor will get
+       maximum bridges (as many as are left in the unallocated bucket).
+
+       The files will be written in ip:port format, one bridge per line.
+
+       The way this works internally is as follows:
+
+       First of all, the assignBridgesToDistributors() routine runs through
+       the database of bridges and looks up the 'distributor' field of each 
+       bridge. Unallocated bridges are sent to a pool for later assignement.
+       Already allocated bridges for file distributors are sorted and checked.
+       They're checked for whether the distributor still exists in the current
+       config and also whether the number of assigned bridges is still valid.
+       If either the distributor is not existing anymore or too many bridges
+       are currently assigned to her, bridges will go to the unassigned pool.
+
+       In the second step, after bridges are sorted and the unassigned pool is
+       ready, the assignBridgesToDistributors() routine assigns one bridge
+       from the unassigned pool to a known distributor at a time until it
+       either runs out of bridges in the unallocated pool or the number of
+       needed bridges for that distributor is fullfilled.
+
+       When all bridges are assigned in this way, they then can then be dumped
+       into files by calling the dumpBridges() routine.
+    """
+
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.distributorList = []
+        self.unallocatedList = []
+        self.unallocated_available = False
+        self.db = bridgedb.Storage.Database(self.cfg.DB_FILE+".sqlite",
+                                            self.cfg.DB_FILE)
+
+    def __del__(self):
+        self.db.close()
+
+    def addToUnallocatedList(self, id):
+        """Add a bridge by database id into the unallocated pool
+        """
+        try:
+            self.db.updateDistributorForId("unallocated", id)
+        except:
+            self.db.rollback()
+            raise
+        else:
+            self.db.commit()
+        self.unallocatedList.append(id)
+        self.unallocated_available = True
+
+    def knownFileDistributor(self, distributor):
+        """Do we know this distributor?
+        """
+        for d in self.distributorList:
+            if d.name == distributor:
+                return d
+        return None
+
+    def assignUnallocatedBridge(self, distributor):
+        """Assign an unallocated bridge to a certain distributor
+        """
+        distributor.allocated += 1
+        id = self.unallocatedList.pop()
+        #print "ID: %d NAME: %s" % (id, distributor.name)
+        try:
+            self.db.updateDistributorForId(distributor.name, id)
+        except:
+            self.db.rollback()
+            raise
+        else:
+            self.db.commit()
+        if len(self.unallocatedList) < 1:
+            self.unallocated_available = False
+        return True
+
+    def assignBridgesToDistributors(self):
+        """Read file distributors from the configuration, sort them and write
+           necessary changes to the database
+        """
+        # Build distributor list
+        for k, v in self.cfg.FILE_DISTRIBUTORS.items():
+            d = FileDistributorBean(k, v)
+            self.distributorList.append(d)
+
+        # Loop through all bridges and sort out our distributors
+        allBridges = self.db.getAllBridges()
+        for bridge in allBridges:
+            distributor =  bridge[4]
+            if distributor == "unallocated":
+                self.addToUnallocatedList(bridge[0])
+                continue
+
+            # Check if we know this distributor
+            d = self.knownFileDistributor(distributor)
+            if d is not None:
+                # Does this distributor need another one?
+                # We assume that d.allocated is 0 in the beginning
+                if d.allocated < d.needed:
+                    d.allocated += 1
+                else:
+                    self.addToUnallocatedList(bridge[0])
+            # We don't know it. Maybe an old entry. Free it.
+            else:
+                # DON'T free https or email allocations!
+                if distributor != "https" and distributor != "email":
+                    self.addToUnallocatedList(bridge[0])
+
+        # Loop though distributorList while we have and need unallocated 
+        # bridges, assign one bridge at a time
+        while self.unallocated_available and len(self.distributorList) > 0:
+            for d in self.distributorList:
+                if d.allocated < d.needed:
+                    if not self.assignUnallocatedBridge(d):
+                        print "Couldn't assign unallocated bridge to %s" % d.name
+                else:
+                    # When we have enough bridges, remove from list
+                    self.distributorList.remove(d)
+         
+
+    def dumpBridges(self):
+        """Dump all known file distributors to files
+        """
+        # Dump https, email and unreserved, too
+        self.cfg.FILE_DISTRIBUTORS["https"] = 0
+        self.cfg.FILE_DISTRIBUTORS["email"] = 0
+        self.cfg.FILE_DISTRIBUTORS["unallocated"] = 0
+        # Loop through all distributors and dump their bridges to files
+        for distributor, _ in self.cfg.FILE_DISTRIBUTORS.items():
+            fileName = distributor + "-" + time.strftime("%Y-%m-%d") + ".brdgs"
+            f = open(fileName, 'w')
+            f.write("Here are your bridges, %s:\n" % distributor)
+            bForDistributor = self.db.getBridgesForDistributor(distributor) 
+            print "Dumping %d bridges for %s to %s" % (len(bForDistributor), distributor, fileName)
+            for bridge in bForDistributor:
+                line = "%s:%s" % (bridge[2], bridge[3])
+                f.write(line + '\n')
+            f.close
diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py
index b13710b..726137f 100644
--- a/lib/bridgedb/Main.py
+++ b/lib/bridgedb/Main.py
@@ -18,6 +18,7 @@ import bridgedb.Dist as Dist
 import bridgedb.Time as Time
 import bridgedb.Server as Server
 import bridgedb.Storage
+import bridgedb.Opt as Opt
 
 class Conf:
     """A configuration object.  Holds unvalidated attributes.
@@ -88,6 +89,8 @@ CONFIG = Conf(
     EMAIL_INCLUDE_FINGERPRINTS = False,
 
     RESERVED_SHARE=2,
+
+    FILE_DISTRIBUTORS = {}
   )
 
 def configureLogging(cfg):
@@ -287,6 +290,12 @@ def startup(cfg):
             logging.info("%d for web:", len(ipDistributor.splitter))
             logging.info("  by location set: %s",
                          " ".join(str(len(r)) for r in ipDistributor.rings))
+            logging.info("  by category set: %s",
+                         " ".join(str(len(r)) for r in ipDistributor.categoryRings))
+            logging.info("Here are all known bridges in the category section:")
+            for r in ipDistributor.categoryRings:
+                for name, b in r.bridges.items():
+                    logging.info("%s" % b.getConfigLine(True))
 
     global _reloadFn
     _reloadFn = reload
@@ -316,18 +325,26 @@ def run():
     """Parse the command line to determine where the configuration is.
        Parse the configuration, and start the servers.
     """
-    if len(sys.argv) != 2:
+    options, arguments = Opt.parseOpts()
+
+    configuration = {}
+    if options.testing:
+        configuration = CONFIG
+    elif not options.configfile:
         print "Syntax: %s [config file]" % sys.argv[0]
         sys.exit(1)
-    if sys.argv[1] == "TESTING":
-        configuration = CONFIG
     else:
-        configuration = {}
-        execfile(sys.argv[1], configuration)
+        configFile = options.configfile
+        execfile(configFile, configuration)
         C = Conf(**configuration)
         configuration = C
 
-    startup(configuration)
+    if options.dumpbridges:
+        fileDistributor = Dist.FileDistributor(configuration)
+        fileDistributor.assignBridgesToDistributors()
+        fileDistributor.dumpBridges()
+    else:
+        startup(configuration)
 
 if __name__ == '__main__':
     run()
diff --git a/lib/bridgedb/Storage.py b/lib/bridgedb/Storage.py
index cc40fc4..d7346b5 100644
--- a/lib/bridgedb/Storage.py
+++ b/lib/bridgedb/Storage.py
@@ -204,6 +204,22 @@ class Database:
         cur.execute("INSERT OR REPLACE INTO EmailedBridges "
                     "(email,when_mailed) VALUES (?,?)", (addr, t))
 
+    def getAllBridges(self):
+        cur = self._cur
+        cur.execute("SELECT * FROM Bridges")
+        return cur.fetchall()
+
+    def getBridgesForDistributor(self, distributor):
+        cur = self._cur
+        cur.execute("SELECT * FROM Bridges WHERE "
+                    "distributor = ?", (distributor, ))
+        return cur.fetchall()
+
+    def updateDistributorForId(self, distributor, id):
+        cur = self._cur
+        cur.execute("UPDATE Bridges SET distributor = ? WHERE id = ?",
+                    (distributor, id))
+
 def openDatabase(sqlite_file):
     conn = sqlite3.Connection(sqlite_file)
     cur = conn.cursor()
-- 
1.7.1




More information about the tor-commits mailing list