[or-cvs] [bridgedb/master 2/4] Changed and cleaned up the unallocated-bridges-to-bucket-files feature a bit, according to the review comments by Nick.

nickm at torproject.org nickm at torproject.org
Mon Sep 27 20:10:51 UTC 2010


Author: Christian Fromme <kaner at strace.org>
Date: Sat, 17 Jul 2010 17:01:32 +0200
Subject: Changed and cleaned up the unallocated-bridges-to-bucket-files feature a bit,
Commit: 5fbe1a5cca5e526890c8a31dbdad08d7385e980a

---
 bridgedb.conf           |    2 +-
 lib/bridgedb/Bucket.py  |  196 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/bridgedb/Dist.py    |  159 --------------------------------------
 lib/bridgedb/Main.py    |   11 ++-
 lib/bridgedb/Opt.py     |   17 ++++
 lib/bridgedb/Storage.py |   53 +++++++++++--
 6 files changed, 266 insertions(+), 172 deletions(-)
 create mode 100644 lib/bridgedb/Bucket.py
 create mode 100644 lib/bridgedb/Opt.py

diff --git a/bridgedb.conf b/bridgedb.conf
index 0af8bbb..44422a7 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -140,4 +140,4 @@ EMAIL_INCLUDE_FINGERPRINTS=False
 #   HTTPS_SHARE : EMAIL_SHARE : RESERVED_SHARE
 RESERVED_SHARE=2
 
-FILE_DISTRIBUTORS = {}
+FILE_BUCKETS = {}
diff --git a/lib/bridgedb/Bucket.py b/lib/bridgedb/Bucket.py
new file mode 100644
index 0000000..ab6d83b
--- /dev/null
+++ b/lib/bridgedb/Bucket.py
@@ -0,0 +1,196 @@
+"""
+This module is responsible for everything concerning file bucket bridge 
+distribution. File bucket bridge distribution means that unallocated bridges 
+are allocated to a certain pseudo-distributor and later written to a file.
+
+For example, the following is a dict of pseudo-distributors (also called 
+'bucket identifiers') with numbers of bridges assigned to them:
+
+        FILE_BUCKETS = { "name1": 10, "name2": 15, "foobar": 3 }
+
+This configuration for buckets would result in 3 files being created for bridge
+distribution: name1-2010-07-17.brdgs, name2-2010-07-17.brdgs and 
+foobar-2010-07-17.brdgs. The first file would contain 10 bridges from BridgeDB's
+'unallocated' pool. The second file would contain 15 bridges from the same pool
+and the third one similarly 3 bridges. These files can then be handed out to 
+trusted parties via mail or fed to other distribution mechanisms such as 
+twitter.
+
+Note that in BridgeDB slang, the _distributor_ would still be 'unallocated',
+even though in the database, there would now by 'name1', 'name2' or 'foobar'
+instead of 'unallocated'. This is why they are called pseudo-distributors.
+"""
+
+import time
+
+import bridgedb.Storage
+
+class BucketData:
+    """A file bucket value class.
+       name      - Name of the bucket (From config)
+       needed    - Needed number of bridges for that bucket (From config)
+       allocated - Number of already allocated bridges for that bucket
+    """
+    def __init__(self, name, needed):
+        self.name = name
+        if needed == "*":
+            # Set to rediculously high number
+            needed = 1000000
+        self.needed = int(needed)
+        self.allocated = 0
+
+class BucketManager:
+    """BucketManager reads a number of file bucket identifiers from the config.
+       They're expected to be in the following format:
+
+       FILE_BUCKETS = { "name1": 10, "name2": 15, "foobar": 3 }
+
+       This syntax means that certain buckets ("name1", "name2" and so on)
+       are given a number of bridges (10, 15 and so on). Names can be anything.
+       The name will later be the prefix of the file that is written with the
+       assigned number of bridges in it. Instead of a number, a wildcard item
+       ("*") is allowed, too. This means that the corresponsing bucket file 
+       will get the maximum number of possible bridges (as many as are left in 
+       the unallocated bucket).
+
+       The files will be written in ip:port format, one bridge per line.
+
+       The way this works internally is as follows:
+
+       First of all, the assignBridgesToBuckets() routine runs through
+       the database of bridges and looks up the 'distributor' field of each 
+       bridge. Unallocated bridges are sent to a pool for later assignement.
+       Already allocated bridges for file bucket distribution are sorted and 
+       checked.
+       They're checked for whether their bucket identifier still exists in the 
+       current config and also whether the number of assigned bridges is still 
+       valid. If either the bucket identifier is not existing anymore or too 
+       many bridges are currently assigned to it, bridges will go to the 
+       unassigned pool.
+
+       In the second step, after bridges are sorted and the unassigned pool is
+       ready, the assignBridgesToBuckets() routine assigns one bridge
+       from the unassigned pool to a known bucket identifier at a time until it
+       either runs out of bridges in the unallocated pool or the number of
+       needed bridges for that bucket is reached.
+
+       When all bridges are assigned in this way, they can then be dumped into
+       files by calling the dumpBridges() routine.
+    """
+
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.bucketList = []
+        self.unallocatedList = []
+        self.unallocated_available = False
+        self.db = bridgedb.Storage.Database(self.cfg.DB_FILE+".sqlite",
+                                            self.cfg.DB_FILE)
+
+    def __del__(self):
+        self.db.close()
+
+    def addToUnallocatedList(self, hex_key):
+        """Add a bridge by hex_key into the unallocated pool
+        """
+        try:
+            self.db.updateDistributorForHexKey("unallocated", hex_key)
+        except:
+            self.db.rollback()
+            raise
+        else:
+            self.db.commit()
+        self.unallocatedList.append(hex_key)
+        self.unallocated_available = True
+
+    def isBucketIdentKnown(self, bucketIdent):
+        """Do we know this bucket identifier?
+        """
+        for d in self.bucketList:
+            if d.name == bucketIdent:
+                return d
+        return None
+
+    def assignUnallocatedBridge(self, bucket):
+        """Assign an unallocated bridge to a certain bucket
+        """
+        bucket.allocated += 1
+        hex_key = self.unallocatedList.pop()
+        #print "KEY: %d NAME: %s" % (hex_key, bucket.name)
+        try:
+            self.db.updateDistributorForHexKey(bucket.name, hex_key)
+        except:
+            self.db.rollback()
+            raise
+        else:
+            self.db.commit()
+        if len(self.unallocatedList) < 1:
+            self.unallocated_available = False
+        return True
+
+    def assignBridgesToBuckets(self):
+        """Read file bucket identifiers from the configuration, sort them and 
+           write necessary changes to the database
+        """
+        # Build distributor list
+        for k, v in self.cfg.FILE_BUCKETS.items():
+            d = BucketData(k, v)
+            self.bucketList.append(d)
+
+        # Loop through all bridges and sort out our distributors
+        allBridges = self.db.getAllBridges()
+        for bridge in allBridges:
+            if bridge.distributor == "unallocated":
+                self.addToUnallocatedList(bridge.hex_key)
+                continue
+
+            # Check if we know this distributor
+            d = self.isBucketIdentKnown(bridge.distributor)
+            if d is not None:
+                # Does this distributor need another one?
+                # We assume that d.allocated is 0 in the beginning
+                if d.allocated < d.needed:
+                    d.allocated += 1
+                else:
+                    self.addToUnallocatedList(bridge.hex_key)
+            # We don't know it. Maybe an old entry. Free it.
+            else:
+                # DON'T free https or email allocations!
+                if bridge.distributor != "https" and \
+                   bridge.distributor != "email":
+                    self.addToUnallocatedList(bridge.hex_key)
+
+        # Loop though bucketList while we have and need unallocated 
+        # bridges, assign one bridge at a time
+        while self.unallocated_available and len(self.bucketList) > 0:
+            for d in self.bucketList:
+                if d.allocated < d.needed:
+                    if not self.assignUnallocatedBridge(d):
+                        print "Couldn't assign unallocated bridge to %s" % d.name
+                else:
+                    # When we have enough bridges, remove bucket identifier 
+                    # from list
+                    self.bucketList.remove(d)
+         
+
+    def dumpBridges(self):
+        """Dump all known file distributors to files
+        """
+        buckets = self.cfg.FILE_BUCKETS
+        # Dump https, email and unreserved, too
+        buckets["https"] = 0
+        buckets["email"] = 0
+        buckets["unallocated"] = 0
+        # Loop through all bucket identifiers and dump their bridges to files
+        for bucketId, _ in buckets.items():
+            fileName = bucketId + "-" + time.strftime("%Y-%m-%d") + ".brdgs"
+            f = open(fileName, 'w')
+            #f.write("Here are your bridges, %s:\n" % bucketId)
+            bForBucket = self.db.getBridgesForDistributor(bucketId) 
+            # Skip empty (pseudo-)distributors
+            if len(bForBucket) < 1:
+                continue
+            print "Dumping %d bridges for %s to %s" % (len(bForBucket), bucketId, fileName)
+            for bridge in bForBucket:
+                line = "%s:%s" % (bridge.address, bridge.or_port)
+                f.write(line + '\n')
+            f.close
diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index 99ab3b4..53fdda0 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -282,162 +282,3 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
         else:
             db.commit()
 
-
-class FileDistributorBean:
-    """A file distributor bean
-    """
-    def __init__(self, name, needed):
-        self.name = name
-        if needed == "*":
-            # Set to rediculously high number
-            needed = 1000000
-        self.needed = int(needed)
-        self.allocated = 0
-
-class FileDistributor:
-    """FileDistributor reads a number of file distributors from the config.
-       They're expected to be in the following format:
-
-       FILE_DISTRIBUTORS = { "name1": 10, "name2": 15, "foobar": 3 }
-
-       This syntax means that certain distributors ("name1", "name2" and so on)
-       are given a number of bridges (10, 15 and so on). Names can be anything.
-       The name will later be the prefix of the file that is written with the
-       assigned number of bridges in it. Instead of a number, a wildcard item
-       ("*") is allowed, too. This means that that file distributor will get
-       maximum bridges (as many as are left in the unallocated bucket).
-
-       The files will be written in ip:port format, one bridge per line.
-
-       The way this works internally is as follows:
-
-       First of all, the assignBridgesToDistributors() routine runs through
-       the database of bridges and looks up the 'distributor' field of each 
-       bridge. Unallocated bridges are sent to a pool for later assignement.
-       Already allocated bridges for file distributors are sorted and checked.
-       They're checked for whether the distributor still exists in the current
-       config and also whether the number of assigned bridges is still valid.
-       If either the distributor is not existing anymore or too many bridges
-       are currently assigned to her, bridges will go to the unassigned pool.
-
-       In the second step, after bridges are sorted and the unassigned pool is
-       ready, the assignBridgesToDistributors() routine assigns one bridge
-       from the unassigned pool to a known distributor at a time until it
-       either runs out of bridges in the unallocated pool or the number of
-       needed bridges for that distributor is fullfilled.
-
-       When all bridges are assigned in this way, they then can then be dumped
-       into files by calling the dumpBridges() routine.
-    """
-
-    def __init__(self, cfg):
-        self.cfg = cfg
-        self.distributorList = []
-        self.unallocatedList = []
-        self.unallocated_available = False
-        self.db = bridgedb.Storage.Database(self.cfg.DB_FILE+".sqlite",
-                                            self.cfg.DB_FILE)
-
-    def __del__(self):
-        self.db.close()
-
-    def addToUnallocatedList(self, id):
-        """Add a bridge by database id into the unallocated pool
-        """
-        try:
-            self.db.updateDistributorForId("unallocated", id)
-        except:
-            self.db.rollback()
-            raise
-        else:
-            self.db.commit()
-        self.unallocatedList.append(id)
-        self.unallocated_available = True
-
-    def knownFileDistributor(self, distributor):
-        """Do we know this distributor?
-        """
-        for d in self.distributorList:
-            if d.name == distributor:
-                return d
-        return None
-
-    def assignUnallocatedBridge(self, distributor):
-        """Assign an unallocated bridge to a certain distributor
-        """
-        distributor.allocated += 1
-        id = self.unallocatedList.pop()
-        #print "ID: %d NAME: %s" % (id, distributor.name)
-        try:
-            self.db.updateDistributorForId(distributor.name, id)
-        except:
-            self.db.rollback()
-            raise
-        else:
-            self.db.commit()
-        if len(self.unallocatedList) < 1:
-            self.unallocated_available = False
-        return True
-
-    def assignBridgesToDistributors(self):
-        """Read file distributors from the configuration, sort them and write
-           necessary changes to the database
-        """
-        # Build distributor list
-        for k, v in self.cfg.FILE_DISTRIBUTORS.items():
-            d = FileDistributorBean(k, v)
-            self.distributorList.append(d)
-
-        # Loop through all bridges and sort out our distributors
-        allBridges = self.db.getAllBridges()
-        for bridge in allBridges:
-            distributor =  bridge[4]
-            if distributor == "unallocated":
-                self.addToUnallocatedList(bridge[0])
-                continue
-
-            # Check if we know this distributor
-            d = self.knownFileDistributor(distributor)
-            if d is not None:
-                # Does this distributor need another one?
-                # We assume that d.allocated is 0 in the beginning
-                if d.allocated < d.needed:
-                    d.allocated += 1
-                else:
-                    self.addToUnallocatedList(bridge[0])
-            # We don't know it. Maybe an old entry. Free it.
-            else:
-                # DON'T free https or email allocations!
-                if distributor != "https" and distributor != "email":
-                    self.addToUnallocatedList(bridge[0])
-
-        # Loop though distributorList while we have and need unallocated 
-        # bridges, assign one bridge at a time
-        while self.unallocated_available and len(self.distributorList) > 0:
-            for d in self.distributorList:
-                if d.allocated < d.needed:
-                    if not self.assignUnallocatedBridge(d):
-                        print "Couldn't assign unallocated bridge to %s" % d.name
-                else:
-                    # When we have enough bridges, remove from list
-                    self.distributorList.remove(d)
-         
-
-    def dumpBridges(self):
-        """Dump all known file distributors to files
-        """
-        # Dump https, email and unreserved, too
-        self.cfg.FILE_DISTRIBUTORS["https"] = 0
-        self.cfg.FILE_DISTRIBUTORS["email"] = 0
-        self.cfg.FILE_DISTRIBUTORS["unallocated"] = 0
-        # Loop through all distributors and dump their bridges to files
-        for distributor, _ in self.cfg.FILE_DISTRIBUTORS.items():
-            fileName = distributor + "-" + time.strftime("%Y-%m-%d") + ".brdgs"
-            f = open(fileName, 'w')
-            f.write("Here are your bridges, %s:\n" % distributor)
-            bForDistributor = self.db.getBridgesForDistributor(distributor) 
-            print "Dumping %d bridges for %s to %s" % (len(bForDistributor), distributor, fileName)
-            for bridge in bForDistributor:
-                line = "%s:%s" % (bridge[2], bridge[3])
-                f.write(line + '\n')
-            f.close
diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py
index 726137f..7bf8f98 100644
--- a/lib/bridgedb/Main.py
+++ b/lib/bridgedb/Main.py
@@ -19,6 +19,7 @@ import bridgedb.Time as Time
 import bridgedb.Server as Server
 import bridgedb.Storage
 import bridgedb.Opt as Opt
+import bridgedb.Bucket as Bucket
 
 class Conf:
     """A configuration object.  Holds unvalidated attributes.
@@ -90,7 +91,7 @@ CONFIG = Conf(
 
     RESERVED_SHARE=2,
 
-    FILE_DISTRIBUTORS = {}
+    FILE_BUCKETS = {}
   )
 
 def configureLogging(cfg):
@@ -326,8 +327,8 @@ def run():
        Parse the configuration, and start the servers.
     """
     options, arguments = Opt.parseOpts()
-
     configuration = {}
+
     if options.testing:
         configuration = CONFIG
     elif not options.configfile:
@@ -340,9 +341,9 @@ def run():
         configuration = C
 
     if options.dumpbridges:
-        fileDistributor = Dist.FileDistributor(configuration)
-        fileDistributor.assignBridgesToDistributors()
-        fileDistributor.dumpBridges()
+        bucketManager = Bucket.BucketManager(configuration)
+        bucketManager.assignBridgesToBuckets()
+        bucketManager.dumpBridges()
     else:
         startup(configuration)
 
diff --git a/lib/bridgedb/Opt.py b/lib/bridgedb/Opt.py
new file mode 100644
index 0000000..3649213
--- /dev/null
+++ b/lib/bridgedb/Opt.py
@@ -0,0 +1,17 @@
+# Parse command line args
+
+import optparse
+
+def parseOpts():
+    cmdParser = optparse.OptionParser()
+    cmdParser.add_option("-c", "--config", dest="configfile",
+                        default="./bridgedb.conf",
+                        help="set config file to FILE", metavar="FILE")
+    cmdParser.add_option("-d", "--dump-bridges", dest="dumpbridges",
+                        action="store_true", default=False,
+                        help="dump reserved bridges into files")
+    cmdParser.add_option("-t", "--testing", dest="testing",
+                        action="store_true", default=False,
+                        help="do some sanity tests")
+    
+    return cmdParser.parse_args()
diff --git a/lib/bridgedb/Storage.py b/lib/bridgedb/Storage.py
index d7346b5..2c64e83 100644
--- a/lib/bridgedb/Storage.py
+++ b/lib/bridgedb/Storage.py
@@ -139,6 +139,24 @@ SCHEMA1_SCRIPT = """
  INSERT INTO Config VALUES ( 'schema-version', 1 );
 """
 
+class BridgeData:
+    """Value class carrying bridge information:
+       hex_key      - The unique hex key of the given bridge
+       address      - Bridge IP address
+       or_port      - Bridge TCP port
+       distributor  - The distributor (or pseudo-distributor) through which 
+                      this bridge is being announced
+       first_seen   - When did we first see this bridge online?
+       last_seen    - When was the last time we saw this bridge online?
+    """
+    def __init__(self, hex_key, address, or_port, distributor="unallocated", 
+                 first_seen="", last_seen=""):
+        self.hex_key = hex_key
+        self.address = address
+        self.or_port = or_port
+        self.distributor = distributor
+        self.first_seen = first_seen
+        self.last_seen = last_seen
 
 class Database:
     def __init__(self, sqlite_fname, db_fname=None):
@@ -151,6 +169,9 @@ class Database:
     def commit(self):
         self._conn.commit()
 
+    def rollback(self):
+        self._conn.rollback()
+
     def close(self):
         self._cur.close()
         self._conn.close()
@@ -205,20 +226,38 @@ class Database:
                     "(email,when_mailed) VALUES (?,?)", (addr, t))
 
     def getAllBridges(self):
+        """Return a list of BridgeData value classes of all bridges in the
+           database
+        """
+        retBridges = []
         cur = self._cur
-        cur.execute("SELECT * FROM Bridges")
-        return cur.fetchall()
+        cur.execute("SELECT hex_key, address, or_port, distributor, "
+                    "first_seen, last_seen  FROM Bridges")
+        for b in cur.fetchall():
+            bridge = BridgeData(b[0], b[1], b[2], b[3], b[4], b[5])
+            retBridges.append(bridge)
+
+        return retBridges
 
     def getBridgesForDistributor(self, distributor):
+        """Return a list of BridgeData value classes of all bridges in the
+           database that are allocated to distributor 'distributor'
+        """
+        retBridges = []
         cur = self._cur
-        cur.execute("SELECT * FROM Bridges WHERE "
+        cur.execute("SELECT hex_key, address, or_port, distributor, "
+                    "first_seen, last_seen FROM Bridges WHERE "
                     "distributor = ?", (distributor, ))
-        return cur.fetchall()
+        for b in cur.fetchall():
+            bridge = BridgeData(b[0], b[1], b[2], b[3], b[4], b[5])
+            retBridges.append(bridge)
+
+        return retBridges
 
-    def updateDistributorForId(self, distributor, id):
+    def updateDistributorForHexKey(self, distributor, hex_key):
         cur = self._cur
-        cur.execute("UPDATE Bridges SET distributor = ? WHERE id = ?",
-                    (distributor, id))
+        cur.execute("UPDATE Bridges SET distributor = ? WHERE hex_key = ?",
+                    (distributor, hex_key))
 
 def openDatabase(sqlite_file):
     conn = sqlite3.Connection(sqlite_file)
-- 
1.7.1




More information about the tor-commits mailing list