[tor-commits] [bridgedb/master] Dump bridge pool assignments to a file for statistics.

karsten at torproject.org karsten at torproject.org
Sun Mar 13 10:36:34 UTC 2011


commit 4d00328af81cfe6606c57cf01241c770bd5559d5
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Mar 9 15:17:45 2011 +0100

    Dump bridge pool assignments to a file for statistics.
    
    With this patch we dump the list of currently running bridges with
    information about their assigned rings, subrings, and file buckets to a
    local file.  The idea is to publish sanitized versions of these assignment
    files and analyze how the assignment affects a bridge's usage.
    
    The assignment file is written on startup and after receiving a HUP signal
    and parsing new bridge descriptors.  Note that the assignments file is not
    updated when bridges are dumped to file buckets; in that case the changed
    assignments to file buckets will be reflected in the assignments file
    after the next HUP.
    
    Also note that the assignment file only contains bridges that are believed
    to be running from parsing the last network status.  As a result, bridges
    that are contained in file buckets, but that are not believed to be
    running, won't be contained in the assignment file.
---
 bridgedb.conf           |    3 +++
 lib/bridgedb/Bridges.py |   43 ++++++++++++++++++++++++++++++++++++++++++-
 lib/bridgedb/Dist.py    |    6 ++++++
 lib/bridgedb/Main.py    |   18 ++++++++++++++++--
 4 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/bridgedb.conf b/bridgedb.conf
index 44422a7..e486e0a 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -30,6 +30,9 @@ DB_LOG_FILE = "./bridgedist.log"
 # File in which we store our secret HMAC root key.
 MASTER_KEY_FILE = "./secret_key"
 
+# File to which we dump bridge pool assignments for statistics.
+ASSIGNMENTS_FILE = "assignments.log"
+
 # How many clusters do we group IPs in when distributing bridges based on IP?
 # Note that if PROXY_LIST_FILES is set (below), what we actually do here
 # is use one higher than the number here, and the extra cluster is used
diff --git a/lib/bridgedb/Bridges.py b/lib/bridgedb/Bridges.py
index 67db9af..eca6db5 100644
--- a/lib/bridgedb/Bridges.py
+++ b/lib/bridgedb/Bridges.py
@@ -211,6 +211,9 @@ class BridgeHolder:
     def assignmentsArePersistent(self):
         return True
 
+    def dumpAssignments(self, f, description=""):
+        pass
+
 class BridgeRingParameters:
     """DOCDOC"""
     def __init__(self, needPorts=(), needFlags=()):
@@ -349,6 +352,15 @@ class BridgeRing(BridgeHolder):
 
         return self.bridgesByID.get(fp)
 
+    def dumpAssignments(self, f, description=""):
+        for b in self.bridges.itervalues():
+            desc = [ description ]
+            ident = b.getID()
+            for tp,val,_,subring in self.subrings:
+                if subring.getBridgeByID(ident):
+                    desc.append("%s=%s"%(tp,val))
+            f.write("%s %s\n"%( toHex(ident), " ".join(desc).strip()))
+
 class FixedBridgeSplitter(BridgeHolder):
     """A bridgeholder that splits bridges up based on an hmac and assigns
        them to several sub-bridgeholders with equal probability.
@@ -376,19 +388,45 @@ class FixedBridgeSplitter(BridgeHolder):
             n += len(r)
         return n
 
+    def dumpAssignments(self, f, description=""):
+        for i,r in zip(xrange(len(self.rings)), self.rings):
+            r.dumpAssignments(f, "%s ring=%s" % (description, i))
 
 class UnallocatedHolder(BridgeHolder):
     """A pseudo-bridgeholder that ignores its bridges and leaves them
        unassigned.
     """
+    def __init__(self):
+        self.fingerprints = []
+
     def insert(self, bridge):
         logging.debug("Leaving %s unallocated", bridge.getConfigLine(True))
+        if not bridge.fingerprint in self.fingerprints:
+            self.fingerprints.append(bridge.fingerprint)
 
     def assignmentsArePersistent(self):
         return False
 
     def __len__(self):
-        return 0
+        return len(self.fingerprints)
+
+    def clear(self):
+        self.fingerprints = []
+
+    def dumpAssignments(self, f, description=""):
+        db = bridgedb.Storage.getDB()
+        allBridges = db.getAllBridges()
+        for bridge in allBridges:
+            if bridge.hex_key not in self.fingerprints:
+                continue
+            dist = bridge.distributor
+            desc = [ description ]
+            if dist.startswith(bridgedb.Bucket.PSEUDO_DISTRI_PREFIX):
+                dist = dist.replace(bridgedb.Bucket.PSEUDO_DISTRI_PREFIX, "")
+                desc.append("bucket=%s" % dist)
+            elif dist != "unallocated":
+                continue
+            f.write("%s %s\n" % (bridge.hex_key, " ".join(desc).strip()))
 
 class BridgeSplitter(BridgeHolder):
     """A BridgeHolder that splits incoming bridges up based on an hmac,
@@ -470,3 +508,6 @@ class BridgeSplitter(BridgeHolder):
         ring = self.ringsByName.get(ringname)
         ring.insert(bridge)
 
+    def dumpAssignments(self, f, description=""):
+        for name,ring in self.ringsByName.iteritems():
+            ring.dumpAssignments(f, "%s %s" % (description, name))
diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index 53fdda0..e11d21e 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -112,6 +112,9 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
     def __len__(self):
         return sum(len(r) for r in self.rings)
 
+    def dumpAssignments(self, f, description=""):
+        self.splitter.dumpAssignments(f, description)
+
 # These characters are the ones that RFC2822 allows.
 #ASPECIAL = '!#$%&*+-/=?^_`{|}~'
 #ASPECIAL += "\\\'"
@@ -282,3 +285,6 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
         else:
             db.commit()
 
+    def dumpAssignments(self, f, description=""):
+        self.ring.dumpAssignments(f, description)
+
diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py
index 82181b8..99d43b6 100644
--- a/lib/bridgedb/Main.py
+++ b/lib/bridgedb/Main.py
@@ -9,6 +9,7 @@ This module sets up a bridgedb and starts the servers running.
 import os
 import signal
 import sys
+import time
 import logging
 import gettext
 
@@ -57,6 +58,8 @@ CONFIG = Conf(
     N_IP_CLUSTERS = 4,
     MASTER_KEY_FILE = "./secret_key",
 
+    ASSIGNMENTS_FILE = "assignments.log",
+
     FORCE_PORTS = [(443, 1)],
     FORCE_FLAGS = [("Stable", 1)],
     PROXY_LIST_FILES = [ ],
@@ -197,8 +200,9 @@ def startup(cfg):
     # Expand any ~ characters in paths in the configuration.
     cfg.BRIDGE_FILES = [ os.path.expanduser(fn) for fn in cfg.BRIDGE_FILES ]
     for key in ("RUN_IN_DIR", "DB_FILE", "DB_LOG_FILE", "MASTER_KEY_FILE",
-                "HTTPS_CERT_FILE", "HTTPS_KEY_FILE", "PIDFILE", "LOGFILE",
-                "STATUS_FILE"):
+                "ASSIGNMENTS_FILE", "HTTPS_CERT_FILE", "HTTPS_KEY_FILE",
+                "PIDFILE", "LOGFILE", "STATUS_FILE"):
+
         v = getattr(cfg, key, None)
         if v:
             setattr(cfg, key, os.path.expanduser(v))
@@ -299,6 +303,16 @@ def startup(cfg):
                 for name, b in r.bridges.items():
                     logging.info("%s" % b.getConfigLine(True))
 
+        # Dump bridge pool assignments to disk.
+        try:
+            f = open(cfg.ASSIGNMENTS_FILE, 'a')
+            f.write("bridge-pool-assignment %s\n" %
+                    time.strftime("%Y-%m-%d %H:%M:%S"))
+            splitter.dumpAssignments(f)
+            f.close()
+        except IOError:
+            logging.info("I/O error while writing assignments")
+
     global _reloadFn
     _reloadFn = reload
     signal.signal(signal.SIGHUP, _handleSIGHUP)





More information about the tor-commits mailing list