[tor-commits] [bridgedb/master] 4297 - add IPBasedDistributor IP Clustering Filter

aagbsn at torproject.org aagbsn at torproject.org
Sat Mar 16 23:46:31 UTC 2013


commit f6c7ee872671995eb32dfb790542932f5279206c
Author: aagbsn <aagbsn at extc.org>
Date:   Fri May 25 16:36:25 2012 -0700

    4297 - add IPBasedDistributor IP Clustering Filter
    
    Implements IP clustering as a default filter.
---
 lib/bridgedb/Dist.py |   64 ++++++++++++++++++++++++-------------------------
 1 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index c151bbc..15d9653 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -42,8 +42,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
     ##        rings of this distributor.
     ##    areaOrderHmac -- an hmac function used to order areas within rings.
     ##    areaClusterHmac -- an hmac function used to assign areas to rings.
-    def __init__(self, areaMapper, nClusters, key, ipCategories=(),
-                 answerParameters=None):
+    def __init__(self, areaMapper, nClusters, key, ipCategories=(), answerParameters=None):
         self.areaMapper = areaMapper
         self.answerParameters = answerParameters
 
@@ -104,6 +103,7 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
                be any string, so long as it changes with every period.
            N -- the number of bridges to try to give back.
         """
+        if not bridgeFilterRules: bridgeFilterRules=[]
         logging.debug("getBridgesForIP(%s, %s, %s, %s" % (ip, epoch, N, bridgeFilterRules))
         if not len(self.splitter):
             logging.debug("bailing without splitter")
@@ -120,44 +120,42 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
                 pos = self.areaOrderHmac("category<%s>%s"%(epoch,area))
                 return ring.getBridges(pos, N, countryCode)
 
-        # dynamic filter construction
-        #XXX: may bypass clusters! front-end must supply clustering function
-        # or provide another mechanism (perhaps geoip based) to sort and select
-        # bridges from separate pools.
-        if bridgeFilterRules:
-            logging.debug("bridgeFilterRules: %s" % bridgeFilterRules)
-            #XXX: is there a better way to cache by ruleset signature?
-            ruleset = frozenset(bridgeFilterRules)
-            if ruleset in self.splitter.filterRings.keys():
-                logging.debug("Cache hit %s" % ruleset)
-                _,ring = self.splitter.filterRings[ruleset]
-            else:
-                logging.debug("Cache miss %s" % ruleset)
-                # add new ring 
-                #XXX what key do we use here? does it matter? 
-                key1 = bridgedb.Bridges.get_hmac(self.splitter.key, str(bridgeFilterRules))
-                ring = bridgedb.Bridges.BridgeRing(key1, self.answerParameters)
-                # debug log: cache miss 
-                self.splitter.addRing(ring, ruleset, filterBridgesByRules(bridgeFilterRules),
-                                  populate_from=self.splitter.bridges)
-                
+        # Bridge Filter Ruleset Construction
+
+        # IP clustering; select only bridges from the corresponding cluster.
+        h = int( self.areaClusterHmac(area)[:8], 16)
+        # length of numClusters
+        clusterNum = h % len(self.rings) 
+ 
+        #XXX: assumes len(self.rings) = len(nClusters)
+        g = filterAssignBridgesToRing(self.splitter.hmac,
+                                      len(self.rings) +
+                                      len(self.categoryRings),
+                                      clusterNum) 
+        bridgeFilterRules.append(g)
+        logging.debug("bridgeFilterRules: %s" % bridgeFilterRules)
+        #XXX: is there a better way to cache by ruleset signature?
+        ruleset = frozenset(bridgeFilterRules)
+
+        # See if we have a cached copy of the ring,
+        # otherwise, add a new ring and populate it
+        if ruleset in self.splitter.filterRings.keys():
+            logging.debug("Cache hit %s" % ruleset)
+            _,ring = self.splitter.filterRings[ruleset]
         else:
-            # Which bridge cluster should we look at?
-            h = int( self.areaClusterHmac(area)[:8], 16)
-            clusterNum = h % len(self.rings)
-            ring = self.rings[clusterNum]
-            # If a ring is empty, consider the next.
-            while not len(ring):
-                clusterNum = (clusterNum + 1) % len(self.rings)
-                ring = self.rings[clusterNum]
+            logging.debug("Cache miss %s" % ruleset)
+            key1 = bridgedb.Bridges.get_hmac(self.splitter.key,
+                                             "Order-Bridges-In-Ring-%d"%clusterNum)
+            ring = bridgedb.Bridges.BridgeRing(key1, self.answerParameters)
+            self.splitter.addRing(ring, ruleset, filterBridgesByRules(bridgeFilterRules),
+                                  populate_from=self.splitter.bridges)
 
         # Now get the bridge.
         pos = self.areaOrderHmac("<%s>%s" % (epoch, area))
         return ring.getBridges(pos, N)
 
     def __len__(self):
-        #XXX does not include ip categories or filtered rings
-        return sum(len(r) for r in self.rings)
+        return len(self.splitter)
 
     def dumpAssignments(self, f, description=""):
         self.splitter.dumpAssignments(f, description)





More information about the tor-commits mailing list