commit 72561ea2b1b30c703efaa8f96c630a13dffd3ed3 Author: aagbsn aagbsn@extc.org Date: Fri Dec 9 19:01:15 2011 -0800
4297 - IPBasedDistributor learns to sort bridges
IPBasedDistributor now uses the FilteredBridgeSplitter to sort bridges into subrings based on the address class of the Bridges' ip(s) (including addresses parsed from the new or-address spec). --- lib/bridgedb/Bridges.py | 17 --------- lib/bridgedb/Dist.py | 91 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 38 deletions(-)
diff --git a/lib/bridgedb/Bridges.py b/lib/bridgedb/Bridges.py index b0e1fc8..281e5f2 100644 --- a/lib/bridgedb/Bridges.py +++ b/lib/bridgedb/Bridges.py @@ -791,23 +791,6 @@ class FilteredBridgeSplitter(BridgeHolder): are passed to the constructor as a list of (filterFn, ringName) """
-#XXX: maybe limit the # of cached rings too? -#XXX: keep the bridges around -#XXX: when the # of rings hits RING_LIMIT drop the oldest -#XXX: LRU is probably OK -#XXX: max_cached_rings should be configurable - - -#XXX: need a default ring (f(x): return True) -#XXX: (don't make this a default, we'll just need to have it) - -#XXX: filterRings['NoFilter'] = [list,of,rings] -#XXX: filterRings['Default'] = [list,of,rings] -#XXX: filterRings['IPv6'] = [list,of,rings] - -#XXX: filterRings['more-complex-query-string?'] - - def __init__(self, key, max_cached_rings=3): self.key = key self.filterRings = {} diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py index bbff9f1..e2d1559 100644 --- a/lib/bridgedb/Dist.py +++ b/lib/bridgedb/Dist.py @@ -38,35 +38,59 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder): def __init__(self, areaMapper, nClusters, key, ipCategories=(), answerParameters=None): self.areaMapper = areaMapper + self.answerParameters = answerParameters
self.rings = [] self.categoryRings = [] #DOCDDOC self.categories = [] #DOCDOC + + key2 = bridgedb.Bridges.get_hmac(key, "Assign-Bridges-To-Rings") + key3 = bridgedb.Bridges.get_hmac(key, "Order-Areas-In-Rings") + self.areaOrderHmac = bridgedb.Bridges.get_hmac_fn(key3, hex=False) + key4 = bridgedb.Bridges.get_hmac(key, "Assign-Areas-To-Rings") + self.areaClusterHmac = bridgedb.Bridges.get_hmac_fn(key4, hex=True) + + # add splitter and cache the default rings + # plus leave room for dynamic filters + ring_cache_size = nClusters + len(ipCategories) + 5 + self.splitter = bridgedb.Bridges.FilteredBridgeSplitter(key2, + max_cached_rings=ring_cache_size) + + # assign bridges using a filter function + def filterAssignBridgesToRing(hmac, numRings, assignedRing): + def f(bridge): + digest = hmac(bridge.getID()) + pos = long( digest[:8], 16 ) + which = pos % numRings + if which == assignedRing: return True + return False + return f + for n in xrange(nClusters): key1 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n) - self.rings.append( bridgedb.Bridges.BridgeRing(key1, - answerParameters) ) - self.rings[-1].setName("IP ring %s"%len(self.rings)) + ring = bridgedb.Bridges.BridgeRing(key1, answerParameters) + ring.setName("IP ring %s"%n) #XXX: should be n+1 for consistency? + + g = filterAssignBridgesToRing(self.splitter.hmac, + nClusters + len(ipCategories), n) + self.splitter.addRing(ring, ring.name, g) + self.rings.append(ring) + n = nClusters for c in ipCategories: logging.info("Building ring: Order-Bridges-In-Ring-%d"%n) key1 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n) ring = bridgedb.Bridges.BridgeRing(key1, answerParameters) + ring.setName("IP category ring %s"%n) #XXX: should be n+1 for consistency? self.categoryRings.append( ring ) - self.categoryRings[-1].setName( - "IP category ring %s"%len(self.categoryRings)) self.categories.append( (c, ring) ) - n += 1
- key2 = bridgedb.Bridges.get_hmac(key, "Assign-Bridges-To-Rings") - self.splitter = bridgedb.Bridges.FixedBridgeSplitter(key2, - self.rings+self.categoryRings) + g = filterAssignBridgesToRing(self.splitter.hmac, + nClusters + len(ipCategories), n)
- key3 = bridgedb.Bridges.get_hmac(key, "Order-Areas-In-Rings") - self.areaOrderHmac = bridgedb.Bridges.get_hmac_fn(key3, hex=False) + self.splitter.addRing(ring, ring.name, g)
- key4 = bridgedb.Bridges.get_hmac(key, "Assign-Areas-To-Rings") - self.areaClusterHmac = bridgedb.Bridges.get_hmac_fn(key4, hex=True) + n += 1
def clear(self): self.splitter.clear() @@ -75,7 +99,8 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder): """Assign a bridge to this distributor.""" self.splitter.insert(bridge)
- def getBridgesForIP(self, ip, epoch, N=1, countryCode=None): + def getBridgesForIP(self, ip, epoch, N=1, countryCode=None, + bridgeFilterRules=None): """Return a list of bridges to give to a user. ip -- the user's IP address, as a dotted quad. epoch -- the time period when we got this request. This can @@ -96,20 +121,44 @@ class IPBasedDistributor(bridgedb.Bridges.BridgeHolder): pos = self.areaOrderHmac("category<%s>%s"%(epoch,area)) return ring.getBridges(pos, N, countryCode)
- # Which bridge cluster should we look at? - h = int( self.areaClusterHmac(area)[:8], 16) - clusterNum = h % len(self.rings) - ring = self.rings[clusterNum] - # If a ring is empty, consider the next. - while not len(ring): - clusterNum = (clusterNum + 1) % len(self.rings) + # dynamic filter construction + if bridgeFilterRules: + ruleset = frozenset(bridgeFilterRules) + if ruleset in self.splitter.filterRings.keys(): + # cache hit + _,ring = self.splitter.filterRings[ruleset] + else: + # cache miss, add new ring + def filterBridgesByRules(rules): + def g(x): + r = [f(x) for f in rules] + if False in r: return False + return True + return g + # add new ring + #XXX what key do we use here? does it matter? + key1 = bridgedb.Bridges.get_hmac(self.splitter.key, str(bridgeFilterRules)) + ring = bridgedb.Bridges.BridgeRing(key1, self.answerParameters) + # debug log: cache miss + self.splitter.addRing(ring, ruleset, filterBridgesByRules(bridgeFilterRules), + populate_from=self.splitter.bridges) + + else: + # Which bridge cluster should we look at? + h = int( self.areaClusterHmac(area)[:8], 16) + clusterNum = h % len(self.rings) ring = self.rings[clusterNum] + # If a ring is empty, consider the next. + while not len(ring): + clusterNum = (clusterNum + 1) % len(self.rings) + ring = self.rings[clusterNum]
# Now get the bridge. pos = self.areaOrderHmac("<%s>%s" % (epoch, area)) return ring.getBridges(pos, N)
def __len__(self): + #XXX does not include ip categories or filtered rings return sum(len(r) for r in self.rings)
def dumpAssignments(self, f, description=""):
tor-commits@lists.torproject.org