commit 0ec37f81f7102e5078ae38b651513f96189d9f01
Author: Chris Wacek <cwacek(a)cs.georgetown.edu>
Date: Thu Dec 20 16:33:41 2012 -0500
Built improved relay selection functions.
These replace the existing selection functions, which were pretty
inefficient (write to string, parse it back again), with ones that
parse Python objects around.
---
compass.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 128 insertions(+), 0 deletions(-)
diff --git a/compass.py b/compass.py
index 520a017..40b564a 100755
--- a/compass.py
+++ b/compass.py
@@ -253,7 +253,135 @@ class RelayStats(object):
WEIGHTS = ['consensus_weight_fraction', 'advertised_bandwidth_fraction', 'guard_probability', 'middle_probability', 'exit_probability']
+ def sort_and_reduce(self, relay_set, options):
+ """
+ Take a set of relays (has already been grouped and
+ filtered), sort it and return the ones requested
+ in the 'top' option. Add index numbers to them as well.
+ """
+ output_relays = list()
+ excluded_relays = None
+ total_relays = None
+
+ # We need a simple sorting key function
+ def sort_fn(r):
+ return getattr(r,options.sort)
+
+ relay_set.sort(key=sort_fn,reverse=options.sort_reverse)
+
+ if options.top < 0:
+ options.top = len(relay_set)
+
+ # Add selected relays to the result set
+ for i,selected_relay in enumerate(relay_set[:options.top]):
+ selected_relay.index = i + 1
+ output_relays.append(selected_relay)
+
+ # Figure out what the 'remainder' numbers are
+ if len(relay_set) > options.top:
+ if options.by_country and options.by_as:
+ filtered = "countries and ASes"
+ elif options.by_country:
+ filtered = "countries"
+ elif options.by_as:
+ filtered = "ASes"
+ else:
+ filtered = "relays"
+
+ # Sum up all the rates
+ excluded_relays = util.Result(zero_probs=True)
+ total_relays = util.Result(zero_probs=True)
+ for i,relay in enumerate(relay_set):
+ if i < options.top:
+ excluded_relays.p_guard += relay.p_guard
+ excluded_relays.p_exit += relay.p_exit
+ excluded_relays.p_middle += relay.p_middle
+ excluded_relays.adv_bw += relay.adv_bw
+ excluded_relays.cw += relay.cw
+ total_relays.p_guard += relay.p_guard
+ total_relays.p_exit += relay.p_exit
+ total_relays.p_middle += relay.p_middle
+ total_relays.adv_bw += relay.adv_bw
+ total_relays.cw += relay.cw
+
+ excluded_relays.fp = "(%d other %s)" % (
+ len(relay_set) - options.top,
+ filtered)
+ total_relays.fp = "(total in selection)"
+
+ # Only include the last line if
+ if total_relays.cw > 99.9:
+ total_relays = None
+
+ return {
+ 'results': output_relays,
+ 'excluded': excluded_relays,
+ 'total': total_relays
+ }
+
+
+ def select_relays(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False):
+ """
+ Return a Pythonic representation of the relays result set. Return it as a set of Result objects.
+ """
+ results = []
+ for group in grouped_relays.itervalues():
+ #Initialize some stuff
+ group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)
+ relays_in_group, exits_in_group, guards_in_group = 0, 0, 0
+ ases_in_group = set()
+ result = util.Result()
+ for relay in group:
+ for weight in RelayStats.WEIGHTS:
+ group_weights[weight] += relay.get(weight, 0)
+
+ result.nick = relay['nickname']
+ result.link = links
+ result.fp = relay['fingerprint']
+
+ if 'Exit' in set(relay['flags']) and not 'BadExit' in set(relay['flags']):
+ result.exit = 'Exit'
+ exits_in_group += 1
+ else:
+ result.exit = '-'
+ if 'Guard' in set(relay['flags']):
+ result.guard = 'Guard'
+ guards_in_group += 1
+ else:
+ result.guard = '-'
+ result.cc = relay.get('country', '??').upper()
+ result.as_no = relay.get('as_number', '??')
+ result.as_name = relay.get('as_name', '??')
+ result.as_info = "%s %s" %(result.as_no, result.as_name)
+ ases_in_group.add(result.as_info)
+ relays_in_group += 1
+
+ # If we want to group by things, we need to handle some fields
+ # specially
+ if by_country or by_as_number:
+ result.nick = "*"
+ result.fp = "(%d relays)" % relays_in_group
+ result.exit = "(%d)" % exits_in_group
+ result.guard = "(%d)" % guards_in_group
+ if not by_as_number and not ases:
+ result.as_info = "(%s)" % len(ases_in_group)
+ if not by_country and not country:
+ country = "*"
+
+ #Include our weight values
+ for weight in group_weights.iterkeys():
+ result['cw'] = group_weights['consensus_weight_fraction'] * 100.0
+ result['adv_bw'] = group_weights['advertised_bandwidth_fraction'] * 100.0
+ result['p_guard'] = group_weights['guard_probability'] * 100.0
+ result['p_middle'] = group_weights['middle_probability'] * 100.0
+ result['p_exit'] = group_weights['exit_probability'] * 100.0
+
+ results.append(result)
+
+ return results
+
def format_and_sort_groups(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False):
+
formatted_groups = {}
for group in grouped_relays.values():
group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)