commit 0ec37f81f7102e5078ae38b651513f96189d9f01 Author: Chris Wacek cwacek@cs.georgetown.edu Date: Thu Dec 20 16:33:41 2012 -0500
Built improved relay selection functions.
These replace the existing selection functions, which were pretty inefficient (write to string, parse it back again), with ones that parse Python objects around. --- compass.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 128 insertions(+), 0 deletions(-)
diff --git a/compass.py b/compass.py index 520a017..40b564a 100755 --- a/compass.py +++ b/compass.py @@ -253,7 +253,135 @@ class RelayStats(object):
WEIGHTS = ['consensus_weight_fraction', 'advertised_bandwidth_fraction', 'guard_probability', 'middle_probability', 'exit_probability']
+ def sort_and_reduce(self, relay_set, options): + """ + Take a set of relays (has already been grouped and + filtered), sort it and return the ones requested + in the 'top' option. Add index numbers to them as well. + """ + output_relays = list() + excluded_relays = None + total_relays = None + + # We need a simple sorting key function + def sort_fn(r): + return getattr(r,options.sort) + + relay_set.sort(key=sort_fn,reverse=options.sort_reverse) + + if options.top < 0: + options.top = len(relay_set) + + # Add selected relays to the result set + for i,selected_relay in enumerate(relay_set[:options.top]): + selected_relay.index = i + 1 + output_relays.append(selected_relay) + + # Figure out what the 'remainder' numbers are + if len(relay_set) > options.top: + if options.by_country and options.by_as: + filtered = "countries and ASes" + elif options.by_country: + filtered = "countries" + elif options.by_as: + filtered = "ASes" + else: + filtered = "relays" + + # Sum up all the rates + excluded_relays = util.Result(zero_probs=True) + total_relays = util.Result(zero_probs=True) + for i,relay in enumerate(relay_set): + if i < options.top: + excluded_relays.p_guard += relay.p_guard + excluded_relays.p_exit += relay.p_exit + excluded_relays.p_middle += relay.p_middle + excluded_relays.adv_bw += relay.adv_bw + excluded_relays.cw += relay.cw + total_relays.p_guard += relay.p_guard + total_relays.p_exit += relay.p_exit + total_relays.p_middle += relay.p_middle + total_relays.adv_bw += relay.adv_bw + total_relays.cw += relay.cw + + excluded_relays.fp = "(%d other %s)" % ( + len(relay_set) - options.top, + filtered) + total_relays.fp = "(total in selection)" + + # Only include the last line if + if total_relays.cw > 99.9: + total_relays = None + + return { + 'results': output_relays, + 'excluded': excluded_relays, + 'total': total_relays + } + + + def select_relays(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False): + """ + Return a Pythonic representation of the relays result set. Return it as a set of Result objects. + """ + results = [] + for group in grouped_relays.itervalues(): + #Initialize some stuff + group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0) + relays_in_group, exits_in_group, guards_in_group = 0, 0, 0 + ases_in_group = set() + result = util.Result() + for relay in group: + for weight in RelayStats.WEIGHTS: + group_weights[weight] += relay.get(weight, 0) + + result.nick = relay['nickname'] + result.link = links + result.fp = relay['fingerprint'] + + if 'Exit' in set(relay['flags']) and not 'BadExit' in set(relay['flags']): + result.exit = 'Exit' + exits_in_group += 1 + else: + result.exit = '-' + if 'Guard' in set(relay['flags']): + result.guard = 'Guard' + guards_in_group += 1 + else: + result.guard = '-' + result.cc = relay.get('country', '??').upper() + result.as_no = relay.get('as_number', '??') + result.as_name = relay.get('as_name', '??') + result.as_info = "%s %s" %(result.as_no, result.as_name) + ases_in_group.add(result.as_info) + relays_in_group += 1 + + # If we want to group by things, we need to handle some fields + # specially + if by_country or by_as_number: + result.nick = "*" + result.fp = "(%d relays)" % relays_in_group + result.exit = "(%d)" % exits_in_group + result.guard = "(%d)" % guards_in_group + if not by_as_number and not ases: + result.as_info = "(%s)" % len(ases_in_group) + if not by_country and not country: + country = "*" + + #Include our weight values + for weight in group_weights.iterkeys(): + result['cw'] = group_weights['consensus_weight_fraction'] * 100.0 + result['adv_bw'] = group_weights['advertised_bandwidth_fraction'] * 100.0 + result['p_guard'] = group_weights['guard_probability'] * 100.0 + result['p_middle'] = group_weights['middle_probability'] * 100.0 + result['p_exit'] = group_weights['exit_probability'] * 100.0 + + results.append(result) + + return results + def format_and_sort_groups(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False): + formatted_groups = {} for group in grouped_relays.values(): group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)