[tor-commits] [metrics-tasks/master] Unify code for grouped and non-grouped results (#6329).

karsten at torproject.org karsten at torproject.org
Fri Jul 20 14:00:01 UTC 2012


commit ccf3f1952318cdfce88b378419b4aa4ab9e6117c
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Jul 20 14:10:27 2012 +0200

    Unify code for grouped and non-grouped results (#6329).
---
 task-6329/tor-relays-stats.py |  122 +++++++++++++++++++++++------------------
 1 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/task-6329/tor-relays-stats.py b/task-6329/tor-relays-stats.py
index 0fbe3aa..038a22d 100755
--- a/task-6329/tor-relays-stats.py
+++ b/task-6329/tor-relays-stats.py
@@ -35,78 +35,94 @@ class RelayStats(object):
             relays.append(relay)
         return relays
 
-    def output_countries(self, count='10', flags=''):
-        count = int(count)
-        flags = flags.split()
-        relays = self.get_relays(flags)
-        countries = {}
+    def group_relays(self, relays, by_country=False, by_as_number=False):
+        grouped_relays = {}
         for relay in relays:
-            country = relay.get('country', None)
-            if country not in countries:
-              countries[country] = 0
-            countries[country] += relay['consensus_weight_fraction']
-
-        ranking = sorted(countries.iteritems(), key=operator.itemgetter(1))
-        ranking.reverse()
-        for country, weight in ranking[:count]:
-            print "%8.4f%% %s" % (weight * 100.0, country)
-        if len(ranking) > count:
+            if by_country and by_as_number:
+                key = (relay.get('country', None), relay.get('as_number', None))
+            elif by_country:
+                key = relay.get('country', None)
+            elif by_as_number:
+                key = relay.get('as_number', None)
+            else:
+                key = relay.get('fingerprint')
+            if key not in grouped_relays:
+                grouped_relays[key] = []
+            grouped_relays[key].append(relay)
+        return grouped_relays
+
+    def format_and_sort_groups(self, grouped_relays, by_country=False, by_as_number=False):
+        formatted_groups = {}
+        for group in grouped_relays.viewvalues():
+            group_consensus_weight_fraction = 0
+            relays_in_group = 0
+            for relay in group:
+                group_consensus_weight_fraction += relay.get('consensus_weight_fraction', 0)
+                nickname = relay['nickname']
+                fingerprint = relay['fingerprint']
+                exit = 'Exit' if 'Exit' in set(relay['flags']) else ''
+                guard = 'Guard' if 'Guard' in set(relay['flags']) else ''
+                country = relay.get('country', '')
+                as_number = relay.get('as_number', '')
+                as_name = relay.get('as_name', '')
+                relays_in_group += 1
+            if by_country or by_as_number:
+                nickname = "*"
+                fingerprint = "*            %5d relays" % relays_in_group
+                exit = "*"
+                guard = "*"
+            if by_country and not by_as_number:
+                as_number = "*"
+                as_name = "*"
+            if by_as_number and not by_country:
+                country = "*"
+            formatted_group = "%8.4f%% %-19s %-40s %-4s %-5s %-2s %-9s %s" % (
+                              group_consensus_weight_fraction * 100.0, nickname, fingerprint,
+                              exit, guard, country, as_number, as_name)
+            formatted_groups[formatted_group] = group_consensus_weight_fraction
+        sorted_groups = sorted(formatted_groups.iteritems(), key=operator.itemgetter(1))
+        sorted_groups.reverse()
+        return sorted_groups
+
+    def print_groups(self, sorted_groups, count=10):
+        print "       CW Nickname            Fingerprint                              Exit Guard CC AS_num    AS_name"
+        for formatted_group, _ in sorted_groups[:count]:
+            print formatted_group
+        if len(sorted_groups) > count:
             other_consensus_weight_fraction = 0
-            for as_set, weight in ranking[count:]:
+            for _, weight in sorted_groups[count:]:
                 other_consensus_weight_fraction += weight
-            print "%8.4f%% (%d others)" % (other_consensus_weight_fraction * 100.0, len(ranking) - count)
+            print "%8.4f%% (%d others)" % (other_consensus_weight_fraction * 100.0, len(sorted_groups) - count)
         selection_consensus_weight_fraction = 0
-        for as_set, weight in ranking:
+        for _, weight in sorted_groups:
             selection_consensus_weight_fraction += weight
         if selection_consensus_weight_fraction < 0.999:
             print "%8.4f%% (total in selection)" % (selection_consensus_weight_fraction * 100.0)
 
+    def output_countries(self, count='10', flags=''):
+        count = int(count)
+        flags = flags.split()
+        relays = self.get_relays(flags)
+        grouped_relays = self.group_relays(relays, by_country=True)
+        sorted_groups = self.format_and_sort_groups(grouped_relays, by_country=True)
+        self.print_groups(sorted_groups, count)
+
     def output_as_sets(self, count='10', flags='', countries=''):
         count = int(count)
         flags = flags.split()
         relays = self.get_relays(flags, countries)
-        as_sets = {}
-        for relay in relays:
-            as_set = relay.get('as_name', 'Unknown')
-            if as_set not in as_sets:
-                as_sets[as_set] = 0
-            as_sets[as_set] += relay['consensus_weight_fraction']
-
-        ranking = sorted(as_sets.iteritems(), key=operator.itemgetter(1))
-        ranking.reverse()
-        for as_set, weight in ranking[:count]:
-            print "%8.4f%% %s" % (weight * 100.0, as_set)
-        if len(ranking) > count:
-            other_consensus_weight_fraction = 0
-            for as_set, weight in ranking[count:]:
-                other_consensus_weight_fraction += weight
-            print "%8.4f%% (%d others)" % (other_consensus_weight_fraction * 100.0, len(ranking) - count)
-        selection_consensus_weight_fraction = 0
-        for as_set, weight in ranking:
-            selection_consensus_weight_fraction += weight
-        if selection_consensus_weight_fraction < 0.999:
-            print "%8.4f%% (total in selection)" % (selection_consensus_weight_fraction * 100.0)
+        grouped_relays = self.group_relays(relays, by_as_number=True)
+        sorted_groups = self.format_and_sort_groups(grouped_relays, by_as_number=True)
+        self.print_groups(sorted_groups, count)
 
     def output_relays(self, count='10', flags='', countries='', as_sets=''):
         count = int(count)
         flags = flags.split()
         as_sets = as_sets.split()
         relays = self.get_relays(flags, countries, as_sets)
-
-        ranking = sorted(relays, key=operator.itemgetter('consensus_weight_fraction'))
-        ranking.reverse()
-        selection_consensus_weight_fraction = 0
-        for relay in ranking[:count]:
-            selection_consensus_weight_fraction += relay['consensus_weight_fraction']
-            print "%8.4f%% %-19s %-2s %-4s %-5s %s %-9s %s" % (relay['consensus_weight_fraction'] * 100.0, relay['nickname'], relay['fingerprint'], 'Exit' if 'Exit' in set(relay['flags']) else '', 'Guard' if 'Guard' in set(relay['flags']) else '', relay.get('country', '  '), relay.get('as_number', ''), relay.get('as_name', ''))
-        if len(ranking) > count:
-            other_consensus_weight_fraction = 0
-            for relay in ranking[count:]:
-                other_consensus_weight_fraction += relay['consensus_weight_fraction']
-                selection_consensus_weight_fraction += relay['consensus_weight_fraction']
-            print "%8.4f%% (%d others)" % (other_consensus_weight_fraction * 100.0, len(ranking) - count)
-        if selection_consensus_weight_fraction < 0.999:
-            print "%8.4f%% (total in selection)" % (selection_consensus_weight_fraction * 100.0)
+        grouped_relays = self.group_relays(relays)
+        sorted_groups = self.format_and_sort_groups(grouped_relays)
+        self.print_groups(sorted_groups, count)
 
 OUTPUTS = {
   'countries': 'output_countries',





More information about the tor-commits mailing list