[tor-commits] [metrics-tasks/master] Add entropy code for #1854

karsten at torproject.org karsten at torproject.org
Wed Sep 19 00:07:08 UTC 2012


commit ad848ad07f14ed75b5e464b20a069928076f8e36
Author: Sathyanarayanan Gunasekaran <gsathya.ceg at gmail.com>
Date:   Sat Sep 15 17:49:56 2012 +0530

    Add entropy code for #1854
---
 task-1854/pyentropy.py |  262 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 262 insertions(+), 0 deletions(-)

diff --git a/task-1854/pyentropy.py b/task-1854/pyentropy.py
new file mode 100644
index 0000000..4ee003c
--- /dev/null
+++ b/task-1854/pyentropy.py
@@ -0,0 +1,262 @@
+"""
+Usage - python pyentropy.py -h
+Output - A CSV file of the format (without newlines):
+         <valid-after>,
+         <min consensus weight>,
+         <number of relays>,
+         <entropy for all nodes>,
+         <max entropy for all nodes>,
+         <entropy for exit nodes>,
+         <max entropy for exit nodes>,
+         <entropy for guard nodes>,
+         <max entropy for guard nodes>,
+         <entropy for countries>,
+         <max entropy for countries>,
+         <entropy for AS>,
+         <max entropy for AS>
+rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
+"""
+
+import sys
+import math
+import os
+import pygeoip
+import StringIO
+import stem.descriptor
+
+from optparse import OptionParser
+from binascii import b2a_hex, a2b_base64, a2b_hex
+from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
+
+class Router:
+    def __init__(self):
+        self.bandwidth = None
+        self.advertised_bw = None
+        self.country = None
+        self.as_no = None
+        self.is_exit = None
+        self.is_guard = None
+        self.cw = None
+
+    def add_router_info(self, values):
+           hex_digest = b2a_hex(a2b_base64(values[2]+"="))
+           self.advertised_bw = self.get_advertised_bw(hex_digest)
+           ip = values[5]
+           self.country = gi_db.country_code_by_addr(ip)
+           self.as_no = self.get_as_details(ip)
+
+    def add_weights(self, values):
+           self.bandwidth = int(values[0].split('=')[1])
+
+    def add_flags(self, values):
+           if "Exit" in values and not "BadExit" in values:
+               self.is_exit = True
+           if "Guard" in values:
+               self.is_guard = True
+
+    def get_as_details(self, ip):
+        try:
+            value = as_db.org_by_addr(str(ip)).split()
+            return value[0]
+        except:
+            return ""
+
+    def get_advertised_bw(self, hex_digest):
+        try:
+            with open(options.server_desc+hex_digest) as f:
+                data = f.read()
+
+            desc_iter = stem.descriptor.server_descriptor.parse_file(StringIO.StringIO(data))
+            desc_entries = list(desc_iter)
+            desc = desc_entries[0]
+            return min(desc.average_bandwidth, desc.burst_bandwidth, desc.observed_bandwidth)
+        except:
+            return 0
+
+def parse_bw_weights(values):
+    data = {}
+    try:
+        for value in values:
+            key, value = value.split("=")
+            data[key] = float(value) / 10000
+        return data
+    except:
+        return None
+
+def run(file_name):
+    routers = []
+    router = None
+    result_string = []
+    Wed, Wee, Wgd, Wgg = 1, 1, 1, 1
+    # parse consensus
+    with open(file_name, 'r') as f:
+        for line in f.readlines():
+            key = line.split()[0]
+            values = line.split()[1:]
+            if key =='r':
+                router = Router()
+                routers.append(router)
+                router.add_router_info(values)
+            elif key == 's':
+                router.add_flags(values)
+            elif key == 'w':
+                router.add_weights(values)
+            elif key == 'valid-after':
+                valid_after = ' '.join(values)
+            elif key == 'bandwidth-weights':
+                data = parse_bw_weights(values)
+                try:
+                    Wed = data['Wed']
+                    Wee = data['Wee']
+                    Wgd = data['Wgd']
+                    Wgg = data['Wgg']
+                except:
+                    pass
+
+    if len(routers) <= 0:
+        return
+
+    # calculate consensus weight for each relay
+    for router in routers:
+        if not router.bandwidth:
+            # should we consider relays with 'None' bandwidth?
+            continue
+        if router.is_guard and router.is_exit:
+            router.cw = Wgd*Wed*router.bandwidth
+        elif router.is_guard:
+            router.cw = Wgg*router.bandwidth
+        elif router.is_exit:
+            router.cw = Wee*router.bandwidth
+
+    # sort list of routers based on consensus weight
+    routers.sort(key=lambda router: router.cw)
+
+    while(len(routers)>1):
+        total_bw, total_exit_bw, total_guard_bw = 0, 0, 0
+        guards_no, exits_no = 0, 0
+        bw_countries, bw_as = {}, {}
+        max_entropy, max_entropy_as, max_entropy_guard, max_entropy_country, max_entropy_exit = 0.0, 0.0, 0.0, 0.0, 0.0
+        # first relay has smallest cw
+        min_cw = routers[0].cw
+
+        for router in routers:
+            if not router.bandwidth:
+                continue
+            total_bw += router.bandwidth
+            if router.is_guard and router.is_exit:
+                total_guard_bw += Wgd*router.bandwidth
+                total_exit_bw += Wed*router.bandwidth
+                guards_no += 1
+                exits_no += 1
+            elif router.is_guard:
+                total_guard_bw += Wgg*router.bandwidth
+                guards_no += 1
+            elif router.is_exit:
+                total_exit_bw += Wee*router.bandwidth
+                exits_no += 1
+            if bw_countries.has_key(router.country):
+                bw_countries[router.country] += router.bandwidth
+            else:
+                bw_countries[router.country] = router.bandwidth
+            if bw_as.has_key(router.as_no):
+                bw_as[router.as_no] += router.bandwidth
+            else:
+                bw_as[router.as_no] = router.bandwidth
+
+        if total_bw == 0:
+            return
+
+        entropy, entropy_exit, entropy_guard, entropy_country, entropy_as = 0.0, 0.0, 0.0, 0.0, 0.0
+        for router in routers:
+            p = float(router.bandwidth) / float(total_bw)
+            if p != 0:
+                entropy += -(p * math.log(p, 2))
+            if router.is_guard and router.is_exit:
+                p = float(Wgd*router.bandwidth) / float(total_guard_bw)
+                if p != 0:
+                    entropy_guard += -(p * math.log(p, 2))
+                p = float(Wed*router.bandwidth) / float(total_exit_bw)
+                if p != 0:
+                    entropy_exit += -(p * math.log(p, 2))
+            elif router.is_guard:
+                p = float(Wgg*router.bandwidth) / float(total_guard_bw)
+                if p != 0:
+                    entropy_guard += -(p * math.log(p, 2))
+            elif router.is_exit:
+                p = float(Wee*router.bandwidth) / float(total_exit_bw)
+                if p != 0:
+                    entropy_exit += -(p * math.log(p, 2))
+
+        for country in bw_countries.iterkeys():
+            p = float(bw_countries[country]) / float(total_bw)
+            if p != 0:
+                entropy_country += -(p * math.log(p, 2))
+
+        for as_no in bw_as.iterkeys():
+            p = float(bw_as[as_no]) / float(total_bw)
+            if p !=0:
+                entropy_as += -(p * math.log(p, 2))
+
+        # Entropy of uniform distribution of 'n' possible values: log(n)
+        max_entropy = math.log(len(routers), 2)
+        if guards_no:
+            max_entropy_guard = math.log(guards_no, 2)
+        if exits_no:
+            max_entropy_exit = math.log(exits_no, 2)
+        if bw_countries:
+            max_entropy_country = math.log(len(bw_countries), 2)
+        if bw_as:
+            max_entropy_as = math.log(len(bw_as), 2)
+
+        result_string.append(','.join([valid_after,
+                                       str(min_cw),
+                                       str(len(routers)),
+                                       str(entropy),
+                                       str(max_entropy),
+                                       str(entropy_exit),
+                                       str(max_entropy_exit),
+                                       str(entropy_guard),
+                                       str(max_entropy_guard),
+                                       str(entropy_country),
+                                       str(max_entropy_country),
+                                       str(entropy_as),
+                                       str(max_entropy_as)]))
+
+        # remove routers with min cw
+        for id, router in enumerate(routers):
+            if router.cw == min_cw:
+                del routers[id]
+            else:
+                break
+
+    return '\n'.join(result_string)
+
+def parse_args():
+    usage = "Usage - python pyentropy.py [options]"
+    parser = OptionParser(usage)
+
+    parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat",
+                      help="Input GeoIP database")
+    parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat",
+                      help="Input AS GeoIP database")
+    parser.add_option("-s", "--server_desc", dest="server_desc",
+                      default="data/relay-descriptors/server-descriptors/", help="Server descriptors directory")
+    parser.add_option("-o", "--output", dest="output", default="entropy.csv",
+                      help="Output filename")
+    parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus",
+                      help="Input consensus dir")
+
+    (options, args) = parser.parse_args()
+
+    return options
+
+if __name__ == "__main__":
+    options = parse_args()
+    gi_db = pygeoip.GeoIP(options.gi_db)
+    as_db = pygeoip.GeoIP(options.as_db)
+
+    with open(options.output, 'w') as f:
+        for file_name in os.listdir(options.consensus):
+            string = run(os.path.join(options.consensus, file_name))
+            if string:
+                f.write("%s\n" % (string))





More information about the tor-commits mailing list