[metrics-tasks/master] Find entropy based on AS No

commit 687d1e8b90e5a413aa56cec17abab097745bde8e Author: Sathyanarayanan Gunasekaran <gsathya.ceg@gmail.com> Date: Sat Jul 21 13:26:31 2012 +0530 Find entropy based on AS No Use -as to specify the AS db --- task-6232/pyentropy.py | 55 +++++++++++++++++++++++++++++++++++++---------- 1 files changed, 43 insertions(+), 12 deletions(-) diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py index 1a981fe..fa50406 100644 --- a/task-6232/pyentropy.py +++ b/task-6232/pyentropy.py @@ -9,7 +9,9 @@ Output - A CSV file of the format (without newlines): <entropy for guard nodes>, <max entropy for guard nodes>, <entropy for countries>, - <max entropy for countries> + <max entropy for countries>, + <entropy for AS>, + <max entropy for AS> rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in """ @@ -17,6 +19,10 @@ import sys import math import os import pygeoip +import StringIO +import stem.descriptor +from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor +from binascii import b2a_hex, a2b_base64, a2b_hex from optparse import OptionParser KEYS = ['r','s','v','w','p','m'] @@ -30,6 +36,8 @@ class Router: self.probability = None self.ip = None self.country = None + self.as_no = None + self.as_name = None self.is_exit = None self.is_guard = None @@ -37,7 +45,8 @@ class Router: if key == 'r': self.nick = values[0] self.ip = values[5] - self.country = gi.country_name_by_addr(self.ip) + self.country = gi_db.country_name_by_addr(self.ip) + self.as_no, self.as_name = self.get_as_details() if key == 'w': self.bandwidth = int(values[0].split('=')[1]) if key == 's': @@ -47,6 +56,13 @@ class Router: if "Guard" in self.flags: self.is_guard = True + def get_as_details(self): + try: + value = as_db.org_by_addr(str(self.ip)).split() + return value[0], value[1] + except: + return None, None + def run(file_name): routers = [] # parse consensus @@ -67,7 +83,7 @@ def run(file_name): totalBW, totalExitBW, totalGuardBW = 0, 0, 0 guards_n, exits_n = 0, 0 - bw_countries = {} + bw_countries, bw_as = {}, {} for router in routers: totalBW += router.bandwidth if router.is_guard: @@ -79,12 +95,17 @@ def run(file_name): if bw_countries.has_key(router.country): bw_countries[router.country] += router.bandwidth else: - bw_countries[router.country] = router.bandwidth + bw_countries[router.country] = router.bandwidth + if router.as_no: + if bw_as.has_key(router.as_no): + bw_as[router.as_no] += router.bandwidth + else: + bw_as[router.as_no] = router.bandwidth if len(routers) <= 0: return - - entropy, entropy_exit, entropy_guard, entropy_country = 0.0, 0.0, 0.0, 0.0 + + entropy, entropy_exit, entropy_guard, entropy_country, entropy_as = 0.0, 0.0, 0.0, 0.0, 0.0 for router in routers: p = float(router.bandwidth) / float(totalBW) if p != 0: @@ -97,18 +118,24 @@ def run(file_name): p = float(router.bandwidth) / float(totalExitBW) if p != 0: entropy_exit += -(p * math.log(p, 2)) - + for country in bw_countries.iterkeys(): p = float(bw_countries[country]) / float(totalBW) if p != 0: entropy_country += -(p * math.log(p, 2)) - + + for as_no in bw_as.iterkeys(): + p = float(bw_as[as_no]) / float(totalBW) + if p !=0: + entropy_as += -(p * math.log(p, 2)) + # Entropy of uniform distribution of 'n' possible values: log(n) max_entropy = math.log(len(routers), 2) max_entropy_guard = math.log(guards_n, 2) max_entropy_exit = math.log(exits_n, 2) max_entropy_country = math.log(len(bw_countries), 2) - + max_entropy_as = math.log(len(bw_as), 2) + return ",".join([valid_after, str(entropy), str(max_entropy), @@ -117,13 +144,16 @@ def run(file_name): str(entropy_guard), str(max_entropy_guard), str(entropy_country), - str(max_entropy_country)]) + str(max_entropy_country), + str(entropy_as), + str(max_entropy_as)]) def parse_args(): usage = "Usage - python pyentropy.py [options]" parser = OptionParser(usage) - parser.add_option("-g", "--geoip", dest="geoip", default="GeoIP.dat", help="Input GeoIP database") + parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat", help="Input GeoIP database") + parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat", help="Input AS GeoIP database") parser.add_option("-o", "--output", dest="output", default="entropy.csv", help="Output filename") parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus", help="Input consensus dir") @@ -134,7 +164,8 @@ def parse_args(): if __name__ == "__main__": options = parse_args() - gi = pygeoip.GeoIP(options.geoip) + gi_db = pygeoip.GeoIP(options.gi_db) + as_db = pygeoip.GeoIP(options.as_db) with open(options.output, 'w') as f: for file_name in os.listdir(options.consensus):
participants (1)
-
karsten@torproject.org