commit edef675675c9c88b12c53c546995da956d2932aa Author: Sathyanarayanan Gunasekaran gsathya.ceg@gmail.com Date: Tue Oct 23 18:51:14 2012 +0530
Remove pyentropy.py --- task-1854/pyentropy.py | 246 ------------------------------------------------ 1 files changed, 0 insertions(+), 246 deletions(-)
diff --git a/task-1854/pyentropy.py b/task-1854/pyentropy.py deleted file mode 100644 index 3899810..0000000 --- a/task-1854/pyentropy.py +++ /dev/null @@ -1,246 +0,0 @@ -""" -Usage - python pyentropy.py -h -Output - A CSV file of the format (without newlines): - <valid-after>, - <min consensus weight>, - <number of relays>, - <entropy for all nodes>, - <max entropy for all nodes>, - <entropy for exit nodes>, - <max entropy for exit nodes>, - <entropy for guard nodes>, - <max entropy for guard nodes>, - <entropy for countries>, - <max entropy for countries>, - <entropy for AS>, - <max entropy for AS> -rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in -""" - -import sys -import math -import os -import pygeoip -import StringIO -import stem.descriptor - -from optparse import OptionParser -from binascii import b2a_hex, a2b_base64, a2b_hex -from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor - -class Router: - def __init__(self): - self.bandwidth = None - self.advertised_bw = None - self.country = None - self.as_no = None - self.is_exit = None - self.is_guard = None - - def add_router_info(self, values): - hex_digest = b2a_hex(a2b_base64(values[2]+"=")) - self.advertised_bw = self.get_advertised_bw(hex_digest) - ip = values[5] - self.country = gi_db.country_code_by_addr(ip) - self.as_no = self.get_as_details(ip) - - def add_weights(self, values): - self.bandwidth = int(values[0].split('=')[1]) - - def add_flags(self, values): - if "Exit" in values and not "BadExit" in values: - self.is_exit = True - if "Guard" in values: - self.is_guard = True - - def get_as_details(self, ip): - try: - value = as_db.org_by_addr(str(ip)).split() - return value[0] - except: - return "" - - def get_advertised_bw(self, hex_digest): - try: - with open(options.server_desc+hex_digest) as f: - data = f.read() - - desc_iter = stem.descriptor.server_descriptor.parse_file(StringIO.StringIO(data)) - desc_entries = list(desc_iter) - desc = desc_entries[0] - return min(desc.average_bandwidth, desc.burst_bandwidth, desc.observed_bandwidth) - except: - return 0 - -def parse_bw_weights(values): - data = {} - try: - for value in values: - key, value = value.split("=") - data[key] = float(value) / 10000 - return data - except: - return None - -def run(file_name): - routers = [] - router = None - result_string = [] - Wed, Wee, Wgd, Wgg = 1, 1, 1, 1 - # parse consensus - with open(file_name, 'r') as f: - for line in f.readlines(): - key = line.split()[0] - values = line.split()[1:] - if key =='r': - router = Router() - routers.append(router) - router.add_router_info(values) - elif key == 's': - router.add_flags(values) - elif key == 'w': - router.add_weights(values) - elif key == 'valid-after': - valid_after = ' '.join(values) - elif key == 'bandwidth-weights': - data = parse_bw_weights(values) - try: - Wed = data['Wed'] - Wee = data['Wee'] - Wgd = data['Wgd'] - Wgg = data['Wgg'] - except: - pass - - if len(routers) <= 0: - return - - # sort list of routers based on consensus weight - routers.sort(key=lambda router: router.bandwidth) - - while(len(routers)>1): - total_bw, total_exit_bw, total_guard_bw = 0, 0, 0 - guards_no, exits_no = 0, 0 - bw_countries, bw_as = {}, {} - max_entropy, max_entropy_as, max_entropy_guard, max_entropy_country, max_entropy_exit = 0.0, 0.0, 0.0, 0.0, 0.0 - # first relay has smallest cw - min_cw = routers[0].bandwidth - - for router in routers: - if not router.bandwidth: - continue - total_bw += router.bandwidth - if router.is_guard and router.is_exit: - total_guard_bw += Wgd*router.bandwidth - total_exit_bw += Wed*router.bandwidth - guards_no += 1 - exits_no += 1 - elif router.is_guard: - total_guard_bw += Wgg*router.bandwidth - guards_no += 1 - elif router.is_exit: - total_exit_bw += Wee*router.bandwidth - exits_no += 1 - if bw_countries.has_key(router.country): - bw_countries[router.country] += router.bandwidth - else: - bw_countries[router.country] = router.bandwidth - if bw_as.has_key(router.as_no): - bw_as[router.as_no] += router.bandwidth - else: - bw_as[router.as_no] = router.bandwidth - - if total_bw == 0: - return - - entropy, entropy_exit, entropy_guard, entropy_country, entropy_as = 0.0, 0.0, 0.0, 0.0, 0.0 - for router in routers: - p = float(router.bandwidth) / float(total_bw) - if p != 0: - entropy += -(p * math.log(p, 2)) - if router.is_guard and router.is_exit: - p = float(Wgd*router.bandwidth) / float(total_guard_bw) - if p != 0: - entropy_guard += -(p * math.log(p, 2)) - p = float(Wed*router.bandwidth) / float(total_exit_bw) - if p != 0: - entropy_exit += -(p * math.log(p, 2)) - elif router.is_guard: - p = float(Wgg*router.bandwidth) / float(total_guard_bw) - if p != 0: - entropy_guard += -(p * math.log(p, 2)) - elif router.is_exit: - p = float(Wee*router.bandwidth) / float(total_exit_bw) - if p != 0: - entropy_exit += -(p * math.log(p, 2)) - - for country in bw_countries.iterkeys(): - p = float(bw_countries[country]) / float(total_bw) - if p != 0: - entropy_country += -(p * math.log(p, 2)) - - for as_no in bw_as.iterkeys(): - p = float(bw_as[as_no]) / float(total_bw) - if p !=0: - entropy_as += -(p * math.log(p, 2)) - - # Entropy of uniform distribution of 'n' possible values: log(n) - max_entropy = math.log(len(routers), 2) - if guards_no: - max_entropy_guard = math.log(guards_no, 2) - if exits_no: - max_entropy_exit = math.log(exits_no, 2) - if bw_countries: - max_entropy_country = math.log(len(bw_countries), 2) - if bw_as: - max_entropy_as = math.log(len(bw_as), 2) - - result_string.append(','.join([valid_after, - str(min_cw), - str(len(routers)), - str(entropy), - str(max_entropy), - str(entropy_exit), - str(max_entropy_exit), - str(entropy_guard), - str(max_entropy_guard), - str(entropy_country), - str(max_entropy_country), - str(entropy_as), - str(max_entropy_as)])) - - # remove routers with min cw - while len(routers) > 0 and routers[0].bandwidth == min_cw: - del routers[0] - - return '\n'.join(result_string) - -def parse_args(): - usage = "Usage - python pyentropy.py [options]" - parser = OptionParser(usage) - - parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat", - help="Input GeoIP database") - parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat", - help="Input AS GeoIP database") - parser.add_option("-s", "--server_desc", dest="server_desc", - default="data/relay-descriptors/server-descriptors/", help="Server descriptors directory") - parser.add_option("-o", "--output", dest="output", default="entropy.csv", - help="Output filename") - parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus", - help="Input consensus dir") - - (options, args) = parser.parse_args() - - return options - -if __name__ == "__main__": - options = parse_args() - gi_db = pygeoip.GeoIP(options.gi_db) - as_db = pygeoip.GeoIP(options.as_db) - - with open(options.output, 'w') as f: - for file_name in os.listdir(options.consensus): - string = run(os.path.join(options.consensus, file_name)) - if string: - f.write("%s\n" % (string))