commit edef675675c9c88b12c53c546995da956d2932aa
Author: Sathyanarayanan Gunasekaran <gsathya.ceg(a)gmail.com>
Date: Tue Oct 23 18:51:14 2012 +0530
Remove pyentropy.py
---
task-1854/pyentropy.py | 246 ------------------------------------------------
1 files changed, 0 insertions(+), 246 deletions(-)
diff --git a/task-1854/pyentropy.py b/task-1854/pyentropy.py
deleted file mode 100644
index 3899810..0000000
--- a/task-1854/pyentropy.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-Usage - python pyentropy.py -h
-Output - A CSV file of the format (without newlines):
- <valid-after>,
- <min consensus weight>,
- <number of relays>,
- <entropy for all nodes>,
- <max entropy for all nodes>,
- <entropy for exit nodes>,
- <max entropy for exit nodes>,
- <entropy for guard nodes>,
- <max entropy for guard nodes>,
- <entropy for countries>,
- <max entropy for countries>,
- <entropy for AS>,
- <max entropy for AS>
-rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
-"""
-
-import sys
-import math
-import os
-import pygeoip
-import StringIO
-import stem.descriptor
-
-from optparse import OptionParser
-from binascii import b2a_hex, a2b_base64, a2b_hex
-from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
-
-class Router:
- def __init__(self):
- self.bandwidth = None
- self.advertised_bw = None
- self.country = None
- self.as_no = None
- self.is_exit = None
- self.is_guard = None
-
- def add_router_info(self, values):
- hex_digest = b2a_hex(a2b_base64(values[2]+"="))
- self.advertised_bw = self.get_advertised_bw(hex_digest)
- ip = values[5]
- self.country = gi_db.country_code_by_addr(ip)
- self.as_no = self.get_as_details(ip)
-
- def add_weights(self, values):
- self.bandwidth = int(values[0].split('=')[1])
-
- def add_flags(self, values):
- if "Exit" in values and not "BadExit" in values:
- self.is_exit = True
- if "Guard" in values:
- self.is_guard = True
-
- def get_as_details(self, ip):
- try:
- value = as_db.org_by_addr(str(ip)).split()
- return value[0]
- except:
- return ""
-
- def get_advertised_bw(self, hex_digest):
- try:
- with open(options.server_desc+hex_digest) as f:
- data = f.read()
-
- desc_iter = stem.descriptor.server_descriptor.parse_file(StringIO.StringIO(data))
- desc_entries = list(desc_iter)
- desc = desc_entries[0]
- return min(desc.average_bandwidth, desc.burst_bandwidth, desc.observed_bandwidth)
- except:
- return 0
-
-def parse_bw_weights(values):
- data = {}
- try:
- for value in values:
- key, value = value.split("=")
- data[key] = float(value) / 10000
- return data
- except:
- return None
-
-def run(file_name):
- routers = []
- router = None
- result_string = []
- Wed, Wee, Wgd, Wgg = 1, 1, 1, 1
- # parse consensus
- with open(file_name, 'r') as f:
- for line in f.readlines():
- key = line.split()[0]
- values = line.split()[1:]
- if key =='r':
- router = Router()
- routers.append(router)
- router.add_router_info(values)
- elif key == 's':
- router.add_flags(values)
- elif key == 'w':
- router.add_weights(values)
- elif key == 'valid-after':
- valid_after = ' '.join(values)
- elif key == 'bandwidth-weights':
- data = parse_bw_weights(values)
- try:
- Wed = data['Wed']
- Wee = data['Wee']
- Wgd = data['Wgd']
- Wgg = data['Wgg']
- except:
- pass
-
- if len(routers) <= 0:
- return
-
- # sort list of routers based on consensus weight
- routers.sort(key=lambda router: router.bandwidth)
-
- while(len(routers)>1):
- total_bw, total_exit_bw, total_guard_bw = 0, 0, 0
- guards_no, exits_no = 0, 0
- bw_countries, bw_as = {}, {}
- max_entropy, max_entropy_as, max_entropy_guard, max_entropy_country, max_entropy_exit = 0.0, 0.0, 0.0, 0.0, 0.0
- # first relay has smallest cw
- min_cw = routers[0].bandwidth
-
- for router in routers:
- if not router.bandwidth:
- continue
- total_bw += router.bandwidth
- if router.is_guard and router.is_exit:
- total_guard_bw += Wgd*router.bandwidth
- total_exit_bw += Wed*router.bandwidth
- guards_no += 1
- exits_no += 1
- elif router.is_guard:
- total_guard_bw += Wgg*router.bandwidth
- guards_no += 1
- elif router.is_exit:
- total_exit_bw += Wee*router.bandwidth
- exits_no += 1
- if bw_countries.has_key(router.country):
- bw_countries[router.country] += router.bandwidth
- else:
- bw_countries[router.country] = router.bandwidth
- if bw_as.has_key(router.as_no):
- bw_as[router.as_no] += router.bandwidth
- else:
- bw_as[router.as_no] = router.bandwidth
-
- if total_bw == 0:
- return
-
- entropy, entropy_exit, entropy_guard, entropy_country, entropy_as = 0.0, 0.0, 0.0, 0.0, 0.0
- for router in routers:
- p = float(router.bandwidth) / float(total_bw)
- if p != 0:
- entropy += -(p * math.log(p, 2))
- if router.is_guard and router.is_exit:
- p = float(Wgd*router.bandwidth) / float(total_guard_bw)
- if p != 0:
- entropy_guard += -(p * math.log(p, 2))
- p = float(Wed*router.bandwidth) / float(total_exit_bw)
- if p != 0:
- entropy_exit += -(p * math.log(p, 2))
- elif router.is_guard:
- p = float(Wgg*router.bandwidth) / float(total_guard_bw)
- if p != 0:
- entropy_guard += -(p * math.log(p, 2))
- elif router.is_exit:
- p = float(Wee*router.bandwidth) / float(total_exit_bw)
- if p != 0:
- entropy_exit += -(p * math.log(p, 2))
-
- for country in bw_countries.iterkeys():
- p = float(bw_countries[country]) / float(total_bw)
- if p != 0:
- entropy_country += -(p * math.log(p, 2))
-
- for as_no in bw_as.iterkeys():
- p = float(bw_as[as_no]) / float(total_bw)
- if p !=0:
- entropy_as += -(p * math.log(p, 2))
-
- # Entropy of uniform distribution of 'n' possible values: log(n)
- max_entropy = math.log(len(routers), 2)
- if guards_no:
- max_entropy_guard = math.log(guards_no, 2)
- if exits_no:
- max_entropy_exit = math.log(exits_no, 2)
- if bw_countries:
- max_entropy_country = math.log(len(bw_countries), 2)
- if bw_as:
- max_entropy_as = math.log(len(bw_as), 2)
-
- result_string.append(','.join([valid_after,
- str(min_cw),
- str(len(routers)),
- str(entropy),
- str(max_entropy),
- str(entropy_exit),
- str(max_entropy_exit),
- str(entropy_guard),
- str(max_entropy_guard),
- str(entropy_country),
- str(max_entropy_country),
- str(entropy_as),
- str(max_entropy_as)]))
-
- # remove routers with min cw
- while len(routers) > 0 and routers[0].bandwidth == min_cw:
- del routers[0]
-
- return '\n'.join(result_string)
-
-def parse_args():
- usage = "Usage - python pyentropy.py [options]"
- parser = OptionParser(usage)
-
- parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat",
- help="Input GeoIP database")
- parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat",
- help="Input AS GeoIP database")
- parser.add_option("-s", "--server_desc", dest="server_desc",
- default="data/relay-descriptors/server-descriptors/", help="Server descriptors directory")
- parser.add_option("-o", "--output", dest="output", default="entropy.csv",
- help="Output filename")
- parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus",
- help="Input consensus dir")
-
- (options, args) = parser.parse_args()
-
- return options
-
-if __name__ == "__main__":
- options = parse_args()
- gi_db = pygeoip.GeoIP(options.gi_db)
- as_db = pygeoip.GeoIP(options.as_db)
-
- with open(options.output, 'w') as f:
- for file_name in os.listdir(options.consensus):
- string = run(os.path.join(options.consensus, file_name))
- if string:
- f.write("%s\n" % (string))