[tor-commits] [metrics-tasks/master] Find entropy based on AS No

karsten at torproject.org karsten at torproject.org
Tue Jul 24 10:11:20 UTC 2012


commit 687d1e8b90e5a413aa56cec17abab097745bde8e
Author: Sathyanarayanan Gunasekaran <gsathya.ceg at gmail.com>
Date:   Sat Jul 21 13:26:31 2012 +0530

    Find entropy based on AS No
    
    Use -as to specify the AS db
---
 task-6232/pyentropy.py |   55 +++++++++++++++++++++++++++++++++++++----------
 1 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
index 1a981fe..fa50406 100644
--- a/task-6232/pyentropy.py
+++ b/task-6232/pyentropy.py
@@ -9,7 +9,9 @@ Output - A CSV file of the format (without newlines):
          <entropy for guard nodes>,
          <max entropy for guard nodes>,
          <entropy for countries>,
-         <max entropy for countries>
+         <max entropy for countries>,
+         <entropy for AS>,
+         <max entropy for AS>
 rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
 """
 
@@ -17,6 +19,10 @@ import sys
 import math
 import os
 import pygeoip
+import StringIO
+import stem.descriptor
+from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
+from binascii import b2a_hex, a2b_base64, a2b_hex
 from optparse import OptionParser
 
 KEYS = ['r','s','v','w','p','m']
@@ -30,6 +36,8 @@ class Router:
         self.probability = None
         self.ip = None
         self.country = None
+        self.as_no = None
+        self.as_name = None
         self.is_exit = None
         self.is_guard = None
 
@@ -37,7 +45,8 @@ class Router:
         if key == 'r':
            self.nick = values[0]
            self.ip = values[5]
-           self.country = gi.country_name_by_addr(self.ip)
+           self.country = gi_db.country_name_by_addr(self.ip)
+           self.as_no, self.as_name = self.get_as_details()
         if key == 'w':
            self.bandwidth = int(values[0].split('=')[1])
         if key == 's':
@@ -47,6 +56,13 @@ class Router:
            if "Guard" in self.flags:
                self.is_guard = True
 
+    def get_as_details(self):
+        try:
+            value = as_db.org_by_addr(str(self.ip)).split()
+            return value[0], value[1]
+        except:
+            return None, None
+        
 def run(file_name):
     routers = []
     # parse consensus
@@ -67,7 +83,7 @@ def run(file_name):
 
     totalBW, totalExitBW, totalGuardBW = 0, 0, 0
     guards_n, exits_n = 0, 0
-    bw_countries = {}
+    bw_countries, bw_as = {}, {}
     for router in routers:
         totalBW += router.bandwidth
         if router.is_guard:
@@ -79,12 +95,17 @@ def run(file_name):
         if bw_countries.has_key(router.country):
             bw_countries[router.country] += router.bandwidth
         else:
-            bw_countries[router.country] = router.bandwidth
+            bw_countries[router.country] = router.bandwidth        
+        if router.as_no:
+            if bw_as.has_key(router.as_no):
+                bw_as[router.as_no] += router.bandwidth
+            else:
+                bw_as[router.as_no] = router.bandwidth
 
     if len(routers) <= 0:
         return
-
-    entropy, entropy_exit, entropy_guard, entropy_country = 0.0, 0.0, 0.0, 0.0
+    
+    entropy, entropy_exit, entropy_guard, entropy_country, entropy_as = 0.0, 0.0, 0.0, 0.0, 0.0
     for router in routers:
         p = float(router.bandwidth) / float(totalBW)
         if p != 0:
@@ -97,18 +118,24 @@ def run(file_name):
             p = float(router.bandwidth) / float(totalExitBW)
             if p != 0:
                 entropy_exit += -(p * math.log(p, 2))
-
+    
     for country in bw_countries.iterkeys():
         p = float(bw_countries[country]) / float(totalBW)
         if p != 0:
             entropy_country += -(p * math.log(p, 2))
-
+    
+    for as_no in bw_as.iterkeys():
+        p = float(bw_as[as_no]) / float(totalBW)
+        if p !=0:
+            entropy_as += -(p * math.log(p, 2))
+    
     # Entropy of uniform distribution of 'n' possible values: log(n)
     max_entropy = math.log(len(routers), 2)
     max_entropy_guard = math.log(guards_n, 2)
     max_entropy_exit = math.log(exits_n, 2)
     max_entropy_country = math.log(len(bw_countries), 2)
-
+    max_entropy_as = math.log(len(bw_as), 2)
+    
     return ",".join([valid_after,
                      str(entropy),
                      str(max_entropy),
@@ -117,13 +144,16 @@ def run(file_name):
                      str(entropy_guard),
                      str(max_entropy_guard),
                      str(entropy_country),
-                     str(max_entropy_country)])
+                     str(max_entropy_country),
+                     str(entropy_as),
+                     str(max_entropy_as)])
 
 def parse_args():
     usage = "Usage - python pyentropy.py [options]"
     parser = OptionParser(usage)
 
-    parser.add_option("-g", "--geoip", dest="geoip", default="GeoIP.dat", help="Input GeoIP database")
+    parser.add_option("-g", "--geoip", dest="gi_db", default="GeoIP.dat", help="Input GeoIP database")
+    parser.add_option("-a", "--as", dest="as_db", default="GeoIPASNum.dat", help="Input AS GeoIP database")
     parser.add_option("-o", "--output", dest="output", default="entropy.csv", help="Output filename")
     parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus", help="Input consensus dir")
 
@@ -134,7 +164,8 @@ def parse_args():
 if __name__ == "__main__":
 
     options = parse_args()
-    gi = pygeoip.GeoIP(options.geoip)
+    gi_db = pygeoip.GeoIP(options.gi_db)
+    as_db = pygeoip.GeoIP(options.as_db)
 
     with open(options.output, 'w') as f:
         for file_name in os.listdir(options.consensus):





More information about the tor-commits mailing list