[tor-commits] [metrics-tasks/master] Revert back to original algorithm

karsten at torproject.org karsten at torproject.org
Mon Jul 9 10:20:09 UTC 2012


commit 9fb1eaeb8c6104d99ed3d4d9f1748fa9269c6ac8
Author: Sathyanarayanan Gunasekaran <gsathya.ceg at gmail.com>
Date:   Thu Jul 5 12:49:37 2012 +0200

    Revert back to original algorithm
    
    Phw realised that the original algorithm made more sense
    in the current scenario where we need to find the probability
    of the node being selected and not the probability of the bandwidth
    being selected/present.
    
    Patched by phw
---
 task-6232/pyentropy.py |   64 ++++++++++++++++++-----------------------------
 1 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
index 6fa5b86..0ae0f25 100644
--- a/task-6232/pyentropy.py
+++ b/task-6232/pyentropy.py
@@ -12,7 +12,6 @@ from decimal import *
 RESULTS = []
 KEYS = ['r','s','v','w','p','m']
 
-
 class Router:
     def __init__(self):
         self.lines = []
@@ -22,7 +21,7 @@ class Router:
         self.probability = None
         self.is_exit = None
         self.is_guard = None
-        
+
     def add(self, key, values):
         if key == 'r':
            self.nick = values[0]
@@ -34,7 +33,7 @@ class Router:
                self.is_exit = True
            if "Guard" in self.flags:
                self.is_guard = True
-        
+
 def run(file_name):
     routers = []
         # parse consensus
@@ -52,48 +51,35 @@ def run(file_name):
                 valid_after = ' '.join(values)
             elif key in KEYS:
                 router.add(key, values)
-                
-    # build hash table with freq. distribution
-    # key: bandwidth
-    # value: number of bandwidth's observations
-    
-    bw_dist, bw_dist_exit, bw_dist_guard = {}, {}, {}
+
+    totalBW, totalExitBW, totalGuardBW = 0, 0, 0
     for router in routers:
-        if router.is_exit:
-            if bw_dist_exit.has_key(router.bandwidth):
-                bw_dist_exit[router.bandwidth] += 1
-            else:
-                bw_dist_exit[router.bandwidth] = 1
+        totalBW += router.bandwidth
         if router.is_guard:
-            if bw_dist_guard.has_key(router.bandwidth):
-                bw_dist_guard[router.bandwidth] += 1
-            else:
-                bw_dist_guard[router.bandwidth] = 1
-        if bw_dist.has_key(router.bandwidth):
-            bw_dist[router.bandwidth] += 1
-        else:
-            bw_dist[router.bandwidth] = 1
-    
+            totalGuardBW += router.bandwidth
+        if router.is_exit:
+            totalExitBW += router.bandwidth
+
     if len(routers) <= 0:
         print "Error: amount of routers must be > 0."
         return;
-    
+
     entropy, entropy_exit, entropy_guard = 0.0, 0.0, 0.0
-    for bw in bw_dist.iterkeys():
-        # p = probability of one particular bandwidth
-        p = float(bw_dist[bw]) / len(routers)
-        entropy += -(p * math.log(p, 2))
-        
-    for bw in bw_dist_exit.iterkeys():
-        # p = probability of one particular bandwidth
-        p = float(bw_dist[bw]) / len(routers)
-        entropy_exit += -(p * math.log(p, 2))
-        
-    for bw in bw_dist_guard.iterkeys():
-        # p = probability of one particular bandwidth
-        p = float(bw_dist[bw]) / len(routers)
-        entropy_guard += -(p * math.log(p, 2))
-    
+    for router in routers:
+        p = float(router.bandwidth) / float(totalBW)
+        if p != 0:
+            entropy += -(p * math.log(p, 2))
+
+        if router.is_guard:
+            p = float(router.bandwidth) / float(totalGuardBW)
+            if p != 0:
+                entropy_exit += -(p * math.log(p, 2))
+
+        if router.is_exit:
+            p = float(router.bandwidth) / float(totalExitBW)
+            if p != 0:
+                entropy_guard += -(p * math.log(p, 2))
+
     return ",".join([valid_after, str(entropy), str(entropy_exit), str(entropy_guard)])
 
 def usage():





More information about the tor-commits mailing list