[tor-commits] [metrics-tasks/master] Add #6232 code written by gsathya and others.

karsten at torproject.org karsten at torproject.org
Wed Jul 4 21:18:54 UTC 2012


commit e456caea6134de661d24be8a253394e323ac025a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Jul 4 23:11:06 2012 +0200

    Add #6232 code written by gsathya and others.
    
    Extract consensus weights from consensuses and calculate the Shannon
    Entropy for them.  Most of this code was written by gsathya and later
    refined by asn and phw.
---
 task-6232/.gitignore       |    3 ++
 task-6232/plot-entropy.R   |   10 +++++
 task-6232/pyentropy.py     |   79 ++++++++++++++++++++++++++++++++++++++++++++
 task-6232/run-pyentropy.py |    5 +++
 4 files changed, 97 insertions(+), 0 deletions(-)

diff --git a/task-6232/.gitignore b/task-6232/.gitignore
new file mode 100644
index 0000000..8a0c627
--- /dev/null
+++ b/task-6232/.gitignore
@@ -0,0 +1,3 @@
+in/
+entropy.csv
+
diff --git a/task-6232/plot-entropy.R b/task-6232/plot-entropy.R
new file mode 100644
index 0000000..1334b88
--- /dev/null
+++ b/task-6232/plot-entropy.R
@@ -0,0 +1,10 @@
+library(ggplot2)
+d <- read.csv("entropy.csv", header = FALSE,
+  col.names = c("validafter", "entropy"))
+ggplot(d, aes(x = as.POSIXct(validafter), y = entropy)) +
+geom_line() +
+scale_x_datetime(name = "\nDate") +
+scale_y_continuous(name = "Entropy\n")
+ggsave("entropy.png", width = 8, height = 6, dpi = 100)
+
+
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
new file mode 100644
index 0000000..f13f709
--- /dev/null
+++ b/task-6232/pyentropy.py
@@ -0,0 +1,79 @@
+"""
+Usage - python pyentropy.py <consensus-dir> <output-file>
+Output - A CSV file of the format <valid-after>,<entropy>
+rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
+"""
+
+import sys
+import math
+import os
+from decimal import *
+
+RESULTS = []
+KEYS = ['r','s','v','w','p','m']
+
+
+class Router:
+	def __init__(self):
+		self.lines = []
+		self.nick = None
+		self.bandwidth = None
+		self.flags = None
+		self.probability = None
+
+	def add(self, key, values):
+		if key == 'r':
+			self.nick = values[0]
+		if key == 'w':
+			self.bandwidth = int(values[0].split('=')[1])
+		if key == 's':
+			self.flags = values
+
+
+def run(file_name):
+	routers = []
+	# parse consensus
+	with open(file_name, 'r') as f:
+		for line in f.readlines():
+			key = line.split()[0]
+			values = line.split()[1:]
+			if key =='r':
+				router = Router()
+				router.add(key, values)
+			elif key == 'p':
+				router.add(key, values)
+				routers.append(router)
+			elif key == 'valid-after':
+				valid_after = ' '.join(values)
+			elif key in KEYS:
+				router.add(key, values)
+
+	# build hash table with freq. distribution
+	# key: bandwidth
+	# value: number of bandwidth's observations
+	bw_dist = {}
+	for router in routers:
+		if bw_dist.has_key(router.bandwidth):
+			bw_dist[router.bandwidth] += 1
+		else:
+			bw_dist[router.bandwidth] = 1
+
+	if len(routers) <= 0:
+		print "Error: amount of routers must be > 0."
+		return;
+
+	print "calculating entropy"
+	entropy = 0.0
+	for bw in bw_dist.iterkeys():
+		# p = probability of one particular bandwidth
+		p = float(bw_dist[bw]) / len(routers)
+		entropy += -(p * math.log(p, 2))
+
+	return ",".join([valid_after, str(entropy)])
+
+
+if __name__ == "__main__":
+	with open(sys.argv[2], 'w') as f:
+		for file_name in os.listdir(sys.argv[1]):
+			string = run(os.path.join(sys.argv[1], file_name))
+			f.write("%s\n" % (string))
diff --git a/task-6232/run-pyentropy.py b/task-6232/run-pyentropy.py
new file mode 100755
index 0000000..a94a7d6
--- /dev/null
+++ b/task-6232/run-pyentropy.py
@@ -0,0 +1,5 @@
+#!/bin/bash
+#### Uncomment to use most recent data instead of extracted tarballs
+###rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
+python pyentropy.py in/consensuses/ entropy.csv
+



More information about the tor-commits mailing list