[tor-commits] [metrics-tasks/master] Plot entropy, max entropy, and degree of anonymity (#6232).

karsten at torproject.org karsten at torproject.org
Wed Jul 18 08:49:14 UTC 2012


commit 2d3fa7220f355cfb389f55100f65306b9055ba82
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Jul 18 10:40:54 2012 +0200

    Plot entropy, max entropy, and degree of anonymity (#6232).
---
 task-6232/plot-entropy.R |   51 +++++++++++++++++++++++++++++++++++++---------
 task-6232/pyentropy.py   |   23 ++++++++++++++++----
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/task-6232/plot-entropy.R b/task-6232/plot-entropy.R
index 87035b9..ab57678 100644
--- a/task-6232/plot-entropy.R
+++ b/task-6232/plot-entropy.R
@@ -1,19 +1,50 @@
 library(ggplot2)
 library(reshape)
+library(scales)
+
 d <- read.csv("entropy.csv", header = FALSE,
-  col.names = c("validafter", "all", "exit", "guard", "country"))
-d <- aggregate(
-  list(all = d$all, exit = d$exit, guard = d$guard, country = d$country),
+  col.names = c("validafter", "all", "max_all", "exit", "max_exit",
+  "guard", "max_guard", "country", "max_country"))
+
+e <- aggregate(
+  list(all = d$all / d$max_all, exit = d$exit / d$max_exit,
+  guard = d$guard / d$max_guard, country = d$country / d$max_country),
   by = list(date = as.Date(d$validafter, origin = "1970-01-01 00:00:00")),
   FUN = median)
-d <- melt(d, "date")
-ggplot(d, aes(x = date, y = value, colour = variable)) +
+e <- melt(e, "date")
+e <- data.frame(date = e$date, variable = ifelse(e$variable == "all",
+  "All relays", ifelse(e$variable == "exit", "All exits",
+  ifelse(e$variable == "guard", "All guards", "All countries"))),
+  value = e$value)
+ggplot(e, aes(x = date, y = value)) +
 geom_line() +
+facet_wrap(~ variable) +
 scale_x_date(name = "\nDate") +
-scale_y_continuous(name = "Degree of Anonymity\n") +
-scale_colour_hue(name = "Relays",
-  breaks = c("all", "exit", "guard", "country"),
-  labels = c("All relays", "All exits", "All guards", "All countries"))
-ggsave("entropy.png", width = 8, height = 6, dpi = 100)
+scale_y_continuous(name = "Degree of anonymity\n", limits = c(0, 1),
+  labels = percent)
+ggsave("degree-of-anonymity.png", width = 8, height = 6, dpi = 100)
 
+f <- aggregate(list(all = d$all, max_all = d$max_all, exit = d$exit,
+  max_exit = d$max_exit, guard = d$guard, max_guard = d$max_guard,
+  country = d$country, max_country = d$max_country),
+  by = list(date = as.Date(d$validafter, origin = "1970-01-01 00:00:00")),
+  FUN = median)
+f <- rbind(
+  data.frame(date = f$date, entropy = f$all, max = f$max_all,
+    type = "All relays"),
+  data.frame(date = f$date, entropy = f$exit, max = f$max_exit,
+    type = "All exits"),
+  data.frame(date = f$date, entropy = f$guard, max = f$max_guard,
+    type = "All guards"),
+  data.frame(date = f$date, entropy = f$country, max = f$max_country,
+    type = "All countries"))
+f <- melt(f, c("date", "type"))
+ggplot(f, aes(x = date, y = value, colour = variable)) +
+geom_line() +
+facet_wrap(~ type) +
+scale_x_date(name = "\nDate") +
+scale_y_continuous(name = "Entropy and maximum entropy\n",
+  limits = c(0, max(f$value))) +
+opts(legend.position = "none")
+ggsave("entropy.png", width = 8, height = 6, dpi = 100)
 
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
index abb716a..ae28f05 100644
--- a/task-6232/pyentropy.py
+++ b/task-6232/pyentropy.py
@@ -1,6 +1,15 @@
 """
 Usage - python pyentropy.py <consensus-dir> <output-file>
-Output - A CSV file of the format <valid-after>,<entropy for all nodes>,<entropy for exitnodes>,<entropy for guardnodes>,<entropy for countries>
+Output - A CSV file of the format (without newlines):
+         <valid-after>,
+         <entropy for all nodes>,
+         <max entropy for all nodes>,
+         <entropy for exit nodes>,
+         <max entropy for exit nodes>,
+         <entropy for guard nodes>,
+         <max entropy for guard nodes>,
+         <entropy for countries>,
+         <max entropy for countries>
 rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
 """
 
@@ -101,10 +110,14 @@ def run(file_name):
     max_entropy_country = math.log(len(bw_countries), 2)
 
     return ",".join([valid_after,
-                     str(entropy/max_entropy),
-                     str(entropy_exit/max_entropy_exit),
-                     str(entropy_guard/max_entropy_guard),
-                     str(entropy_country/max_entropy_country)])
+                     str(entropy),
+                     str(max_entropy),
+                     str(entropy_exit),
+                     str(max_entropy_exit),
+                     str(entropy_guard),
+                     str(max_entropy_guard),
+                     str(entropy_country),
+                     str(max_entropy_country)])
 
 def usage():
     print "Usage - python pyentropy.py <consensus-dir> <output-file>"



More information about the tor-commits mailing list