commit 2d3fa7220f355cfb389f55100f65306b9055ba82
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Jul 18 10:40:54 2012 +0200
Plot entropy, max entropy, and degree of anonymity (#6232).
---
task-6232/plot-entropy.R | 51 +++++++++++++++++++++++++++++++++++++---------
task-6232/pyentropy.py | 23 ++++++++++++++++----
2 files changed, 59 insertions(+), 15 deletions(-)
diff --git a/task-6232/plot-entropy.R b/task-6232/plot-entropy.R
index 87035b9..ab57678 100644
--- a/task-6232/plot-entropy.R
+++ b/task-6232/plot-entropy.R
@@ -1,19 +1,50 @@
library(ggplot2)
library(reshape)
+library(scales)
+
d <- read.csv("entropy.csv", header = FALSE,
- col.names = c("validafter", "all", "exit", "guard", "country"))
-d <- aggregate(
- list(all = d$all, exit = d$exit, guard = d$guard, country = d$country),
+ col.names = c("validafter", "all", "max_all", "exit", "max_exit",
+ "guard", "max_guard", "country", "max_country"))
+
+e <- aggregate(
+ list(all = d$all / d$max_all, exit = d$exit / d$max_exit,
+ guard = d$guard / d$max_guard, country = d$country / d$max_country),
by = list(date = as.Date(d$validafter, origin = "1970-01-01 00:00:00")),
FUN = median)
-d <- melt(d, "date")
-ggplot(d, aes(x = date, y = value, colour = variable)) +
+e <- melt(e, "date")
+e <- data.frame(date = e$date, variable = ifelse(e$variable == "all",
+ "All relays", ifelse(e$variable == "exit", "All exits",
+ ifelse(e$variable == "guard", "All guards", "All countries"))),
+ value = e$value)
+ggplot(e, aes(x = date, y = value)) +
geom_line() +
+facet_wrap(~ variable) +
scale_x_date(name = "\nDate") +
-scale_y_continuous(name = "Degree of Anonymity\n") +
-scale_colour_hue(name = "Relays",
- breaks = c("all", "exit", "guard", "country"),
- labels = c("All relays", "All exits", "All guards", "All countries"))
-ggsave("entropy.png", width = 8, height = 6, dpi = 100)
+scale_y_continuous(name = "Degree of anonymity\n", limits = c(0, 1),
+ labels = percent)
+ggsave("degree-of-anonymity.png", width = 8, height = 6, dpi = 100)
+f <- aggregate(list(all = d$all, max_all = d$max_all, exit = d$exit,
+ max_exit = d$max_exit, guard = d$guard, max_guard = d$max_guard,
+ country = d$country, max_country = d$max_country),
+ by = list(date = as.Date(d$validafter, origin = "1970-01-01 00:00:00")),
+ FUN = median)
+f <- rbind(
+ data.frame(date = f$date, entropy = f$all, max = f$max_all,
+ type = "All relays"),
+ data.frame(date = f$date, entropy = f$exit, max = f$max_exit,
+ type = "All exits"),
+ data.frame(date = f$date, entropy = f$guard, max = f$max_guard,
+ type = "All guards"),
+ data.frame(date = f$date, entropy = f$country, max = f$max_country,
+ type = "All countries"))
+f <- melt(f, c("date", "type"))
+ggplot(f, aes(x = date, y = value, colour = variable)) +
+geom_line() +
+facet_wrap(~ type) +
+scale_x_date(name = "\nDate") +
+scale_y_continuous(name = "Entropy and maximum entropy\n",
+ limits = c(0, max(f$value))) +
+opts(legend.position = "none")
+ggsave("entropy.png", width = 8, height = 6, dpi = 100)
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
index abb716a..ae28f05 100644
--- a/task-6232/pyentropy.py
+++ b/task-6232/pyentropy.py
@@ -1,6 +1,15 @@
"""
Usage - python pyentropy.py <consensus-dir> <output-file>
-Output - A CSV file of the format <valid-after>,<entropy for all nodes>,<entropy for exitnodes>,<entropy for guardnodes>,<entropy for countries>
+Output - A CSV file of the format (without newlines):
+ <valid-after>,
+ <entropy for all nodes>,
+ <max entropy for all nodes>,
+ <entropy for exit nodes>,
+ <max entropy for exit nodes>,
+ <entropy for guard nodes>,
+ <max entropy for guard nodes>,
+ <entropy for countries>,
+ <max entropy for countries>
rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
"""
@@ -101,10 +110,14 @@ def run(file_name):
max_entropy_country = math.log(len(bw_countries), 2)
return ",".join([valid_after,
- str(entropy/max_entropy),
- str(entropy_exit/max_entropy_exit),
- str(entropy_guard/max_entropy_guard),
- str(entropy_country/max_entropy_country)])
+ str(entropy),
+ str(max_entropy),
+ str(entropy_exit),
+ str(max_entropy_exit),
+ str(entropy_guard),
+ str(max_entropy_guard),
+ str(entropy_country),
+ str(max_entropy_country)])
def usage():
print "Usage - python pyentropy.py <consensus-dir> <output-file>"