commit 2d3fa7220f355cfb389f55100f65306b9055ba82 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Jul 18 10:40:54 2012 +0200
Plot entropy, max entropy, and degree of anonymity (#6232). --- task-6232/plot-entropy.R | 51 +++++++++++++++++++++++++++++++++++++--------- task-6232/pyentropy.py | 23 ++++++++++++++++---- 2 files changed, 59 insertions(+), 15 deletions(-)
diff --git a/task-6232/plot-entropy.R b/task-6232/plot-entropy.R index 87035b9..ab57678 100644 --- a/task-6232/plot-entropy.R +++ b/task-6232/plot-entropy.R @@ -1,19 +1,50 @@ library(ggplot2) library(reshape) +library(scales) + d <- read.csv("entropy.csv", header = FALSE, - col.names = c("validafter", "all", "exit", "guard", "country")) -d <- aggregate( - list(all = d$all, exit = d$exit, guard = d$guard, country = d$country), + col.names = c("validafter", "all", "max_all", "exit", "max_exit", + "guard", "max_guard", "country", "max_country")) + +e <- aggregate( + list(all = d$all / d$max_all, exit = d$exit / d$max_exit, + guard = d$guard / d$max_guard, country = d$country / d$max_country), by = list(date = as.Date(d$validafter, origin = "1970-01-01 00:00:00")), FUN = median) -d <- melt(d, "date") -ggplot(d, aes(x = date, y = value, colour = variable)) + +e <- melt(e, "date") +e <- data.frame(date = e$date, variable = ifelse(e$variable == "all", + "All relays", ifelse(e$variable == "exit", "All exits", + ifelse(e$variable == "guard", "All guards", "All countries"))), + value = e$value) +ggplot(e, aes(x = date, y = value)) + geom_line() + +facet_wrap(~ variable) + scale_x_date(name = "\nDate") + -scale_y_continuous(name = "Degree of Anonymity\n") + -scale_colour_hue(name = "Relays", - breaks = c("all", "exit", "guard", "country"), - labels = c("All relays", "All exits", "All guards", "All countries")) -ggsave("entropy.png", width = 8, height = 6, dpi = 100) +scale_y_continuous(name = "Degree of anonymity\n", limits = c(0, 1), + labels = percent) +ggsave("degree-of-anonymity.png", width = 8, height = 6, dpi = 100)
+f <- aggregate(list(all = d$all, max_all = d$max_all, exit = d$exit, + max_exit = d$max_exit, guard = d$guard, max_guard = d$max_guard, + country = d$country, max_country = d$max_country), + by = list(date = as.Date(d$validafter, origin = "1970-01-01 00:00:00")), + FUN = median) +f <- rbind( + data.frame(date = f$date, entropy = f$all, max = f$max_all, + type = "All relays"), + data.frame(date = f$date, entropy = f$exit, max = f$max_exit, + type = "All exits"), + data.frame(date = f$date, entropy = f$guard, max = f$max_guard, + type = "All guards"), + data.frame(date = f$date, entropy = f$country, max = f$max_country, + type = "All countries")) +f <- melt(f, c("date", "type")) +ggplot(f, aes(x = date, y = value, colour = variable)) + +geom_line() + +facet_wrap(~ type) + +scale_x_date(name = "\nDate") + +scale_y_continuous(name = "Entropy and maximum entropy\n", + limits = c(0, max(f$value))) + +opts(legend.position = "none") +ggsave("entropy.png", width = 8, height = 6, dpi = 100)
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py index abb716a..ae28f05 100644 --- a/task-6232/pyentropy.py +++ b/task-6232/pyentropy.py @@ -1,6 +1,15 @@ """ Usage - python pyentropy.py <consensus-dir> <output-file> -Output - A CSV file of the format <valid-after>,<entropy for all nodes>,<entropy for exitnodes>,<entropy for guardnodes>,<entropy for countries> +Output - A CSV file of the format (without newlines): + <valid-after>, + <entropy for all nodes>, + <max entropy for all nodes>, + <entropy for exit nodes>, + <max entropy for exit nodes>, + <entropy for guard nodes>, + <max entropy for guard nodes>, + <entropy for countries>, + <max entropy for countries> rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in """
@@ -101,10 +110,14 @@ def run(file_name): max_entropy_country = math.log(len(bw_countries), 2)
return ",".join([valid_after, - str(entropy/max_entropy), - str(entropy_exit/max_entropy_exit), - str(entropy_guard/max_entropy_guard), - str(entropy_country/max_entropy_country)]) + str(entropy), + str(max_entropy), + str(entropy_exit), + str(max_entropy_exit), + str(entropy_guard), + str(max_entropy_guard), + str(entropy_country), + str(max_entropy_country)])
def usage(): print "Usage - python pyentropy.py <consensus-dir> <output-file>"