commit eaae6a2bb25b122866e84549faa7c54cba7d53c8 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Nov 26 11:09:29 2012 -0500
Add graphing code for new linf data (#1854). --- task-1854/plot-entropy.R | 133 ++++++++++++++++++++++++++++++---------------- task-1854/pyextract.py | 29 ++++++++++ 2 files changed, 116 insertions(+), 46 deletions(-)
diff --git a/task-1854/plot-entropy.R b/task-1854/plot-entropy.R index e28193f..735e65f 100644 --- a/task-1854/plot-entropy.R +++ b/task-1854/plot-entropy.R @@ -2,52 +2,93 @@ library(ggplot2) library(reshape) library(scales)
-d <- read.csv("entropy.csv", header = FALSE, - col.names = c("validafter", "min_cw", "relays", "all", "max_all", - "exit", "max_exit", "guard", "max_guard", "country", "max_country", - "as", "max_as"), stringsAsFactor = FALSE) -max_validafter <- max(d$validafter) -d <- d[d$validafter == max_validafter, ] - -e <- d[d$min_cw <= 10000, ] -e <- data.frame(min_cw = e$min_cw, all = e$all / e$max_all, - exit = e$exit / e$max_exit, guard = e$guard / e$max_guard, - country = e$country / e$max_country, as = e$as / e$max_as) -e <- melt(e, "min_cw") -e <- data.frame(min_cw = e$min_cw, variable = - ifelse(e$variable == "all", "All relays", - ifelse(e$variable == "exit", "All exits", - ifelse(e$variable == "guard", "All guards", - ifelse(e$variable == "country", "All countries", - "All ASes")))), value = e$value) -ggplot(e, aes(x = min_cw, y = value)) + +e <- read.csv("extracted.csv", header = FALSE, + col.names = c("validafter", "min_adv_bw", "relays", "linf", "graph"), + stringsAsFactor = FALSE) + +l <- e[e$graph == 'last', ] +l <- data.frame(x = l$min_adv_bw, relays = l$relays, linf = l$linf) +l <- melt(l, "x") +ggplot(l, aes(x = x, y = value)) + +geom_line() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_log10(name = "\nAdvertised bandwidth cutoff in B/s (log scale)") + +scale_y_continuous(name = "") + +opts(title = paste("Consensus with valid-after time ", max(e$validafter), + "\n", sep = "")) + +l <- e[e$graph == 'last' & e$min_adv_bw >= 10000 & e$min_adv_bw <= 100000, ] +l <- data.frame(x = l$min_adv_bw, relays = l$relays, linf = l$linf) +l <- melt(l, "x") +ggplot(l, aes(x = x, y = value)) + +geom_line() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_log10(name = "\nAdvertised bandwidth cutoff in B/s (log scale)") + +scale_y_continuous(name = "") + +opts(title = paste("Consensus with valid-after time ", max(e$validafter), + "\n", sep = "")) + +h <- e[e$graph == 'history' & e$min_adv_bw == 10000, ] +h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf) +h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean) +h <- melt(h, "x") +ggplot(h, aes(x = as.POSIXct(x), y = value)) + +geom_line() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_datetime(name = "") + +scale_y_continuous(name = "") + +opts(title = "Advertised bandwidth cutoff 10000 B/s\n") + +h <- e[e$graph == 'history' & e$min_adv_bw == 50000, ] +h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf) +h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean) +h <- melt(h, "x") +ggplot(h, aes(x = as.POSIXct(x), y = value)) + +geom_line() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_datetime(name = "") + +scale_y_continuous(name = "") + +opts(title = "Advertised bandwidth cutoff 50000 B/s\n") + +h <- e[e$graph == 'history' & e$min_adv_bw == 100000, ] +h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf) +h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean) +h <- melt(h, "x") +ggplot(h, aes(x = as.POSIXct(x), y = value)) + geom_line() + -facet_wrap(~ variable) + -scale_x_continuous(name = "\nMinimum consensus weight") + -scale_y_continuous(name = "", limits = c(0, 1), labels = percent) + -opts(title = paste("Degree of anonymity based on consensus from ", - max_validafter, "\n", sep = "")) -ggsave("degree-of-anonymity-min-cw.png", width = 8, height = 6, dpi = 100) - -f <- d[d$min_cw <= 10000, ] -f <- rbind( - data.frame(min_cw = f$min_cw, entropy = f$all, max = f$max_all, - type = "All relays"), - data.frame(min_cw = f$min_cw, entropy = f$exit, max = f$max_exit, - type = "All exits"), - data.frame(min_cw = f$min_cw, entropy = f$guard, max = f$max_guard, - type = "All guards"), - data.frame(min_cw = f$min_cw, entropy = f$country, max = f$max_country, - type = "All countries"), - data.frame(min_cw = f$min_cw, entropy = f$as, max = f$max_as, - type = "All ASes")) -f <- melt(f, c("min_cw", "type")) -ggplot(f, aes(x = min_cw, y = value, colour = variable)) + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_datetime(name = "") + +scale_y_continuous(name = "") + +opts(title = "Advertised bandwidth cutoff 100000 B/s\n") + +h <- e[e$graph == 'history' & e$min_adv_bw %in% c(10000, 50000, 100000), ] +h <- data.frame(validafter = h$validafter, min_adv_bw = h$min_adv_bw, relays = h$relays, linf = h$linf) +h <- aggregate(h[, 2:length(h)], + by = list(x = as.Date(h$validafter), min_adv_bw = h$min_adv_bw), FUN = mean) +h <- melt(h, c("x", "min_adv_bw")) +ggplot(h, aes(x = as.POSIXct(x), y = value, colour = as.factor(min_adv_bw))) + +geom_line() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_datetime(name = "") + +scale_y_continuous(name = "") + +scale_colour_hue(name = "Advertised bandwidth cutoff in B/s") + +opts(legend.position = "bottom") + +h <- e[e$graph == 'history' & e$min_adv_bw == 10000, ] +m10000 <- data.frame(linf = sort(h$linf), + frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "10000") +h <- e[e$graph == 'history' & e$min_adv_bw == 50000, ] +m50000 <- data.frame(linf = sort(h$linf), + frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "50000") +h <- e[e$graph == 'history' & e$min_adv_bw == 100000, ] +m100000 <- data.frame(linf = sort(h$linf), + frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "100000") +h <- rbind(m10000, m50000, m100000) +ggplot(h, aes(x = linf, y = frac_cons, colour = as.factor(min_adv_bw))) + geom_line() + -facet_wrap(~ type) + -scale_x_continuous(name = "\nMinimum consensus weight") + -scale_y_continuous(name = "", limits = c(0, max(f$value))) + -opts(title = paste("Entropy and maximum entropy based on consensus from ", - max_validafter, "\n", sep = ""), legend.position = "none") -ggsave("entropy-min-cw.png", width = 8, height = 6, dpi = 100) +scale_x_continuous(name = "") + +scale_y_continuous( + name = "Fraction of consensuses from 2011-11-01 to 2012-10-31\n") + +scale_colour_hue(name = "Advertised bandwidth cutoff in B/s") + +opts(legend.position = "bottom")
diff --git a/task-1854/pyextract.py b/task-1854/pyextract.py new file mode 100644 index 0000000..4220a2a --- /dev/null +++ b/task-1854/pyextract.py @@ -0,0 +1,29 @@ +import os +import sys + +def main(): + out_file = open('extracted.csv', 'w') + last_validafter = '' + last_lines = [] + for line in open('entropy.csv'): + parts = line.strip().split(',') + validafter = parts[0] + min_adv_bw = int(parts[1]) + relays = parts[2] + linf = parts[3] + if last_validafter != validafter: + last_lines = [] + next_cutoffs = [0, 10000, 20000, 30000, 40000, 50000, 75000, + 100000, 100000000000000000000] + while min_adv_bw >= next_cutoffs[0]: + out_file.write("%s,%d,%s,%s,history\n" % (validafter, + next_cutoffs[0], relays, linf, )) + next_cutoffs.pop(0) + last_lines.append(line.strip()) + last_validafter = validafter + for line in last_lines: + out_file.write(line + ",last\n") + +if __name__ == '__main__': + main() +