commit eaae6a2bb25b122866e84549faa7c54cba7d53c8
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Nov 26 11:09:29 2012 -0500
Add graphing code for new linf data (#1854).
---
task-1854/plot-entropy.R | 133 ++++++++++++++++++++++++++++++----------------
task-1854/pyextract.py | 29 ++++++++++
2 files changed, 116 insertions(+), 46 deletions(-)
diff --git a/task-1854/plot-entropy.R b/task-1854/plot-entropy.R
index e28193f..735e65f 100644
--- a/task-1854/plot-entropy.R
+++ b/task-1854/plot-entropy.R
@@ -2,52 +2,93 @@ library(ggplot2)
library(reshape)
library(scales)
-d <- read.csv("entropy.csv", header = FALSE,
- col.names = c("validafter", "min_cw", "relays", "all", "max_all",
- "exit", "max_exit", "guard", "max_guard", "country", "max_country",
- "as", "max_as"), stringsAsFactor = FALSE)
-max_validafter <- max(d$validafter)
-d <- d[d$validafter == max_validafter, ]
-
-e <- d[d$min_cw <= 10000, ]
-e <- data.frame(min_cw = e$min_cw, all = e$all / e$max_all,
- exit = e$exit / e$max_exit, guard = e$guard / e$max_guard,
- country = e$country / e$max_country, as = e$as / e$max_as)
-e <- melt(e, "min_cw")
-e <- data.frame(min_cw = e$min_cw, variable =
- ifelse(e$variable == "all", "All relays",
- ifelse(e$variable == "exit", "All exits",
- ifelse(e$variable == "guard", "All guards",
- ifelse(e$variable == "country", "All countries",
- "All ASes")))), value = e$value)
-ggplot(e, aes(x = min_cw, y = value)) +
+e <- read.csv("extracted.csv", header = FALSE,
+ col.names = c("validafter", "min_adv_bw", "relays", "linf", "graph"),
+ stringsAsFactor = FALSE)
+
+l <- e[e$graph == 'last', ]
+l <- data.frame(x = l$min_adv_bw, relays = l$relays, linf = l$linf)
+l <- melt(l, "x")
+ggplot(l, aes(x = x, y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_log10(name = "\nAdvertised bandwidth cutoff in B/s (log scale)") +
+scale_y_continuous(name = "") +
+opts(title = paste("Consensus with valid-after time ", max(e$validafter),
+ "\n", sep = ""))
+
+l <- e[e$graph == 'last' & e$min_adv_bw >= 10000 & e$min_adv_bw <= 100000, ]
+l <- data.frame(x = l$min_adv_bw, relays = l$relays, linf = l$linf)
+l <- melt(l, "x")
+ggplot(l, aes(x = x, y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_log10(name = "\nAdvertised bandwidth cutoff in B/s (log scale)") +
+scale_y_continuous(name = "") +
+opts(title = paste("Consensus with valid-after time ", max(e$validafter),
+ "\n", sep = ""))
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 10000, ]
+h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean)
+h <- melt(h, "x")
+ggplot(h, aes(x = as.POSIXct(x), y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+opts(title = "Advertised bandwidth cutoff 10000 B/s\n")
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 50000, ]
+h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean)
+h <- melt(h, "x")
+ggplot(h, aes(x = as.POSIXct(x), y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+opts(title = "Advertised bandwidth cutoff 50000 B/s\n")
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 100000, ]
+h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean)
+h <- melt(h, "x")
+ggplot(h, aes(x = as.POSIXct(x), y = value)) +
geom_line() +
-facet_wrap(~ variable) +
-scale_x_continuous(name = "\nMinimum consensus weight") +
-scale_y_continuous(name = "", limits = c(0, 1), labels = percent) +
-opts(title = paste("Degree of anonymity based on consensus from ",
- max_validafter, "\n", sep = ""))
-ggsave("degree-of-anonymity-min-cw.png", width = 8, height = 6, dpi = 100)
-
-f <- d[d$min_cw <= 10000, ]
-f <- rbind(
- data.frame(min_cw = f$min_cw, entropy = f$all, max = f$max_all,
- type = "All relays"),
- data.frame(min_cw = f$min_cw, entropy = f$exit, max = f$max_exit,
- type = "All exits"),
- data.frame(min_cw = f$min_cw, entropy = f$guard, max = f$max_guard,
- type = "All guards"),
- data.frame(min_cw = f$min_cw, entropy = f$country, max = f$max_country,
- type = "All countries"),
- data.frame(min_cw = f$min_cw, entropy = f$as, max = f$max_as,
- type = "All ASes"))
-f <- melt(f, c("min_cw", "type"))
-ggplot(f, aes(x = min_cw, y = value, colour = variable)) +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+opts(title = "Advertised bandwidth cutoff 100000 B/s\n")
+
+h <- e[e$graph == 'history' & e$min_adv_bw %in% c(10000, 50000, 100000), ]
+h <- data.frame(validafter = h$validafter, min_adv_bw = h$min_adv_bw, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)],
+ by = list(x = as.Date(h$validafter), min_adv_bw = h$min_adv_bw), FUN = mean)
+h <- melt(h, c("x", "min_adv_bw"))
+ggplot(h, aes(x = as.POSIXct(x), y = value, colour = as.factor(min_adv_bw))) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+scale_colour_hue(name = "Advertised bandwidth cutoff in B/s") +
+opts(legend.position = "bottom")
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 10000, ]
+m10000 <- data.frame(linf = sort(h$linf),
+ frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "10000")
+h <- e[e$graph == 'history' & e$min_adv_bw == 50000, ]
+m50000 <- data.frame(linf = sort(h$linf),
+ frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "50000")
+h <- e[e$graph == 'history' & e$min_adv_bw == 100000, ]
+m100000 <- data.frame(linf = sort(h$linf),
+ frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "100000")
+h <- rbind(m10000, m50000, m100000)
+ggplot(h, aes(x = linf, y = frac_cons, colour = as.factor(min_adv_bw))) +
geom_line() +
-facet_wrap(~ type) +
-scale_x_continuous(name = "\nMinimum consensus weight") +
-scale_y_continuous(name = "", limits = c(0, max(f$value))) +
-opts(title = paste("Entropy and maximum entropy based on consensus from ",
- max_validafter, "\n", sep = ""), legend.position = "none")
-ggsave("entropy-min-cw.png", width = 8, height = 6, dpi = 100)
+scale_x_continuous(name = "") +
+scale_y_continuous(
+ name = "Fraction of consensuses from 2011-11-01 to 2012-10-31\n") +
+scale_colour_hue(name = "Advertised bandwidth cutoff in B/s") +
+opts(legend.position = "bottom")
diff --git a/task-1854/pyextract.py b/task-1854/pyextract.py
new file mode 100644
index 0000000..4220a2a
--- /dev/null
+++ b/task-1854/pyextract.py
@@ -0,0 +1,29 @@
+import os
+import sys
+
+def main():
+ out_file = open('extracted.csv', 'w')
+ last_validafter = ''
+ last_lines = []
+ for line in open('entropy.csv'):
+ parts = line.strip().split(',')
+ validafter = parts[0]
+ min_adv_bw = int(parts[1])
+ relays = parts[2]
+ linf = parts[3]
+ if last_validafter != validafter:
+ last_lines = []
+ next_cutoffs = [0, 10000, 20000, 30000, 40000, 50000, 75000,
+ 100000, 100000000000000000000]
+ while min_adv_bw >= next_cutoffs[0]:
+ out_file.write("%s,%d,%s,%s,history\n" % (validafter,
+ next_cutoffs[0], relays, linf, ))
+ next_cutoffs.pop(0)
+ last_lines.append(line.strip())
+ last_validafter = validafter
+ for line in last_lines:
+ out_file.write(line + ",last\n")
+
+if __name__ == '__main__':
+ main()
+