[tor-commits] [metrics-tasks/master] Add graphing code for new linf data (#1854).

karsten at torproject.org karsten at torproject.org
Mon Nov 26 16:10:13 UTC 2012


commit eaae6a2bb25b122866e84549faa7c54cba7d53c8
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Nov 26 11:09:29 2012 -0500

    Add graphing code for new linf data (#1854).
---
 task-1854/plot-entropy.R |  133 ++++++++++++++++++++++++++++++----------------
 task-1854/pyextract.py   |   29 ++++++++++
 2 files changed, 116 insertions(+), 46 deletions(-)

diff --git a/task-1854/plot-entropy.R b/task-1854/plot-entropy.R
index e28193f..735e65f 100644
--- a/task-1854/plot-entropy.R
+++ b/task-1854/plot-entropy.R
@@ -2,52 +2,93 @@ library(ggplot2)
 library(reshape)
 library(scales)
 
-d <- read.csv("entropy.csv", header = FALSE,
-  col.names = c("validafter", "min_cw", "relays", "all", "max_all",
-  "exit", "max_exit", "guard", "max_guard", "country", "max_country",
-  "as", "max_as"), stringsAsFactor = FALSE)
-max_validafter <- max(d$validafter)
-d <- d[d$validafter == max_validafter, ]
-
-e <- d[d$min_cw <= 10000, ]
-e <- data.frame(min_cw = e$min_cw, all = e$all / e$max_all,
-  exit = e$exit / e$max_exit, guard = e$guard / e$max_guard,
-  country = e$country / e$max_country, as = e$as / e$max_as)
-e <- melt(e, "min_cw")
-e <- data.frame(min_cw = e$min_cw, variable =
-  ifelse(e$variable == "all", "All relays",
-  ifelse(e$variable == "exit", "All exits",
-  ifelse(e$variable == "guard", "All guards",
-  ifelse(e$variable == "country", "All countries",
-  "All ASes")))), value = e$value)
-ggplot(e, aes(x = min_cw, y = value)) +
+e <- read.csv("extracted.csv", header = FALSE,
+  col.names = c("validafter", "min_adv_bw", "relays", "linf", "graph"),
+  stringsAsFactor = FALSE)
+
+l <- e[e$graph == 'last', ]
+l <- data.frame(x = l$min_adv_bw, relays = l$relays, linf = l$linf)
+l <- melt(l, "x")
+ggplot(l, aes(x = x, y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_log10(name = "\nAdvertised bandwidth cutoff in B/s (log scale)") +
+scale_y_continuous(name = "") +
+opts(title = paste("Consensus with valid-after time ", max(e$validafter),
+  "\n", sep = ""))
+
+l <- e[e$graph == 'last' & e$min_adv_bw >= 10000 & e$min_adv_bw <= 100000, ]
+l <- data.frame(x = l$min_adv_bw, relays = l$relays, linf = l$linf)
+l <- melt(l, "x")
+ggplot(l, aes(x = x, y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_log10(name = "\nAdvertised bandwidth cutoff in B/s (log scale)") +
+scale_y_continuous(name = "") +
+opts(title = paste("Consensus with valid-after time ", max(e$validafter),
+  "\n", sep = ""))
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 10000, ]
+h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean)
+h <- melt(h, "x")
+ggplot(h, aes(x = as.POSIXct(x), y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+opts(title = "Advertised bandwidth cutoff 10000 B/s\n")
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 50000, ]
+h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean)
+h <- melt(h, "x")
+ggplot(h, aes(x = as.POSIXct(x), y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+opts(title = "Advertised bandwidth cutoff 50000 B/s\n")
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 100000, ]
+h <- data.frame(validafter = h$validafter, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)], by = list(x = as.Date(h$validafter)), FUN = mean)
+h <- melt(h, "x")
+ggplot(h, aes(x = as.POSIXct(x), y = value)) +
 geom_line() +
-facet_wrap(~ variable) +
-scale_x_continuous(name = "\nMinimum consensus weight") +
-scale_y_continuous(name = "", limits = c(0, 1), labels = percent) +
-opts(title = paste("Degree of anonymity based on consensus from ",
-  max_validafter, "\n", sep = ""))
-ggsave("degree-of-anonymity-min-cw.png", width = 8, height = 6, dpi = 100)
-
-f <- d[d$min_cw <= 10000, ]
-f <- rbind(
-  data.frame(min_cw = f$min_cw, entropy = f$all, max = f$max_all,
-    type = "All relays"),
-  data.frame(min_cw = f$min_cw, entropy = f$exit, max = f$max_exit,
-    type = "All exits"),
-  data.frame(min_cw = f$min_cw, entropy = f$guard, max = f$max_guard,
-    type = "All guards"),
-  data.frame(min_cw = f$min_cw, entropy = f$country, max = f$max_country,
-    type = "All countries"),
-  data.frame(min_cw = f$min_cw, entropy = f$as, max = f$max_as,
-    type = "All ASes"))
-f <- melt(f, c("min_cw", "type"))
-ggplot(f, aes(x = min_cw, y = value, colour = variable)) +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+opts(title = "Advertised bandwidth cutoff 100000 B/s\n")
+
+h <- e[e$graph == 'history' & e$min_adv_bw %in% c(10000, 50000, 100000), ]
+h <- data.frame(validafter = h$validafter, min_adv_bw = h$min_adv_bw, relays = h$relays, linf = h$linf)
+h <- aggregate(h[, 2:length(h)],
+  by = list(x = as.Date(h$validafter), min_adv_bw = h$min_adv_bw), FUN = mean)
+h <- melt(h, c("x", "min_adv_bw"))
+ggplot(h, aes(x = as.POSIXct(x), y = value, colour = as.factor(min_adv_bw))) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_datetime(name = "") +
+scale_y_continuous(name = "") +
+scale_colour_hue(name = "Advertised bandwidth cutoff in B/s") +
+opts(legend.position = "bottom")
+
+h <- e[e$graph == 'history' & e$min_adv_bw == 10000, ]
+m10000 <- data.frame(linf = sort(h$linf),
+  frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "10000")
+h <- e[e$graph == 'history' & e$min_adv_bw == 50000, ]
+m50000 <- data.frame(linf = sort(h$linf),
+  frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "50000")
+h <- e[e$graph == 'history' & e$min_adv_bw == 100000, ]
+m100000 <- data.frame(linf = sort(h$linf),
+  frac_cons = (1:length(h$linf))/length(h$linf), min_adv_bw = "100000")
+h <- rbind(m10000, m50000, m100000)
+ggplot(h, aes(x = linf, y = frac_cons, colour = as.factor(min_adv_bw))) +
 geom_line() +
-facet_wrap(~ type) +
-scale_x_continuous(name = "\nMinimum consensus weight") +
-scale_y_continuous(name = "", limits = c(0, max(f$value))) +
-opts(title = paste("Entropy and maximum entropy based on consensus from ",
-  max_validafter, "\n", sep = ""), legend.position = "none")
-ggsave("entropy-min-cw.png", width = 8, height = 6, dpi = 100)
+scale_x_continuous(name = "") +
+scale_y_continuous(
+  name = "Fraction of consensuses from 2011-11-01 to 2012-10-31\n") +
+scale_colour_hue(name = "Advertised bandwidth cutoff in B/s") +
+opts(legend.position = "bottom")
 
diff --git a/task-1854/pyextract.py b/task-1854/pyextract.py
new file mode 100644
index 0000000..4220a2a
--- /dev/null
+++ b/task-1854/pyextract.py
@@ -0,0 +1,29 @@
+import os
+import sys
+
+def main():
+    out_file = open('extracted.csv', 'w')
+    last_validafter = ''
+    last_lines = []
+    for line in open('entropy.csv'):
+        parts = line.strip().split(',')
+        validafter = parts[0]
+        min_adv_bw = int(parts[1])
+        relays = parts[2]
+        linf = parts[3]
+        if last_validafter != validafter:
+            last_lines = []
+            next_cutoffs = [0, 10000, 20000, 30000, 40000, 50000, 75000,
+                    100000, 100000000000000000000]
+        while min_adv_bw >= next_cutoffs[0]:
+            out_file.write("%s,%d,%s,%s,history\n" % (validafter,
+                    next_cutoffs[0], relays, linf, ))
+            next_cutoffs.pop(0)
+        last_lines.append(line.strip())
+        last_validafter = validafter
+    for line in last_lines:
+        out_file.write(line + ",last\n")
+
+if __name__ == '__main__':
+    main()
+



More information about the tor-commits mailing list