commit 66474d34ad189d7ab6e6f0ad6359452bbed77e89
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Tue Apr 19 21:21:05 2011 +0200
Add code to generate the #1991 graphs.
---
task-1991/.gitignore | 6 ++
task-1991/Aggregate.java | 131 ++++++++++++++++++++++++++++
task-1991/Merge.java | 104 ++++++++++++++++++++++
task-1991/README | 46 ++++++++++
task-1991/torperf-guard-bandwidths-ranks.R | 40 +++++++++
task-1991/torperf-guard-quantiles.R | 57 ++++++++++++
6 files changed, 384 insertions(+), 0 deletions(-)
diff --git a/task-1991/.gitignore b/task-1991/.gitignore
new file mode 100644
index 0000000..a0fc089
--- /dev/null
+++ b/task-1991/.gitignore
@@ -0,0 +1,6 @@
+*.mergedata
+*.class
+*.png
+*.csv
+Rplots.pdf
+
diff --git a/task-1991/Aggregate.java b/task-1991/Aggregate.java
new file mode 100644
index 0000000..682a553
--- /dev/null
+++ b/task-1991/Aggregate.java
@@ -0,0 +1,131 @@
+import java.io.*;
+import java.util.*;
+
+public class Aggregate {
+ public static void main(String[] args) throws Exception {
+ BufferedReader br = new BufferedReader(new FileReader(
+ "torperf-guard-bandwidths-ranks.csv"));
+ String line = br.readLine();
+ List<String> sortedByBandwidth = new ArrayList<String>(),
+ sortedByRank = new ArrayList<String>();
+ while ((line = br.readLine()) != null) {
+ sortedByBandwidth.add(line);
+ sortedByRank.add(line);
+ }
+ br.close();
+ Collections.sort(sortedByBandwidth, new Comparator<String>() {
+ public int compare(String a, String b) {
+ return Integer.parseInt(a.split(",")[0]) -
+ Integer.parseInt(b.split(",")[0]);
+ }
+ });
+ Collections.sort(sortedByRank, new Comparator<String>() {
+ public int compare(String a, String b) {
+ return (int) (100.0 * (Double.parseDouble(a.split(",")[1]) -
+ Double.parseDouble(b.split(",")[1])));
+ }
+ });
+
+ List<Integer> percentiles = new ArrayList<Integer>();
+ for (int percentile = 1; percentile < 100; percentile += 1) {
+ percentiles.add(percentile);
+ }
+
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "torperf-guard-rank-quantiles.csv"));
+ bw.write("filesize,rank");
+ for (int percentile : percentiles) {
+ bw.write(",p" + percentile);
+ }
+ bw.write(",len\n");
+ for (String filesize : Arrays.asList("50kb,1mb,5mb".split(","))) {
+ double rankPercentInterval = 0.02;
+ double rankPercent = rankPercentInterval;
+ List<Integer> times = new ArrayList<Integer>();
+ for (int i = 0; i <= sortedByRank.size(); i++) {
+ double rank = -1.0;
+ int completionTime = -1;
+ if (i < sortedByRank.size()) {
+ String[] parts = sortedByRank.get(i).split(",");
+ rank = Double.parseDouble(parts[1]);
+ completionTime = Integer.parseInt(parts[2]);
+ if (!parts[4].equals(filesize)) {
+ continue;
+ }
+ }
+ if (i == sortedByRank.size() || rank > rankPercent) {
+ String rankString = String.format("%.3f",
+ rankPercent - 0.5 * rankPercentInterval);
+ bw.write(filesize + "," + rankString);
+ if (times.size() > 1) {
+ Collections.sort(times);
+ for (int percentile : percentiles) {
+ bw.write("," + times.get(times.size() * percentile / 100));
+ }
+ } else {
+ for (int percentile : percentiles) {
+ bw.write(",NA");
+ }
+ }
+ bw.write("," + times.size() + "\n");
+ times.clear();
+ rankPercent += rankPercentInterval;
+ }
+ if (i == sortedByRank.size()) {
+ break;
+ }
+ times.add(completionTime);
+ }
+ }
+ bw.close();
+
+ bw = new BufferedWriter(new FileWriter(
+ "torperf-guard-bandwidth-quantiles.csv"));
+ bw.write("filesize,bandwidth");
+ for (int percentile : percentiles) {
+ bw.write(",p" + percentile);
+ }
+ bw.write(",len\n");
+ for (String filesize : Arrays.asList("50kb,1mb,5mb".split(","))) {
+ int bandwidthInterval = 2500;
+ int curBandwidth = bandwidthInterval;
+ List<Integer> times = new ArrayList<Integer>();
+ for (int i = 0; i <= sortedByBandwidth.size(); i++) {
+ int bandwidth = -1;
+ int completionTime = -1;
+ if (i < sortedByBandwidth.size()) {
+ String[] parts = sortedByBandwidth.get(i).split(",");
+ bandwidth = Integer.parseInt(parts[0]);
+ completionTime = Integer.parseInt(parts[2]);
+ if (!parts[4].equals(filesize)) {
+ continue;
+ }
+ }
+ if (i == sortedByBandwidth.size() || bandwidth > curBandwidth) {
+ String bandwidthString = String.format("%d",
+ curBandwidth - (bandwidthInterval / 2));
+ bw.write(filesize + "," + bandwidthString);
+ if (times.size() > 1) {
+ Collections.sort(times);
+ for (int percentile : percentiles) {
+ bw.write("," + times.get(times.size() * percentile / 100));
+ }
+ } else {
+ for (int percentile : percentiles) {
+ bw.write(",NA");
+ }
+ }
+ bw.write("," + times.size() + "\n");
+ times.clear();
+ curBandwidth += bandwidthInterval;
+ }
+ if (i == sortedByBandwidth.size()) {
+ break;
+ }
+ times.add(completionTime);
+ }
+ }
+ bw.close();
+ }
+}
+
diff --git a/task-1991/Merge.java b/task-1991/Merge.java
new file mode 100644
index 0000000..622cbf6
--- /dev/null
+++ b/task-1991/Merge.java
@@ -0,0 +1,104 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+
+public class Merge {
+ public static void main(String[] args) throws Exception {
+
+ System.out.println("Reading guard node bandwidths...");
+ SortedMap<String, String> bandwidthRanks =
+ new TreeMap<String, String>();
+ BufferedReader br = new BufferedReader(new FileReader(
+ "bandwidths-sql.csv"));
+ String line = br.readLine(), lastDateTime = null;
+ List<String> currentRelays = new ArrayList<String>();
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("fingerprint") || line.startsWith("(")) {
+ continue;
+ }
+ String[] parts = line.split(",");
+ String fingerprint = parts[0], dateTime = parts[1],
+ bandwidth = parts[2];
+ if (lastDateTime != null && !dateTime.equals(lastDateTime)) {
+ Collections.sort(currentRelays, new Comparator<String>() {
+ public int compare(String a, String b) {
+ return Integer.parseInt(a.split(",")[2]) -
+ Integer.parseInt(b.split(",")[2]);
+ }
+ });
+ for (int i = 0; i < currentRelays.size(); i++) {
+ String relay = currentRelays.get(i);
+ String relayParts[] = currentRelays.get(i).split(",");
+ String relayFingerprint = relayParts[0];
+ String relayBandwidth = relayParts[2];
+ bandwidthRanks.put(relayFingerprint + "," + lastDateTime,
+ String.format("%s,%.6f", relayBandwidth, (double) i /
+ (double) (currentRelays.size() - 1)));
+ }
+ currentRelays.clear();
+ }
+ lastDateTime = dateTime;
+ currentRelays.add(line);
+ }
+ br.close();
+
+ System.out.println("Reading .mergedata file and writing completion "
+ + "time, guard bandwidth, and rank to disk...");
+ SortedMap<Integer, List<Long>> aggregatedResults =
+ new TreeMap<Integer, List<Long>>();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "torperf-guard-bandwidths-ranks.csv"));
+ bw.write("bandwidth,rank,completiontime,guards,filesize\n");
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (File mergedataFile : new File(".").listFiles()) {
+ String filename = mergedataFile.getName();
+ if (!filename.endsWith(".mergedata")) {
+ continue;
+ }
+ String guards = filename.substring(0, filename.indexOf("80cbt"));
+ String filesize = filename.split("-")[1].split("\\.")[0];
+ br = new BufferedReader(new FileReader(mergedataFile));
+ while ((line = br.readLine()) != null) {
+ String path = null;
+ long started = 0L, completed = 0L;
+ for (String part : line.split(" ")) {
+ String key = part.substring(0, part.indexOf("="));
+ String value = part.substring(part.indexOf("=") + 1);
+ if (key.equals("PATH")) {
+ path = value;
+ } else if (key.equals("STARTSEC")) {
+ started += Long.parseLong(value) * 1000L;
+ } else if (key.equals("STARTUSEC")) {
+ started += Long.parseLong(value) / 1000L;
+ } else if (key.equals("DATACOMPLETESEC")) {
+ completed += Long.parseLong(value) * 1000L;
+ } else if (key.equals("DATACOMPLETEUSEC")) {
+ completed += Long.parseLong(value) / 1000L;
+ } else if (key.equals("DIDTIMEOUT")) {
+ if (value.equals("1")) {
+ continue;
+ }
+ }
+ }
+ if (path == null || started == 0L || completed == 0L) {
+ continue;
+ }
+ String dateTime = dateTimeFormat.format(started);
+ String fingerprint = path.split(",")[0].substring(1).toLowerCase();
+ String guardKey = fingerprint + "," + dateTime;
+ String previousGuardKey = bandwidthRanks.headMap(guardKey).lastKey();
+ if (previousGuardKey.startsWith(fingerprint)) {
+ String bandwidthRank = bandwidthRanks.get(previousGuardKey);
+ long completionTime = completed - started;
+ bw.write(bandwidthRank + "," + completionTime + "," + guards
+ + "," + filesize + "\n");
+ }
+ }
+ }
+ br.close();
+ bw.close();
+ }
+}
+
diff --git a/task-1991/README b/task-1991/README
new file mode 100644
index 0000000..8cf5548
--- /dev/null
+++ b/task-1991/README
@@ -0,0 +1,46 @@
+Visualize influence of guard bandwidth on Torperf completion time
+
+ - Generate one or more .mergedata files, e.g.,
+
+ $ ./consolidate_stats.py 50kb.data 50kb.extradata
+ regular80cbt-50kb.mergedata
+
+ - Extract consensus bandwidths of all relays and write them to a CSV
+ file. The easiest way to do this is to run an SQL query on the metrics
+ database:
+
+ => \f ','
+ => \a
+ => \t
+ => \o bandwidths-sql.csv
+ => SELECT fingerprint, validafter, bandwidth FROM statusentry
+ WHERE validafter >= '2011-02-23' ORDER BY validafter, fingerprint;
+ => \o
+ => \t
+ => \a
+
+ - Merge the .mergedata file with the consensus bandwidth file to have a
+ single CSV file that contains Torperf completion times and Guard node
+ consensus bandwidths and ranks:
+
+ $ javac Merge.java && java -Xmx2048m Merge
+
+ The result is a file torperf-guard-bandwidths-ranks.csv.
+
+ - Aggregate the results to obtain percentiles:
+
+ $ javac Aggregate.java && java Aggregate
+
+ The result are two files torperf-guard-bandwidth-quantiles.csv and
+ torperf-guard-rank-quantiles.csv.
+
+ - Plot Torperf results by guard consensus bandwidth and ranks for each
+ guard selection strategy separately:
+
+ $ R --slave -f torperf-guard-bandwidths-ranks.R
+
+ - Plot Torperf results by guard consensus bandwidth and ranks for all
+ guard selection strategies and including quantile lines:
+
+ $ R --slave -f torperf-guard-quantiles.R
+
diff --git a/task-1991/torperf-guard-bandwidths-ranks.R b/task-1991/torperf-guard-bandwidths-ranks.R
new file mode 100644
index 0000000..8e7eb0a
--- /dev/null
+++ b/task-1991/torperf-guard-bandwidths-ranks.R
@@ -0,0 +1,40 @@
+library(ggplot2)
+
+# Read data
+data <- read.csv("torperf-guard-bandwidths-ranks.csv",
+ stringsAsFactors = FALSE)
+
+data <- data[(data$filesize == "50kb" & data$completiontime < 60000) |
+ (data$filesize == "1mb" & data$completiontime < 120000) |
+ (data$filesize == "5mb" & data$completiontime < 300000), ]
+
+data[data$guards == "slow", "guards"] <- "a) slowest overall"
+data[data$guards == "slowratio", "guards"] <- "b) slowest ratio"
+data[data$guards == "regular", "guards"] <- "c) default"
+data[data$guards == "fastratio", "guards"] <- "d) fastest ratio"
+data[data$guards == "fast", "guards"] <- "e) fastest overall"
+data[data$filesize == "50kb", "filesize"] <- "a) 50 KB"
+data[data$filesize == "1mb", "filesize"] <- "b) 1 MB"
+data[data$filesize == "5mb", "filesize"] <- "c) 5 MB"
+
+ggplot(data, aes(x = bandwidth / 1000, y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous("\nGuard consensus bandwidth in MB/s") +
+scale_y_continuous("Torperf completion time in seconds\n") +
+facet_grid(filesize ~ guards, scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-bandwidths.png",
+ width = 8, height = 5, dpi = 150)
+
+ggplot(data, aes(x = as.numeric(rank), y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous(paste("\nGuard rank by consensus bandwidth from",
+ "slowest (0) to fastest (1)"), limits = c(0, 1),
+ breaks = c(0.25, 0.5, 0.75)) +
+scale_y_continuous("Torperf completion time in seconds\n") +
+facet_grid(filesize ~ guards, scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-ranks.png",
+ width = 8, height = 5, dpi = 150)
+
+
diff --git a/task-1991/torperf-guard-quantiles.R b/task-1991/torperf-guard-quantiles.R
new file mode 100644
index 0000000..2554c34
--- /dev/null
+++ b/task-1991/torperf-guard-quantiles.R
@@ -0,0 +1,57 @@
+library(ggplot2)
+
+# Read data
+data <- read.csv("torperf-guard-bandwidths-ranks.csv",
+ stringsAsFactors = FALSE)
+data <- data[(data$filesize == "50kb" & data$completiontime < 60000) |
+ (data$filesize == "1mb" & data$completiontime < 600000) |
+ (data$filesize == "5mb" & data$completiontime < 1500000), ]
+data[data$filesize == "50kb", "filesize"] <- "a) 50 KB"
+data[data$filesize == "1mb", "filesize"] <- "b) 1 MB"
+data[data$filesize == "5mb", "filesize"] <- "c) 5 MB"
+
+percentiles <- paste("p", seq(90, 90, 1), sep = "")
+
+rq <- read.csv("torperf-guard-rank-quantiles.csv",
+ stringsAsFactors = FALSE)
+rq[(rq$len < 30 & rq$filesize == "50kb") |
+ (rq$len < 10 & rq$filesize == "1mb") |
+ (rq$len < 5 & rq$filesize == "5mb"), percentiles] <- NA
+rq <- rq[, c("filesize", "rank", percentiles)]
+rq <- melt(rq, id = c("filesize", "rank"))
+rq[rq$filesize == "50kb", "filesize"] <- "a) 50 KB"
+rq[rq$filesize == "1mb", "filesize"] <- "b) 1 MB"
+rq[rq$filesize == "5mb", "filesize"] <- "c) 5 MB"
+ggplot(data, aes(x = as.numeric(rank), y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous(paste("\nGuard rank by consensus bandwidth from",
+ "slowest (0) to fastest (1)"), limits = c(0, 1)) +
+scale_y_continuous("Torperf completion time in seconds\n") +
+geom_line(data = rq, aes(x = as.numeric(rank), y = value / 1000,
+ colour = variable)) +
+facet_grid(filesize ~ ., scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-rank-quantiles.png",
+ width = 8, height = 5, dpi = 150)
+
+bq <- read.csv("torperf-guard-bandwidth-quantiles.csv",
+ stringsAsFactors = FALSE)
+bq[(bq$len < 30 & bq$filesize == "50kb") |
+ (bq$len < 10 & bq$filesize == "1mb") |
+ (bq$len < 5 & bq$filesize == "5mb"), percentiles] <- NA
+bq <- bq[, c("filesize", "bandwidth", percentiles)]
+bq <- melt(bq, id = c("filesize", "bandwidth"))
+bq[bq$filesize == "50kb", "filesize"] <- "a) 50 KB"
+bq[bq$filesize == "1mb", "filesize"] <- "b) 1 MB"
+bq[bq$filesize == "5mb", "filesize"] <- "c) 5 MB"
+ggplot(data, aes(x = bandwidth / 1000, y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous("\nGuard consensus bandwidth in MB/s") +
+scale_y_continuous("Torperf completion time in seconds\n") +
+geom_line(data = bq, aes(x = bandwidth / 1000, y = value / 1000,
+ colour = variable)) +
+facet_grid(filesize ~ ., scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-bandwidth-quantiles.png",
+ width = 8, height = 5, dpi = 150)
+