commit 66474d34ad189d7ab6e6f0ad6359452bbed77e89 Author: Karsten Loesing karsten.loesing@gmx.net Date: Tue Apr 19 21:21:05 2011 +0200
Add code to generate the #1991 graphs. --- task-1991/.gitignore | 6 ++ task-1991/Aggregate.java | 131 ++++++++++++++++++++++++++++ task-1991/Merge.java | 104 ++++++++++++++++++++++ task-1991/README | 46 ++++++++++ task-1991/torperf-guard-bandwidths-ranks.R | 40 +++++++++ task-1991/torperf-guard-quantiles.R | 57 ++++++++++++ 6 files changed, 384 insertions(+), 0 deletions(-)
diff --git a/task-1991/.gitignore b/task-1991/.gitignore new file mode 100644 index 0000000..a0fc089 --- /dev/null +++ b/task-1991/.gitignore @@ -0,0 +1,6 @@ +*.mergedata +*.class +*.png +*.csv +Rplots.pdf + diff --git a/task-1991/Aggregate.java b/task-1991/Aggregate.java new file mode 100644 index 0000000..682a553 --- /dev/null +++ b/task-1991/Aggregate.java @@ -0,0 +1,131 @@ +import java.io.*; +import java.util.*; + +public class Aggregate { + public static void main(String[] args) throws Exception { + BufferedReader br = new BufferedReader(new FileReader( + "torperf-guard-bandwidths-ranks.csv")); + String line = br.readLine(); + List<String> sortedByBandwidth = new ArrayList<String>(), + sortedByRank = new ArrayList<String>(); + while ((line = br.readLine()) != null) { + sortedByBandwidth.add(line); + sortedByRank.add(line); + } + br.close(); + Collections.sort(sortedByBandwidth, new Comparator<String>() { + public int compare(String a, String b) { + return Integer.parseInt(a.split(",")[0]) - + Integer.parseInt(b.split(",")[0]); + } + }); + Collections.sort(sortedByRank, new Comparator<String>() { + public int compare(String a, String b) { + return (int) (100.0 * (Double.parseDouble(a.split(",")[1]) - + Double.parseDouble(b.split(",")[1]))); + } + }); + + List<Integer> percentiles = new ArrayList<Integer>(); + for (int percentile = 1; percentile < 100; percentile += 1) { + percentiles.add(percentile); + } + + BufferedWriter bw = new BufferedWriter(new FileWriter( + "torperf-guard-rank-quantiles.csv")); + bw.write("filesize,rank"); + for (int percentile : percentiles) { + bw.write(",p" + percentile); + } + bw.write(",len\n"); + for (String filesize : Arrays.asList("50kb,1mb,5mb".split(","))) { + double rankPercentInterval = 0.02; + double rankPercent = rankPercentInterval; + List<Integer> times = new ArrayList<Integer>(); + for (int i = 0; i <= sortedByRank.size(); i++) { + double rank = -1.0; + int completionTime = -1; + if (i < sortedByRank.size()) { + String[] parts = sortedByRank.get(i).split(","); + rank = Double.parseDouble(parts[1]); + completionTime = Integer.parseInt(parts[2]); + if (!parts[4].equals(filesize)) { + continue; + } + } + if (i == sortedByRank.size() || rank > rankPercent) { + String rankString = String.format("%.3f", + rankPercent - 0.5 * rankPercentInterval); + bw.write(filesize + "," + rankString); + if (times.size() > 1) { + Collections.sort(times); + for (int percentile : percentiles) { + bw.write("," + times.get(times.size() * percentile / 100)); + } + } else { + for (int percentile : percentiles) { + bw.write(",NA"); + } + } + bw.write("," + times.size() + "\n"); + times.clear(); + rankPercent += rankPercentInterval; + } + if (i == sortedByRank.size()) { + break; + } + times.add(completionTime); + } + } + bw.close(); + + bw = new BufferedWriter(new FileWriter( + "torperf-guard-bandwidth-quantiles.csv")); + bw.write("filesize,bandwidth"); + for (int percentile : percentiles) { + bw.write(",p" + percentile); + } + bw.write(",len\n"); + for (String filesize : Arrays.asList("50kb,1mb,5mb".split(","))) { + int bandwidthInterval = 2500; + int curBandwidth = bandwidthInterval; + List<Integer> times = new ArrayList<Integer>(); + for (int i = 0; i <= sortedByBandwidth.size(); i++) { + int bandwidth = -1; + int completionTime = -1; + if (i < sortedByBandwidth.size()) { + String[] parts = sortedByBandwidth.get(i).split(","); + bandwidth = Integer.parseInt(parts[0]); + completionTime = Integer.parseInt(parts[2]); + if (!parts[4].equals(filesize)) { + continue; + } + } + if (i == sortedByBandwidth.size() || bandwidth > curBandwidth) { + String bandwidthString = String.format("%d", + curBandwidth - (bandwidthInterval / 2)); + bw.write(filesize + "," + bandwidthString); + if (times.size() > 1) { + Collections.sort(times); + for (int percentile : percentiles) { + bw.write("," + times.get(times.size() * percentile / 100)); + } + } else { + for (int percentile : percentiles) { + bw.write(",NA"); + } + } + bw.write("," + times.size() + "\n"); + times.clear(); + curBandwidth += bandwidthInterval; + } + if (i == sortedByBandwidth.size()) { + break; + } + times.add(completionTime); + } + } + bw.close(); + } +} + diff --git a/task-1991/Merge.java b/task-1991/Merge.java new file mode 100644 index 0000000..622cbf6 --- /dev/null +++ b/task-1991/Merge.java @@ -0,0 +1,104 @@ +import java.io.*; +import java.text.*; +import java.util.*; + +public class Merge { + public static void main(String[] args) throws Exception { + + System.out.println("Reading guard node bandwidths..."); + SortedMap<String, String> bandwidthRanks = + new TreeMap<String, String>(); + BufferedReader br = new BufferedReader(new FileReader( + "bandwidths-sql.csv")); + String line = br.readLine(), lastDateTime = null; + List<String> currentRelays = new ArrayList<String>(); + while ((line = br.readLine()) != null) { + if (line.startsWith("fingerprint") || line.startsWith("(")) { + continue; + } + String[] parts = line.split(","); + String fingerprint = parts[0], dateTime = parts[1], + bandwidth = parts[2]; + if (lastDateTime != null && !dateTime.equals(lastDateTime)) { + Collections.sort(currentRelays, new Comparator<String>() { + public int compare(String a, String b) { + return Integer.parseInt(a.split(",")[2]) - + Integer.parseInt(b.split(",")[2]); + } + }); + for (int i = 0; i < currentRelays.size(); i++) { + String relay = currentRelays.get(i); + String relayParts[] = currentRelays.get(i).split(","); + String relayFingerprint = relayParts[0]; + String relayBandwidth = relayParts[2]; + bandwidthRanks.put(relayFingerprint + "," + lastDateTime, + String.format("%s,%.6f", relayBandwidth, (double) i / + (double) (currentRelays.size() - 1))); + } + currentRelays.clear(); + } + lastDateTime = dateTime; + currentRelays.add(line); + } + br.close(); + + System.out.println("Reading .mergedata file and writing completion " + + "time, guard bandwidth, and rank to disk..."); + SortedMap<Integer, List<Long>> aggregatedResults = + new TreeMap<Integer, List<Long>>(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + "torperf-guard-bandwidths-ranks.csv")); + bw.write("bandwidth,rank,completiontime,guards,filesize\n"); + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + for (File mergedataFile : new File(".").listFiles()) { + String filename = mergedataFile.getName(); + if (!filename.endsWith(".mergedata")) { + continue; + } + String guards = filename.substring(0, filename.indexOf("80cbt")); + String filesize = filename.split("-")[1].split("\.")[0]; + br = new BufferedReader(new FileReader(mergedataFile)); + while ((line = br.readLine()) != null) { + String path = null; + long started = 0L, completed = 0L; + for (String part : line.split(" ")) { + String key = part.substring(0, part.indexOf("=")); + String value = part.substring(part.indexOf("=") + 1); + if (key.equals("PATH")) { + path = value; + } else if (key.equals("STARTSEC")) { + started += Long.parseLong(value) * 1000L; + } else if (key.equals("STARTUSEC")) { + started += Long.parseLong(value) / 1000L; + } else if (key.equals("DATACOMPLETESEC")) { + completed += Long.parseLong(value) * 1000L; + } else if (key.equals("DATACOMPLETEUSEC")) { + completed += Long.parseLong(value) / 1000L; + } else if (key.equals("DIDTIMEOUT")) { + if (value.equals("1")) { + continue; + } + } + } + if (path == null || started == 0L || completed == 0L) { + continue; + } + String dateTime = dateTimeFormat.format(started); + String fingerprint = path.split(",")[0].substring(1).toLowerCase(); + String guardKey = fingerprint + "," + dateTime; + String previousGuardKey = bandwidthRanks.headMap(guardKey).lastKey(); + if (previousGuardKey.startsWith(fingerprint)) { + String bandwidthRank = bandwidthRanks.get(previousGuardKey); + long completionTime = completed - started; + bw.write(bandwidthRank + "," + completionTime + "," + guards + + "," + filesize + "\n"); + } + } + } + br.close(); + bw.close(); + } +} + diff --git a/task-1991/README b/task-1991/README new file mode 100644 index 0000000..8cf5548 --- /dev/null +++ b/task-1991/README @@ -0,0 +1,46 @@ +Visualize influence of guard bandwidth on Torperf completion time + + - Generate one or more .mergedata files, e.g., + + $ ./consolidate_stats.py 50kb.data 50kb.extradata + regular80cbt-50kb.mergedata + + - Extract consensus bandwidths of all relays and write them to a CSV + file. The easiest way to do this is to run an SQL query on the metrics + database: + + => \f ',' + => \a + => \t + => \o bandwidths-sql.csv + => SELECT fingerprint, validafter, bandwidth FROM statusentry + WHERE validafter >= '2011-02-23' ORDER BY validafter, fingerprint; + => \o + => \t + => \a + + - Merge the .mergedata file with the consensus bandwidth file to have a + single CSV file that contains Torperf completion times and Guard node + consensus bandwidths and ranks: + + $ javac Merge.java && java -Xmx2048m Merge + + The result is a file torperf-guard-bandwidths-ranks.csv. + + - Aggregate the results to obtain percentiles: + + $ javac Aggregate.java && java Aggregate + + The result are two files torperf-guard-bandwidth-quantiles.csv and + torperf-guard-rank-quantiles.csv. + + - Plot Torperf results by guard consensus bandwidth and ranks for each + guard selection strategy separately: + + $ R --slave -f torperf-guard-bandwidths-ranks.R + + - Plot Torperf results by guard consensus bandwidth and ranks for all + guard selection strategies and including quantile lines: + + $ R --slave -f torperf-guard-quantiles.R + diff --git a/task-1991/torperf-guard-bandwidths-ranks.R b/task-1991/torperf-guard-bandwidths-ranks.R new file mode 100644 index 0000000..8e7eb0a --- /dev/null +++ b/task-1991/torperf-guard-bandwidths-ranks.R @@ -0,0 +1,40 @@ +library(ggplot2) + +# Read data +data <- read.csv("torperf-guard-bandwidths-ranks.csv", + stringsAsFactors = FALSE) + +data <- data[(data$filesize == "50kb" & data$completiontime < 60000) | + (data$filesize == "1mb" & data$completiontime < 120000) | + (data$filesize == "5mb" & data$completiontime < 300000), ] + +data[data$guards == "slow", "guards"] <- "a) slowest overall" +data[data$guards == "slowratio", "guards"] <- "b) slowest ratio" +data[data$guards == "regular", "guards"] <- "c) default" +data[data$guards == "fastratio", "guards"] <- "d) fastest ratio" +data[data$guards == "fast", "guards"] <- "e) fastest overall" +data[data$filesize == "50kb", "filesize"] <- "a) 50 KB" +data[data$filesize == "1mb", "filesize"] <- "b) 1 MB" +data[data$filesize == "5mb", "filesize"] <- "c) 5 MB" + +ggplot(data, aes(x = bandwidth / 1000, y = completiontime / 1000)) + +geom_point(alpha = 0.05) + +scale_x_continuous("\nGuard consensus bandwidth in MB/s") + +scale_y_continuous("Torperf completion time in seconds\n") + +facet_grid(filesize ~ guards, scale = "free_y") + +opts(legend.position = "none") +ggsave(filename = "torperf-guard-bandwidths.png", + width = 8, height = 5, dpi = 150) + +ggplot(data, aes(x = as.numeric(rank), y = completiontime / 1000)) + +geom_point(alpha = 0.05) + +scale_x_continuous(paste("\nGuard rank by consensus bandwidth from", + "slowest (0) to fastest (1)"), limits = c(0, 1), + breaks = c(0.25, 0.5, 0.75)) + +scale_y_continuous("Torperf completion time in seconds\n") + +facet_grid(filesize ~ guards, scale = "free_y") + +opts(legend.position = "none") +ggsave(filename = "torperf-guard-ranks.png", + width = 8, height = 5, dpi = 150) + + diff --git a/task-1991/torperf-guard-quantiles.R b/task-1991/torperf-guard-quantiles.R new file mode 100644 index 0000000..2554c34 --- /dev/null +++ b/task-1991/torperf-guard-quantiles.R @@ -0,0 +1,57 @@ +library(ggplot2) + +# Read data +data <- read.csv("torperf-guard-bandwidths-ranks.csv", + stringsAsFactors = FALSE) +data <- data[(data$filesize == "50kb" & data$completiontime < 60000) | + (data$filesize == "1mb" & data$completiontime < 600000) | + (data$filesize == "5mb" & data$completiontime < 1500000), ] +data[data$filesize == "50kb", "filesize"] <- "a) 50 KB" +data[data$filesize == "1mb", "filesize"] <- "b) 1 MB" +data[data$filesize == "5mb", "filesize"] <- "c) 5 MB" + +percentiles <- paste("p", seq(90, 90, 1), sep = "") + +rq <- read.csv("torperf-guard-rank-quantiles.csv", + stringsAsFactors = FALSE) +rq[(rq$len < 30 & rq$filesize == "50kb") | + (rq$len < 10 & rq$filesize == "1mb") | + (rq$len < 5 & rq$filesize == "5mb"), percentiles] <- NA +rq <- rq[, c("filesize", "rank", percentiles)] +rq <- melt(rq, id = c("filesize", "rank")) +rq[rq$filesize == "50kb", "filesize"] <- "a) 50 KB" +rq[rq$filesize == "1mb", "filesize"] <- "b) 1 MB" +rq[rq$filesize == "5mb", "filesize"] <- "c) 5 MB" +ggplot(data, aes(x = as.numeric(rank), y = completiontime / 1000)) + +geom_point(alpha = 0.05) + +scale_x_continuous(paste("\nGuard rank by consensus bandwidth from", + "slowest (0) to fastest (1)"), limits = c(0, 1)) + +scale_y_continuous("Torperf completion time in seconds\n") + +geom_line(data = rq, aes(x = as.numeric(rank), y = value / 1000, + colour = variable)) + +facet_grid(filesize ~ ., scale = "free_y") + +opts(legend.position = "none") +ggsave(filename = "torperf-guard-rank-quantiles.png", + width = 8, height = 5, dpi = 150) + +bq <- read.csv("torperf-guard-bandwidth-quantiles.csv", + stringsAsFactors = FALSE) +bq[(bq$len < 30 & bq$filesize == "50kb") | + (bq$len < 10 & bq$filesize == "1mb") | + (bq$len < 5 & bq$filesize == "5mb"), percentiles] <- NA +bq <- bq[, c("filesize", "bandwidth", percentiles)] +bq <- melt(bq, id = c("filesize", "bandwidth")) +bq[bq$filesize == "50kb", "filesize"] <- "a) 50 KB" +bq[bq$filesize == "1mb", "filesize"] <- "b) 1 MB" +bq[bq$filesize == "5mb", "filesize"] <- "c) 5 MB" +ggplot(data, aes(x = bandwidth / 1000, y = completiontime / 1000)) + +geom_point(alpha = 0.05) + +scale_x_continuous("\nGuard consensus bandwidth in MB/s") + +scale_y_continuous("Torperf completion time in seconds\n") + +geom_line(data = bq, aes(x = bandwidth / 1000, y = value / 1000, + colour = variable)) + +facet_grid(filesize ~ ., scale = "free_y") + +opts(legend.position = "none") +ggsave(filename = "torperf-guard-bandwidth-quantiles.png", + width = 8, height = 5, dpi = 150) +