[tor-commits] [metrics-tasks/master] Add code to generate the #1991 graphs.

karsten at torproject.org karsten at torproject.org
Tue Apr 19 19:21:39 UTC 2011


commit 66474d34ad189d7ab6e6f0ad6359452bbed77e89
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Apr 19 21:21:05 2011 +0200

    Add code to generate the #1991 graphs.
---
 task-1991/.gitignore                       |    6 ++
 task-1991/Aggregate.java                   |  131 ++++++++++++++++++++++++++++
 task-1991/Merge.java                       |  104 ++++++++++++++++++++++
 task-1991/README                           |   46 ++++++++++
 task-1991/torperf-guard-bandwidths-ranks.R |   40 +++++++++
 task-1991/torperf-guard-quantiles.R        |   57 ++++++++++++
 6 files changed, 384 insertions(+), 0 deletions(-)

diff --git a/task-1991/.gitignore b/task-1991/.gitignore
new file mode 100644
index 0000000..a0fc089
--- /dev/null
+++ b/task-1991/.gitignore
@@ -0,0 +1,6 @@
+*.mergedata
+*.class
+*.png
+*.csv
+Rplots.pdf
+
diff --git a/task-1991/Aggregate.java b/task-1991/Aggregate.java
new file mode 100644
index 0000000..682a553
--- /dev/null
+++ b/task-1991/Aggregate.java
@@ -0,0 +1,131 @@
+import java.io.*;
+import java.util.*;
+
+public class Aggregate {
+  public static void main(String[] args) throws Exception {
+    BufferedReader br = new BufferedReader(new FileReader(
+        "torperf-guard-bandwidths-ranks.csv"));
+    String line = br.readLine();
+    List<String> sortedByBandwidth = new ArrayList<String>(),
+        sortedByRank = new ArrayList<String>();
+    while ((line = br.readLine()) != null) {
+      sortedByBandwidth.add(line);
+      sortedByRank.add(line);
+    }
+    br.close();
+    Collections.sort(sortedByBandwidth, new Comparator<String>() {
+      public int compare(String a, String b) {
+        return Integer.parseInt(a.split(",")[0]) -
+            Integer.parseInt(b.split(",")[0]);
+      }
+    });
+    Collections.sort(sortedByRank, new Comparator<String>() {
+      public int compare(String a, String b) {
+        return (int) (100.0 * (Double.parseDouble(a.split(",")[1]) -
+            Double.parseDouble(b.split(",")[1])));
+      }
+    });
+
+    List<Integer> percentiles = new ArrayList<Integer>();
+    for (int percentile = 1; percentile < 100; percentile += 1) {
+      percentiles.add(percentile);
+    }
+
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "torperf-guard-rank-quantiles.csv"));
+    bw.write("filesize,rank");
+    for (int percentile : percentiles) {
+      bw.write(",p" + percentile);
+    }
+    bw.write(",len\n");
+    for (String filesize : Arrays.asList("50kb,1mb,5mb".split(","))) {
+      double rankPercentInterval = 0.02;
+      double rankPercent = rankPercentInterval;
+      List<Integer> times = new ArrayList<Integer>();
+      for (int i = 0; i <= sortedByRank.size(); i++) {
+        double rank = -1.0;
+        int completionTime = -1;
+        if (i < sortedByRank.size()) {
+          String[] parts = sortedByRank.get(i).split(",");
+          rank = Double.parseDouble(parts[1]);
+          completionTime = Integer.parseInt(parts[2]);
+          if (!parts[4].equals(filesize)) {
+            continue;
+          }
+        }
+        if (i == sortedByRank.size() || rank > rankPercent) {
+          String rankString = String.format("%.3f",
+              rankPercent - 0.5 * rankPercentInterval);
+          bw.write(filesize + "," + rankString);
+          if (times.size() > 1) {
+            Collections.sort(times);
+            for (int percentile : percentiles) {
+              bw.write("," + times.get(times.size() * percentile / 100));
+            }
+          } else {
+            for (int percentile : percentiles) {
+              bw.write(",NA");
+            }
+          }
+          bw.write("," + times.size() + "\n");
+          times.clear();
+          rankPercent += rankPercentInterval;
+        }
+        if (i == sortedByRank.size()) {
+          break;
+        }
+        times.add(completionTime);
+      }
+    }
+    bw.close();
+
+    bw = new BufferedWriter(new FileWriter(
+        "torperf-guard-bandwidth-quantiles.csv"));
+    bw.write("filesize,bandwidth");
+    for (int percentile : percentiles) {
+      bw.write(",p" + percentile);
+    }
+    bw.write(",len\n");
+    for (String filesize : Arrays.asList("50kb,1mb,5mb".split(","))) {
+      int bandwidthInterval = 2500;
+      int curBandwidth = bandwidthInterval;
+      List<Integer> times = new ArrayList<Integer>();
+      for (int i = 0; i <= sortedByBandwidth.size(); i++) {
+        int bandwidth = -1;
+        int completionTime = -1;
+        if (i < sortedByBandwidth.size()) {
+          String[] parts = sortedByBandwidth.get(i).split(",");
+          bandwidth = Integer.parseInt(parts[0]);
+          completionTime = Integer.parseInt(parts[2]);
+          if (!parts[4].equals(filesize)) {
+            continue;
+          }
+        }
+        if (i == sortedByBandwidth.size() || bandwidth > curBandwidth) {
+          String bandwidthString = String.format("%d",
+              curBandwidth - (bandwidthInterval / 2));
+          bw.write(filesize + "," + bandwidthString);
+          if (times.size() > 1) {
+            Collections.sort(times);
+            for (int percentile : percentiles) {
+              bw.write("," + times.get(times.size() * percentile / 100));
+            }
+          } else {
+            for (int percentile : percentiles) {
+              bw.write(",NA");
+            }
+          }
+          bw.write("," + times.size() + "\n");
+          times.clear();
+          curBandwidth += bandwidthInterval;
+        }
+        if (i == sortedByBandwidth.size()) {
+          break;
+        }
+        times.add(completionTime);
+      }
+    }
+    bw.close();
+  }
+}
+
diff --git a/task-1991/Merge.java b/task-1991/Merge.java
new file mode 100644
index 0000000..622cbf6
--- /dev/null
+++ b/task-1991/Merge.java
@@ -0,0 +1,104 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+
+public class Merge {
+  public static void main(String[] args) throws Exception {
+
+    System.out.println("Reading guard node bandwidths...");
+    SortedMap<String, String> bandwidthRanks =
+        new TreeMap<String, String>();
+    BufferedReader br = new BufferedReader(new FileReader(
+        "bandwidths-sql.csv"));
+    String line = br.readLine(), lastDateTime = null;
+    List<String> currentRelays = new ArrayList<String>();
+    while ((line = br.readLine()) != null) {
+      if (line.startsWith("fingerprint") || line.startsWith("(")) {
+        continue;
+      }
+      String[] parts = line.split(",");
+      String fingerprint = parts[0], dateTime = parts[1],
+          bandwidth = parts[2];
+      if (lastDateTime != null && !dateTime.equals(lastDateTime)) {
+        Collections.sort(currentRelays, new Comparator<String>() {
+          public int compare(String a, String b) {
+            return Integer.parseInt(a.split(",")[2]) -
+                Integer.parseInt(b.split(",")[2]);
+          }
+        });
+        for (int i = 0; i < currentRelays.size(); i++) {
+          String relay = currentRelays.get(i);
+          String relayParts[] = currentRelays.get(i).split(",");
+          String relayFingerprint = relayParts[0];
+          String relayBandwidth = relayParts[2];
+          bandwidthRanks.put(relayFingerprint + "," + lastDateTime,
+              String.format("%s,%.6f", relayBandwidth, (double) i /
+              (double) (currentRelays.size() - 1)));
+        }
+        currentRelays.clear();
+      }
+      lastDateTime = dateTime;
+      currentRelays.add(line);
+    }
+    br.close();
+
+    System.out.println("Reading .mergedata file and writing completion "
+       + "time, guard bandwidth, and rank to disk...");
+    SortedMap<Integer, List<Long>> aggregatedResults =
+        new TreeMap<Integer, List<Long>>();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "torperf-guard-bandwidths-ranks.csv"));
+    bw.write("bandwidth,rank,completiontime,guards,filesize\n");
+    SimpleDateFormat dateTimeFormat =
+        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    for (File mergedataFile : new File(".").listFiles()) {
+      String filename = mergedataFile.getName();
+      if (!filename.endsWith(".mergedata")) {
+        continue;
+      }
+      String guards = filename.substring(0, filename.indexOf("80cbt"));
+      String filesize = filename.split("-")[1].split("\\.")[0];
+      br = new BufferedReader(new FileReader(mergedataFile));
+      while ((line = br.readLine()) != null) {
+        String path = null;
+        long started = 0L, completed = 0L;
+        for (String part : line.split(" ")) {
+          String key = part.substring(0, part.indexOf("="));
+          String value = part.substring(part.indexOf("=") + 1);
+          if (key.equals("PATH")) {
+            path = value;
+          } else if (key.equals("STARTSEC")) {
+            started += Long.parseLong(value) * 1000L;
+          } else if (key.equals("STARTUSEC")) {
+            started += Long.parseLong(value) / 1000L;
+          } else if (key.equals("DATACOMPLETESEC")) {
+            completed += Long.parseLong(value) * 1000L;
+          } else if (key.equals("DATACOMPLETEUSEC")) {
+            completed += Long.parseLong(value) / 1000L;
+          } else if (key.equals("DIDTIMEOUT")) {
+            if (value.equals("1")) {
+              continue;
+            }
+          }
+        }
+        if (path == null || started == 0L || completed == 0L) {
+          continue;
+        }
+        String dateTime = dateTimeFormat.format(started);
+        String fingerprint = path.split(",")[0].substring(1).toLowerCase();
+        String guardKey = fingerprint + "," + dateTime;
+        String previousGuardKey = bandwidthRanks.headMap(guardKey).lastKey();
+        if (previousGuardKey.startsWith(fingerprint)) {
+          String bandwidthRank = bandwidthRanks.get(previousGuardKey);
+          long completionTime = completed - started;
+          bw.write(bandwidthRank + "," + completionTime + "," + guards
+              + "," + filesize + "\n");
+        }
+      }
+    }
+    br.close();
+    bw.close();
+  }
+}
+
diff --git a/task-1991/README b/task-1991/README
new file mode 100644
index 0000000..8cf5548
--- /dev/null
+++ b/task-1991/README
@@ -0,0 +1,46 @@
+Visualize influence of guard bandwidth on Torperf completion time
+
+ - Generate one or more .mergedata files, e.g.,
+
+   $ ./consolidate_stats.py 50kb.data 50kb.extradata
+     regular80cbt-50kb.mergedata
+
+ - Extract consensus bandwidths of all relays and write them to a CSV
+   file.  The easiest way to do this is to run an SQL query on the metrics
+   database:
+
+   => \f ','
+   => \a
+   => \t
+   => \o bandwidths-sql.csv
+   => SELECT fingerprint, validafter, bandwidth FROM statusentry
+      WHERE validafter >= '2011-02-23' ORDER BY validafter, fingerprint;
+   => \o
+   => \t
+   => \a
+
+ - Merge the .mergedata file with the consensus bandwidth file to have a
+   single CSV file that contains Torperf completion times and Guard node
+   consensus bandwidths and ranks:
+
+   $ javac Merge.java && java -Xmx2048m Merge
+
+   The result is a file torperf-guard-bandwidths-ranks.csv.
+
+ - Aggregate the results to obtain percentiles:
+
+   $ javac Aggregate.java && java Aggregate
+
+   The result are two files torperf-guard-bandwidth-quantiles.csv and
+   torperf-guard-rank-quantiles.csv.
+
+ - Plot Torperf results by guard consensus bandwidth and ranks for each
+   guard selection strategy separately:
+
+   $ R --slave -f torperf-guard-bandwidths-ranks.R
+
+ - Plot Torperf results by guard consensus bandwidth and ranks for all
+   guard selection strategies and including quantile lines:
+
+   $ R --slave -f torperf-guard-quantiles.R
+
diff --git a/task-1991/torperf-guard-bandwidths-ranks.R b/task-1991/torperf-guard-bandwidths-ranks.R
new file mode 100644
index 0000000..8e7eb0a
--- /dev/null
+++ b/task-1991/torperf-guard-bandwidths-ranks.R
@@ -0,0 +1,40 @@
+library(ggplot2)
+
+# Read data
+data <- read.csv("torperf-guard-bandwidths-ranks.csv",
+  stringsAsFactors = FALSE)
+
+data <- data[(data$filesize == "50kb" & data$completiontime < 60000) |
+             (data$filesize == "1mb" & data$completiontime < 120000) |
+             (data$filesize == "5mb" & data$completiontime < 300000), ]
+
+data[data$guards == "slow", "guards"] <- "a) slowest overall"
+data[data$guards == "slowratio", "guards"] <- "b) slowest ratio"
+data[data$guards == "regular", "guards"] <- "c) default"
+data[data$guards == "fastratio", "guards"] <- "d) fastest ratio"
+data[data$guards == "fast", "guards"] <- "e) fastest overall"
+data[data$filesize == "50kb", "filesize"] <- "a) 50 KB"
+data[data$filesize == "1mb", "filesize"] <- "b) 1 MB"
+data[data$filesize == "5mb", "filesize"] <- "c) 5 MB"
+
+ggplot(data, aes(x = bandwidth / 1000, y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous("\nGuard consensus bandwidth in MB/s") +
+scale_y_continuous("Torperf completion time in seconds\n") +
+facet_grid(filesize ~ guards, scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-bandwidths.png",
+  width = 8, height = 5, dpi = 150)
+
+ggplot(data, aes(x = as.numeric(rank), y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous(paste("\nGuard rank by consensus bandwidth from",
+  "slowest (0) to fastest (1)"), limits = c(0, 1),
+  breaks = c(0.25, 0.5, 0.75)) +
+scale_y_continuous("Torperf completion time in seconds\n") +
+facet_grid(filesize ~ guards, scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-ranks.png",
+  width = 8, height = 5, dpi = 150)
+
+
diff --git a/task-1991/torperf-guard-quantiles.R b/task-1991/torperf-guard-quantiles.R
new file mode 100644
index 0000000..2554c34
--- /dev/null
+++ b/task-1991/torperf-guard-quantiles.R
@@ -0,0 +1,57 @@
+library(ggplot2)
+
+# Read data
+data <- read.csv("torperf-guard-bandwidths-ranks.csv",
+  stringsAsFactors = FALSE)
+data <- data[(data$filesize == "50kb" & data$completiontime < 60000) |
+             (data$filesize == "1mb" & data$completiontime < 600000) |
+             (data$filesize == "5mb" & data$completiontime < 1500000), ]
+data[data$filesize == "50kb", "filesize"] <- "a) 50 KB"
+data[data$filesize == "1mb", "filesize"] <- "b) 1 MB"
+data[data$filesize == "5mb", "filesize"] <- "c) 5 MB"
+
+percentiles <- paste("p", seq(90, 90, 1), sep = "")
+
+rq <- read.csv("torperf-guard-rank-quantiles.csv",
+  stringsAsFactors = FALSE)
+rq[(rq$len < 30 & rq$filesize == "50kb") |
+   (rq$len < 10 & rq$filesize == "1mb") |
+   (rq$len < 5 & rq$filesize == "5mb"), percentiles] <- NA
+rq <- rq[, c("filesize", "rank", percentiles)]
+rq <- melt(rq, id = c("filesize", "rank"))
+rq[rq$filesize == "50kb", "filesize"] <- "a) 50 KB"
+rq[rq$filesize == "1mb", "filesize"] <- "b) 1 MB"
+rq[rq$filesize == "5mb", "filesize"] <- "c) 5 MB"
+ggplot(data, aes(x = as.numeric(rank), y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous(paste("\nGuard rank by consensus bandwidth from",
+  "slowest (0) to fastest (1)"), limits = c(0, 1)) +
+scale_y_continuous("Torperf completion time in seconds\n") +
+geom_line(data = rq, aes(x = as.numeric(rank), y = value / 1000,
+  colour = variable)) +
+facet_grid(filesize ~ ., scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-rank-quantiles.png",
+  width = 8, height = 5, dpi = 150)
+
+bq <- read.csv("torperf-guard-bandwidth-quantiles.csv",
+  stringsAsFactors = FALSE)
+bq[(bq$len < 30 & bq$filesize == "50kb") |
+   (bq$len < 10 & bq$filesize == "1mb") |
+   (bq$len < 5 & bq$filesize == "5mb"), percentiles] <- NA
+bq <- bq[, c("filesize", "bandwidth", percentiles)]
+bq <- melt(bq, id = c("filesize", "bandwidth"))
+bq[bq$filesize == "50kb", "filesize"] <- "a) 50 KB"
+bq[bq$filesize == "1mb", "filesize"] <- "b) 1 MB"
+bq[bq$filesize == "5mb", "filesize"] <- "c) 5 MB"
+ggplot(data, aes(x = bandwidth / 1000, y = completiontime / 1000)) +
+geom_point(alpha = 0.05) +
+scale_x_continuous("\nGuard consensus bandwidth in MB/s") +
+scale_y_continuous("Torperf completion time in seconds\n") +
+geom_line(data = bq, aes(x = bandwidth / 1000, y = value / 1000,
+  colour = variable)) +
+facet_grid(filesize ~ ., scale = "free_y") +
+opts(legend.position = "none")
+ggsave(filename = "torperf-guard-bandwidth-quantiles.png",
+  width = 8, height = 5, dpi = 150)
+



More information about the tor-commits mailing list