[tor-commits] [metrics-tasks/master] Commit graphing code for #3574.

karsten at torproject.org karsten at torproject.org
Wed Apr 4 07:17:13 UTC 2012


commit b7a73f0ce0605e4d0f017cb0696cc924a75c986e
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Apr 4 09:15:30 2012 +0200

    Commit graphing code for #3574.
    
    Ticket is closed since 7 months now, just found the code on my disk.
---
 task-3574/.gitignore                |    6 +++
 task-3574/AggregatePerDay.java      |   66 +++++++++++++++++++++++++++++++++++
 task-3574/OneLinePerDescriptor.java |   20 ++++++++++
 task-3574/README                    |   23 ++++++++++++
 task-3574/eval.R                    |   12 ++++++
 5 files changed, 127 insertions(+), 0 deletions(-)

diff --git a/task-3574/.gitignore b/task-3574/.gitignore
new file mode 100644
index 0000000..fae3c24
--- /dev/null
+++ b/task-3574/.gitignore
@@ -0,0 +1,6 @@
+*.txt
+extra-infos/
+*.csv
+*.class
+*.pdf
+
diff --git a/task-3574/AggregatePerDay.java b/task-3574/AggregatePerDay.java
new file mode 100644
index 0000000..cf625a6
--- /dev/null
+++ b/task-3574/AggregatePerDay.java
@@ -0,0 +1,66 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class AggregatePerDay {
+  public static void main(String[] args) throws Exception {
+    SortedMap<String, long[]> byteHistory = new TreeMap<String, long[]>();
+    BufferedReader br = new BufferedReader(new FileReader(
+        "bridge-bandwidth-histories-sorted.txt"));
+    String line;
+    SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat dateFormat = new SimpleDateFormat(
+        "yyyy-MM-dd");
+    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat timeFormat = new SimpleDateFormat(
+        "HH:mm:ss");
+    timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    while ((line = br.readLine()) != null) {
+      String[] parts = line.split(" ");
+      if (parts.length < 3) {
+        continue;
+      }
+      String fingerprint = parts[2];
+      while (line.contains("-history")) {
+        line = line.substring(line.indexOf("-history") - 4);
+        boolean isReadHistory = line.startsWith("read");
+        line = line.substring(5);
+        parts = line.split(" ");
+        if (parts.length >= 6 && parts[5].length() > 0 && !parts[5].contains("history")) {
+          String[] bytes = parts[5].split(",");
+          long intervalEnd = dateTimeFormat.parse(parts[1] + " " + parts[2]).getTime();
+          for (int i = bytes.length - 1; i >= 0; i--) {
+            String key = fingerprint + ","
+                + dateFormat.format(intervalEnd)
+                + (isReadHistory ? ",read" : ",write");
+            long timeIndex = timeFormat.parse(
+                dateTimeFormat.format(intervalEnd).split(" ")[1]).getTime()
+                / (15L * 60L * 1000L);
+            long value = Long.parseLong(bytes[i]);
+            if (!byteHistory.containsKey(key)) {
+              byteHistory.put(key, new long[96]);
+            }
+            byteHistory.get(key)[(int) timeIndex] = value + 1L;
+            intervalEnd -= 15L * 60L * 1000L;
+          }
+        }
+      }
+    }
+    br.close();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "bridge-bandwidth-per-day.csv"));
+    for (Map.Entry<String, long[]> e : byteHistory.entrySet()) {
+      long total = 0L, count = 0L;
+      for (long val : e.getValue()) {
+        if (val > 0L) {
+          total += val - 1L;
+          count += 1L;
+        }
+      }
+      bw.write(e.getKey() + "," + total + "," + count + "\n");
+    }
+    bw.close();
+  }
+}
+
diff --git a/task-3574/OneLinePerDescriptor.java b/task-3574/OneLinePerDescriptor.java
new file mode 100644
index 0000000..78c83b1
--- /dev/null
+++ b/task-3574/OneLinePerDescriptor.java
@@ -0,0 +1,20 @@
+import java.io.*;
+public class OneLinePerDescriptor {
+  public static void main(String[] args) throws Exception {
+    BufferedReader br = new BufferedReader(new FileReader(
+        "bridge-bandwidth-histories-raw.txt"));
+    String line;
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "bridge-bandwidth-histories-by-fingerprint.txt"));
+    while ((line = br.readLine()) != null) {
+      if (line.startsWith("extra-info ")) {
+        bw.write("\n" + line);
+      } else {
+        bw.write(" " + line);
+      }
+    }
+    bw.close();
+    br.close();
+  }
+}
+
diff --git a/task-3574/README b/task-3574/README
new file mode 100644
index 0000000..0151eb4
--- /dev/null
+++ b/task-3574/README
@@ -0,0 +1,23 @@
+Extract bridge fingerprints and byte histories from extra-info
+descriptors:
+
+  $ grep -hRE "^extra-info|^write-history|^read-history" extra-infos >
+        bridge-bandwidth-histories-raw.txt
+
+Convert the output in something we can sort by fingerprint:
+
+  $ javac OneLinePerDescriptor.java && java OneLinePerDescriptor
+
+Sort by fingerprint and filter out duplicates:
+
+  $ sort bridge-bandwidth-histories-by-fingerprint.txt | uniq >
+        bridge-bandwidth-histories-sorted.txt
+
+Aggregate bytes per day:
+
+  $ javac AggregatePerDay.java && java AggregatePerDay
+
+Plot graphs:
+
+  $ R --slave -f eval.R
+
diff --git a/task-3574/eval.R b/task-3574/eval.R
new file mode 100644
index 0000000..ac36880
--- /dev/null
+++ b/task-3574/eval.R
@@ -0,0 +1,12 @@
+library(ggplot2)
+data <- read.csv("bridge-bandwidth-per-day.csv", stringsAsFactors = FALSE, col.names = c("fingerprint", "date", "operation", "bytes", "intervals"))
+d <- aggregate(list(bytes = data$bytes, intervals = data$intervals), by = list(fingerprint = data$fingerprint), sum)
+ggplot(d, aes(x = sort(bytes) / 2^30, y = (1:length(bytes)) / length(bytes))) +
+geom_line() +
+scale_x_continuous(name = "\nTotal read and written GiB per month", limits = c(0, 100)) +
+scale_y_continuous(name = "Fraction of bridges\n", formatter = "percent", limits = c(0, 1))
+ggplot(d, aes(x = sort(bytes / intervals) / (15 * 60 * 2^10), y = (1:length(bytes)) / length(bytes))) +
+geom_line() +
+scale_x_continuous(name = "\nMean read and written KiB per second", limits = c(0, 10)) +
+scale_y_continuous(name = "Fraction of bridges\n", formatter = "percent", limits = c(0, 1))
+



More information about the tor-commits mailing list