[tor-commits] [metrics-tasks/master] Update the how-wrong-are-our-bridge-stats analysis (#3261).

karsten at torproject.org karsten at torproject.org
Tue Nov 1 09:52:26 UTC 2011


commit ccd1a72cdb60f00c50163df9a59f7208ae407820
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Nov 1 10:51:53 2011 +0100

    Update the how-wrong-are-our-bridge-stats analysis (#3261).
---
 task-3261/.gitignore                |    3 +
 task-3261/AnalyzeStatsCoverage.java |   70 ++++++++++++++++++++++++-----------
 task-3261/README                    |    2 +-
 task-3261/stats-coverage.R          |   16 +++++--
 4 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/task-3261/.gitignore b/task-3261/.gitignore
index 2f247e9..1e3dcc5 100644
--- a/task-3261/.gitignore
+++ b/task-3261/.gitignore
@@ -1,4 +1,7 @@
 *.class
 *.png
+*.pdf
 *.csv
+in/
+temp/
 
diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java
index 9d6cc84..21d791e 100644
--- a/task-3261/AnalyzeStatsCoverage.java
+++ b/task-3261/AnalyzeStatsCoverage.java
@@ -18,6 +18,7 @@ public class AnalyzeStatsCoverage {
     if (inDirectory.exists() && inDirectory.isDirectory()) {
       System.out.println("Parsing descriptors in '"
           + inDirectory.getAbsolutePath() + "'.");
+      long started = System.currentTimeMillis();
       tempDirectory.mkdirs();
       Stack<File> dirs = new Stack<File>();
       SortedSet<File> files = new TreeSet<File>();
@@ -36,9 +37,12 @@ public class AnalyzeStatsCoverage {
       for (File file : files) {
         if (++fileNumber % (totalFiles / 1000) == 0) {
           int numberLength = String.valueOf(totalFiles).length();
+          long minutesLeft = (((System.currentTimeMillis() - started)
+              * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
           System.out.printf("Parsed %" + numberLength + "d of %"
-              + numberLength + "d descriptors (%3d %%)%n", fileNumber,
-              totalFiles, (fileNumber * 100) / totalFiles);
+              + numberLength + "d descriptors (%3d %%) %d minutes left%n",
+              fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
+              minutesLeft);
         }
         BufferedReader br = new BufferedReader(new FileReader(file));
         String line, fingerprint = null, publishedLine = null;
@@ -177,15 +181,20 @@ public class AnalyzeStatsCoverage {
 
     /* Parse relevant lines by fingerprint and date.  The result will be
      * how many bytes that relay or bridge read/wrote in total, and how
-     * many bytes were included in the different reported statistics. */
+     * many bytes were included in the different reported statistics.
+     * Another result is the number of seconds for which this relay or
+     * bridge reported byte histories and other statistics. */
     if (tempDirectory.exists() && tempDirectory.isDirectory()) {
       System.out.println("Evaluating previously parsed descriptors in '"
           + tempDirectory.getAbsolutePath() + "'.");
       BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
-      bw.write("fingerprint,date,totalwritten,totalread,dirreqwritten,"
-          + "dirreqread,entrywritten,entryread,exitwritten,exitread,"
-          + "cellwritten,cellread,connbidirectwritten,connbidirectread,"
-          + "bridgewritten,bridgeread,geoipwritten,geoipread\n");
+      bw.write("fingerprint,date,totalwritten,totalread,totalseconds,"
+          + "dirreqwritten,dirreqread,dirreqseconds,entrywritten,"
+          + "entryread,entryseconds,exitwritten,exitread,exitseconds,"
+          + "cellwritten,cellread,cellseconds,connbidirectwritten,"
+          + "connbidirectread,connbidirectseconds,bridgewritten,"
+          + "bridgeread,bridgeseconds,geoipwritten,geoipread,"
+          + "geoipseconds\n");
       Stack<File> dirs = new Stack<File>();
       SortedSet<File> files = new TreeSet<File>();
       dirs.add(tempDirectory);
@@ -204,15 +213,16 @@ public class AnalyzeStatsCoverage {
         if (++fileNumber % (totalFiles / 1000) == 0) {
           int numberLength = String.valueOf(totalFiles).length();
           System.out.printf("Evaluated %" + numberLength + "d of %"
-              + numberLength + "d descriptors (%3d %%)%n", fileNumber,
-              totalFiles, (fileNumber * 100) / totalFiles);
+              + numberLength + "d descriptors/days (%3d %%)%n",
+              fileNumber, totalFiles, (fileNumber * 100) / totalFiles);
         }
         String fingerprint = file.getName().substring(0, 40);
         String date = file.getName().substring(41);
         long dateStartMillis = dateFormat.parse(date).getTime();
         long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L;
         long[] writeHistory = new long[96], readHistory = new long[96];
-        boolean[] dirreqStats = new boolean[96],
+        boolean[] running = new boolean[96],
+            dirreqStats = new boolean[96],
             entryStats = new boolean[96],
             exitStats = new boolean[96],
             cellStats = new boolean[96],
@@ -245,6 +255,7 @@ public class AnalyzeStatsCoverage {
                   System.exit(1);
                 }
                 history[j] = Long.parseLong(historyValues[i]);
+                running[j] = true;
               }
               currentMillis += 15L * 60L * 1000L;
             }
@@ -300,38 +311,53 @@ public class AnalyzeStatsCoverage {
         }
         br.close();
         bw.write(fingerprint + "," + date + ",");
-        long totalWritten = 0L, totalRead = 0L, dirreqWritten = 0L,
-            dirreqRead = 0L, entryWritten = 0L, entryRead = 0L,
-            exitWritten = 0L, exitRead = 0L, cellWritten = 0L,
-            cellRead = 0L, connBiDirectWritten = 0L,
-            connBiDirectRead = 0L, bridgeWritten = 0L, bridgeRead = 0L,
-            geoipWritten = 0L, geoipRead = 0L;
+        long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L,
+            dirreqWritten = 0L, dirreqRead = 0L, dirreqSeconds = 0L,
+            entryWritten = 0L, entryRead = 0L, entrySeconds = 0L,
+            exitWritten = 0L, exitRead = 0L, exitSeconds = 0L,
+            cellWritten = 0L, cellRead = 0L, cellSeconds = 0L,
+            connBiDirectWritten = 0L, connBiDirectRead = 0L,
+            connBiDirectSeconds = 0L, bridgeWritten = 0L, bridgeRead = 0L,
+            bridgeSeconds = 0L, geoipWritten = 0L, geoipRead = 0L,
+            geoipSeconds = 0L;
         for (int i = 0; i < 96; i++) {
           totalWritten += writeHistory[i];
           totalRead += readHistory[i];
+          totalSeconds += running[i] ? 900L : 0L;
           dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L;
           dirreqRead += dirreqStats[i] ? readHistory[i] : 0L;
+          dirreqSeconds += dirreqStats[i] && running[i] ? 900L : 0L;
           entryWritten += entryStats[i] ? writeHistory[i] : 0L;
           entryRead += entryStats[i] ? readHistory[i] : 0L;
+          entrySeconds += entryStats[i] && running[i] ? 900L : 0L;
           exitWritten += exitStats[i] ? writeHistory[i] : 0L;
           exitRead += exitStats[i] ? readHistory[i] : 0L;
+          exitSeconds += exitStats[i] && running[i] ? 900L : 0L;
           cellWritten += cellStats[i] ? writeHistory[i] : 0L;
           cellRead += cellStats[i] ? readHistory[i] : 0L;
+          cellSeconds += cellStats[i] && running[i] ? 900L : 0L;
           connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i]
               : 0L;
           connBiDirectRead += connBiDirectStats[i] ? readHistory[i]
               : 0L;
+          connBiDirectSeconds += connBiDirectStats[i] && running[i] ? 900L
+              : 0L;
           bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L;
           bridgeRead += bridgeStats[i] ? readHistory[i] : 0L;
+          bridgeSeconds += bridgeStats[i] && running[i] ? 900L : 0L;
           geoipWritten += geoipStats[i] ? writeHistory[i] : 0L;
           geoipRead += geoipStats[i] ? readHistory[i] : 0L;
+          geoipSeconds += geoipStats[i] && running[i] ? 900L : 0L;
         }
-        bw.write(totalWritten + "," + totalRead + "," + dirreqWritten
-            + "," + dirreqRead + "," + entryWritten + "," + entryRead
-            + "," + exitWritten + "," + exitRead + "," + cellWritten
-            + "," + cellRead + "," + connBiDirectWritten + ","
-            + connBiDirectRead + "," + bridgeWritten + "," + bridgeRead
-            + "," + geoipWritten + "," + geoipRead + "\n");
+        bw.write(totalWritten + "," + totalRead + "," + totalSeconds + ","
+            + dirreqWritten + "," + dirreqRead + "," + dirreqSeconds + ","
+            + entryWritten + "," + entryRead + "," + entrySeconds + ","
+            + exitWritten + "," + exitRead + "," + exitSeconds + ","
+            + cellWritten + "," + cellRead + "," + cellSeconds + ","
+            + connBiDirectWritten + "," + connBiDirectRead + ","
+            + connBiDirectSeconds + "," + bridgeWritten + ","
+            + bridgeRead + "," + bridgeSeconds + "," + geoipWritten + ","
+            + geoipRead + "," + geoipSeconds + "\n");
       }
       bw.close();
     }
diff --git a/task-3261/README b/task-3261/README
index 1796e15..1b95743 100644
--- a/task-3261/README
+++ b/task-3261/README
@@ -1,4 +1,4 @@
 $ javac AnalyzeStatsCoverage.java
-$ java -Xmx4g AnalyzeStatsCoverage.java
+$ java -Xmx4g AnalyzeStatsCoverage
 $ R --slave -f stats-coverage.R
 
diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R
index b9c1810..d0794b9 100644
--- a/task-3261/stats-coverage.R
+++ b/task-3261/stats-coverage.R
@@ -1,11 +1,17 @@
 library(ggplot2)
 b <- read.csv("stats-coverage.csv")
-b <- aggregate(list(totalwritten = b$totalwritten,
-  bridgewritten = b$bridgewritten, geoipwritten = b$geoipwritten),
-  by = list(date = b$date), sum)
-b <- data.frame(date = b$date, value = b$bridgewritten / b$totalwritten)
-ggplot(b, aes(x = as.Date(date), y = value)) +
+b <- aggregate(list(
+  totalwritten = b$totalwritten, totalseconds = b$totalseconds,
+  bridgewritten = b$bridgewritten, bridgeseconds = b$bridgeseconds,
+  geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds),
+  by = list(date = as.Date(b$date)), sum)
+b <- rbind(data.frame(date = b$date, variable = "by written bytes",
+    value = (b$bridgewritten + b$geoipwritten) / b$totalwritten),
+  data.frame(date = b$date, variable = "by uptime seconds",
+    value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds))
+ggplot(b, aes(x = date, y = value)) +
 geom_line() +
+facet_grid(variable ~ .) +
 scale_x_date(name = "", major = "3 months", minor = "1 month",
   format = "%b %Y", limits = as.Date(c("2010-10-01", "2011-09-30"))) +
 scale_y_continuous(name = "", limits = c(0, 1), formatter = "percent") +



More information about the tor-commits mailing list