[tor-commits] [metrics-tasks/master] Update #3261 code.

karsten at torproject.org karsten at torproject.org
Mon Apr 16 13:27:12 UTC 2012


commit a3fd963839d80da97d2e67384bd1c16021a82ac9
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Apr 16 15:20:46 2012 +0200

    Update #3261 code.
    
    We now parse bridge network statuses and append "running " lines to files
    tempDirectory/$date/$fingerprint-$date for later processing by fingerprint
    and date.  We then add *running columns to the .csv file which contain
    fractions by bridges with the Running flag.
---
 task-3261/.gitignore                |    1 +
 task-3261/AnalyzeStatsCoverage.java |  182 ++++++++++++++++++++++++++++-------
 task-3261/README                    |    4 +-
 task-3261/stats-coverage.R          |   22 +++--
 4 files changed, 163 insertions(+), 46 deletions(-)

diff --git a/task-3261/.gitignore b/task-3261/.gitignore
index 1e3dcc5..2bfd23b 100644
--- a/task-3261/.gitignore
+++ b/task-3261/.gitignore
@@ -4,4 +4,5 @@
 *.csv
 in/
 temp/
+*.jar
 
diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java
index 21d791e..4688bde 100644
--- a/task-3261/AnalyzeStatsCoverage.java
+++ b/task-3261/AnalyzeStatsCoverage.java
@@ -1,6 +1,9 @@
 import java.io.*;
 import java.text.*;
 import java.util.*;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
 public class AnalyzeStatsCoverage {
   public static void main(String[] args) throws Exception {
     File inDirectory = new File("in");
@@ -26,6 +29,9 @@ public class AnalyzeStatsCoverage {
       while (!dirs.isEmpty()) {
         File file = dirs.pop();
         if (file.isDirectory()) {
+          if (file.getName().equals("statuses")) {
+            continue;
+          }
           for (File f : file.listFiles()) {
             dirs.add(f);
           }
@@ -179,22 +185,103 @@ public class AnalyzeStatsCoverage {
       }
     }
 
+    /* Parse bridge network statuses and append "running " lines to
+     * files tempDirectory/$date/$fingerprint-$date for later processing
+     * by fingerprint and date. */
+    SimpleDateFormat statusFormat =
+        new SimpleDateFormat("yyyyMMdd-HHmmss");
+    statusFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    if (inDirectory.exists() && inDirectory.isDirectory()) {
+      System.out.println("Parsing statuses in '"
+          + inDirectory.getAbsolutePath() + "'.");
+      long started = System.currentTimeMillis();
+      tempDirectory.mkdirs();
+      Stack<File> dirs = new Stack<File>();
+      SortedSet<File> files = new TreeSet<File>();
+      dirs.add(inDirectory);
+      while (!dirs.isEmpty()) {
+        File file = dirs.pop();
+        if (file.isDirectory()) {
+          if (file.getName().equals("extra-infos")) {
+            continue;
+          }
+          for (File f : file.listFiles()) {
+            dirs.add(f);
+          }
+        } else {
+          files.add(file);
+        }
+      }
+      int totalFiles = files.size(), fileNumber = 0;
+      for (File file : files) {
+        if (++fileNumber % (totalFiles / 1000) == 0) {
+          int numberLength = String.valueOf(totalFiles).length();
+          long minutesLeft = (((System.currentTimeMillis() - started)
+              * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
+          System.out.printf("Parsed %" + numberLength + "d of %"
+              + numberLength + "d statuses (%3d %%) %d minutes left%n",
+              fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
+              minutesLeft);
+        }
+        long statusPublishedMillis = statusFormat.parse(
+            file.getName().substring(0, "YYYYMMdd-HHmmss".length())).
+            getTime();
+        SortedSet<String> statusPublishedDates = new TreeSet<String>();
+        String statusPublishedString = dateTimeFormat.format(
+            statusPublishedMillis);
+        statusPublishedDates.add(dateFormat.format(
+            statusPublishedMillis));
+        statusPublishedDates.add(dateFormat.format(
+            statusPublishedMillis + 15L * 60L * 1000L));
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String line, rLine = null;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("r ")) {
+            rLine = line;
+          } else if (line.startsWith("s ") && line.contains(" Running") &&
+              rLine != null) {
+            String[] parts = rLine.split(" ");
+            if (parts.length != 9) {
+              System.out.println("Illegal line '" + rLine + "' in "
+                  + file.getAbsolutePath() + ".  Skipping this line.");
+              continue;
+            }
+            String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
+                parts[2] + "=="));
+            for (String date : statusPublishedDates) {
+              File outputFile = new File(tempDirectory, date + "/"
+                  + fingerprint.toUpperCase() + "-" + date);
+              outputFile.getParentFile().mkdirs();
+              BufferedWriter bw = new BufferedWriter(new FileWriter(
+                  outputFile, true));
+              bw.write("running " + statusPublishedString + "\n");
+              bw.close();
+            }
+          }
+        }
+      }
+    }
+
     /* Parse relevant lines by fingerprint and date.  The result will be
      * how many bytes that relay or bridge read/wrote in total, and how
      * many bytes were included in the different reported statistics.
-     * Another result is the number of seconds for which this relay or
-     * bridge reported byte histories and other statistics. */
+     * Other results are the number of seconds for which this relay or
+     * bridge reported byte histories and other statistics, either based
+     * on self-reported bandwidth histories or based on the Running flag
+     * in bridge network statuses. */
     if (tempDirectory.exists() && tempDirectory.isDirectory()) {
       System.out.println("Evaluating previously parsed descriptors in '"
           + tempDirectory.getAbsolutePath() + "'.");
       BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
       bw.write("fingerprint,date,totalwritten,totalread,totalseconds,"
-          + "dirreqwritten,dirreqread,dirreqseconds,entrywritten,"
-          + "entryread,entryseconds,exitwritten,exitread,exitseconds,"
-          + "cellwritten,cellread,cellseconds,connbidirectwritten,"
-          + "connbidirectread,connbidirectseconds,bridgewritten,"
-          + "bridgeread,bridgeseconds,geoipwritten,geoipread,"
-          + "geoipseconds\n");
+          + "totalrunning,dirreqwritten,dirreqread,dirreqseconds,"
+          + "dirreqrunning,entrywritten,entryread,entryseconds,"
+          + "entryrunning,exitwritten,exitread,exitseconds,exitrunning,"
+          + "cellwritten,cellread,cellseconds,cellrunning,"
+          + "connbidirectwritten,connbidirectread,connbidirectseconds,"
+          + "connbidirectrunning,bridgewritten,bridgeread,bridgeseconds,"
+          + "bridgerunning,geoipwritten,geoipread,geoipseconds,"
+          + "geoiprunning\n");
       Stack<File> dirs = new Stack<File>();
       SortedSet<File> files = new TreeSet<File>();
       dirs.add(tempDirectory);
@@ -221,7 +308,8 @@ public class AnalyzeStatsCoverage {
         long dateStartMillis = dateFormat.parse(date).getTime();
         long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L;
         long[] writeHistory = new long[96], readHistory = new long[96];
-        boolean[] running = new boolean[96],
+        boolean[] upBridge = new boolean[96],
+            upStatus = new boolean[96],
             dirreqStats = new boolean[96],
             entryStats = new boolean[96],
             exitStats = new boolean[96],
@@ -232,7 +320,17 @@ public class AnalyzeStatsCoverage {
         BufferedReader br = new BufferedReader(new FileReader(file));
         String line;
         while ((line = br.readLine()) != null) {
-          if (line.startsWith("write-history ") ||
+          if (line.startsWith("running ")) {
+            long statusPublishedMillis = dateTimeFormat.parse(
+                line.substring("running ".length())).getTime();
+            int j = (int) ((statusPublishedMillis - dateStartMillis)
+                / (900L * 1000L));
+            for (int i = 0; i < 2; i++) {
+              if (j + i >= 0 && j + i < 96) {
+                upStatus[j + i] = true;
+              }
+            }
+          } else if (line.startsWith("write-history ") ||
               line.startsWith("read-history ")) {
             long[] history = line.startsWith("write-history ")
                 ? writeHistory : readHistory;
@@ -255,7 +353,7 @@ public class AnalyzeStatsCoverage {
                   System.exit(1);
                 }
                 history[j] = Long.parseLong(historyValues[i]);
-                running[j] = true;
+                upBridge[j] = true;
               }
               currentMillis += 15L * 60L * 1000L;
             }
@@ -312,52 +410,66 @@ public class AnalyzeStatsCoverage {
         br.close();
         bw.write(fingerprint + "," + date + ",");
         long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L,
-            dirreqWritten = 0L, dirreqRead = 0L, dirreqSeconds = 0L,
-            entryWritten = 0L, entryRead = 0L, entrySeconds = 0L,
+            totalRunning = 0L, dirreqWritten = 0L, dirreqRead = 0L,
+            dirreqSeconds = 0L, dirreqRunning = 0L, entryWritten = 0L,
+            entryRead = 0L, entrySeconds = 0L, entryRunning = 0L,
             exitWritten = 0L, exitRead = 0L, exitSeconds = 0L,
-            cellWritten = 0L, cellRead = 0L, cellSeconds = 0L,
-            connBiDirectWritten = 0L, connBiDirectRead = 0L,
-            connBiDirectSeconds = 0L, bridgeWritten = 0L, bridgeRead = 0L,
-            bridgeSeconds = 0L, geoipWritten = 0L, geoipRead = 0L,
-            geoipSeconds = 0L;
+            exitRunning = 0L, cellWritten = 0L, cellRead = 0L,
+            cellSeconds = 0L, cellRunning = 0L, connBiDirectWritten = 0L,
+            connBiDirectRead = 0L, connBiDirectSeconds = 0L,
+            connBiDirectRunning = 0L, bridgeWritten = 0L, bridgeRead = 0L,
+            bridgeSeconds = 0L, bridgeRunning = 0L, geoipWritten = 0L,
+            geoipRead = 0L, geoipSeconds = 0L, geoipRunning = 0L;
         for (int i = 0; i < 96; i++) {
           totalWritten += writeHistory[i];
           totalRead += readHistory[i];
-          totalSeconds += running[i] ? 900L : 0L;
+          totalSeconds += upBridge[i] ? 900L : 0L;
+          totalRunning += upStatus[i] ? 900L : 0L;
           dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L;
           dirreqRead += dirreqStats[i] ? readHistory[i] : 0L;
-          dirreqSeconds += dirreqStats[i] && running[i] ? 900L : 0L;
+          dirreqSeconds += dirreqStats[i] && upBridge[i] ? 900L : 0L;
+          dirreqRunning += dirreqStats[i] && upStatus[i] ? 900L : 0L;
           entryWritten += entryStats[i] ? writeHistory[i] : 0L;
           entryRead += entryStats[i] ? readHistory[i] : 0L;
-          entrySeconds += entryStats[i] && running[i] ? 900L : 0L;
+          entrySeconds += entryStats[i] && upBridge[i] ? 900L : 0L;
+          entryRunning += entryStats[i] && upStatus[i] ? 900L : 0L;
           exitWritten += exitStats[i] ? writeHistory[i] : 0L;
           exitRead += exitStats[i] ? readHistory[i] : 0L;
-          exitSeconds += exitStats[i] && running[i] ? 900L : 0L;
+          exitSeconds += exitStats[i] && upBridge[i] ? 900L : 0L;
+          exitRunning += exitStats[i] && upStatus[i] ? 900L : 0L;
           cellWritten += cellStats[i] ? writeHistory[i] : 0L;
           cellRead += cellStats[i] ? readHistory[i] : 0L;
-          cellSeconds += cellStats[i] && running[i] ? 900L : 0L;
+          cellSeconds += cellStats[i] && upBridge[i] ? 900L : 0L;
+          cellRunning += cellStats[i] && upStatus[i] ? 900L : 0L;
           connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i]
               : 0L;
           connBiDirectRead += connBiDirectStats[i] ? readHistory[i]
               : 0L;
-          connBiDirectSeconds += connBiDirectStats[i] && running[i] ? 900L
-              : 0L;
+          connBiDirectSeconds += connBiDirectStats[i] && upBridge[i]
+              ? 900L : 0L;
+          connBiDirectRunning += connBiDirectStats[i] && upStatus[i]
+              ? 900L : 0L;
           bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L;
           bridgeRead += bridgeStats[i] ? readHistory[i] : 0L;
-          bridgeSeconds += bridgeStats[i] && running[i] ? 900L : 0L;
+          bridgeSeconds += bridgeStats[i] && upBridge[i] ? 900L : 0L;
+          bridgeRunning += bridgeStats[i] && upStatus[i] ? 900L : 0L;
           geoipWritten += geoipStats[i] ? writeHistory[i] : 0L;
           geoipRead += geoipStats[i] ? readHistory[i] : 0L;
-          geoipSeconds += geoipStats[i] && running[i] ? 900L : 0L;
+          geoipSeconds += geoipStats[i] && upBridge[i] ? 900L : 0L;
+          geoipRunning += geoipStats[i] && upStatus[i] ? 900L : 0L;
         }
         bw.write(totalWritten + "," + totalRead + "," + totalSeconds + ","
-            + dirreqWritten + "," + dirreqRead + "," + dirreqSeconds + ","
-            + entryWritten + "," + entryRead + "," + entrySeconds + ","
-            + exitWritten + "," + exitRead + "," + exitSeconds + ","
-            + cellWritten + "," + cellRead + "," + cellSeconds + ","
-            + connBiDirectWritten + "," + connBiDirectRead + ","
-            + connBiDirectSeconds + "," + bridgeWritten + ","
-            + bridgeRead + "," + bridgeSeconds + "," + geoipWritten + ","
-            + geoipRead + "," + geoipSeconds + "\n");
+            + totalRunning + "," + dirreqWritten + "," + dirreqRead + ","
+            + dirreqSeconds + "," + dirreqRunning + "," + entryWritten
+            + "," + entryRead + "," + entrySeconds + "," + entryRunning
+            + "," + exitWritten + "," + exitRead + "," + exitSeconds + ","
+            + exitRunning + "," + cellWritten + "," + cellRead + ","
+            + cellSeconds + "," + cellRunning + "," + connBiDirectWritten
+            + "," + connBiDirectRead + "," + connBiDirectSeconds + ","
+            + connBiDirectRunning + "," + bridgeWritten + "," + bridgeRead
+            + "," + bridgeSeconds + "," + bridgeRunning + ","
+            + geoipWritten + "," + geoipRead + "," + geoipSeconds + ","
+            + geoipRunning + "\n");
       }
       bw.close();
     }
diff --git a/task-3261/README b/task-3261/README
index 1b95743..cb430ac 100644
--- a/task-3261/README
+++ b/task-3261/README
@@ -1,4 +1,4 @@
-$ javac AnalyzeStatsCoverage.java
-$ java -Xmx4g AnalyzeStatsCoverage
+$ javac -cp commons-codec-1.4.jar AnalyzeStatsCoverage.java
+$ java -cp commons-codec-1.4.jar.: -Xmx4g AnalyzeStatsCoverage
 $ R --slave -f stats-coverage.R
 
diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R
index d0794b9..aef63f2 100644
--- a/task-3261/stats-coverage.R
+++ b/task-3261/stats-coverage.R
@@ -1,21 +1,25 @@
 library(ggplot2)
+library(scales)
 b <- read.csv("stats-coverage.csv")
 b <- aggregate(list(
   totalwritten = b$totalwritten, totalseconds = b$totalseconds,
-  bridgewritten = b$bridgewritten, bridgeseconds = b$bridgeseconds,
-  geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds),
-  by = list(date = as.Date(b$date)), sum)
+  totalrunning = b$totalrunning, bridgewritten = b$bridgewritten,
+  bridgeseconds = b$bridgeseconds, bridgerunning = b$bridgerunning,
+  geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds,
+  geoiprunning = b$geoiprunning), by = list(date = as.Date(b$date)), sum)
 b <- rbind(data.frame(date = b$date, variable = "by written bytes",
     value = (b$bridgewritten + b$geoipwritten) / b$totalwritten),
-  data.frame(date = b$date, variable = "by uptime seconds",
-    value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds))
+  data.frame(date = b$date, variable = "by uptime (bandwidth history)",
+    value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds),
+  data.frame(date = b$date, variable = "by uptime (Running flag)",
+    value = (b$bridgerunning + b$geoiprunning) / b$totalrunning))
+b <- b[b$date >= as.Date("2010-10-01") & b$date < as.Date("2012-04-01"), ]
 ggplot(b, aes(x = date, y = value)) +
 geom_line() +
 facet_grid(variable ~ .) +
-scale_x_date(name = "", major = "3 months", minor = "1 month",
-  format = "%b %Y", limits = as.Date(c("2010-10-01", "2011-09-30"))) +
-scale_y_continuous(name = "", limits = c(0, 1), formatter = "percent") +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", limits = c(0, 1), labels = percent) +
 scale_colour_hue(name = "") +
 opts(title = "Fraction of bridges reporting statistics\n")
-ggsave("stats-coverage-bridges.png", width = 8, height = 6, dpi = 72)
+ggsave("stats-coverage-bridges.png", width = 8, height = 7, dpi = 72)
 



More information about the tor-commits mailing list