[tor-commits] [metrics-tasks/master] Update analysis code for #3261.

karsten at torproject.org karsten at torproject.org
Fri Apr 27 06:52:55 UTC 2012


commit 0dddee5316aebf046db58858af81220bdf3fcb6a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Apr 27 08:50:31 2012 +0200

    Update analysis code for #3261.
---
 task-3261/.gitignore                  |    2 +
 task-3261/AggregateStats.java         |  122 +++++++++
 task-3261/AnalyzeDescriptorParts.java |  315 ++++++++++++++++++++++
 task-3261/AnalyzeStatsCoverage.java   |  478 ---------------------------------
 task-3261/ExtractDescriptorParts.java |  172 ++++++++++++
 task-3261/README                      |   38 +++-
 task-3261/plot.R                      |   65 +++++
 task-3261/stats-coverage.R            |   25 --
 8 files changed, 711 insertions(+), 506 deletions(-)

diff --git a/task-3261/.gitignore b/task-3261/.gitignore
index 2bfd23b..5f2b4dc 100644
--- a/task-3261/.gitignore
+++ b/task-3261/.gitignore
@@ -2,6 +2,8 @@
 *.png
 *.pdf
 *.csv
+bridge-network-statuses
+parse-history
 in/
 temp/
 *.jar
diff --git a/task-3261/AggregateStats.java b/task-3261/AggregateStats.java
new file mode 100755
index 0000000..73f7279
--- /dev/null
+++ b/task-3261/AggregateStats.java
@@ -0,0 +1,122 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Aggregate half-hourly per-bridge data to daily statistics. */
+public class AggregateStats {
+  public static void main(String[] args) throws Exception {
+
+    /* Read file containing publication times of bridge statuses and count
+     * statuses per day. */
+    SortedMap<String, Long> publishedStatuses =
+        new TreeMap<String, Long>();
+    File statusFile = new File("bridge-network-statuses");
+    if (!statusFile.exists()) {
+      System.err.println(statusFile.getAbsolutePath() + " does not "
+          + "exist.  Exiting.");
+      System.exit(1);
+    } else {
+      BufferedReader br = new BufferedReader(new FileReader(statusFile));
+      String line;
+      while ((line = br.readLine()) != null) {
+        String date = line.split(" ")[0];
+        if (publishedStatuses.containsKey(date)) {
+          publishedStatuses.put(date, publishedStatuses.get(date) + 1L);
+        } else {
+          publishedStatuses.put(date, 1L);
+        }
+      }
+    }
+
+    /* Aggregate single observations in memory. */
+    SortedMap<String, Map<String, Long>> aggregatedStats =
+        new TreeMap<String, Map<String, Long>>();
+    SortedSet<String> allKeys = new TreeSet<String>();
+    File evalOutFile = new File("eval-out.csv");
+    if (!evalOutFile.exists()) {
+      System.err.println(evalOutFile.getAbsolutePath() + " does not "
+          + "exist.  Exiting.");
+      System.exit(1);
+    } else {
+      BufferedReader ebr = new BufferedReader(new FileReader(evalOutFile));
+      String line;
+      while ((line = ebr.readLine()) != null) {
+        String[] parts = line.split(",");
+        String date = parts[0].split(" ")[0];
+        String key = parts[2] + "," + parts[3] + "," + parts[4];
+        allKeys.add(key);
+        Map<String, Long> stats = null;
+        if (aggregatedStats.containsKey(date)) {
+          stats = aggregatedStats.get(date);
+        } else {
+          stats = new HashMap<String, Long>();
+          aggregatedStats.put(date, stats);
+        }
+        if (stats.containsKey(key)) {
+          stats.put(key, stats.get(key) + 1L);
+        } else {
+          stats.put(key, 1L);
+        }
+      }
+      ebr.close();
+    }
+
+    /* Write aggregated statistics to aggregated.csv. */
+    File aggregatedFile = new File("aggregated.csv");
+    BufferedWriter abw = new BufferedWriter(new FileWriter(
+        aggregatedFile));
+    abw.write("date,reported,discarded,reason,bridges,statuses\n");
+    long previousDateMillis = -1L;
+    final long DAY = 24L * 60L * 60L * 1000L;
+    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    for (Map.Entry<String, Map<String, Long>> e :
+        aggregatedStats.entrySet()) {
+      String date = e.getKey();
+      long currentDateMillis = dateFormat.parse(date).getTime();
+      while (previousDateMillis > -1L &&
+          currentDateMillis - previousDateMillis > DAY) {
+        previousDateMillis += DAY;
+        String tempDate = dateFormat.format(previousDateMillis);
+        for (String key : allKeys) {
+          abw.write(tempDate + "," + key + ",NA,0\n");
+        }
+      }
+      previousDateMillis = currentDateMillis;
+      String nextDate = dateFormat.format(currentDateMillis + DAY);
+      String nextPlusOneDate = dateFormat.format(currentDateMillis
+          + 2 * DAY);
+      long statuses = publishedStatuses.containsKey(date) ?
+          publishedStatuses.get(date) : 0L;
+      Map<String, Long> stats = e.getValue();
+      if (!aggregatedStats.containsKey(nextDate) ||
+          !aggregatedStats.containsKey(nextPlusOneDate) ||
+          statuses < 40) {
+        for (String key : allKeys) {
+          abw.write(date + "," + key + ",NA," + statuses + "\n");
+        }
+      } else {
+        for (String key : allKeys) {
+          if (stats.containsKey(key)) {
+            abw.write(date + "," + key + "," + (stats.get(key) / statuses)
+                + "," + statuses + "\n");
+          } else {
+            abw.write(date + "," + key + ",0," + statuses + "\n");
+          }
+        }
+      }
+    }
+    abw.close();
+  }
+}
+
diff --git a/task-3261/AnalyzeDescriptorParts.java b/task-3261/AnalyzeDescriptorParts.java
new file mode 100755
index 0000000..7f4bbc4
--- /dev/null
+++ b/task-3261/AnalyzeDescriptorParts.java
@@ -0,0 +1,315 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Analyze descriptors parts bridge by bridge and determine whether a
+ * bridge reported usage statistics at a given time, and if not, find out
+ * why not. */
+public class AnalyzeDescriptorParts {
+  public static void main(String[] args) throws Exception {
+
+    /* Define paths: we read descriptor part files from temp/ and append
+     * statistics on half hour detail to eval-out.csv. */
+    File tempDirectory = new File("temp");
+    File evalOutFile = new File("eval-out.csv");
+
+    /* Parse descriptor part files bridge by bridge. */
+    SimpleDateFormat dateTimeFormat =
+        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    final long HALF_HOUR = 30L * 60L * 1000L;
+    BufferedWriter ebw = new BufferedWriter(new FileWriter(evalOutFile));
+    for (File tempFile : tempDirectory.listFiles()) {
+      String fingerprint = tempFile.getName();
+      BufferedReader br = new BufferedReader(new FileReader(tempFile));
+      String line;
+
+      /* For each bridge, determine when it was first seen as relay.  All
+       * timestamps are in half hours since 1970-01-01 00:00:00 UTC. */
+      long firstRunningRelay = Long.MAX_VALUE;
+
+      /* For each time the bridge was listed in a bridge network status as
+       * Running, remember the status publication time and referenced
+       * descriptor digest. */
+      SortedMap<Long, String> runningBridgeHalfHours =
+          new TreeMap<Long, String>();
+
+      /* For each descriptor published by the bridge, remember seven
+       * timestamps in an array:
+       * 0: when the bridge was started due to the descriptor publication
+       *    time and reported uptime,
+       * 1: when the descriptor was published,
+       * 2: when the descriptor was first referenced in a status,
+       * 3: when the descriptor was last referenced in status,
+       * 4: when the first descriptor in the same uptime session was first
+       *    referenced in a status,
+       * 5: when the last descriptor in the same uptime session was last
+       *    referenced in a status, and
+       * 6: when the last descriptor in the same uptime session was
+       *    published. */
+      Map<String, long[]> descriptorSessions =
+          new HashMap<String, long[]>();
+
+      /* For each descriptor, remember the platform string. */
+      Map<String, String> descriptorPlatforms =
+          new HashMap<String, String>();
+
+      /* For each bridge-stats or geoip-stats line, remember a long[] with
+       * two timestamps and a boolean:
+       * 0: when the statistics interval started,
+       * 1: when the statistics interval ended,
+       * 2: whether the bridge reported its geoip file digest (only
+       *    0.2.3.x or higher). */
+      SortedMap<Long, long[]> bridgeStats = new TreeMap<Long, long[]>(),
+          geoipStats = new TreeMap<Long, long[]>();
+
+      /* Parse the file in temp/ line by line. */
+      while ((line = br.readLine()) != null) {
+
+        /* Remember when a descriptor was published and which platform
+         * string it contained. */
+        if (line.startsWith("server-descriptor ")) {
+          String[] parts = line.split(" ");
+          long publishedMillis = dateTimeFormat.parse(parts[1] + " "
+              + parts[2]).getTime();
+          long publishedHalfHour = publishedMillis / HALF_HOUR + 1L;
+          String descriptor = parts[3];
+          long startedHalfHour = (publishedMillis
+              - Long.parseLong(parts[4]) * 1000L) / HALF_HOUR + 1L;
+          long[] descriptorSession;
+          if (descriptorSessions.containsKey(descriptor)) {
+            descriptorSession = descriptorSessions.get(descriptor);
+          } else {
+            descriptorSession = new long[7];
+            descriptorSessions.put(descriptor, descriptorSession);
+          }
+          if (descriptorSession[0] == 0) {
+            descriptorSession[0] = startedHalfHour;
+            descriptorSession[1] = publishedHalfHour;
+          }
+          String platform = line.substring(line.indexOf("Tor "));
+          descriptorPlatforms.put(descriptor, platform);
+
+        /* Remember when a descriptor was first and last referenced from a
+         * bridge network status. */
+        } else if (line.startsWith("running-bridge ")) {
+          String[] parts = line.split(" ");
+          long publishedMillis = dateTimeFormat.parse(parts[1] + " "
+              + parts[2]).getTime();
+          long publishedHalfHour = publishedMillis / HALF_HOUR;
+          String descriptor = parts[3];
+          long[] descriptorSession;
+          if (descriptorSessions.containsKey(descriptor)) {
+            descriptorSession = descriptorSessions.get(descriptor);
+            if (descriptorSession[2] == 0 ||
+                publishedHalfHour < descriptorSession[2]) {
+              descriptorSession[2] = publishedHalfHour;
+            }
+            if (publishedHalfHour > descriptorSession[3]) {
+              descriptorSession[3] = publishedHalfHour;
+            }
+          } else {
+            descriptorSession = new long[7];
+            descriptorSession[2] = publishedHalfHour;
+            descriptorSession[3] = publishedHalfHour;
+            descriptorSessions.put(descriptor, descriptorSession);
+          }
+          runningBridgeHalfHours.put(publishedHalfHour, descriptor);
+
+        /* Remember the start and end of a bridge-stats or geoip-stats
+         * interval, and remember whether the extra-info descriptor
+         * contained a geoip-db-digest line. */
+        } else if (line.startsWith("bridge-stats ") ||
+            line.startsWith("geoip-stats ")) {
+          String parts[] = line.split(" ");
+          long statsEndMillis = dateTimeFormat.parse(parts[1] + " "
+              + parts[2]).getTime();
+          long statsEnd = statsEndMillis / HALF_HOUR;
+          long statsStart = (statsEndMillis - Long.parseLong(parts[3])
+              * 1000L) / HALF_HOUR;
+          boolean hasGeoipFile = !parts[4].equals("NA");
+          long[] stats = new long[3];
+          stats[0] = statsStart;
+          stats[1] = statsEnd;
+          stats[2] = hasGeoipFile ? 1L : 0L;
+          if (line.startsWith("bridge-stats ")) {
+            bridgeStats.put(statsStart, stats);
+          } else {
+            geoipStats.put(statsStart, stats);
+          }
+
+        /* Remember when this bridge was first seen as a relay in the
+         * consensus. */
+        } else if (line.startsWith("running-relay ")) {
+          long runningRelayMillis = dateTimeFormat.parse(line.substring(
+              "running-relay ".length())).getTime() / HALF_HOUR;
+          firstRunningRelay = Math.min(firstRunningRelay,
+              runningRelayMillis);
+        }
+      }
+      br.close();
+
+      /* Sort descriptors by their first reference in a bridge network
+       * status. */
+      SortedMap<Long, String> descriptorsByFirstReferenced =
+          new TreeMap<Long, String>();
+      for (Map.Entry<String, long[]> e : descriptorSessions.entrySet()) {
+        if (e.getValue()[2] == 0) {
+          continue;
+        }
+        descriptorsByFirstReferenced.put(e.getValue()[2], e.getKey());
+      }
+      if (descriptorsByFirstReferenced.isEmpty()) {
+        continue;
+      }
+
+      /* Go through list of descriptors and see if two or more of them
+       * belong to the same bridge uptime session.  Two descriptors are
+       * considered as part of the same uptime session if a) they are
+       * referenced from two subsequent statuses and b) the start time in
+       * the second descriptor lies before the publication time of the
+       * first descriptor.  First make a list of all descriptors of a
+       * session and then update their long[] values to contain session
+       * information. */
+      long[] previousDescriptorTimestamps = null;
+      long firstStatusInSession = Long.MAX_VALUE,
+          lastStatusInSession = -1L, lastDescriptorPublished = -1L;
+      Set<String> descriptorsInSession = new HashSet<String>();
+      for (String descriptor : descriptorsByFirstReferenced.values()) {
+        long[] currentDescriptorTimestamps =
+            descriptorSessions.get(descriptor);
+        String currentDescriptor = descriptor;
+        if (previousDescriptorTimestamps != null) {
+          boolean sameSession =
+              previousDescriptorTimestamps[3] + 1L ==
+              currentDescriptorTimestamps[2] &&
+              currentDescriptorTimestamps[0] <=
+              previousDescriptorTimestamps[1];
+          if (!sameSession) {
+            for (String descriptorInSession : descriptorsInSession) {
+              long[] descriptorTimestamps = descriptorSessions.get(
+                  descriptorInSession);
+              descriptorTimestamps[4] = firstStatusInSession;
+              descriptorTimestamps[5] = lastStatusInSession;
+              descriptorTimestamps[6] = lastDescriptorPublished;
+            }
+            firstStatusInSession = Long.MAX_VALUE;
+            lastStatusInSession = lastDescriptorPublished = -1L;
+            descriptorsInSession.clear();
+          }
+        }
+        firstStatusInSession = Math.min(firstStatusInSession,
+            currentDescriptorTimestamps[2]);
+        lastStatusInSession = Math.max(lastStatusInSession,
+            currentDescriptorTimestamps[3]);
+        lastDescriptorPublished = Math.max(lastDescriptorPublished,
+            currentDescriptorTimestamps[1]);
+        descriptorsInSession.add(currentDescriptor);
+        previousDescriptorTimestamps = currentDescriptorTimestamps;
+      }
+      for (String descriptorInSession : descriptorsInSession) {
+        long[] descriptorTimestamps = descriptorSessions.get(
+            descriptorInSession);
+        descriptorTimestamps[4] = firstStatusInSession;
+        descriptorTimestamps[5] = lastStatusInSession;
+        descriptorTimestamps[6] = lastDescriptorPublished;
+      }
+
+      /* Go through all statuses listing this bridge as Running, determine
+       * if it reported usage statistics and if they were considered for
+       * aggregation, and find out possible reasons for the bridge not
+       * reporting usage statistics. */
+      for (Map.Entry<Long, String> e :
+          runningBridgeHalfHours.entrySet()) {
+        long statusPublished = e.getKey();
+        String descriptor = e.getValue();
+        String platform = descriptorPlatforms.get(descriptor);
+        boolean reported = false, discarded = false;
+        String reason = "none";
+        if (firstRunningRelay <= statusPublished) {
+          /* The bridge was running as a relay before. */
+          discarded = true;
+          reason = "runasrelay";
+        }
+        if (!geoipStats.headMap(statusPublished + 1).isEmpty()) {
+          long[] stats = geoipStats.get(geoipStats.headMap(statusPublished
+              + 1).lastKey());
+          if (stats[0] <= statusPublished && stats[1] > statusPublished) {
+            /* Status publication time falls into stats interval. */
+            reported = true;
+            if (platform != null && platform.compareTo("Tor 0.2.2") > 0) {
+              /* geoip stats published by versions 0.2.2.x or higher are
+               * buggy and therefore discarded. */
+              discarded = true;
+              reason = "geoip022";
+            }
+          }
+        }
+        if (!bridgeStats.headMap(statusPublished + 1).isEmpty()) {
+          long[] stats = bridgeStats.get(bridgeStats.headMap(
+              statusPublished + 1).lastKey());
+          if (stats[0] <= statusPublished && stats[1] > statusPublished) {
+            /* Status publication time falls into stats interval. */
+            reported = true;
+            if (platform != null && platform.compareTo("Tor 0.2.3") > 0 &&
+                stats[2] == 0) {
+              /* The bridge running version 0.2.3.x did not have a geoip
+               * file and therefore published bad bridge statistics. */
+              discarded = true;
+              reason = "nogeoipfile";
+            }
+          }
+        }
+        if (!reported) {
+          /* The bridge didn't report statistics, so it doesn't matter
+           * whether we'd have discarded them. */
+          discarded = false;
+          if (!descriptorSessions.containsKey(descriptor)) {
+            /* The descriptor referenced in the bridge network status is
+             * unavailable, which means we cannot make any statement why the
+             * bridge did not report usage statistics. */
+            reason = "noserverdesc";
+          } else {
+            long[] descriptorTimestamps = descriptorSessions.get(descriptor);
+            long sessionStart = descriptorTimestamps[4],
+                sessionEnd = descriptorTimestamps[5],
+                lastDescPubl = descriptorTimestamps[6];
+            long currentStatsEnd = sessionStart
+                + 48 * ((statusPublished - sessionStart) / 48 + 1);
+            if (sessionEnd <= currentStatsEnd) {
+              /* The current uptime session ends before the 24-hour statistics
+               * interval. */
+              reason = "lessthan24h";
+            } else if (currentStatsEnd > lastDescPubl) {
+              /* The current uptime session ended after the 24-hour statistics
+               * interval, but the bridge didn't publish a descriptor
+               * containing the statistics. */
+              reason = "publdelay";
+            } else {
+              /* There is some other reason why the bridge did not report
+               * statistics. */
+              reason = "other";
+            }
+          }
+        }
+        ebw.write(dateTimeFormat.format(statusPublished * HALF_HOUR) + ","
+            + fingerprint + "," + reported + "," + discarded + ","
+            + reason + "\n");
+      }
+    }
+    ebw.close();
+  }
+}
+
diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java
deleted file mode 100644
index 4688bde..0000000
--- a/task-3261/AnalyzeStatsCoverage.java
+++ /dev/null
@@ -1,478 +0,0 @@
-import java.io.*;
-import java.text.*;
-import java.util.*;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-public class AnalyzeStatsCoverage {
-  public static void main(String[] args) throws Exception {
-    File inDirectory = new File("in");
-    File tempDirectory = new File("temp");
-    File outFile = new File("stats-coverage.csv");
-
-    /* Extract relevant lines from extra-info descriptors in inDirectory
-     * and write them to files tempDirectory/$date/$fingerprint-$date for
-     * later processing by fingerprint and date. */
-    SimpleDateFormat dateTimeFormat =
-        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    if (inDirectory.exists() && inDirectory.isDirectory()) {
-      System.out.println("Parsing descriptors in '"
-          + inDirectory.getAbsolutePath() + "'.");
-      long started = System.currentTimeMillis();
-      tempDirectory.mkdirs();
-      Stack<File> dirs = new Stack<File>();
-      SortedSet<File> files = new TreeSet<File>();
-      dirs.add(inDirectory);
-      while (!dirs.isEmpty()) {
-        File file = dirs.pop();
-        if (file.isDirectory()) {
-          if (file.getName().equals("statuses")) {
-            continue;
-          }
-          for (File f : file.listFiles()) {
-            dirs.add(f);
-          }
-        } else {
-          files.add(file);
-        }
-      }
-      int totalFiles = files.size(), fileNumber = 0;
-      for (File file : files) {
-        if (++fileNumber % (totalFiles / 1000) == 0) {
-          int numberLength = String.valueOf(totalFiles).length();
-          long minutesLeft = (((System.currentTimeMillis() - started)
-              * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
-          System.out.printf("Parsed %" + numberLength + "d of %"
-              + numberLength + "d descriptors (%3d %%) %d minutes left%n",
-              fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
-              minutesLeft);
-        }
-        BufferedReader br = new BufferedReader(new FileReader(file));
-        String line, fingerprint = null, publishedLine = null;
-        SortedMap<String, SortedSet<String>> linesByDate =
-            new TreeMap<String, SortedSet<String>>();
-        while ((line = br.readLine()) != null) {
-          if (line.startsWith("extra-info ")) {
-            fingerprint = line.split(" ")[2];
-          } else if (line.startsWith("write-history ") ||
-              line.startsWith("read-history ")) {
-            String[] parts = line.split(" ");
-            if (parts.length < 6) {
-              continue;
-            }
-            String historyEndDate = parts[1];
-            long historyEndMillis = dateTimeFormat.parse(parts[1] + " "
-                + parts[2]).getTime();
-            long intervalLength = Long.parseLong(parts[3].substring(1));
-            if (intervalLength != 900L) {
-              System.out.println("Non-standard interval length in "
-                  + "line '" + line + "' in file "
-                  + file.getAbsolutePath() + ".  Skipping this line.");
-              continue;
-            }
-            int intervals = parts[5].split(",").length;
-            long historyStartMillis = historyEndMillis
-                - (intervals * intervalLength * 1000L);
-            long currentMillis = historyStartMillis;
-            String currentDate;
-            while ((currentDate = dateFormat.format(currentMillis)).
-                compareTo(historyEndDate) <= 0) {
-              if (!linesByDate.containsKey(currentDate)) {
-                linesByDate.put(currentDate, new TreeSet<String>());
-              }
-              linesByDate.get(currentDate).add(line);
-              currentMillis += 24L * 60L * 60L * 1000L;
-            }
-          } else if (line.startsWith("dirreq-stats-end ") ||
-              line.startsWith("entry-stats-end ") ||
-              line.startsWith("exit-stats-end ") ||
-              line.startsWith("cell-stats-end ") ||
-              line.startsWith("conn-bi-direct ") ||
-              line.startsWith("bridge-stats-end ")) {
-            String[] parts = line.split(" ");
-            if (parts.length < 5) {
-              System.out.println("Malformed line '" + line + "' in "
-                  + "file " + file.getAbsolutePath() + ".  Skipping "
-                  + "this line.");
-              continue;
-            }
-            String statsEndDate = parts[1];
-            long statsEndMillis = dateTimeFormat.parse(parts[1] + " "
-                + parts[2]).getTime();
-            long intervalLength = Long.parseLong(parts[3].substring(1));
-            long statsStartMillis = statsEndMillis
-                - intervalLength * 1000L;
-            long currentMillis = statsStartMillis;
-            String currentDate;
-            while ((currentDate = dateFormat.format(currentMillis)).
-                compareTo(statsEndDate) <= 0) {
-              if (!linesByDate.containsKey(currentDate)) {
-                linesByDate.put(currentDate, new TreeSet<String>());
-              }
-              linesByDate.get(currentDate).add(line);
-              currentMillis += 24L * 60L * 60L * 1000L;
-            }
-          } else if (line.startsWith("published ")) {
-            publishedLine = line;
-          } else if (line.startsWith("geoip-start-time ")) {
-            if (publishedLine == null) {
-              System.out.println("Missing published line in file "
-                  + file.getAbsolutePath() + ".  Skipping "
-                  + "geoip-start-time line.");
-              continue;
-            }
-            String[] publishedParts = publishedLine.split(" ");
-            if (publishedParts.length < 3) {
-              System.out.println("Malformed line '" + publishedLine
-                  + "' in file " + file.getAbsolutePath() + ".  "
-                  + "Skipping geoip-start-time line.");
-              continue;
-            }
-            String[] parts = line.split(" ");
-            if (parts.length < 3) {
-              System.out.println("Malformed line '" + line + "' in "
-                  + "file " + file.getAbsolutePath() + ".  Skipping "
-                  + "this line.");
-              continue;
-            }
-            String statsEndDate = parts[1];
-            long statsEndMillis = dateTimeFormat.parse(
-                publishedParts[1] + " " + publishedParts[2]).getTime();
-            long statsStartMillis = dateTimeFormat.parse(parts[1] + " "
-                + parts[2]).getTime();
-            long intervalLength = (statsEndMillis - statsStartMillis)
-                / 1000L;
-            String rewrittenLine = "geoip-stats-end "
-                + publishedParts[1] + " " + publishedParts[2] + " ("
-                + intervalLength + " s)";
-            long currentMillis = statsStartMillis;
-            String currentDate;
-            while ((currentDate = dateFormat.format(currentMillis)).
-                compareTo(statsEndDate) <= 0) {
-              if (!linesByDate.containsKey(currentDate)) {
-                linesByDate.put(currentDate, new TreeSet<String>());
-              }
-              linesByDate.get(currentDate).add(rewrittenLine);
-              currentMillis += 24L * 60L * 60L * 1000L;
-            }
-          }
-        }
-        br.close();
-        for (Map.Entry<String, SortedSet<String>> e :
-            linesByDate.entrySet()) {
-          String date = e.getKey();
-          SortedSet<String> lines = e.getValue();
-          File outputFile = new File(tempDirectory, date + "/"
-              + fingerprint + "-" + date);
-          if (outputFile.exists()) {
-            br = new BufferedReader(new FileReader(outputFile));
-            while ((line = br.readLine()) != null) {
-              lines.add(line);
-            }
-            br.close();
-          }
-          outputFile.getParentFile().mkdirs();
-          BufferedWriter bw = new BufferedWriter(new FileWriter(
-              outputFile));
-          for (String l : lines) {
-            bw.write(l + "\n");
-          }
-          bw.close();
-        }
-      }
-    }
-
-    /* Parse bridge network statuses and append "running " lines to
-     * files tempDirectory/$date/$fingerprint-$date for later processing
-     * by fingerprint and date. */
-    SimpleDateFormat statusFormat =
-        new SimpleDateFormat("yyyyMMdd-HHmmss");
-    statusFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    if (inDirectory.exists() && inDirectory.isDirectory()) {
-      System.out.println("Parsing statuses in '"
-          + inDirectory.getAbsolutePath() + "'.");
-      long started = System.currentTimeMillis();
-      tempDirectory.mkdirs();
-      Stack<File> dirs = new Stack<File>();
-      SortedSet<File> files = new TreeSet<File>();
-      dirs.add(inDirectory);
-      while (!dirs.isEmpty()) {
-        File file = dirs.pop();
-        if (file.isDirectory()) {
-          if (file.getName().equals("extra-infos")) {
-            continue;
-          }
-          for (File f : file.listFiles()) {
-            dirs.add(f);
-          }
-        } else {
-          files.add(file);
-        }
-      }
-      int totalFiles = files.size(), fileNumber = 0;
-      for (File file : files) {
-        if (++fileNumber % (totalFiles / 1000) == 0) {
-          int numberLength = String.valueOf(totalFiles).length();
-          long minutesLeft = (((System.currentTimeMillis() - started)
-              * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L);
-          System.out.printf("Parsed %" + numberLength + "d of %"
-              + numberLength + "d statuses (%3d %%) %d minutes left%n",
-              fileNumber, totalFiles, (fileNumber * 100) / totalFiles,
-              minutesLeft);
-        }
-        long statusPublishedMillis = statusFormat.parse(
-            file.getName().substring(0, "YYYYMMdd-HHmmss".length())).
-            getTime();
-        SortedSet<String> statusPublishedDates = new TreeSet<String>();
-        String statusPublishedString = dateTimeFormat.format(
-            statusPublishedMillis);
-        statusPublishedDates.add(dateFormat.format(
-            statusPublishedMillis));
-        statusPublishedDates.add(dateFormat.format(
-            statusPublishedMillis + 15L * 60L * 1000L));
-        BufferedReader br = new BufferedReader(new FileReader(file));
-        String line, rLine = null;
-        while ((line = br.readLine()) != null) {
-          if (line.startsWith("r ")) {
-            rLine = line;
-          } else if (line.startsWith("s ") && line.contains(" Running") &&
-              rLine != null) {
-            String[] parts = rLine.split(" ");
-            if (parts.length != 9) {
-              System.out.println("Illegal line '" + rLine + "' in "
-                  + file.getAbsolutePath() + ".  Skipping this line.");
-              continue;
-            }
-            String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
-                parts[2] + "=="));
-            for (String date : statusPublishedDates) {
-              File outputFile = new File(tempDirectory, date + "/"
-                  + fingerprint.toUpperCase() + "-" + date);
-              outputFile.getParentFile().mkdirs();
-              BufferedWriter bw = new BufferedWriter(new FileWriter(
-                  outputFile, true));
-              bw.write("running " + statusPublishedString + "\n");
-              bw.close();
-            }
-          }
-        }
-      }
-    }
-
-    /* Parse relevant lines by fingerprint and date.  The result will be
-     * how many bytes that relay or bridge read/wrote in total, and how
-     * many bytes were included in the different reported statistics.
-     * Other results are the number of seconds for which this relay or
-     * bridge reported byte histories and other statistics, either based
-     * on self-reported bandwidth histories or based on the Running flag
-     * in bridge network statuses. */
-    if (tempDirectory.exists() && tempDirectory.isDirectory()) {
-      System.out.println("Evaluating previously parsed descriptors in '"
-          + tempDirectory.getAbsolutePath() + "'.");
-      BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
-      bw.write("fingerprint,date,totalwritten,totalread,totalseconds,"
-          + "totalrunning,dirreqwritten,dirreqread,dirreqseconds,"
-          + "dirreqrunning,entrywritten,entryread,entryseconds,"
-          + "entryrunning,exitwritten,exitread,exitseconds,exitrunning,"
-          + "cellwritten,cellread,cellseconds,cellrunning,"
-          + "connbidirectwritten,connbidirectread,connbidirectseconds,"
-          + "connbidirectrunning,bridgewritten,bridgeread,bridgeseconds,"
-          + "bridgerunning,geoipwritten,geoipread,geoipseconds,"
-          + "geoiprunning\n");
-      Stack<File> dirs = new Stack<File>();
-      SortedSet<File> files = new TreeSet<File>();
-      dirs.add(tempDirectory);
-      while (!dirs.isEmpty()) {
-        File file = dirs.pop();
-        if (file.isDirectory()) {
-          for (File f : file.listFiles()) {
-            dirs.add(f);
-          }
-        } else {
-          files.add(file);
-        }
-      }
-      int totalFiles = files.size(), fileNumber = 0;
-      for (File file : files) {
-        if (++fileNumber % (totalFiles / 1000) == 0) {
-          int numberLength = String.valueOf(totalFiles).length();
-          System.out.printf("Evaluated %" + numberLength + "d of %"
-              + numberLength + "d descriptors/days (%3d %%)%n",
-              fileNumber, totalFiles, (fileNumber * 100) / totalFiles);
-        }
-        String fingerprint = file.getName().substring(0, 40);
-        String date = file.getName().substring(41);
-        long dateStartMillis = dateFormat.parse(date).getTime();
-        long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L;
-        long[] writeHistory = new long[96], readHistory = new long[96];
-        boolean[] upBridge = new boolean[96],
-            upStatus = new boolean[96],
-            dirreqStats = new boolean[96],
-            entryStats = new boolean[96],
-            exitStats = new boolean[96],
-            cellStats = new boolean[96],
-            connBiDirectStats = new boolean[96],
-            bridgeStats = new boolean[96],
-            geoipStats = new boolean[96];
-        BufferedReader br = new BufferedReader(new FileReader(file));
-        String line;
-        while ((line = br.readLine()) != null) {
-          if (line.startsWith("running ")) {
-            long statusPublishedMillis = dateTimeFormat.parse(
-                line.substring("running ".length())).getTime();
-            int j = (int) ((statusPublishedMillis - dateStartMillis)
-                / (900L * 1000L));
-            for (int i = 0; i < 2; i++) {
-              if (j + i >= 0 && j + i < 96) {
-                upStatus[j + i] = true;
-              }
-            }
-          } else if (line.startsWith("write-history ") ||
-              line.startsWith("read-history ")) {
-            long[] history = line.startsWith("write-history ")
-                ? writeHistory : readHistory;
-            String[] parts = line.split(" ");
-            long historyEndMillis = dateTimeFormat.parse(parts[1] + " "
-                + parts[2]).getTime();
-            String[] historyValues = parts[5].split(",");
-            long historyStartMillis = historyEndMillis
-                - (historyValues.length * 900L * 1000L);
-            long currentMillis = historyStartMillis;
-            for (int i = 0; i < historyValues.length; i++) {
-              if (currentMillis >= dateStartMillis &&
-                  currentMillis < dateEndMillis) {
-                int j = (int) ((currentMillis - dateStartMillis)
-                    / (900L * 1000L));
-                if (j < 0 || j >= 96) {
-                  System.out.println("Internal error when processing "
-                      + "line '" + line + "'.  Index = " + j
-                      + ".  Exiting.");
-                  System.exit(1);
-                }
-                history[j] = Long.parseLong(historyValues[i]);
-                upBridge[j] = true;
-              }
-              currentMillis += 15L * 60L * 1000L;
-            }
-          } else if (line.startsWith("dirreq-stats-end ") ||
-              line.startsWith("entry-stats-end ") ||
-              line.startsWith("exit-stats-end ") ||
-              line.startsWith("cell-stats-end ") ||
-              line.startsWith("conn-bi-direct ") ||
-              line.startsWith("bridge-stats-end ") ||
-              line.startsWith("geoip-stats-end ")) {
-            boolean[] stats = null;
-            if (line.startsWith("dirreq-stats-end ")) {
-              stats = dirreqStats;
-            } else if (line.startsWith("entry-stats-end ")) {
-              stats = entryStats;
-            } else if (line.startsWith("exit-stats-end ")) {
-              stats = exitStats;
-            } else if (line.startsWith("cell-stats-end ")) {
-              stats = cellStats;
-            } else if (line.startsWith("conn-bi-direct ")) {
-              stats = connBiDirectStats;
-            } else if (line.startsWith("bridge-stats-end ")) {
-              stats = bridgeStats;
-            } else if (line.startsWith("geoip-stats-end ")) {
-              stats = geoipStats;
-            } else {
-              System.out.println("Internal error when processing line '"
-                  + line + "'.  Exiting.");
-              System.exit(1);
-            }
-            String[] parts = line.split(" ");
-            long statsEndMillis = dateTimeFormat.parse(parts[1] + " "
-                + parts[2]).getTime();
-            long intervalLength = Long.parseLong(parts[3].substring(1));
-            long statsStartMillis = statsEndMillis
-                - intervalLength * 1000L;
-            long currentMillis = statsStartMillis;
-            while (currentMillis < dateEndMillis) {
-              if (currentMillis >= dateStartMillis) {
-                int j = (int) ((currentMillis - dateStartMillis)
-                    / (900L * 1000L));
-                if (j < 0 || j >= 96) {
-                  System.out.println("Internal error when processing "
-                      + "line '" + line + "'.  Index = " + j
-                      + ".  Exiting.");
-                  System.exit(1);
-                }
-                stats[j] = true;
-              }
-              currentMillis += 15L * 60L * 1000L;
-            }
-          }
-        }
-        br.close();
-        bw.write(fingerprint + "," + date + ",");
-        long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L,
-            totalRunning = 0L, dirreqWritten = 0L, dirreqRead = 0L,
-            dirreqSeconds = 0L, dirreqRunning = 0L, entryWritten = 0L,
-            entryRead = 0L, entrySeconds = 0L, entryRunning = 0L,
-            exitWritten = 0L, exitRead = 0L, exitSeconds = 0L,
-            exitRunning = 0L, cellWritten = 0L, cellRead = 0L,
-            cellSeconds = 0L, cellRunning = 0L, connBiDirectWritten = 0L,
-            connBiDirectRead = 0L, connBiDirectSeconds = 0L,
-            connBiDirectRunning = 0L, bridgeWritten = 0L, bridgeRead = 0L,
-            bridgeSeconds = 0L, bridgeRunning = 0L, geoipWritten = 0L,
-            geoipRead = 0L, geoipSeconds = 0L, geoipRunning = 0L;
-        for (int i = 0; i < 96; i++) {
-          totalWritten += writeHistory[i];
-          totalRead += readHistory[i];
-          totalSeconds += upBridge[i] ? 900L : 0L;
-          totalRunning += upStatus[i] ? 900L : 0L;
-          dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L;
-          dirreqRead += dirreqStats[i] ? readHistory[i] : 0L;
-          dirreqSeconds += dirreqStats[i] && upBridge[i] ? 900L : 0L;
-          dirreqRunning += dirreqStats[i] && upStatus[i] ? 900L : 0L;
-          entryWritten += entryStats[i] ? writeHistory[i] : 0L;
-          entryRead += entryStats[i] ? readHistory[i] : 0L;
-          entrySeconds += entryStats[i] && upBridge[i] ? 900L : 0L;
-          entryRunning += entryStats[i] && upStatus[i] ? 900L : 0L;
-          exitWritten += exitStats[i] ? writeHistory[i] : 0L;
-          exitRead += exitStats[i] ? readHistory[i] : 0L;
-          exitSeconds += exitStats[i] && upBridge[i] ? 900L : 0L;
-          exitRunning += exitStats[i] && upStatus[i] ? 900L : 0L;
-          cellWritten += cellStats[i] ? writeHistory[i] : 0L;
-          cellRead += cellStats[i] ? readHistory[i] : 0L;
-          cellSeconds += cellStats[i] && upBridge[i] ? 900L : 0L;
-          cellRunning += cellStats[i] && upStatus[i] ? 900L : 0L;
-          connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i]
-              : 0L;
-          connBiDirectRead += connBiDirectStats[i] ? readHistory[i]
-              : 0L;
-          connBiDirectSeconds += connBiDirectStats[i] && upBridge[i]
-              ? 900L : 0L;
-          connBiDirectRunning += connBiDirectStats[i] && upStatus[i]
-              ? 900L : 0L;
-          bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L;
-          bridgeRead += bridgeStats[i] ? readHistory[i] : 0L;
-          bridgeSeconds += bridgeStats[i] && upBridge[i] ? 900L : 0L;
-          bridgeRunning += bridgeStats[i] && upStatus[i] ? 900L : 0L;
-          geoipWritten += geoipStats[i] ? writeHistory[i] : 0L;
-          geoipRead += geoipStats[i] ? readHistory[i] : 0L;
-          geoipSeconds += geoipStats[i] && upBridge[i] ? 900L : 0L;
-          geoipRunning += geoipStats[i] && upStatus[i] ? 900L : 0L;
-        }
-        bw.write(totalWritten + "," + totalRead + "," + totalSeconds + ","
-            + totalRunning + "," + dirreqWritten + "," + dirreqRead + ","
-            + dirreqSeconds + "," + dirreqRunning + "," + entryWritten
-            + "," + entryRead + "," + entrySeconds + "," + entryRunning
-            + "," + exitWritten + "," + exitRead + "," + exitSeconds + ","
-            + exitRunning + "," + cellWritten + "," + cellRead + ","
-            + cellSeconds + "," + cellRunning + "," + connBiDirectWritten
-            + "," + connBiDirectRead + "," + connBiDirectSeconds + ","
-            + connBiDirectRunning + "," + bridgeWritten + "," + bridgeRead
-            + "," + bridgeSeconds + "," + bridgeRunning + ","
-            + geoipWritten + "," + geoipRead + "," + geoipSeconds + ","
-            + geoipRunning + "\n");
-      }
-      bw.close();
-    }
-  }
-}
-
diff --git a/task-3261/ExtractDescriptorParts.java b/task-3261/ExtractDescriptorParts.java
new file mode 100755
index 0000000..544022d
--- /dev/null
+++ b/task-3261/ExtractDescriptorParts.java
@@ -0,0 +1,172 @@
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.ServerDescriptor;
+
+/* Extract the relevant parts from bridge descriptors and consensuses that
+ * are required to answer what fraction of bridges are not reporting
+ * bridge usage statistics. */
+public class ExtractDescriptorParts {
+  public static void main(String[] args) throws Exception {
+
+    /* Define paths: we parse descriptor (tarballs) from in/, store the
+     * parse history to parse-history, write relevant parts per bridge to
+     * temp/, and write publication times of bridge network statuses to
+     * bridge-network-statuses. */
+    File inDirectory = new File("in");
+    File parseHistoryFile = new File("parse-history");
+    File tempDirectory = new File("temp");
+    File statusFile = new File("bridge-network-statuses");
+
+    /* Read descriptors. */
+    SimpleDateFormat dateTimeFormat =
+        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    DescriptorReader reader =
+        DescriptorSourceFactory.createDescriptorReader();
+    reader.addDirectory(inDirectory);
+    reader.setExcludeFiles(parseHistoryFile);
+    Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      if (descriptorFile.getDescriptors() != null) {
+        for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+
+          /* Extract bridge-stats and geoip-stats from bridge extra-info
+           * descriptors. */
+          if (descriptor instanceof ExtraInfoDescriptor) {
+            System.out.print("e");
+            SortedSet<String> lines = new TreeSet<String>();
+            ExtraInfoDescriptor extraInfoDescriptor =
+                (ExtraInfoDescriptor) descriptor;
+            if (extraInfoDescriptor.getBridgeStatsEndMillis() > 0) {
+              lines.add("bridge-stats " + dateTimeFormat.format(
+                  extraInfoDescriptor.getBridgeStatsEndMillis()) + " "
+                  + extraInfoDescriptor.getBridgeStatsIntervalLength()
+                  + " " + (extraInfoDescriptor.getGeoipDbDigest() == null
+                  ? "NA" : extraInfoDescriptor.getGeoipDbDigest()));
+            }
+            if (extraInfoDescriptor.getGeoipStartTimeMillis() > 0) {
+              long intervalLength =
+                  (extraInfoDescriptor.getPublishedMillis()
+                  - extraInfoDescriptor.getGeoipStartTimeMillis())
+                  / 1000L;
+              String geoipStatsEnd = dateTimeFormat.format(
+                  extraInfoDescriptor.getPublishedMillis());
+              lines.add("geoip-stats " + geoipStatsEnd + " "
+                  + intervalLength + " "
+                  + (extraInfoDescriptor.getGeoipDbDigest() == null
+                  ? "NA" : extraInfoDescriptor.getGeoipDbDigest()));
+            }
+            if (!lines.isEmpty()) {
+              File outputFile = new File(tempDirectory,
+                  extraInfoDescriptor.getFingerprint().toUpperCase());
+              outputFile.getParentFile().mkdirs();
+              BufferedWriter bw = new BufferedWriter(new FileWriter(
+                  outputFile, true));
+              for (String l : lines) {
+                bw.write(l + "\n");
+              }
+              bw.close();
+            }
+
+          /* Extract all bridges with the Running flag from bridge network
+           * statuses.  Also extract the status publication time. */
+          } else if (descriptor instanceof BridgeNetworkStatus) {
+            System.out.print("n");
+            BridgeNetworkStatus status = (BridgeNetworkStatus) descriptor;
+            String published = dateTimeFormat.format(
+                status.getPublishedMillis());
+            if (status.getStatusEntries() != null) {
+              for (NetworkStatusEntry entry :
+                  status.getStatusEntries().values()) {
+                if (entry.getFlags().contains("Running")) {
+                  File outputFile = new File(tempDirectory,
+                      entry.getFingerprint().toUpperCase());
+                  outputFile.getParentFile().mkdirs();
+                  BufferedWriter bw = new BufferedWriter(new FileWriter(
+                      outputFile, true));
+                  String digest = entry.getDescriptor().toUpperCase();
+                  bw.write("running-bridge " + published + " " + digest
+                      + "\n");
+                  bw.close();
+                }
+              }
+              BufferedWriter bw = new BufferedWriter(new FileWriter(
+                  statusFile, true));
+              bw.write(published + "\n");
+              bw.close();
+            }
+
+          /* Extract publication time, digest, uptime, and platform string
+           * from bridge server descriptors. */
+          } else if (descriptor instanceof ServerDescriptor) {
+            System.out.print("s");
+            ServerDescriptor serverDescriptor =
+                (ServerDescriptor) descriptor;
+            String published = dateTimeFormat.format(
+                serverDescriptor.getPublishedMillis());
+            String digest = descriptorFile.getFileName().substring(
+                descriptorFile.getFileName().lastIndexOf("/") + 1).
+                toUpperCase();
+            String uptime = serverDescriptor.getUptime() == null ? "-1"
+                : String.valueOf(serverDescriptor.getUptime());
+            String platform = serverDescriptor.getPlatform() == null
+                ? "NA" : serverDescriptor.getPlatform();
+            File outputFile = new File(tempDirectory,
+                serverDescriptor.getFingerprint().toUpperCase());
+            outputFile.getParentFile().mkdirs();
+            BufferedWriter bw = new BufferedWriter(new FileWriter(
+                outputFile, true));
+            bw.write("server-descriptor " + published + " "
+                + digest + " " + uptime + " " + platform + "\n");
+            bw.close();
+
+          /* Extract hashed fingerprints of all relays with the Running
+           * flag from relay network status consensuses. */
+          } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+            System.out.print("r");
+            RelayNetworkStatusConsensus status =
+                (RelayNetworkStatusConsensus) descriptor;
+            if (status.getStatusEntries() != null) {
+              for (NetworkStatusEntry entry :
+                  status.getStatusEntries().values()) {
+                if (entry.getFlags().contains("Running")) {
+                  String hashedFingerprint = Hex.encodeHexString(
+                      DigestUtils.sha(Hex.decodeHex(
+                      entry.getFingerprint().toCharArray()))).
+                      toUpperCase();
+                  File outputFile = new File(tempDirectory,
+                      hashedFingerprint);
+                  outputFile.getParentFile().mkdirs();
+                  BufferedWriter bw = new BufferedWriter(new FileWriter(
+                      outputFile, true));
+                  bw.write("running-relay " + dateTimeFormat.format(
+                      status.getValidAfterMillis()) + "\n");
+                  bw.close();
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/task-3261/README b/task-3261/README
old mode 100644
new mode 100755
index cb430ac..43ef208
--- a/task-3261/README
+++ b/task-3261/README
@@ -1,4 +1,36 @@
-$ javac -cp commons-codec-1.4.jar AnalyzeStatsCoverage.java
-$ java -cp commons-codec-1.4.jar.: -Xmx4g AnalyzeStatsCoverage
-$ R --slave -f stats-coverage.R
+What fraction of our bridges are not reporting usage statistics?
+================================================================
+
+Usage:
+
+1. Put metrics tarballs into a directory called in/.  The best parsing
+   performance can be achieved by decompressing tarballs without
+   extracting them.  The bridge-descriptors-* and consensuses-* tarballs
+   are required for this analysis.
+
+2. Clone metrics-lib.git, build descriptor.jar, and put it in this
+   directory.
+
+3. Download Apache Commons Codec and Compress and put the .jar files in
+   this directory.
+
+4. Parse descriptors and write all relevant parts to one file per bridge:
+   $ javac
+       -cp commons-codec-1.4.jar:commons-compress-1.3.jar:descriptor.jar
+       ExtractDescriptorParts.java
+   $ java
+       -cp commons-codec-1.4.jar:commons-compress-1.3.jar:descriptor.jar:.
+       ExtractDescriptorParts
+
+5. Analyze descriptors parts bridge by bridge and determine whether it
+   reported bridge stats at a given time, and if not, find out why not:
+   $ javac AnalyzeDescriptorParts.java
+   $ java AnalyzeDescriptorParts
+
+6. Aggregate daily statistics that can be plotted:
+   $ javac AggregateStats.java
+   $ java AggregateStats
+
+7. Plot results:
+   $ R --slave -f plot.R
 
diff --git a/task-3261/plot.R b/task-3261/plot.R
new file mode 100644
index 0000000..8a3808c
--- /dev/null
+++ b/task-3261/plot.R
@@ -0,0 +1,65 @@
+library(ggplot2)
+library(scales)
+library(reshape)
+a <- read.csv("aggregated.csv", stringsAsFactors = FALSE)
+
+e <- a
+e <- data.frame(date = as.Date(e$date), case = ifelse(
+  e$reported == "true", ifelse(e$discarded == "false", "case1", "case2"),
+  "case3"), bridges = e$bridges)
+e <- aggregate(list(bridges = e$bridges),
+  by = list(date = e$date, case = e$case), FUN = sum)
+e <- cast(e, date ~ case)
+sums <- e$case1 + e$case2 + e$case3
+e <- data.frame(date = e$date, case1 = e$case1 / sums,
+  case2 = e$case2 / sums, case3 = e$case3 / sums, stringsAsFactors = FALSE)
+e <- melt(e, "date")
+e <- data.frame(date = e$date, variable = ifelse(e$variable == "case1",
+  "reported and used", ifelse(e$variable == "case2",
+  "reported and discarded", "not reported")), value = e$value)
+ggplot(e, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(variable ~ .) +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+opts(title = "Fraction of bridge usage statistics that were...\n")
+ggsave("reported-bridge-statistics.png", width = 8, height = 6, dpi = 120)
+
+d <- a
+d <- d[d$reported == "false", ]
+d <- data.frame(date = d$date, reason = d$reason, value = d$bridges)
+d <- cast(d, date ~ reason)
+d <- data.frame(date = d$date, case1 = d$lessthan24h / sums,
+  case2 = d$publdelay / sums, case3 = d$other / sums)
+d <- melt(d, "date")
+d <- data.frame(date = d$date, variable = ifelse(d$variable == "case1",
+  "Less than 24h uptime", ifelse(d$variable == "case2",
+  "Publication delay", "Other reason")), value = d$value)
+ggplot(d, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(variable ~ .) +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+opts(title = "Reasons for bridges not reporting usage statistics\n")
+ggsave("bridge-statistics-nonreported.png", width = 8, height = 6,
+  dpi = 120)
+
+b <- a
+b <- b[b$discarded == "true", ]
+b <- data.frame(date = b$date, reason = b$reason, value = b$bridges)
+b <- cast(b, date ~ reason)
+b <- data.frame(date = b$date, case1 = b$geoip022 / sums,
+  case2 = b$nogeoipfile / sums, case3 = b$runasrelay / sums)
+b <- melt(b, "date")
+b <- data.frame(date = b$date, variable = ifelse(b$variable == "case1",
+  "0.2.2.x geoip-stats bug", ifelse(b$variable == "case2",
+  "missing geoip file", "Run as non-bridge relay")), value = b$value)
+ggplot(b, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(variable ~ .) +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+opts(title = "Reasons for discarding reported usage statistics\n")
+ggsave("bridge-statistics-discarded.png", width = 8, height = 6,
+  dpi = 120)
+
diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R
deleted file mode 100644
index aef63f2..0000000
--- a/task-3261/stats-coverage.R
+++ /dev/null
@@ -1,25 +0,0 @@
-library(ggplot2)
-library(scales)
-b <- read.csv("stats-coverage.csv")
-b <- aggregate(list(
-  totalwritten = b$totalwritten, totalseconds = b$totalseconds,
-  totalrunning = b$totalrunning, bridgewritten = b$bridgewritten,
-  bridgeseconds = b$bridgeseconds, bridgerunning = b$bridgerunning,
-  geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds,
-  geoiprunning = b$geoiprunning), by = list(date = as.Date(b$date)), sum)
-b <- rbind(data.frame(date = b$date, variable = "by written bytes",
-    value = (b$bridgewritten + b$geoipwritten) / b$totalwritten),
-  data.frame(date = b$date, variable = "by uptime (bandwidth history)",
-    value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds),
-  data.frame(date = b$date, variable = "by uptime (Running flag)",
-    value = (b$bridgerunning + b$geoiprunning) / b$totalrunning))
-b <- b[b$date >= as.Date("2010-10-01") & b$date < as.Date("2012-04-01"), ]
-ggplot(b, aes(x = date, y = value)) +
-geom_line() +
-facet_grid(variable ~ .) +
-scale_x_date(name = "") +
-scale_y_continuous(name = "", limits = c(0, 1), labels = percent) +
-scale_colour_hue(name = "") +
-opts(title = "Fraction of bridges reporting statistics\n")
-ggsave("stats-coverage-bridges.png", width = 8, height = 7, dpi = 72)
-



More information about the tor-commits mailing list