[or-cvs] [ernie/master 1/2] Clean up and document bridge-stats and dirreq-stats writers.

karsten at torproject.org karsten at torproject.org
Sun Feb 28 15:05:58 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Sun, 28 Feb 2010 15:52:14 +0100
Subject: Clean up and document bridge-stats and dirreq-stats writers.
Commit: 07bfb5dd3c078f312cec774964f17d4527a60998

---
 src/BridgeStatsFileHandler.java    |  386 ++++++++++++++++++++++++++----------
 src/ConsensusStatsFileHandler.java |   67 ++++---
 src/DirreqStatsFileHandler.java    |  221 +++++++++++++++------
 src/Main.java                      |    2 +-
 4 files changed, 480 insertions(+), 196 deletions(-)

diff --git a/src/BridgeStatsFileHandler.java b/src/BridgeStatsFileHandler.java
index 2d1fa88..8bc8fac 100644
--- a/src/BridgeStatsFileHandler.java
+++ b/src/BridgeStatsFileHandler.java
@@ -1,199 +1,375 @@
 import java.io.*;
+import java.text.*;
 import java.util.*;
 import java.util.logging.*;
 
 /**
- *
+ * Determines estimates of bridge users per country and day from the
+ * extra-info descriptors that bridges publish. In a first step, the
+ * number of unique IP addresses that bridges see are normalized to a
+ * 24-hour period. In the next step, all bridges are excluded that have
+ * been running as a relay. Finally, observations are summed up and
+ * written to <code>stats/bridge-stats</code>.
  */
 public class BridgeStatsFileHandler {
+
+  /**
+   * Two-letter country codes of countries that we care about.
+   */
+  private SortedSet<String> countries;
+
+  /**
+   * Intermediate results file containing bridge user numbers by country
+   * as seen by single bridges, normalized to 24-hour periods.
+   */
   private File bridgeStatsRawFile;
-  private File bridgeStatsFile;
+
+  /**
+   * Bridge user numbers by country as seen by single bridges on a given
+   * day. Map keys are bridge and date written as "bridge,date", map
+   * values are lines as read from <code>stats/bridge-stats-raw</code>.
+   */
+  private SortedMap<String, String> bridgeUsersRaw;
+
+  /**
+   * Modification flag for <code>bridgeUsersRaw</code>. This flag is used to
+   * decide whether the contents of <code>bridgeUsersRaw</code> need to be
+   * written to disk during <code>writeFiles</code>.
+   */
+  private boolean bridgeUsersRawModified;
+
+  /**
+   * Helper file containing the hashed relay identities of all known
+   * relays. These hashes are compared to the bridge identity hashes to
+   * exclude bridges that have been known as relays from the statistics.
+   */
   private File hashedRelayIdentitiesFile;
-  private SortedSet<String> countries;
-  private SortedSet<String> hashedRelays = new TreeSet<String>();
-  private SortedMap<String, String> observations;
+
+  /**
+   * Known hashed relay identities used to exclude bridges that have been
+   * running as relays.
+   */
+  private SortedSet<String> hashedRelays;
+
+  /**
+   * Modification flag for <code>hashedRelays</code>. This flag is used to
+   * decide whether the contents of <code>hashedRelays</code> need to be
+   * written to disk during <code>writeFiles</code>.
+   */
   private boolean hashedRelaysModified;
-  private boolean observationsModified;
+
+  /**
+   * Final results file containing the number of bridge users per country
+   * and day. If this file exists on disk, it is not read in during
+   * initialization, but overwritten if either <code>bridgeUsersRaw</code>
+   * or <code>hashedRelays</code> have been modified.
+   */
+  private File bridgeStatsFile;
+
+  /**
+   * Logger for this class.
+   */
   private Logger logger;
+
+  /**
+   * Initializes this class, including reading in intermediate results
+   * files <code>stats/bridge-stats-raw</code> and
+   * <code>stats/hashed-relay-identities</code>.
+   */
   public BridgeStatsFileHandler(SortedSet<String> countries) {
+
+    /* Memorize the set of countries we care about. */
+    this.countries = countries;
+
+    /* Initialize local data structures to hold results. */
+    this.bridgeUsersRaw = new TreeMap<String, String>();
+    this.hashedRelays = new TreeSet<String>();
+
+    /* Initialize file names for intermediate and final results. */
     this.bridgeStatsRawFile = new File("stats/bridge-stats-raw");
     this.bridgeStatsFile = new File("stats/bridge-stats");
     this.hashedRelayIdentitiesFile = new File(
         "stats/hashed-relay-identities");
-    this.countries = countries;
-    this.observations = new TreeMap<String, String>();
-    this.logger =
-        Logger.getLogger(BridgeStatsFileHandler.class.getName());
+
+    /* Initialize logger. */
+    this.logger = Logger.getLogger(
+        BridgeStatsFileHandler.class.getName());
+
+    /* Read in bridge user numbers by country as seen by single bridges,
+     * normalized to 24-hour periods. */
     if (this.bridgeStatsRawFile.exists()) {
-      this.logger.info("Reading file "
-          + this.bridgeStatsRawFile.getAbsolutePath() + "...");
       try {
+        this.logger.fine("Reading file "
+            + this.bridgeStatsRawFile.getAbsolutePath() + "...");
         BufferedReader br = new BufferedReader(new FileReader(
             this.bridgeStatsRawFile));
         String line = br.readLine();
         if (line != null) {
-          String[] headers = line.split(",");
-          for (int i = 3; i < headers.length; i++) {
-            this.countries.add(headers[i]);
-          }
-          while ((line = br.readLine()) != null) {
-            String[] readData = line.split(",");
-            String hashedBridgeIdentity = readData[0];
-            String date = readData[1];
-            String time = readData[2];
-            SortedMap<String, String> obs = new TreeMap<String, String>();
-            for (int i = 3; i < readData.length; i++) {
-              obs.put(headers[i], readData[i]);
+          /* The first line should contain headers that we need to parse
+           * in order to learn what countries we were interested in when
+           * writing this file. */
+          if (!line.startsWith("bridge,date,time,")) {
+            this.logger.warning("Incorrect first line '" + line + "' in "
+                + this.bridgeStatsRawFile.getAbsolutePath() + "! This line "
+                + "should contain headers! Aborting to read in this "
+                + "file!");
+          } else {
+            String[] headers = line.split(",");
+            for (int i = 3; i < headers.length; i++) {
+              this.countries.add(headers[i]);
+            }
+            /* Read in the rest of the file. */
+            while ((line = br.readLine()) != null) {
+              String[] parts = line.split(",");
+              if (parts.length != headers.length) {
+                this.logger.warning("Corrupt line '" + line + "' in file "
+                    + this.bridgeStatsRawFile.getAbsolutePath()
+                    + "! Aborting to read this file!");
+                break;
+              } 
+              String hashedBridgeIdentity = parts[0];
+              String date = parts[1];
+              String time = parts[2];
+              SortedMap<String, String> obs =
+                  new TreeMap<String, String>();
+              for (int i = 3; i < parts.length; i++) {
+                obs.put(headers[i], parts[i]);
+              }
+              this.addObs(hashedBridgeIdentity, date, time, obs);
             }
-            this.addObs(hashedBridgeIdentity, date, time, obs);
           }
         }
         br.close();
-        this.observationsModified = false;
-        this.logger.info("Finished reading file "
+        this.logger.fine("Finished reading file "
             + this.bridgeStatsRawFile.getAbsolutePath() + ".");
       } catch (IOException e) {
-        this.logger.log(Level.WARNING, "Failed reading file "
+        this.logger.log(Level.WARNING, "Failed to read file "
             + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
       }
     }
+
+    /* Read in known hashed relay identities used to exclude bridges that
+     * have been running as relays. */
     if (this.hashedRelayIdentitiesFile.exists()) {
-      this.logger.info("Reading file "
-          + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
       try {
+        this.logger.fine("Reading file "
+            + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
         BufferedReader br = new BufferedReader(new FileReader(
             this.hashedRelayIdentitiesFile));
         String line = null;
+        /* Read in all lines from the file and memorize them. */
         while ((line = br.readLine()) != null) {
           this.hashedRelays.add(line);
         }
         br.close();
-        this.hashedRelaysModified = false;
-        this.logger.info("Finished reading file "
+        this.logger.fine("Finished reading file "
             + this.hashedRelayIdentitiesFile.getAbsolutePath() + ".");
       } catch (IOException e) {
-        this.logger.log(Level.WARNING, "Failed reading file "
+        this.logger.log(Level.WARNING, "Failed to read file "
             + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
       }
+
+      /* Set modification flags to false. */
+      this.bridgeUsersRawModified = this.hashedRelaysModified = false;
     }
   }
-  public void addHashedRelay(String hashedRelayIdentity)
-      throws IOException {
-    this.hashedRelays.add(hashedRelayIdentity);
-    this.hashedRelaysModified = true;
+
+  /**
+   * Adds a hashed relay identity string to the list of bridges that we
+   * are going to ignore in the future. If we counted user numbers from
+   * bridges that have been running as relays, our numbers would be far
+   * higher than what we think is correct.
+   */
+  public void addHashedRelay(String hashedRelayIdentity) {
+    if (!this.hashedRelays.contains(hashedRelayIdentity)) {
+      this.logger.finer("Adding new hashed relay identity: "
+          + hashedRelayIdentity);
+      this.hashedRelays.add(hashedRelayIdentity);
+      this.hashedRelaysModified = true;
+    }
   }
-  public boolean isKnownRelay(String hashedBridgeIdentity)
-      throws IOException {
+
+  /**
+   * Returns whether the given fingerprint is a known hashed relay
+   * identity. <code>BridgeDescriptorParser</code> uses this information
+   * to decide whether to continue parsing a bridge extra-descriptor
+   * descriptor or not.
+   */
+  public boolean isKnownRelay(String hashedBridgeIdentity) {
     return this.hashedRelays.contains(hashedBridgeIdentity);
   }
+
+  /**
+   * Adds bridge user numbers by country as seen by a single bridge on a
+   * given date and time. Bridges can publish statistics on unique IP
+   * addresses multiple times a day, but we only want to include one
+   * observation per day. If we already have an observation from the given
+   * bridge and day, we keep the one with the later publication time and
+   * discard the other one.
+   */
   public void addObs(String hashedIdentity, String date,
-      String time, Map<String, String> obs) throws IOException {
+      String time, Map<String, String> obs) {
     String key = hashedIdentity + "," + date;
     StringBuilder sb = new StringBuilder(key + "," + time);
     for (String c : countries) {
       sb.append("," + (obs.containsKey(c) ? obs.get(c) : "0.0"));
     }
     String value = sb.toString();
-    if (!this.observations.containsKey(key)
-        || value.compareTo(this.observations.get(key)) > 0) {
-      this.observations.put(key, value);
-      this.observationsModified = true;
+    if (!this.bridgeUsersRaw.containsKey(key)) {
+      this.logger.finer("Adding new bridge user numbers: " + value);
+      this.bridgeUsersRaw.put(key, value);
+      this.bridgeUsersRawModified = true;
+    } else if (value.compareTo(this.bridgeUsersRaw.get(key)) > 0) {
+      this.logger.finer("Replacing existing bridge user numbers (" +
+          this.bridgeUsersRaw.get(key) + " with new numbers: "
+          + value);
+      this.bridgeUsersRaw.put(key, value);
+      this.bridgeUsersRawModified = true;
+    } else {
+      this.logger.finer("Not replacing existing bridge user numbers (" +
+          this.bridgeUsersRaw.get(key) + " with new numbers (" + value
+          + ").");
     }
   }
 
-  public void writeFile() {
-    if (!this.hashedRelays.isEmpty() && this.hashedRelaysModified) {
+  /**
+   * Writes the list of hashed relay identities and bridge user numbers as
+   * observed by single bridges to disk, aggregates per-day statistics for
+   * all bridges, and writes those to disk, too.
+   */
+  public void writeFiles() {
+
+    /* Write hashed relay identities to disk. */
+    if (this.hashedRelaysModified) {
       try {
-        this.logger.info("Writing file "
+        this.logger.fine("Writing file "
             + this.hashedRelayIdentitiesFile.getAbsolutePath() + "...");
         this.hashedRelayIdentitiesFile.getParentFile().mkdirs();
-        BufferedWriter bwRelayIdentities = new BufferedWriter(
-            new FileWriter(this.hashedRelayIdentitiesFile));
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            this.hashedRelayIdentitiesFile));
         for (String hashedRelay : this.hashedRelays) {
-          bwRelayIdentities.append(hashedRelay + "\n");
+          bw.append(hashedRelay + "\n");
         }
-        bwRelayIdentities.close();
-        this.logger.info("Finished writing file "
+        bw.close();
+        this.logger.fine("Finished writing file "
             + this.hashedRelayIdentitiesFile.getAbsolutePath() + ".");
       } catch (IOException e) {
-        this.logger.log(Level.WARNING, "Failed writing "
+        this.logger.log(Level.WARNING, "Failed to write "
             + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
       }
+    } else {
+      this.logger.fine("Not writing file "
+          + this.hashedRelayIdentitiesFile.getAbsolutePath()
+          + ", because nothing has changed.");
     }
-    if (!this.observations.isEmpty() && this.observationsModified) {
+
+    /* Write observations made by single bridges to disk. */
+    if (this.bridgeUsersRawModified) {
       try {
-        this.logger.info("Writing file "
+        this.logger.fine("Writing file "
             + this.bridgeStatsRawFile.getAbsolutePath() + "...");
         this.bridgeStatsRawFile.getParentFile().mkdirs();
-        BufferedWriter bwBridgeStats = new BufferedWriter(
-            new FileWriter(this.bridgeStatsRawFile));
-        bwBridgeStats.append("bridge,date,time");
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            this.bridgeStatsRawFile));
+        bw.append("bridge,date,time");
         for (String c : this.countries) {
-          bwBridgeStats.append("," + c);
+          bw.append("," + c);
         }
-        bwBridgeStats.append("\n");
-        SortedMap<String, Set<double[]>> observationsPerDay =
-            new TreeMap<String, Set<double[]>>();
-        for (String observation : this.observations.values()) {
-          String hashedBridgeIdentity = observation.split(",")[0];
+        bw.append("\n");
+        for (String line : this.bridgeUsersRaw.values()) {
+          String hashedBridgeIdentity = line.split(",")[0];
           if (!this.hashedRelays.contains(hashedBridgeIdentity)) {
-            bwBridgeStats.append(observation + "\n");
-            String[] parts = observation.split(",");
-            String date = parts[1];
-            double[] users = new double[countries.size()];
-            for (int i = 3; i < parts.length; i++) {
-              users[i - 3] = Double.parseDouble(parts[i]);
-            }
-            Set<double[]> perDay = observationsPerDay.get(date);
-            if (perDay == null) {
-              perDay = new HashSet<double[]>();
-              observationsPerDay.put(date, perDay);
-            }
-            perDay.add(users);
+            bw.append(line + "\n");
           }
         }
-        bwBridgeStats.close();
-        this.logger.info("Finished writing file "
+        bw.close();
+        this.logger.fine("Finished writing file "
             + this.bridgeStatsRawFile.getAbsolutePath() + ".");
-        this.logger.info("Writing file "
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Failed to write "
+            + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+      }
+    } else {
+      this.logger.fine("Not writing file "
+          + this.bridgeStatsRawFile.getAbsolutePath() + ", because "
+          + "nothing has changed.");
+    }
+
+    /* Aggregate per-day statistics. */
+    if (this.hashedRelaysModified || this.bridgeUsersRawModified) {
+      SortedMap<String, double[]> bridgeUsersPerDay =
+          new TreeMap<String, double[]>();
+      for (String line : this.bridgeUsersRaw.values()) {
+        String[] parts = line.split(",");
+        String hashedBridgeIdentity = parts[0];
+        if (!this.hashedRelays.contains(hashedBridgeIdentity)) {
+          String date = parts[1];
+          double[] users = bridgeUsersPerDay.get(date);
+          if (users == null) {
+            users = new double[countries.size()];
+            bridgeUsersPerDay.put(date, users);
+          }
+          for (int i = 3; i < parts.length; i++) {
+            users[i - 3] += Double.parseDouble(parts[i]);
+          }
+        }
+      }
+      /* Write final results of bridge users per day and country to
+       * <code>stats/bridge-stats</code>. */
+      try {
+        this.logger.fine("Writing file "
             + this.bridgeStatsRawFile.getAbsolutePath() + "...");
         this.bridgeStatsFile.getParentFile().mkdirs();
-        BufferedWriter bwBridgeStatsDate = new BufferedWriter(
-            new FileWriter(this.bridgeStatsFile));
-        bwBridgeStatsDate.append("date");
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            this.bridgeStatsFile));
+        bw.append("date");
         for (String c : this.countries) {
-          bwBridgeStatsDate.append("," + c);
+          bw.append("," + c);
         }
-        bwBridgeStatsDate.append("\n");
-        for (Map.Entry<String, Set<double[]>> e :
-            observationsPerDay.entrySet()) {
+        bw.append("\n");
+        /* Memorize last written date fill missing dates with NA's. */
+        long lastDateMillis = 0L; 
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+        for (Map.Entry<String, double[]> e : bridgeUsersPerDay.entrySet()) {
           String date = e.getKey();
-          double[] sums = null;
-          for (double[] users : e.getValue()) {
-            if (sums == null) {
-              sums = users;
-            } else {
-              for (int i = 0; i < sums.length; i++) {
-                sums[i] += users[i];
-              }
-            }
+          long currentDateMillis = dateFormat.parse(date).getTime();
+          while (currentDateMillis - 24L * 60L * 60L * 1000L
+              > lastDateMillis) {
+            lastDateMillis += 24L * 60L * 60L * 1000L;
+            bw.append(dateFormat.format(new Date(lastDateMillis)));
+            for (String c : this.countries) {
+              bw.append(",NA");
+            }   
+            bw.append("\n");
           }
-          bwBridgeStatsDate.append(date);
-          for (int i = 0; i < sums.length; i++) {
-            bwBridgeStatsDate.append(","
-                + String.format("%.2f", sums[i]));
+          lastDateMillis = currentDateMillis;
+          /* Write current observation. */
+          bw.append(date);
+          double[] users = e.getValue();
+          for (int i = 0; i < users.length; i++) {
+            bw.append("," + String.format("%.2f", users[i]));
           }
-          bwBridgeStatsDate.append("\n");
+          bw.append("\n");
         }
-        bwBridgeStatsDate.close();
-        this.logger.info("Finished writing file "
+        bw.close();
+        this.logger.fine("Finished writing file "
             + this.bridgeStatsFile.getAbsolutePath() + ".");
       } catch (IOException e) {
-        this.logger.log(Level.WARNING, "Failed writing "
-            + this.bridgeStatsRawFile.getAbsolutePath() + " or "
+        this.logger.log(Level.WARNING, "Failed to write "
+            + this.bridgeStatsFile.getAbsolutePath() + "!", e);
+      } catch (ParseException e) {
+        this.logger.log(Level.WARNING, "Failed to write "
             + this.bridgeStatsFile.getAbsolutePath() + "!", e);
       }
+    } else {
+      this.logger.fine("Not writing file "
+          + this.bridgeStatsFile.getAbsolutePath() + ", because nothing "
+          + "has changed.");
     }
+
+    /* Set modification flags to false again. */
+    this.bridgeUsersRawModified = this.hashedRelaysModified = false;
   }
 }
 
diff --git a/src/ConsensusStatsFileHandler.java b/src/ConsensusStatsFileHandler.java
index 83128a3..ceb7a48 100644
--- a/src/ConsensusStatsFileHandler.java
+++ b/src/ConsensusStatsFileHandler.java
@@ -83,10 +83,10 @@ public class ConsensusStatsFileHandler {
   private Logger logger;
 
  /**
-  * Initializes <code>ConsensusStatsFileHandler</code>, including reading
-  * in intermediate results files <code>stats/consensus-stats-raw</code>
-  * and <code>stats/bridge-consensus-stats-raw</code> and final results
-  * file <code>stats/consensus-stats</code>.
+  * Initializes this class, including reading in intermediate results
+  * files <code>stats/consensus-stats-raw</code> and
+  * <code>stats/bridge-consensus-stats-raw</code> and final results file
+  * <code>stats/consensus-stats</code>.
   */
   public ConsensusStatsFileHandler() {
 
@@ -110,13 +110,14 @@ public class ConsensusStatsFileHandler {
     /* Read in number of relays with flags set per consensus. */
     if (this.consensusStatsRawFile.exists()) {
       try {
-        this.logger.info("Reading file "
+        this.logger.fine("Reading file "
             + this.consensusStatsRawFile.getAbsolutePath() + "...");
         BufferedReader br = new BufferedReader(new FileReader(
             this.consensusStatsRawFile));
         String line = null;
         while ((line = br.readLine()) != null) {
-          if (line.startsWith("#") || line.startsWith("date")) {
+          if (line.startsWith("date")) {
+            /* Skip headers. */
             continue;
           }
           String[] parts = line.split(",");
@@ -130,7 +131,7 @@ public class ConsensusStatsFileHandler {
           this.relaysRaw.put(dateTime, line);
         }
         br.close();
-        this.logger.info("Finished reading file "
+        this.logger.fine("Finished reading file "
             + this.consensusStatsRawFile.getAbsolutePath() + ".");
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to read file "
@@ -141,13 +142,14 @@ public class ConsensusStatsFileHandler {
     /* Read in number of running bridges per bridge status. */
     if (this.bridgeConsensusStatsRawFile.exists()) {
       try {
-        this.logger.info("Reading file "
+        this.logger.fine("Reading file "
             + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
         BufferedReader br = new BufferedReader(new FileReader(
             this.bridgeConsensusStatsRawFile));
         String line = null;
         while ((line = br.readLine()) != null) {
-          if (line.startsWith("#") || line.startsWith("date")) {
+          if (line.startsWith("date")) {
+            /* Skip headers. */
             continue;
           }
           String[] parts = line.split(",");
@@ -161,7 +163,7 @@ public class ConsensusStatsFileHandler {
           this.bridgesRaw.put(dateTime, line);
         }
         br.close();
-        this.logger.info("Finished reading file "
+        this.logger.fine("Finished reading file "
             + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to read file "
@@ -174,13 +176,14 @@ public class ConsensusStatsFileHandler {
      * bridges per day. */
     if (this.consensusStatsFile.exists()) {
       try {
-        this.logger.info("Reading file "
+        this.logger.fine("Reading file "
             + this.consensusStatsFile.getAbsolutePath() + "...");
         BufferedReader br = new BufferedReader(new FileReader(
             this.consensusStatsFile));
         String line = null;
         while ((line = br.readLine()) != null) {
-          if (line.startsWith("#") || line.startsWith("date")) {
+          if (line.startsWith("date")) {
+            /* Skip headers. */
             continue;
           }
           String[] parts = line.split(",");
@@ -204,7 +207,7 @@ public class ConsensusStatsFileHandler {
           }
         }
         br.close();
-        this.logger.info("Finished reading file "
+        this.logger.fine("Finished reading file "
             + this.consensusStatsFile.getAbsolutePath() + ".");
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to write file "
@@ -221,11 +224,11 @@ public class ConsensusStatsFileHandler {
    * flags in a given consensus to the existing observations.
    */
   public void addConsensusResults(String validAfter, int exit, int fast,
-      int guard, int running, int stable) throws IOException {
+      int guard, int running, int stable) {
     String line = validAfter + "," + exit + "," + fast + "," + guard + ","
         + running + "," + stable;
     if (!this.relaysRaw.containsKey(validAfter)) {
-      this.logger.fine("Adding new relay numbers: " + line);
+      this.logger.finer("Adding new relay numbers: " + line);
       this.relaysRaw.put(validAfter, line);
       this.relaysRawModified = true;
     } else if (!line.equals(this.relaysRaw.get(validAfter))) {
@@ -242,11 +245,10 @@ public class ConsensusStatsFileHandler {
    * Adds the intermediate results of the number of running bridges in a
    * given bridge status to the existing observations.
    */
-  public void addBridgeConsensusResults(String published, int running)
-      throws IOException {
+  public void addBridgeConsensusResults(String published, int running) {
     String line = published + "," + running;
     if (!this.bridgesRaw.containsKey(published)) {
-      this.logger.fine("Adding new bridge numbers: " + line);
+      this.logger.finer("Adding new bridge numbers: " + line);
       this.bridgesRaw.put(published, line);
       this.bridgesRawModified = true;
     } else if (!line.equals(this.bridgesRaw.get(published))) {
@@ -291,12 +293,12 @@ public class ConsensusStatsFileHandler {
                 + (running/ consensuses) + "," + (stable/ consensuses);
             /* Are our results new? */
             if (!this.relaysPerDay.containsKey(tempDate)) {
-              this.logger.fine("Adding new average relay numbers: "
+              this.logger.finer("Adding new average relay numbers: "
                   + line);
               this.relaysPerDay.put(tempDate, line);
               writeConsensusStats = true;
             } else if (!line.equals(this.relaysPerDay.get(tempDate))) {
-              this.logger.info("Replacing existing average relay numbers "
+              this.logger.finer("Replacing existing average relay numbers "
                   + "(" + this.relaysPerDay.get(tempDate) + " with new "
                   + "numbers: " + line);
               this.relaysPerDay.put(tempDate, line);
@@ -339,12 +341,12 @@ public class ConsensusStatsFileHandler {
             String line = "," + (brunning / statuses);
             /* Are our results new? */
             if (!this.bridgesPerDay.containsKey(tempDate)) {
-              this.logger.fine("Adding new average bridge numbers: "
+              this.logger.finer("Adding new average bridge numbers: "
                   + tempDate + line);
               this.bridgesPerDay.put(tempDate, line);
               writeConsensusStats = true;
             } else if (!line.equals(this.bridgesPerDay.get(tempDate))) {
-              this.logger.info("Replacing existing average bridge "
+              this.logger.finer("Replacing existing average bridge "
                   + "numbers (" + this.bridgesPerDay.get(tempDate)
                   + " with new numbers: " + line);
               this.bridgesPerDay.put(tempDate, line);
@@ -366,7 +368,7 @@ public class ConsensusStatsFileHandler {
     /* Write raw numbers of relays with flags set to disk. */
     if (this.relaysRawModified) {
       try {
-        this.logger.info("Writing file "
+        this.logger.fine("Writing file "
             + this.consensusStatsRawFile.getAbsolutePath() + "...");
         this.consensusStatsRawFile.getParentFile().mkdirs();
         BufferedWriter bw = new BufferedWriter(new FileWriter(
@@ -376,14 +378,14 @@ public class ConsensusStatsFileHandler {
           bw.append(line + "\n");
         }
         bw.close();
-        this.logger.info("Finished writing file "
+        this.logger.fine("Finished writing file "
             + this.consensusStatsRawFile.getAbsolutePath() + ".");
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to write file "
             + this.consensusStatsRawFile.getAbsolutePath() + "!", e);
       }
     } else {
-      this.logger.info("Not writing file "
+      this.logger.fine("Not writing file "
           + this.consensusStatsRawFile.getAbsolutePath() + ", because "
           + "nothing has changed.");
     }
@@ -391,7 +393,7 @@ public class ConsensusStatsFileHandler {
     /* Write raw numbers of running bridges to disk. */
     if (this.bridgesRawModified) {
       try {
-        this.logger.info("Writing file "
+        this.logger.fine("Writing file "
             + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "...");
         this.bridgeConsensusStatsRawFile.getParentFile().mkdirs();
         BufferedWriter bw = new BufferedWriter(
@@ -401,7 +403,7 @@ public class ConsensusStatsFileHandler {
           bw.append(line + "\n");
         }
         bw.close();
-        this.logger.info("Finished writing file "
+        this.logger.fine("Finished writing file "
             + this.bridgeConsensusStatsRawFile.getAbsolutePath() + ".");
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to write file "
@@ -409,7 +411,7 @@ public class ConsensusStatsFileHandler {
             e);
       }
     } else {
-      this.logger.info("Not writing file "
+      this.logger.fine("Not writing file "
           + this.bridgeConsensusStatsRawFile.getAbsolutePath()
           + ", because nothing has changed.");
     }
@@ -418,7 +420,7 @@ public class ConsensusStatsFileHandler {
      * to disk. */
     if (writeConsensusStats) {
       try {
-        this.logger.info("Writing file "
+        this.logger.fine("Writing file "
             + this.consensusStatsFile.getAbsolutePath() + "...");
         this.consensusStatsFile.getParentFile().mkdirs();
         BufferedWriter bw = new BufferedWriter(new FileWriter(
@@ -453,7 +455,7 @@ public class ConsensusStatsFileHandler {
           currentDateMillis += 24L * 60L * 60L * 1000L;
         }
         bw.close();
-        this.logger.info("Finished writing file "
+        this.logger.fine("Finished writing file "
             + this.consensusStatsFile.getAbsolutePath() + ".");
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to write file "
@@ -463,10 +465,13 @@ public class ConsensusStatsFileHandler {
             + this.consensusStatsFile.getAbsolutePath() + "!", e);
       }
     } else {
-      this.logger.info("Not writing file "
+      this.logger.fine("Not writing file "
           + this.consensusStatsFile.getAbsolutePath()
           + ", because nothing has changed.");
     }
+
+    /* Set modification flags to false again. */
+    this.relaysRawModified = this.bridgesRawModified = false;
   }
 }
 
diff --git a/src/DirreqStatsFileHandler.java b/src/DirreqStatsFileHandler.java
index a815701..347d189 100644
--- a/src/DirreqStatsFileHandler.java
+++ b/src/DirreqStatsFileHandler.java
@@ -1,115 +1,218 @@
 import java.io.*;
+import java.text.*;
 import java.util.*;
 import java.util.logging.*;
-import java.text.*;
 
 /**
- *
+ * Extracts statistics on v3 directory requests by country from extra-info
+ * descriptors and writes them to a CSV file that is easily parsable by R.
+ * Parse results come from <code>RelayDescriptorParser</code> and are
+ * written to <code>stats/dirreq-stats</code>.
  */
 public class DirreqStatsFileHandler {
+
+  /**
+   * Two-letter country codes of countries that we care about.
+   */
   private SortedSet<String> countries;
+
+  /**
+   * Results file containing v3 directory requests by country.
+   */
   private File dirreqStatsFile;
-  private SortedMap<String, String> observations;
-  private boolean modified;
+
+  /**
+   * Directory requests by directory and date. Map keys are directory and
+   * date written as "directory,date", map values are lines as read from
+   * <code>stats/dirreq-stats</code>.
+   */
+  private SortedMap<String, String> dirreqs;
+
+  /**
+   * Modification flag for directory requests stored in memory. This flag
+   * is used to decide whether the contents of <code>dirreqs</code> need
+   * to be written to disk during <code>writeFile</code>.
+   */
+  private boolean dirreqsModified;
+
+  /**
+   * Logger for this class.
+   */
   private Logger logger;
+
+  /**
+   * Initializes this class, including reading in previous results from
+   * <code>stats/dirreq-stats</code>.
+   */
   public DirreqStatsFileHandler(SortedSet<String> countries) {
-    this.dirreqStatsFile = new File("stats/dirreq-stats");
+
+    /* Memorize the set of countries we care about. */
     this.countries = countries;
-    this.observations = new TreeMap<String, String>();
-    this.logger =
-        Logger.getLogger(DirreqStatsFileHandler.class.getName());
+
+    /* Initialize local data structure to hold observations received from
+     * RelayDescriptorParser. */
+    this.dirreqs = new TreeMap<String, String>();
+
+    /* Initialize file name for observations file. */
+    this.dirreqStatsFile = new File("stats/dirreq-stats");
+
+    /* Initialize logger. */
+    this.logger = Logger.getLogger(
+        DirreqStatsFileHandler.class.getName());
+
+    /* Read in previously stored results. */
     if (this.dirreqStatsFile.exists()) {
-      this.logger.info("Reading file "
-          + this.dirreqStatsFile.getAbsolutePath() + "...");
       try {
+        this.logger.fine("Reading file "
+            + this.dirreqStatsFile.getAbsolutePath() + "...");
         BufferedReader br = new BufferedReader(new FileReader(
             this.dirreqStatsFile));
         String line = br.readLine();
         if (line != null) {
-          String[] headers = line.split(",");
-          for (int i = 2; i < headers.length - 1; i++) {
-            this.countries.add(headers[i]);
-          }
-          while ((line = br.readLine()) != null) {
-            String[] readData = line.split(",");
-            String dirNickname = readData[0];
-            String date = readData[1];
-            if (!readData[readData.length - 1].equals("NA")) {
-              Map<String, String> obs = new HashMap<String, String>();
-              for (int i = 2; i < readData.length - 1; i++) {
-                obs.put(headers[i], readData[i]);
+          /* The first line should contain headers that we need to parse
+           * in order to learn what countries we were interested in when
+           * writing this file. */
+          if (!line.startsWith("directory,date,")) {
+            this.logger.warning("Incorrect first line '" + line + "' in "
+                + this.dirreqStatsFile.getAbsolutePath() + "! This line "
+                + "should contain headers! Aborting to read in this "
+                + "file!");
+          } else {
+            String[] headers = line.split(",");
+            for (int i = 2; i < headers.length - 1; i++) {
+              this.countries.add(headers[i]);
+            }
+            /* Read in the rest of the file. */
+            while ((line = br.readLine()) != null) {
+              String[] parts = line.split(",");
+              if (parts.length != headers.length) {
+                this.logger.warning("Corrupt line '" + line + "' in file "
+                    + this.dirreqStatsFile.getAbsolutePath() + "! This "
+                    + "line has either fewer or more columns than the "
+                    + "file has column headers! Aborting to read this "
+                    + "file!");
+                break;
+              }
+              String directory = parts[0];
+              String date = parts[1];
+              /* If the share column contains NA, all the other columns do.
+               * We only want to read in non-NA lines here. */
+              if (!parts[parts.length - 1].equals("NA")) {
+                Map<String, String> obs = new HashMap<String, String>();
+                for (int i = 2; i < parts.length - 1; i++) {
+                  obs.put(headers[i], parts[i]);
+                }
+                String share = parts[parts.length - 1];
+                this.addObs(directory, date, obs, share);
               }
-              String share = readData[readData.length - 1];
-              this.addObs(dirNickname, date, obs, share);
             }
           }
         }
         br.close();
-        this.logger.info("Finished reading file "
+        this.logger.fine("Finished reading file "
             + this.dirreqStatsFile.getAbsolutePath() + ".");
       } catch (IOException e) {
-        this.logger.log(Level.WARNING, "Failed reading file "
+        this.logger.log(Level.WARNING, "Failed to read file "
             + this.dirreqStatsFile.getAbsolutePath() + "!", e);
       }
     }
+
+    /* Set modification flag to false. */
+    this.dirreqsModified = false;
   }
-  public void addObs(String dirNickname, String date,
-      Map<String, String> obs, String share) throws IOException {
-    String obsKey = dirNickname + "," + date;
-    StringBuilder sb = new StringBuilder(obsKey);
+
+  /**
+   * Adds observations on the number of directory requests by country as
+   * seen on a directory at a given data that expected to see the given
+   * share of all directory requests in the network.
+   */
+  public void addObs(String directory, String date,
+      Map<String, String> obs, String share) {
+    String key = directory + "," + date;
+    StringBuilder sb = new StringBuilder(key);
     for (String c : this.countries) {
       sb.append("," + (obs.containsKey(c) ? obs.get(c) : "0"));
     }
     sb.append("," + share);
-    this.observations.put(obsKey, sb.toString());
-    this.modified = true;
+    String value = sb.toString();
+    if (!this.dirreqs.containsKey(key)) {
+      this.logger.finer("Adding new directory request numbers: " + value);
+      this.dirreqs.put(key, value);
+      this.dirreqsModified = true;
+    } else if (value.compareTo(this.dirreqs.get(key)) > 0) {
+      this.logger.warning("The directory request numbers we were just "
+          + "given (" + value + ") are different from what we learned "
+          + "before (" + this.dirreqs.get(key) + "! Overwriting!");
+      this.dirreqs.put(key, value);
+      this.dirreqsModified = true;
+    }   
   }
+
+  /**
+   * Writes the v3 directory request numbers from memory to
+   * <code>stats/dirreq-stats</code> if they have changed.
+   */
   public void writeFile() {
-    if (this.modified && !this.observations.isEmpty()) {
+
+    /* Only write file if we learned something new. */
+    if (this.dirreqsModified) {
       try {
-        this.logger.info("Writing file "
+        this.logger.fine("Writing file "
             + this.dirreqStatsFile.getAbsolutePath() + "...");
         this.dirreqStatsFile.getParentFile().mkdirs();
-        BufferedWriter bwDirreqStats = new BufferedWriter(
-            new FileWriter(this.dirreqStatsFile));
-        bwDirreqStats.append("directory,date");
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            this.dirreqStatsFile));
+        /* Write header. */
+        bw.append("directory,date");
         for (String country : this.countries) {
-          bwDirreqStats.append("," + country);
+          bw.append("," + country);
         }
-        bwDirreqStats.append(",share\n");
-        long lastDate = 0L;
+        bw.append(",share\n");
+        /* Memorize last written date and directory to fill missing dates
+         * with NA's. */
+        long lastDateMillis = 0L;
         String lastDirectory = null;
-        SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd");
-        timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-        for (String observation : this.observations.values()) {
-          String currentDirectory = observation.split(",")[0];
-          long currentDate = timeFormat.parse(observation.split(",")[1]).
-              getTime();
-          while (currentDirectory.equals(lastDirectory)
-              && lastDate > 0L && currentDate - 86400000L > lastDate) {
-            lastDate += 86400000L;
-            bwDirreqStats.append(currentDirectory + ","
-                + timeFormat.format(new Date(lastDate)));
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+        for (String line : this.dirreqs.values()) {
+          /* Fill missing dates with NA's. */
+          String[] parts = line.split(",");
+          String currentDirectory = parts[0];
+          long currentDateMillis = dateFormat.parse(parts[1]).getTime();
+          while (currentDirectory.equals(lastDirectory) &&
+              currentDateMillis - 24L * 60L * 60L * 1000L
+              > lastDateMillis) {
+            lastDateMillis += 24L * 60L * 60L * 1000L;
+            bw.append(currentDirectory + ","
+                + dateFormat.format(new Date(lastDateMillis)));
             for (String country : this.countries) {
-              bwDirreqStats.append(",NA");
+              bw.append(",NA");
             }
-            bwDirreqStats.append(",NA\n");
+            bw.append(",NA\n");
           }
-          lastDate = currentDate;
+          lastDateMillis = currentDateMillis;
           lastDirectory = currentDirectory;
-          bwDirreqStats.append(observation + "\n");
+          /* Write current observation. */
+          bw.append(line + "\n");
         }
-        bwDirreqStats.close();
-        this.logger.info("Finished writing file "
+        bw.close();
+        this.logger.fine("Finished writing file "
             + this.dirreqStatsFile.getAbsolutePath() + ".");
       } catch (IOException e) {
-        this.logger.log(Level.WARNING, "Failed writing file "
+        this.logger.log(Level.WARNING, "Failed to write file "
             + this.dirreqStatsFile.getAbsolutePath() + "!", e);
       } catch (ParseException e) {
-        this.logger.log(Level.WARNING, "Failed writing file "
+        this.logger.log(Level.WARNING, "Failed to write file "
             + this.dirreqStatsFile.getAbsolutePath() + "!", e);
       }
+    } else {
+      this.logger.fine("Not writing file "
+          + this.dirreqStatsFile.getAbsolutePath() + ", because "
+          + "nothing has changed.");
     }
+
+    /* Set modification flag to false again. */
+    this.dirreqsModified = false;
   }
 }
 
diff --git a/src/Main.java b/src/Main.java
index 1902116..3c02697 100644
--- a/src/Main.java
+++ b/src/Main.java
@@ -89,7 +89,7 @@ public class Main {
 
     // Write updated stats files to disk
     if (bsfh != null) {
-      bsfh.writeFile();
+      bsfh.writeFiles();
       bsfh = null;
     }
     if (csfh != null) {
-- 
1.6.5




More information about the tor-commits mailing list