[or-cvs] [metrics-db/master 2/2] Extend statistics to all countries and directories.

karsten at torproject.org karsten at torproject.org
Fri Nov 19 13:36:18 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri, 19 Nov 2010 12:42:31 +0100
Subject: Extend statistics to all countries and directories.
Commit: e8a093178d35f8f5ad8c80796bc8f32e0707d5ae

---
 config                                             |   11 --
 db/tordir.sql                                      |    1 -
 .../ernie/db/BridgeDescriptorParser.java           |   25 +--
 .../ernie/db/BridgeStatsFileHandler.java           |  135 ++++++++---------
 src/org/torproject/ernie/db/Configuration.java     |   25 ---
 .../ernie/db/DirreqStatsFileHandler.java           |  162 ++++++++------------
 src/org/torproject/ernie/db/Main.java              |   25 +--
 .../ernie/db/RelayDescriptorDownloader.java        |   60 ++------
 .../torproject/ernie/db/RelayDescriptorParser.java |   49 ++-----
 9 files changed, 178 insertions(+), 315 deletions(-)

diff --git a/config b/config
index f5486e7..bb39a4f 100644
--- a/config
+++ b/config
@@ -115,17 +115,6 @@
 ## Write dirreq stats to disk
 #WriteDirreqStats 0
 #
-## Comma-separated set of countries to be included in dirreq and bridge
-## graphs; note that after adding new countries, an import of the relevant
-## descriptor archives (relay and/or bridge) is necessary!
-#DirreqBridgeCountries au,bh,br,ca,cn,cu,de,et,fr,gb,ir,it,jp,kr,mm,pl,ru,sa,se,sy,tn,tm,us,uz,vn,ye
-#
-## Comma-separated set of fingerprints of directory mirrors to be included
-## in dirreq and bridge graphs; note that after adding new directories, an
-## import of the relevant descriptor archives (relay and/or bridge) is
-## necessary!
-#DirreqDirectories 8522EB98C91496E80EC238E732594D1509158E77,9695DFC35FFEB861329B9F1AB04C46397020CE31
-#
 ## Write bridge stats to disk
 #WriteBridgeStats 0
 
diff --git a/db/tordir.sql b/db/tordir.sql
index 2035593..43064d6 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -564,7 +564,6 @@ CREATE TABLE dirreq_stats (
     "date" DATE NOT NULL,
     country CHARACTER(2) NOT NULL,
     requests INTEGER NOT NULL,
-    "share" DOUBLE PRECISION NOT NULL,
     CONSTRAINT dirreq_stats_pkey PRIMARY KEY (source, "date", country)
 );
 
diff --git a/src/org/torproject/ernie/db/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/BridgeDescriptorParser.java
index b4b0888..e2d8bd0 100644
--- a/src/org/torproject/ernie/db/BridgeDescriptorParser.java
+++ b/src/org/torproject/ernie/db/BridgeDescriptorParser.java
@@ -12,15 +12,12 @@ public class BridgeDescriptorParser {
   private ConsensusStatsFileHandler csfh;
   private BridgeStatsFileHandler bsfh;
   private SanitizedBridgesWriter sbw;
-  private SortedSet<String> countries;
   private Logger logger;
   public BridgeDescriptorParser(ConsensusStatsFileHandler csfh,
-      BridgeStatsFileHandler bsfh, SanitizedBridgesWriter sbw,
-      SortedSet<String> countries) {
+      BridgeStatsFileHandler bsfh, SanitizedBridgesWriter sbw) {
     this.csfh = csfh;
     this.bsfh = bsfh;
     this.sbw = sbw;
-    this.countries = countries;
     this.logger =
         Logger.getLogger(BridgeDescriptorParser.class.getName());
   }
@@ -107,17 +104,13 @@ public class BridgeDescriptorParser {
           Map<String, String> obs = new HashMap<String, String>();
           String[] parts = line.split(" ")[1].split(",");
           for (String p : parts) {
+            String country = p.substring(0, 2);
             double users = ((double) Long.parseLong(p.substring(3)) - 4L)
                     * 86400.0D / ((double) seconds);
             allUsers += users;
-            for (String c : this.countries) {
-              if (p.startsWith(c)) {
-                obs.put(c, String.format("%.2f", users));
-                break;
-              }
-            }
-            obs.put("zy", String.format("%.2f", allUsers));
+            obs.put(country, String.format("%.2f", users));
           }
+          obs.put("zy", String.format("%.2f", allUsers));
           String date = publishedLine.split(" ")[1];
           String time = publishedLine.split(" ")[2];
           if (this.bsfh != null) {
@@ -138,14 +131,12 @@ public class BridgeDescriptorParser {
           Map<String, String> obs = new HashMap<String, String>();
           String[] parts = line.split(" ")[1].split(",");
           for (String p : parts) {
+            String country = p.substring(0, 2);
             double users = (double) Long.parseLong(p.substring(3)) - 4L;
-            for (String c : countries) {
-              if (p.startsWith(c)) {
-                obs.put(c, String.format("%.2f", users));
-                break;
-              }
-            }
+            allUsers += users;
+            obs.put(country, String.format("%.2f", users));
           }
+          obs.put("zy", String.format("%.2f", allUsers));
           String date = bridgeStatsEndLine.split(" ")[1];
           String time = bridgeStatsEndLine.split(" ")[2];
           if (this.bsfh != null) {
diff --git a/src/org/torproject/ernie/db/BridgeStatsFileHandler.java b/src/org/torproject/ernie/db/BridgeStatsFileHandler.java
index ca4dff8..8a417b5 100644
--- a/src/org/torproject/ernie/db/BridgeStatsFileHandler.java
+++ b/src/org/torproject/ernie/db/BridgeStatsFileHandler.java
@@ -19,7 +19,7 @@ import java.util.logging.*;
 public class BridgeStatsFileHandler {
 
   /**
-   * Two-letter country codes of countries that we care about.
+   * Two-letter country codes of known countries.
    */
   private SortedSet<String> countries;
 
@@ -34,7 +34,7 @@ public class BridgeStatsFileHandler {
    * day. Map keys are bridge and date written as "bridge,date", map
    * values are lines as read from <code>stats/bridge-stats-raw</code>.
    */
-  private SortedMap<String, String> bridgeUsersRaw;
+  private SortedMap<String, Map<String, String>> bridgeUsersRaw;
 
   /**
    * Helper file containing the hashed relay identities of all known
@@ -83,14 +83,14 @@ public class BridgeStatsFileHandler {
    * files <code>stats/bridge-stats-raw</code> and
    * <code>stats/hashed-relay-identities</code>.
    */
-  public BridgeStatsFileHandler(SortedSet<String> countries,
-     String connectionURL) {
+  public BridgeStatsFileHandler(String connectionURL) {
 
-    /* Memorize the set of countries we care about. */
-    this.countries = countries;
+    /* Initialize set of known countries. */
+    this.countries = new TreeSet<String>();
+    this.countries.add("zy");
 
     /* Initialize local data structures to hold results. */
-    this.bridgeUsersRaw = new TreeMap<String, String>();
+    this.bridgeUsersRaw = new TreeMap<String, Map<String, String>>();
     this.hashedRelays = new TreeSet<String>();
     this.zeroTwoTwoDescriptors = new TreeSet<String>();
 
@@ -130,9 +130,7 @@ public class BridgeStatsFileHandler {
           } else {
             String[] headers = line.split(",");
             for (int i = 3; i < headers.length; i++) {
-              if (headers[i].equals("all")) {
-                this.countries.add("zy");
-              } else {
+              if (!headers[i].equals("all")) {
                 this.countries.add(headers[i]);
               }
             }
@@ -151,6 +149,9 @@ public class BridgeStatsFileHandler {
               SortedMap<String, String> obs =
                   new TreeMap<String, String>();
               for (int i = 3; i < parts.length; i++) {
+                if (parts[i].equals("NA")) {
+                  continue;
+                }
                 if (headers[i].equals("all")) {
                   obs.put("zy", parts[i]);
                 } else {
@@ -265,25 +266,25 @@ public class BridgeStatsFileHandler {
    */
   public void addObs(String hashedIdentity, String date, String time,
       Map<String, String> obs) {
-    String key = hashedIdentity + "," + date;
-    StringBuilder sb = new StringBuilder(key + "," + time);
-    for (String c : this.countries) {
-      sb.append("," + (obs.containsKey(c) && !obs.get(c).startsWith("-")
-          ? obs.get(c) : "0.0"));
+    for (String country : obs.keySet()) {
+      this.countries.add(country);
     }
-    String value = sb.toString();
-    if (!this.bridgeUsersRaw.containsKey(key)) {
-      this.logger.finer("Adding new bridge user numbers: " + value);
-      this.bridgeUsersRaw.put(key, value);
-    } else if (value.compareTo(this.bridgeUsersRaw.get(key)) > 0) {
+    String shortKey = hashedIdentity + "," + date;
+    String longKey = shortKey + "," + time;
+    SortedMap<String, Map<String, String>> tailMap =
+        this.bridgeUsersRaw.tailMap(shortKey);
+    String nextKey = tailMap.isEmpty() ? null : tailMap.firstKey();
+    if (nextKey == null || !nextKey.startsWith(shortKey)) {
+      this.logger.finer("Adding new bridge user numbers for key "
+          + longKey);
+      this.bridgeUsersRaw.put(longKey, obs);
+    } else if (longKey.compareTo(nextKey) > 0) {
       this.logger.finer("Replacing existing bridge user numbers (" +
-          this.bridgeUsersRaw.get(key) + " with new numbers: "
-          + value);
-      this.bridgeUsersRaw.put(key, value);
+          nextKey + " with new numbers: " + longKey);
+      this.bridgeUsersRaw.put(longKey, obs);
     } else {
       this.logger.finer("Not replacing existing bridge user numbers (" +
-          this.bridgeUsersRaw.get(key) + " with new numbers (" + value
-          + ").");
+          nextKey + " with new numbers (" + longKey + ").");
     }
   }
 
@@ -346,14 +347,22 @@ public class BridgeStatsFileHandler {
         }
       }
       bw.append("\n");
-      for (String line : this.bridgeUsersRaw.values()) {
-        String[] parts = line.split(",");
+      for (Map.Entry<String, Map<String, String>> e :
+          this.bridgeUsersRaw.entrySet()) {
+        String longKey = e.getKey();
+        String[] parts = longKey.split(",");
         String hashedBridgeIdentity = parts[0];
         String date = parts[1];
         String time = parts[2];
         if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
-            !this.zeroTwoTwoDescriptors.contains(hashedBridgeIdentity
-            + "," + date + "," + time)) {
+            !this.zeroTwoTwoDescriptors.contains(longKey)) {
+          Map<String, String> obs = e.getValue();
+          StringBuilder sb = new StringBuilder(longKey);
+          for (String c : this.countries) {
+            sb.append("," + (obs.containsKey(c) &&
+                !obs.get(c).startsWith("-") ? obs.get(c) : "NA"));
+          }
+          String line = sb.toString();
           bw.append(line + "\n");
         }
       }
@@ -368,21 +377,27 @@ public class BridgeStatsFileHandler {
     /* Aggregate per-day statistics. */
     SortedMap<String, double[]> bridgeUsersPerDay =
         new TreeMap<String, double[]>();
-    for (String line : this.bridgeUsersRaw.values()) {
-      String[] parts = line.split(",");
+    for (Map.Entry<String, Map<String, String>> e :
+        this.bridgeUsersRaw.entrySet()) {
+      String longKey = e.getKey();
+      String[] parts = longKey.split(",");
       String hashedBridgeIdentity = parts[0];
       String date = parts[1];
       String time = parts[2];
       if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
-          !this.zeroTwoTwoDescriptors.contains(hashedBridgeIdentity + ","
-          + date + "," + time)) {
+          !this.zeroTwoTwoDescriptors.contains(longKey)) {
         double[] users = bridgeUsersPerDay.get(date);
+        Map<String, String> obs = e.getValue();
         if (users == null) {
           users = new double[this.countries.size()];
           bridgeUsersPerDay.put(date, users);
         }
-        for (int i = 3; i < parts.length; i++) {
-          users[i - 3] += Double.parseDouble(parts[i]);
+        int i = 0;
+        for (String c : this.countries) {
+          if (obs.containsKey(c) && !obs.get(c).startsWith("-")) {
+            users[i] += Double.parseDouble(obs.get(c));
+          }
+          i++;
         }
       }
     }
@@ -405,28 +420,9 @@ public class BridgeStatsFileHandler {
       }
       bw.append("\n");
 
-      /* Memorize last written date fill missing dates with NA's. */
-      long lastDateMillis = 0L;
-      SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-      dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      /* Write current observation. */
       for (Map.Entry<String, double[]> e : bridgeUsersPerDay.entrySet()) {
         String date = e.getKey();
-        long currentDateMillis = dateFormat.parse(date).getTime();
-        if (lastDateMillis == 0L) {
-          lastDateMillis = currentDateMillis;
-        }
-        while (currentDateMillis - 24L * 60L * 60L * 1000L
-            > lastDateMillis) {
-          lastDateMillis += 24L * 60L * 60L * 1000L;
-          bw.append(dateFormat.format(lastDateMillis));
-          for (int i = 0; i < this.countries.size(); i++) {
-            bw.append(",NA");
-          }
-          bw.append("\n");
-        }
-        lastDateMillis = currentDateMillis;
-
-        /* Write current observation. */
         bw.append(date);
         double[] users = e.getValue();
         for (int i = 0; i < users.length; i++) {
@@ -440,9 +436,6 @@ public class BridgeStatsFileHandler {
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Failed to write "
           + this.bridgeStatsFile.getAbsolutePath() + "!", e);
-    } catch (ParseException e) {
-      this.logger.log(Level.WARNING, "Failed to write "
-          + this.bridgeStatsFile.getAbsolutePath() + "!", e);
     }
 
     /* Add daily bridge users to database. */
@@ -452,16 +445,20 @@ public class BridgeStatsFileHandler {
         for (String c : this.countries) {
           countryList.add(c);
         }
-        Map<String, Double> insertRows = new HashMap<String, Double>(),
-            updateRows = new HashMap<String, Double>();
+        Map<String, Integer> insertRows = new HashMap<String, Integer>(),
+            updateRows = new HashMap<String, Integer>();
         for (Map.Entry<String, double[]> e :
             bridgeUsersPerDay.entrySet()) {
           String date = e.getKey();
           double[] users = e.getValue();
           for (int i = 0; i < users.length; i++) {
+            int usersInt = (int) users[i];
+            if (usersInt < 1) {
+              continue;
+            }
             String country = countryList.get(i);
             String key = date + "," + country;
-            insertRows.put(key, users[i]);
+            insertRows.put(key, usersInt);
           }
         }
         Connection conn = DriverManager.getConnection(connectionURL);
@@ -474,8 +471,8 @@ public class BridgeStatsFileHandler {
           String country = rs.getString(2);
           String key = date + "," + country;
           if (insertRows.containsKey(key)) {
-            double insertRow = insertRows.remove(key);
-            double oldUsers = rs.getDouble(3);
+            int insertRow = insertRows.remove(key);
+            int oldUsers = rs.getInt(3);
             if (oldUsers != insertRow) {
               updateRows.put(key, insertRow);
             }
@@ -485,13 +482,13 @@ public class BridgeStatsFileHandler {
         PreparedStatement psU = conn.prepareStatement(
             "UPDATE bridge_stats SET users = ? "
             + "WHERE date = ? AND country = ?");
-        for (Map.Entry<String, Double> e : updateRows.entrySet()) {
+        for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
           String[] keyParts = e.getKey().split(",");
           java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
           String country = keyParts[1];
-          double users = e.getValue();
+          int users = e.getValue();
           psU.clearParameters();
-          psU.setDouble(1, users);
+          psU.setInt(1, users);
           psU.setDate(2, date);
           psU.setString(3, country);
           psU.executeUpdate();
@@ -499,13 +496,13 @@ public class BridgeStatsFileHandler {
         PreparedStatement psI = conn.prepareStatement(
             "INSERT INTO bridge_stats (users, date, country) "
             + "VALUES (?, ?, ?)");
-        for (Map.Entry<String, Double> e : insertRows.entrySet()) {
+        for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
           String[] keyParts = e.getKey().split(",");
           java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
           String country = keyParts[1];
-          double users = e.getValue();
+          int users = e.getValue();
           psI.clearParameters();
-          psI.setDouble(1, users);
+          psI.setInt(1, users);
           psI.setDate(2, date);
           psI.setString(3, country);
           psI.executeUpdate();
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index ea694a6..03b4358 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -15,12 +15,6 @@ import java.util.logging.*;
 public class Configuration {
   private boolean writeConsensusStats = false;
   private boolean writeDirreqStats = false;
-  private SortedSet<String> dirreqBridgeCountries = new TreeSet<String>(
-      Arrays.asList(("au,bh,br,ca,cn,cu,de,et,fr,gb,ir,it,jp,kr,mm,pl,ru,"
-          + "sa,se,sy,tn,tm,us,uz,vn,ye").split(",")));
-  private SortedSet<String> dirreqDirectories = new TreeSet<String>(
-      Arrays.asList(("8522EB98C91496E80EC238E732594D1509158E77,"
-      + "9695DFC35FFEB861329B9F1AB04C46397020CE31").split(",")));
   private boolean writeBridgeStats = false;
   private boolean writeDirectoryArchives = false;
   private String directoryArchivesOutputDirectory = "directory-archive/";
@@ -87,19 +81,6 @@ public class Configuration {
         } else if (line.startsWith("WriteDirreqStats")) {
           this.writeDirreqStats = Integer.parseInt(
               line.split(" ")[1]) != 0;
-        } else if (line.startsWith("DirreqBridgeCountries")) {
-          this.dirreqBridgeCountries = new TreeSet<String>();
-          for (String country : line.split(" ")[1].split(",")) {
-            if (country.length() != 2) {
-              logger.severe("Configuration file contains illegal country "
-                  + "code in line '" + line + "'! Exiting!");
-              System.exit(1);
-            }
-            this.dirreqBridgeCountries.add(country);
-          }
-        } else if (line.startsWith("DirreqDirectories")) {
-          this.dirreqDirectories = new TreeSet<String>(
-              Arrays.asList(line.split(" ")[1].split(",")));
         } else if (line.startsWith("WriteBridgeStats")) {
           this.writeBridgeStats = Integer.parseInt(
               line.split(" ")[1]) != 0;
@@ -291,12 +272,6 @@ public class Configuration {
   public boolean getWriteDirreqStats() {
     return this.writeDirreqStats;
   }
-  public SortedSet<String> getDirreqBridgeCountries() {
-    return this.dirreqBridgeCountries;
-  }
-  public SortedSet<String> getDirreqDirectories() {
-    return this.dirreqDirectories;
-  }
   public boolean getWriteBridgeStats() {
     return this.writeBridgeStats;
   }
diff --git a/src/org/torproject/ernie/db/DirreqStatsFileHandler.java b/src/org/torproject/ernie/db/DirreqStatsFileHandler.java
index 2ab65c9..83372dd 100644
--- a/src/org/torproject/ernie/db/DirreqStatsFileHandler.java
+++ b/src/org/torproject/ernie/db/DirreqStatsFileHandler.java
@@ -17,7 +17,7 @@ import java.util.logging.*;
 public class DirreqStatsFileHandler {
 
   /**
-   * Two-letter country codes of countries that we care about.
+   * Two-letter country codes of known countries.
    */
   private SortedSet<String> countries;
 
@@ -28,10 +28,9 @@ public class DirreqStatsFileHandler {
 
   /**
    * Directory requests by directory and date. Map keys are directory and
-   * date written as "directory,date", map values are lines as read from
-   * <code>stats/dirreq-stats</code>.
+   * date written as "directory,date", map values are country-user maps.
    */
-  private SortedMap<String, String> dirreqs;
+  private SortedMap<String, Map<String, String>> dirreqs;
 
   /**
    * Modification flag for directory requests stored in memory. This flag
@@ -54,15 +53,15 @@ public class DirreqStatsFileHandler {
    * Initializes this class, including reading in previous results from
    * <code>stats/dirreq-stats</code>.
    */
-  public DirreqStatsFileHandler(SortedSet<String> countries,
-      String connectionURL) {
+  public DirreqStatsFileHandler(String connectionURL) {
 
-    /* Memorize the set of countries we care about. */
-    this.countries = countries;
+    /* Initialize set of known countries. */
+    this.countries = new TreeSet<String>();
+    this.countries.add("zy");
 
     /* Initialize local data structure to hold observations received from
      * RelayDescriptorParser. */
-    this.dirreqs = new TreeMap<String, String>();
+    this.dirreqs = new TreeMap<String, Map<String, String>>();
 
     /* Initialize file name for observations file. */
     this.dirreqStatsFile = new File("stats/dirreq-stats");
@@ -94,9 +93,7 @@ public class DirreqStatsFileHandler {
           } else {
             String[] headers = line.split(",");
             for (int i = 2; i < headers.length - 1; i++) {
-              if (headers[i].equals("all")) {
-                this.countries.add("zy");
-              } else {
+              if (!headers[i].equals("all")) {
                 this.countries.add(headers[i]);
               }
             }
@@ -113,20 +110,18 @@ public class DirreqStatsFileHandler {
               }
               String directory = parts[0];
               String date = parts[1];
-              /* If the share column contains NA, all the other columns do.
-               * We only want to read in non-NA lines here. */
-              if (!parts[parts.length - 1].equals("NA")) {
-                Map<String, String> obs = new HashMap<String, String>();
-                for (int i = 2; i < parts.length - 1; i++) {
-                  if (headers[i].equals("all")) {
-                    obs.put("zy", parts[i]);
-                  } else {
-                    obs.put(headers[i], parts[i]);
-                  }
+              Map<String, String> obs = new HashMap<String, String>();
+              for (int i = 2; i < parts.length - 1; i++) {
+                if (parts[i].equals("NA")) {
+                  continue;
+                }
+                if (headers[i].equals("all")) {
+                  obs.put("zy", parts[i]);
+                } else {
+                  obs.put(headers[i], parts[i]);
                 }
-                String share = parts[parts.length - 1];
-                this.addObs(directory, date, obs, share);
               }
+              this.addObs(directory, date, obs);
             }
           }
         }
@@ -146,28 +141,24 @@ public class DirreqStatsFileHandler {
 
   /**
    * Adds observations on the number of directory requests by country as
-   * seen on a directory at a given data that expected to see the given
-   * share of all directory requests in the network.
+   * seen on a directory at a given date.
    */
   public void addObs(String directory, String date,
-      Map<String, String> obs, String share) {
-    String key = directory + "," + date;
-    StringBuilder sb = new StringBuilder(key);
-    for (String c : this.countries) {
-      sb.append("," + (obs.containsKey(c) ? obs.get(c) : "0"));
+      Map<String, String> obs) {
+    for (String country : obs.keySet()) {
+      this.countries.add(country);
     }
-    sb.append("," + share);
-    String value = sb.toString();
+    String key = directory + "," + date;
     if (!this.dirreqs.containsKey(key)) {
-      this.logger.finer("Adding new directory request numbers: " + value);
-      this.dirreqs.put(key, value);
+      this.logger.finer("Adding new directory request numbers: " + key);
+      this.dirreqs.put(key, obs);
       this.dirreqsModified = true;
       this.addedResults++;
-    } else if (value.compareTo(this.dirreqs.get(key)) > 0) {
-      this.logger.warning("The directory request numbers we were just "
-          + "given (" + value + ") are different from what we learned "
-          + "before (" + this.dirreqs.get(key) + "! Overwriting!");
-      this.dirreqs.put(key, value);
+    } else {
+      this.logger.fine("The directory request numbers we were just "
+          + "given for " + key + " may be different from what we learned "
+          + "before. Overwriting!");
+      this.dirreqs.put(key, obs);
       this.dirreqsModified = true;
     }
   }
@@ -195,32 +186,17 @@ public class DirreqStatsFileHandler {
             bw.append("," + country);
           }
         }
-        bw.append(",share\n");
-        /* Memorize last written date and directory to fill missing dates
-         * with NA's. */
-        long lastDateMillis = 0L;
-        String lastDirectory = null;
-        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-        for (String line : this.dirreqs.values()) {
-          /* Fill missing dates with NA's. */
-          String[] parts = line.split(",");
-          String currentDirectory = parts[0];
-          long currentDateMillis = dateFormat.parse(parts[1]).getTime();
-          while (currentDirectory.equals(lastDirectory) &&
-              currentDateMillis - 24L * 60L * 60L * 1000L
-              > lastDateMillis) {
-            lastDateMillis += 24L * 60L * 60L * 1000L;
-            bw.append(currentDirectory + ","
-                + dateFormat.format(lastDateMillis));
-            for (int i = 0; i < this.countries.size(); i++) {
-              bw.append(",NA");
-            }
-            bw.append(",NA\n");
+        bw.append("\n");
+        /* Write observations. */
+        for (Map.Entry<String, Map<String, String>> e :
+            this.dirreqs.entrySet()) {
+          String key = e.getKey();
+          Map<String, String> obs = e.getValue();
+          StringBuilder sb = new StringBuilder(key);
+          for (String c : this.countries) {
+            sb.append("," + (obs.containsKey(c) ? obs.get(c) : "NA"));
           }
-          lastDateMillis = currentDateMillis;
-          lastDirectory = currentDirectory;
-          /* Write current observation. */
+          String line = sb.toString();
           bw.append(line + "\n");
         }
         bw.close();
@@ -229,9 +205,6 @@ public class DirreqStatsFileHandler {
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to write file "
             + this.dirreqStatsFile.getAbsolutePath() + "!", e);
-      } catch (ParseException e) {
-        this.logger.log(Level.WARNING, "Failed to write file "
-            + this.dirreqStatsFile.getAbsolutePath() + "!", e);
       }
     } else {
       this.logger.fine("Not writing file "
@@ -248,25 +221,26 @@ public class DirreqStatsFileHandler {
         }
         Map<String, String> insertRows = new HashMap<String, String>(),
             updateRows = new HashMap<String, String>();
-        for (String dirreq : this.dirreqs.values()) {
-          String[] parts = dirreq.split(",");
+        for (Map.Entry<String, Map<String, String>> e :
+            this.dirreqs.entrySet()) {
+          String[] parts = e.getKey().split(",");
           String directory = parts[0];
           String date = parts[1];
-          String share = parts[parts.length - 1];
-          for (int i = 2; i < parts.length - 1; i++) {
-            String country = countryList.get(i - 2);
-            String key = directory + "," + date + "," + country;
-            String requests = parts[i];
-            String value = requests + "," + share;
-            insertRows.put(key, value);
+          Map<String, String> obs = e.getValue();
+          int i = 0;
+          for (String country : this.countries) {
+            if (obs.containsKey(country)) {
+              String key = directory + "," + date + "," + country;
+              String requests = "" + obs.get(country);
+              insertRows.put(key, requests);
+            }
           }
         }
         Connection conn = DriverManager.getConnection(connectionURL);
         conn.setAutoCommit(false);
         Statement statement = conn.createStatement();
         ResultSet rs = statement.executeQuery(
-            "SELECT source, date, country, requests, share "
-            + "FROM dirreq_stats");
+            "SELECT source, date, country, requests FROM dirreq_stats");
         while (rs.next()) {
           String source = rs.getString(1);
           String date = rs.getDate(2).toString();
@@ -283,41 +257,37 @@ public class DirreqStatsFileHandler {
         }
         rs.close();
         PreparedStatement psU = conn.prepareStatement(
-            "UPDATE dirreq_stats SET requests = ?, share = ? "
+            "UPDATE dirreq_stats SET requests = ? "
             + "WHERE source = ? AND date = ? AND country = ?");
         for (Map.Entry<String, String> e : updateRows.entrySet()) {
           String[] keyParts = e.getKey().split(",");
-          String[] valueParts = e.getValue().split(",");
+          String valueParts = e.getValue();
           String source = keyParts[0];
           java.sql.Date date = java.sql.Date.valueOf(keyParts[1]);
           String country = keyParts[2];
-          long requests = Long.parseLong(valueParts[0]);
-          double share = Double.parseDouble(valueParts[1]);
+          long requests = Long.parseLong(valueParts);
           psU.clearParameters();
           psU.setLong(1, requests);
-          psU.setDouble(2, share);
-          psU.setString(3, source);
-          psU.setDate(4, date);
-          psU.setString(5, country);
+          psU.setString(2, source);
+          psU.setDate(3, date);
+          psU.setString(4, country);
           psU.executeUpdate();
         }
         PreparedStatement psI = conn.prepareStatement(
-            "INSERT INTO dirreq_stats (requests, share, source, date, "
-            + "country) VALUES (?, ?, ?, ?, ?)");
+            "INSERT INTO dirreq_stats (requests, source, date, "
+            + "country) VALUES (?, ?, ?, ?)");
         for (Map.Entry<String, String> e : insertRows.entrySet()) {
           String[] keyParts = e.getKey().split(",");
-          String[] valueParts = e.getValue().split(",");
+          String valueParts = e.getValue();
           String source = keyParts[0];
           java.sql.Date date = java.sql.Date.valueOf(keyParts[1]);
           String country = keyParts[2];
-          long requests = Long.parseLong(valueParts[0]);
-          double share = Double.parseDouble(valueParts[1]);
+          long requests = Long.parseLong(valueParts);
           psI.clearParameters();
           psI.setLong(1, requests);
-          psI.setDouble(2, share);
-          psI.setString(3, source);
-          psI.setDate(4, date);
-          psI.setString(5, country);
+          psI.setString(2, source);
+          psI.setDate(3, date);
+          psI.setString(4, country);
           psI.executeUpdate();
         }
         conn.commit();
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index 9f5079f..64dcc55 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -29,22 +29,17 @@ public class Main {
       System.exit(1);
     }
 
-    // Define which stats we are interested in
-    SortedSet<String> countries = config.getDirreqBridgeCountries();
-    countries.add("zy"); // Add country zy for 'all users'
-    SortedSet<String> directories = config.getDirreqDirectories();
-
     // Prepare stats file handlers (only if we are writing stats)
     ConsensusStatsFileHandler csfh = config.getWriteConsensusStats() ?
         new ConsensusStatsFileHandler(
         config.getWriteAggregateStatsDatabase() ?
         config.getRelayDescriptorDatabaseJDBC() : null) : null;
     BridgeStatsFileHandler bsfh = config.getWriteBridgeStats() ?
-        new BridgeStatsFileHandler(countries,
+        new BridgeStatsFileHandler(
         config.getWriteAggregateStatsDatabase() ?
         config.getRelayDescriptorDatabaseJDBC() : null) : null;
     DirreqStatsFileHandler dsfh = config.getWriteDirreqStats() ?
-        new DirreqStatsFileHandler(countries,
+        new DirreqStatsFileHandler(
         config.getWriteAggregateStatsDatabase() ?
         config.getRelayDescriptorDatabaseJDBC() : null) : null;
 
@@ -75,8 +70,8 @@ public class Main {
         config.getWriteRelayDescriptorDatabase() ||
         config.getWriteRelayDescriptorsRawFiles() ||
         config.getWriteConsensusHealth() ?
-        new RelayDescriptorParser(csfh, bsfh, dsfh, aw, rddi, chc,
-            countries, directories) : null;
+        new RelayDescriptorParser(csfh, bsfh, dsfh, aw, rddi, chc)
+            : null;
 
     // Import/download relay descriptors from the various sources
     if (rdp != null) {
@@ -87,14 +82,12 @@ public class Main {
         boolean downloadCurrentConsensus = aw != null || csfh != null ||
             bsfh != null || rddi != null || chc != null;
         boolean downloadCurrentVotes = aw != null || chc != null;
-        boolean downloadAllServerDescriptors = aw != null || rddi != null;
-        boolean downloadAllExtraInfos = aw != null;
-        Set<String> downloadDescriptorsForRelays = bsfh != null ||
-            dsfh != null ? directories : new HashSet<String>();
+        boolean downloadAllServerDescriptors = aw != null ||
+            dsfh != null || rddi != null;
+        boolean downloadAllExtraInfos = aw != null || dsfh != null;
         rdd = new RelayDescriptorDownloader(rdp, dirSources,
             downloadCurrentConsensus, downloadCurrentVotes,
-            downloadAllServerDescriptors, downloadAllExtraInfos,
-            downloadDescriptorsForRelays);
+            downloadAllServerDescriptors, downloadAllExtraInfos);
         rdp.setRelayDescriptorDownloader(rdd);
       }
       if (config.getImportCachedRelayDescriptors()) {
@@ -151,7 +144,7 @@ public class Main {
     // Prepare bridge descriptor parser
     BridgeDescriptorParser bdp = config.getWriteConsensusStats() ||
         config.getWriteBridgeStats() || config.getWriteSanitizedBridges()
-        ? new BridgeDescriptorParser(csfh, bsfh, sbw, countries) : null;
+        ? new BridgeDescriptorParser(csfh, bsfh, sbw) : null;
 
     // Import bridge descriptors
     if (bdp != null && config.getImportSanitizedBridges()) {
diff --git a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
index c85473d..a88c36a 100644
--- a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
+++ b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
@@ -75,12 +75,6 @@ public class RelayDescriptorDownloader {
   private boolean downloadAllExtraInfos;
 
   /**
-   * Should we try to download missing server and extra-info descriptors
-   * of certain relays that have been published within the past 24 hours?
-   */
-  private Set<String> downloadDescriptorsForRelays;
-
-  /**
    * valid-after time that we expect the current consensus and votes to
    * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
    * consensuses and votes with this valid-after time on the directories.
@@ -122,8 +116,7 @@ public class RelayDescriptorDownloader {
   public RelayDescriptorDownloader(RelayDescriptorParser rdp,
       List<String> dirSources, boolean downloadCurrentConsensus,
       boolean downloadCurrentVotes, boolean downloadAllServerDescriptors,
-      boolean downloadAllExtraInfos,
-      Set<String> downloadDescriptorsForRelays) {
+      boolean downloadAllExtraInfos) {
 
     /* Memorize argument values. */
     this.rdp = rdp;
@@ -132,7 +125,6 @@ public class RelayDescriptorDownloader {
     this.downloadCurrentVotes = downloadCurrentVotes;
     this.downloadAllServerDescriptors = downloadAllServerDescriptors;
     this.downloadAllExtraInfos = downloadAllExtraInfos;
-    this.downloadDescriptorsForRelays = downloadDescriptorsForRelays;
 
     /* Initialize logger. */
     this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
@@ -252,21 +244,15 @@ public class RelayDescriptorDownloader {
     }
 
     /* Add server descriptors to missing list. */
-    if (this.downloadAllServerDescriptors ||
-        this.downloadDescriptorsForRelays != null) {
+    if (this.downloadAllServerDescriptors) {
       for (String serverDescriptor : serverDescriptors) {
         String published = serverDescriptor.split(",")[0];
         if (this.descriptorCutOff.compareTo(published) <= 0) {
-          if (this.downloadAllServerDescriptors ||
-              (this.downloadDescriptorsForRelays != null &&
-              this.downloadDescriptorsForRelays.contains(
-              serverDescriptor.split(",")[1].toUpperCase()))) {
-            String serverDescriptorKey = "server," + serverDescriptor;
-            if (!this.missingDescriptors.containsKey(
-                serverDescriptorKey)) {
-              this.missingDescriptors.put(serverDescriptorKey, "NA");
-              this.newMissingServerDescriptors++;
-            }
+          String serverDescriptorKey = "server," + serverDescriptor;
+          if (!this.missingDescriptors.containsKey(
+              serverDescriptorKey)) {
+            this.missingDescriptors.put(serverDescriptorKey, "NA");
+            this.newMissingServerDescriptors++;
           }
         }
       }
@@ -286,20 +272,15 @@ public class RelayDescriptorDownloader {
     }
 
     /* Add server descriptors to missing list. */
-    if (this.downloadAllServerDescriptors ||
-        this.downloadDescriptorsForRelays != null) {
+    if (this.downloadAllServerDescriptors) {
       for (String serverDescriptor : serverDescriptors) {
         String published = serverDescriptor.split(",")[0];
         if (this.descriptorCutOff.compareTo(published) < 0) {
-          if (this.downloadDescriptorsForRelays == null ||
-              this.downloadDescriptorsForRelays.contains(
-              serverDescriptor.split(",")[1].toUpperCase())) {
-            String serverDescriptorKey = "server," + serverDescriptor;
-            if (!this.missingDescriptors.containsKey(
-                serverDescriptorKey)) {
-              this.missingDescriptors.put(serverDescriptorKey, "NA");
-              this.newMissingServerDescriptors++;
-            }
+          String serverDescriptorKey = "server," + serverDescriptor;
+          if (!this.missingDescriptors.containsKey(
+              serverDescriptorKey)) {
+            this.missingDescriptors.put(serverDescriptorKey, "NA");
+            this.newMissingServerDescriptors++;
           }
         }
       }
@@ -323,10 +304,7 @@ public class RelayDescriptorDownloader {
           this.parsedTimestampString);
 
       /* Add extra-info descriptor to missing list. */
-      if (extraInfoDigest != null && (this.downloadAllExtraInfos ||
-          (this.downloadDescriptorsForRelays != null &&
-          this.downloadDescriptorsForRelays.contains(relayIdentity.
-          toUpperCase())))) {
+      if (extraInfoDigest != null && this.downloadAllExtraInfos) {
         String extraInfoKey = "extra," + published + ","
             + relayIdentity + "," + extraInfoDigest;
         if (!this.missingDescriptors.containsKey(extraInfoKey)) {
@@ -404,17 +382,11 @@ public class RelayDescriptorDownloader {
               this.currentValidAfter.equals(parts[1])) {
             urls.add("/tor/status-vote/current/" + parts[2]);
           } else if (parts[0].equals("server") &&
-              (this.downloadAllServerDescriptors ||
-              (this.downloadDescriptorsForRelays != null &&
-              this.downloadDescriptorsForRelays.contains(parts[2].
-              toUpperCase()))) &&
+              this.downloadAllServerDescriptors &&
               this.descriptorCutOff.compareTo(parts[1]) <= 0) {
             urls.add("/tor/server/d/" + parts[3]);
           } else if (parts[0].equals("extra") &&
-              (this.downloadAllExtraInfos ||
-              (this.downloadDescriptorsForRelays != null &&
-              this.downloadDescriptorsForRelays.contains(parts[2].
-              toUpperCase()))) &&
+              this.downloadAllExtraInfos &&
               this.descriptorCutOff.compareTo(parts[1]) <= 0) {
             urls.add("/tor/extra/d/" + parts[3]);
           }
diff --git a/src/org/torproject/ernie/db/RelayDescriptorParser.java b/src/org/torproject/ernie/db/RelayDescriptorParser.java
index bbc1f16..c110bb5 100644
--- a/src/org/torproject/ernie/db/RelayDescriptorParser.java
+++ b/src/org/torproject/ernie/db/RelayDescriptorParser.java
@@ -55,17 +55,6 @@ public class RelayDescriptorParser {
   private ConsensusHealthChecker chc;
 
   /**
-   * Countries that we care about for directory request and bridge
-   * statistics.
-   */
-  private SortedSet<String> countries;
-
-  /**
-   * Directories that we care about for directory request statistics.
-   */
-  private SortedSet<String> directories;
-
-  /**
    * Logger for this class.
    */
   private Logger logger;
@@ -78,16 +67,13 @@ public class RelayDescriptorParser {
   public RelayDescriptorParser(ConsensusStatsFileHandler csfh,
       BridgeStatsFileHandler bsfh, DirreqStatsFileHandler dsfh,
       ArchiveWriter aw, RelayDescriptorDatabaseImporter rddi,
-      ConsensusHealthChecker chc, SortedSet<String> countries,
-      SortedSet<String> directories) {
+      ConsensusHealthChecker chc) {
     this.csfh = csfh;
     this.bsfh = bsfh;
     this.dsfh = dsfh;
     this.aw = aw;
     this.rddi = rddi;
     this.chc = chc;
-    this.countries = countries;
-    this.directories = directories;
 
     /* Initialize logger. */
     this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
@@ -269,9 +255,9 @@ public class RelayDescriptorParser {
           }
         }
       } else if (line.startsWith("router ")) {
-        String platformLine = null, publishedTime = null,
-            bandwidthLine = null, extraInfoDigest = null,
-            relayIdentifier = null;
+        String platformLine = null, publishedLine = null,
+            publishedTime = null, bandwidthLine = null,
+            extraInfoDigest = null, relayIdentifier = null;
         String[] parts = line.split(" ");
         String nickname = parts[1];
         String address = parts[2];
@@ -334,7 +320,7 @@ public class RelayDescriptorParser {
         String publishedTime = null, relayIdentifier = line.split(" ")[2];
         long published = -1L;
         String dir = line.split(" ")[2];
-        String date = null, v3Reqs = null;
+        String date = null;
         SortedMap<String, String> bandwidthHistory =
             new TreeMap<String, String>();
         boolean skip = false;
@@ -378,28 +364,19 @@ public class RelayDescriptorParser {
             date = line.split(" ")[1];
           } else if (line.startsWith("dirreq-v3-reqs ")
               && line.length() > "dirreq-v3-reqs ".length()) {
-            v3Reqs = line.split(" ")[1];
-          } else if (line.startsWith("dirreq-v3-share ")
-              && v3Reqs != null && !skip) {
             int allUsers = 0;
             Map<String, String> obs = new HashMap<String, String>();
-            String[] parts = v3Reqs.split(",");
+            String[] parts = line.substring("dirreq-v3-reqs ".length()).
+                split(",");
             for (String p : parts) {
-              allUsers += Integer.parseInt(p.substring(3)) - 4;
-              for (String c : this.countries) {
-                if (p.startsWith(c)) {
-                  // TODO in theory, we should substract 4 here, too
-                  obs.put(c, p.substring(3));
-                  break;
-                }
-              }
+              String country = p.substring(0, 2);
+              int users = Integer.parseInt(p.substring(3)) - 4;
+              allUsers += users;
+              obs.put(country, "" + users);
             }
             obs.put("zy", "" + allUsers);
-            String share = line.substring("dirreq-v3-share ".length(),
-                line.length() - 1);
-            if (this.dsfh != null &&
-                directories.contains(relayIdentifier)) {
-              this.dsfh.addObs(dir, date, obs, share);
+            if (this.dsfh != null) {
+              this.dsfh.addObs(dir, date, obs);
             }
           }
         }
-- 
1.7.1



More information about the tor-commits mailing list