[or-cvs] [ernie/master 2/2] Make bridge stats more accurate.

karsten at torproject.org karsten at torproject.org
Tue May 25 13:22:00 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Tue, 25 May 2010 11:15:12 +0200
Subject: Make bridge stats more accurate.
Commit: 342073e7fb41f6036ce65d6d9099caeeb5a970de

Exclude geoip-stats of 0.2.2.x bridges which may be broken.
Include bridge-stats of bridges running 0.2.2.7-alpha or higher.
---
 src/BridgeDescriptorParser.java |   48 +++++++++++++++++-
 src/BridgeStatsFileHandler.java |  100 +++++++++++++++++++++++++++++++++++----
 2 files changed, 135 insertions(+), 13 deletions(-)

diff --git a/src/BridgeDescriptorParser.java b/src/BridgeDescriptorParser.java
index d67ab47..a679336 100644
--- a/src/BridgeDescriptorParser.java
+++ b/src/BridgeDescriptorParser.java
@@ -27,8 +27,9 @@ public class BridgeDescriptorParser {
       SimpleDateFormat timeFormat = new SimpleDateFormat(
           "yyyy-MM-dd HH:mm:ss");
       timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-      String hashedIdentity = null, publishedLine = null,
-          geoipStartTimeLine = null;
+      String hashedIdentity = null, platformLine = null,
+          publishedLine = null, geoipStartTimeLine = null,
+          bridgeStatsEndLine = null;
       boolean skip = false;
       String line = null;
       while ((line = br.readLine()) != null) {
@@ -70,8 +71,17 @@ public class BridgeDescriptorParser {
           if (this.bsfh != null) {
             skip = this.bsfh.isKnownRelay(hashedIdentity);
           }
+        } else if (!skip && line.startsWith("platform ")) {
+          platformLine = line;
         } else if (!skip && line.startsWith("published ")) {
           publishedLine = line;
+        } else if (line.startsWith("opt fingerprint") ||
+            line.startsWith("fingerprint")) {
+          String identity = line.substring(line.startsWith("opt ") ?
+              "opt fingerprint".length() : "fingerprint".length()).
+              replaceAll(" ", "").toLowerCase();
+          hashedIdentity = sanitized ? identity
+              : DigestUtils.shaHex(identity).toUpperCase();
         } else if (!skip && line.startsWith("geoip-start-time ")) {
           geoipStartTimeLine = line;
         } else if (!skip && line.startsWith("geoip-client-origins")
@@ -103,10 +113,42 @@ public class BridgeDescriptorParser {
           String date = publishedLine.split(" ")[1];
           String time = publishedLine.split(" ")[2];
           if (this.bsfh != null) {
-            bsfh.addObs(hashedIdentity, date, time, obs);
+            this.bsfh.addObs(hashedIdentity, date, time, obs);
+          }
+        } else if (!skip && line.startsWith("bridge-stats-end ")) {
+          bridgeStatsEndLine = line;
+        } else if (!skip && line.startsWith("bridge-ips")
+            && line.split(" ").length > 1) {
+          if (bridgeStatsEndLine == null) {
+            this.logger.warning("bridge-ips line without preceding "
+                + "bridge-stats-end line in "
+                + (sanitized ? "sanitized" : "non-sanitized")
+                + " bridge descriptor.");
+            break;
+          }
+          Map<String, String> obs = new HashMap<String, String>();
+          String[] parts = line.split(" ")[1].split(",");
+          for (String p : parts) {
+            for (String c : countries) {
+              if (p.startsWith(c)) {
+                obs.put(c, String.format("%.2f",
+                    (double) Long.parseLong(p.substring(3)) - 4L));
+              }
+            }
+          }
+          String date = bridgeStatsEndLine.split(" ")[1];
+          String time = bridgeStatsEndLine.split(" ")[2];
+          if (this.bsfh != null) {
+            this.bsfh.addObs(hashedIdentity, date, time, obs);
           }
         }
       }
+      if (this.bsfh != null && platformLine != null &&
+          platformLine.startsWith("platform Tor 0.2.2")) {
+        String date = publishedLine.split(" ")[1];
+        String time = publishedLine.split(" ")[2];
+        this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+      }
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
           e);
diff --git a/src/BridgeStatsFileHandler.java b/src/BridgeStatsFileHandler.java
index 1e2d0f3..9960481 100644
--- a/src/BridgeStatsFileHandler.java
+++ b/src/BridgeStatsFileHandler.java
@@ -45,10 +45,23 @@ public class BridgeStatsFileHandler {
   private SortedSet<String> hashedRelays;
 
   /**
+   * Helper file containing extra-info descriptors published by 0.2.2.x
+   * bridges. If these descriptors contain geoip-stats, they are not
+   * included in the results, because stats are very likely broken.
+   */
+  private File zeroTwoTwoDescriptorsFile;
+
+  /**
+   * Extra-info descriptors published by 0.2.2.x bridges. If these
+   * descriptors contain geoip-stats, they are not included in the
+   * results, because stats are very likely broken.
+   */
+  private SortedSet<String> zeroTwoTwoDescriptors;
+
+  /**
    * Final results file containing the number of bridge users per country
-   * and day. If this file exists on disk, it is not read in during
-   * initialization, but overwritten if either <code>bridgeUsersRaw</code>
-   * or <code>hashedRelays</code> have been modified.
+   * and day. This file is not read in during initialization, but
+   * overwritten at the end of the execution.
    */
   private File bridgeStatsFile;
 
@@ -70,12 +83,15 @@ public class BridgeStatsFileHandler {
     /* Initialize local data structures to hold results. */
     this.bridgeUsersRaw = new TreeMap<String, String>();
     this.hashedRelays = new TreeSet<String>();
+    this.zeroTwoTwoDescriptors = new TreeSet<String>();
 
     /* Initialize file names for intermediate and final results. */
     this.bridgeStatsRawFile = new File("stats/bridge-stats-raw");
     this.bridgeStatsFile = new File("stats/bridge-stats");
     this.hashedRelayIdentitiesFile = new File(
         "stats/hashed-relay-identities");
+    this.zeroTwoTwoDescriptorsFile = new File(
+        "stats/v022-bridge-descriptors");
 
     /* Initialize logger. */
     this.logger = Logger.getLogger(
@@ -112,7 +128,7 @@ public class BridgeStatsFileHandler {
                     + this.bridgeStatsRawFile.getAbsolutePath()
                     + "! Aborting to read this file!");
                 break;
-              } 
+              }
               String hashedBridgeIdentity = parts[0];
               String date = parts[1];
               String time = parts[2];
@@ -155,6 +171,28 @@ public class BridgeStatsFileHandler {
             + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
       }
     }
+
+    /* Read in known extra-info descriptors published by 0.2.2.x
+     * bridges. */
+    if (this.zeroTwoTwoDescriptorsFile.exists()) {
+      try {
+        this.logger.fine("Reading file "
+            + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "...");
+        BufferedReader br = new BufferedReader(new FileReader(
+            this.zeroTwoTwoDescriptorsFile));
+        String line = null;
+        /* Read in all lines from the file and memorize them. */
+        while ((line = br.readLine()) != null) {
+          this.zeroTwoTwoDescriptors.add(line);
+        }
+        br.close();
+        this.logger.fine("Finished reading file "
+            + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + ".");
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Failed to read file "
+            + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "!", e);
+      }
+    }
   }
 
   /**
@@ -172,6 +210,22 @@ public class BridgeStatsFileHandler {
   }
 
   /**
+   * Adds an extra-info descriptor identifier published by an 0.2.2.x
+   * bridges. If this extra-info descriptor contains geoip-stats, they are
+   * not included in the results, because stats are very likely broken.
+   */
+  public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity,
+      String date, String time) {
+    String value = hashedBridgeIdentity.toUpperCase() + "," + date + ","
+        + time;
+    if (!this.zeroTwoTwoDescriptors.contains(value)) {
+      this.logger.finer("Adding new bridge 0.2.2.x extra-info "
+          + "descriptor: " + value);
+      this.zeroTwoTwoDescriptors.add(value);
+    }
+  }
+
+  /**
    * Returns whether the given fingerprint is a known hashed relay
    * identity. <code>BridgeDescriptorParser</code> uses this information
    * to decide whether to continue parsing a bridge extra-descriptor
@@ -189,8 +243,8 @@ public class BridgeStatsFileHandler {
    * bridge and day, we keep the one with the later publication time and
    * discard the other one.
    */
-  public void addObs(String hashedIdentity, String date,
-      String time, Map<String, String> obs) {
+  public void addObs(String hashedIdentity, String date, String time,
+      Map<String, String> obs) {
     String key = hashedIdentity + "," + date;
     StringBuilder sb = new StringBuilder(key + "," + time);
     for (String c : countries) {
@@ -237,6 +291,24 @@ public class BridgeStatsFileHandler {
           + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e);
     }
 
+    /* Write bridge extra-info descriptor identifiers to disk. */
+    try {
+      this.logger.fine("Writing file "
+          + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "...");
+      this.zeroTwoTwoDescriptorsFile.getParentFile().mkdirs();
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          this.zeroTwoTwoDescriptorsFile));
+      for (String descriptorIdentifier : this.zeroTwoTwoDescriptors) {
+        bw.append(descriptorIdentifier + "\n");
+      }
+      bw.close();
+      this.logger.fine("Finished writing file "
+          + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + ".");
+    } catch (IOException e) {
+      this.logger.log(Level.WARNING, "Failed to write "
+          + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "!", e);
+    }
+
     /* Write observations made by single bridges to disk. */
     try {
       this.logger.fine("Writing file "
@@ -250,8 +322,13 @@ public class BridgeStatsFileHandler {
       }
       bw.append("\n");
       for (String line : this.bridgeUsersRaw.values()) {
-        String hashedBridgeIdentity = line.split(",")[0];
-        if (!this.hashedRelays.contains(hashedBridgeIdentity)) {
+        String[] parts = line.split(",");
+        String hashedBridgeIdentity = parts[0];
+        String date = parts[1];
+        String time = parts[2];
+        if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
+            !this.zeroTwoTwoDescriptors.contains(hashedBridgeIdentity
+            + "," + date + "," + time)) {
           bw.append(line + "\n");
         }
       }
@@ -269,8 +346,11 @@ public class BridgeStatsFileHandler {
     for (String line : this.bridgeUsersRaw.values()) {
       String[] parts = line.split(",");
       String hashedBridgeIdentity = parts[0];
-      if (!this.hashedRelays.contains(hashedBridgeIdentity)) {
-        String date = parts[1];
+      String date = parts[1];
+      String time = parts[2];
+      if (!this.hashedRelays.contains(hashedBridgeIdentity) &&
+          !this.zeroTwoTwoDescriptors.contains(hashedBridgeIdentity + ","
+          + date + "," + time)) {
         double[] users = bridgeUsersPerDay.get(date);
         if (users == null) {
           users = new double[countries.size()];
-- 
1.6.5



More information about the tor-commits mailing list