[tor-commits] [metrics-tasks/master] Add code for 2680.

11 Mar 2011

commit ee7df6a759f6788579f61403fb9771996ea5c988
Author: Karsten Loesing <karsten.loesing@gmx.net>
Date:   Fri Mar 11 14:51:12 2011 +0100

    Add code for 2680.
---
 task-2680/.gitignore                   |    6 +
 task-2680/ProcessRelayConsensuses.java |   85 ++++++++
 task-2680/ProcessSanitizedBridges.java |  327 ++++++++++++++++++++++++++++++++
 task-2680/README                       |  145 ++++++++++++++
 task-2680/verify.R                     |   27 +++
 5 files changed, 590 insertions(+), 0 deletions(-)

diff --git a/task-2680/.gitignore b/task-2680/.gitignore
new file mode 100644
index 0000000..134e86d
--- /dev/null
+++ b/task-2680/.gitignore
@@ -0,0 +1,6 @@
+*.class
+*.csv
+bridge-descriptors/
+commons-codec-1.4.jar
+consensuses/
+
diff --git a/task-2680/ProcessRelayConsensuses.java b/task-2680/ProcessRelayConsensuses.java
new file mode 100644
index 0000000..44d9ce3
--- /dev/null
+++ b/task-2680/ProcessRelayConsensuses.java
@@ -0,0 +1,85 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.binary.*;
+import org.apache.commons.codec.digest.*;
+
+public class ProcessRelayConsensuses {
+  public static void main(String[] args) throws IOException {
+
+    /* Validate command-line arguments. */
+    if (args.length != 1 || !new File(args[0]).exists()) {
+      System.out.println("Usage: java ProcessRelayConsensuses <dir>");
+      System.exit(1);
+    }
+
+    /* Find all files that we should parse. Somewhat fragile, but should
+     * work. */
+    System.out.println("Creating list of files we should parse.");
+    SortedMap<String, File> consensuses = new TreeMap<String, File>();
+    Stack<File> files = new Stack<File>();
+    files.add(new File(args[0]));
+    while (!files.isEmpty()) {
+      File file = files.pop();
+      String filename = file.getName();
+      if (file.isDirectory()) {
+        files.addAll(Arrays.asList(file.listFiles()));
+      } else if (filename.endsWith("-consensus")) {
+        consensuses.put(filename, file);
+      }
+    }
+    System.out.println("We found " + consensuses.size()
+        + " consensuses.");
+
+    /* Parse consensuses. */
+    if (!consensuses.isEmpty()) {
+      System.out.println("Parsing consensuses.");
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          "relays.csv"));
+      bw.write("consensus,fingerprint\n");
+      int parsedConsensuses = 0, totalConsensuses = consensuses.size(),
+          writtenOutputLines = 1;
+      long started = System.currentTimeMillis();
+      for (File file : consensuses.values()) {
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String line, validAfter = null;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("valid-after ")) {
+            validAfter = line.substring("valid-after ".length());
+          } else if (line.startsWith("r ")) {
+            if (validAfter == null) {
+              System.out.println("Found an r line before the valid-after "
+                  + "line in " + file.getName() + ". Please check. "
+                  + "Exiting.");
+              System.exit(1);
+            }
+            String fingerprint = DigestUtils.shaHex(Base64.decodeBase64(
+                line.split(" ")[2] + "="));
+            bw.write(validAfter + "," + fingerprint + "\n");
+            writtenOutputLines++;
+          }
+        }
+        br.close();
+        parsedConsensuses++;
+        if (parsedConsensuses % (totalConsensuses / 10) == 0) {
+          double fractionDone = (double) (parsedConsensuses) /
+              (double) totalConsensuses;
+          double fractionLeft = 1.0D - fractionDone;
+          long now = System.currentTimeMillis();
+          double millisLeft = ((double) (now - started)) * fractionLeft /
+              fractionDone;
+          long secondsLeft = (long) millisLeft / 1000L;
+          System.out.println("  " + (parsedConsensuses / (totalConsensuses
+              / 10)) + "0% done, " + secondsLeft + " seconds left.");
+        }
+      }
+      bw.close();
+      System.out.println("Parsed " + parsedConsensuses + " consensuses "
+          + "and wrote " + writtenOutputLines + " lines to relays.csv.");
+    }
+
+    /* This is it. */
+    System.out.println("Terminating.");
+  }
+}
+
+
diff --git a/task-2680/ProcessSanitizedBridges.java b/task-2680/ProcessSanitizedBridges.java
new file mode 100644
index 0000000..1f0e00e
--- /dev/null
+++ b/task-2680/ProcessSanitizedBridges.java
@@ -0,0 +1,327 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import org.apache.commons.codec.binary.*;
+
+public class ProcessSanitizedBridges {
+  public static void main(String[] args) throws IOException,
+      ParseException {
+
+    /* Validate command-line arguments. */
+    if (args.length != 1 || !new File(args[0]).exists()) {
+      System.out.println("Usage: java ProcessSanitizedBridges <dir>");
+      System.exit(1);
+    }
+
+    /* Find all files that we should parse. Somewhat fragile, but should
+     * work. */
+    System.out.println("Creating list of files we should parse.");
+    SortedMap<String, File> statuses = new TreeMap<String, File>();
+    SortedMap<String, File> serverDescriptors =
+        new TreeMap<String, File>();
+    SortedMap<String, File> extraInfoDescriptors =
+        new TreeMap<String, File>();
+    Stack<File> files = new Stack<File>();
+    files.add(new File(args[0]));
+    while (!files.isEmpty()) {
+      File file = files.pop();
+      String path = file.getAbsolutePath();
+      String filename = file.getName();
+      if (file.isDirectory()) {
+        files.addAll(Arrays.asList(file.listFiles()));
+      } else if (path.contains("statuses")) {
+        statuses.put(filename, file);
+      } else if (path.contains("server-descriptors")) {
+        serverDescriptors.put(filename, file);
+      } else if (path.contains("extra-infos")) {
+        extraInfoDescriptors.put(filename, file);
+      }
+    }
+    System.out.println("We found\n  " + statuses.size() + " statuses,\n  "
+        + serverDescriptors.size() + " server descriptors, and\n  "
+        + extraInfoDescriptors.size() + " extra-info descriptors.");
+
+    /* Parse statuses. */
+    if (!statuses.isEmpty()) {
+      System.out.println("Parsing statuses.");
+      List<String> knownFlags = new ArrayList<String>(Arrays.asList(
+          ("Authority,BadExit,BadDirectory,Exit,Fast,Guard,Named,Stable,"
+          + "Running,Valid,V2Dir").split(",")));
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          "statuses.csv"));
+      bw.write("status,fingerprint,descriptor,published,address,orport,"
+          + "dirport");
+      for (String knownFlag : knownFlags) {
+        bw.write("," + knownFlag.toLowerCase());
+      }
+      bw.write("\n");
+      int parsedStatuses = 0, totalStatuses = statuses.size(),
+          writtenOutputLines = 1;
+      long started = System.currentTimeMillis();
+      for (File file : statuses.values()) {
+        String filename = file.getName();
+        if (filename.length() != ("20110101-000703-"
+            + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D").length()) {
+          System.out.println("Status filename has wrong length: '"
+              + filename + "' Please check. Exiting.");
+          System.exit(1);
+        }
+        String statusDateTime = filename.substring(0, 4) + "-"
+            + filename.substring(4, 6) + "-" + filename.substring(6, 8)
+            + " " + filename.substring(9, 11) + ":"
+            + filename.substring(11, 13) + ":"
+            + filename.substring(13, 15);
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String line;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("r ")) {
+            String[] parts = line.split(" ");
+            if (parts.length != 9) {
+              System.out.println("r line doesn't have the correct number "
+                  + "of entries: '" + line + "'. Please check. Exiting.");
+              System.exit(1);
+            }
+            String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
+                parts[2] + "="));
+            String descriptor = Hex.encodeHexString(Base64.decodeBase64(
+                parts[2] + "="));
+            String published = parts[4] + " " + parts[5];
+            String address = parts[6];
+            String orPort = parts[7];
+            String dirPort = parts[8];
+            bw.write(statusDateTime + "," + fingerprint + "," + descriptor
+                + "," + published + "," + address + "," + orPort + ","
+                + dirPort);
+          } else if (line.equals("s") || line.startsWith("s ")) {
+            String flags = line.substring(1);
+            for (String flag : knownFlags) {
+              if (flags.contains(" " + flag)) {
+                bw.write(",TRUE");
+              } else {
+                bw.write(",FALSE");
+              }
+            }
+            bw.write("\n");
+            writtenOutputLines++;
+          }
+        }
+        br.close();
+        parsedStatuses++;
+        if (parsedStatuses % (totalStatuses / 10) == 0) {
+          double fractionDone = (double) (parsedStatuses) /
+              (double) totalStatuses;
+          double fractionLeft = 1.0D - fractionDone;
+          long now = System.currentTimeMillis();
+          double millisLeft = ((double) (now - started)) * fractionLeft /
+              fractionDone;
+          long secondsLeft = (long) millisLeft / 1000L;
+          System.out.println("  " + (parsedStatuses / (totalStatuses
+              / 10)) + "0% done, " + secondsLeft + " seconds left.");
+        }
+      }
+      bw.close();
+      System.out.println("Parsed " + parsedStatuses + " statuses and "
+          + "wrote " + writtenOutputLines + " lines to statuses.csv.");
+    }
+
+    /* Parse server descriptors and extra-info descriptors. */
+    if (!serverDescriptors.isEmpty()) {
+      System.out.println("Parsing server descriptors and extra-info "
+          + "descriptors.");
+      List<String> knownCountries = new ArrayList<String>(Arrays.asList(
+          ("?? A1 A2 AD AE AF AG AI AL AM AN AO AP AQ AR AS AT AU AW AX "
+          + "AZ BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BV BW BY BZ "
+          + "CA CD CF CG CH CI CK CL CM CN CO CR CS CU CV CY CZ DE DJ DK "
+          + "DM DO DZ EC EE EG ER ES ET EU FI FJ FK FM FO FR GA GB GD GE "
+          + "GF GG GH GI GL GM GN GP GQ GR GT GU GW GY HK HN HR HT HU ID "
+          + "IE IL IM IN IO IQ IR IS IT JE JM JO JP KE KG KH KI KM KN KP "
+          + "KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD ME MF "
+          + "MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC "
+          + "NE NF NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PR "
+          + "PS PT PW PY QA RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK "
+          + "SL SM SN SO SR ST SV SY SZ TC TD TG TH TJ TK TL TM TN TO TR "
+          + "TT TV TW TZ UA UG UM US UY UZ VA VC VE VG VI VN VU WF WS YE "
+          + "YT ZA ZM ZW").toLowerCase().split(" ")));
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          "descriptors.csv"));
+      bw.write("descriptor,fingerprint,published,address,orport,dirport,"
+          + "version,platform,uptime,bridgestatsend,bridgestatsseconds");
+      for (String country : knownCountries) {
+        bw.write("," + country);
+      }
+      bw.write(",bridgestatscountries,bridgestatstotal\n");
+      int parsedServerDescriptors = 0, parsedExtraInfoDescriptors = 0,
+          parsedGeoipStats = 0, skippedGeoipStats = 0,
+          parsedBridgeStats = 0,
+          totalServerDescriptors = serverDescriptors.size(),
+          writtenOutputLines = 1;
+      SimpleDateFormat timeFormat = new SimpleDateFormat(
+          "yyyy-MM-dd HH:mm:ss");
+      timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      long started = System.currentTimeMillis();
+      for (File file : serverDescriptors.values()) {
+        String filename = file.getName();
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String line, fingerprint = null, published = null, address = null,
+            orPort = null, dirPort = null, version = null,
+            platform = null, uptime = null, extraInfoDigest = null,
+            bridgeStatsEnd = null, bridgeStatsSeconds = null;
+        SortedMap<String, String> bridgeStatsIps =
+            new TreeMap<String, String>();
+        long bridgeStatsTotal = 0L;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("opt ")) {
+            line = line.substring(4);
+          }
+          if (line.startsWith("router ")) {
+            String[] parts = line.split(" ");
+            address = parts[2];
+            orPort = parts[3];
+            dirPort = parts[4];
+          } else if (line.startsWith("platform ")) {
+            version = line.split(" ")[2];
+            platform = line.substring(line.indexOf("on ")
+                + "on ".length());
+            if (platform.contains("Windows")) {
+              platform = "Windows";
+            } else if (platform.contains("Linux")) {
+              platform = "Linux";
+            } else if (platform.contains("Darwin")) {
+              platform = "Mac OS X";
+            } else if (platform.contains("BSD")) {
+              platform = "*BSD";
+            } else {
+              platform = "Other";
+            }
+          } else if (line.startsWith("published ")) {
+            String[] parts = line.split(" ");
+            published = parts[1] + " " + parts[2];
+          } else if (line.startsWith("fingerprint ")) {
+            fingerprint = line.substring("fingerprint".length()).
+                replaceAll(" ", "").toLowerCase();
+          } else if (line.startsWith("uptime ")) {
+            uptime = line.split(" ")[1];
+          } else if (line.startsWith("extra-info-digest ")) {
+            extraInfoDigest = line.substring("extra-info-digest ".
+                length()).toLowerCase();
+            if (extraInfoDescriptors.containsKey(extraInfoDigest)) {
+              parsedExtraInfoDescriptors++;
+              BufferedReader br2 = new BufferedReader(new FileReader(
+                  extraInfoDescriptors.get(extraInfoDigest)));
+              String geoipStartTime = null, bridgeStatsEndLine = null;
+              while ((line = br2.readLine()) != null) {
+                if (line.startsWith("geoip-start-time ")) {
+                  geoipStartTime = line.substring("geoip-start-time ".
+                      length());
+                } else if (line.startsWith("geoip-client-origins ") &&
+                    line.split(" ").length > 1 && published != null &&
+                    geoipStartTime != null) {
+                  if (version.startsWith("0.2.2.")) {
+                    skippedGeoipStats++;
+                  } else {
+                    parsedGeoipStats++;
+                    bridgeStatsEnd = published;
+                    bridgeStatsSeconds = "" +
+                        + (timeFormat.parse(published).getTime()
+                        - timeFormat.parse(geoipStartTime).getTime())
+                        / 1000L;
+                    for (String pair : line.split(" ")[1].split(",")) {
+                      String country = pair.substring(0, 2);
+                      String ips = pair.substring(3);
+                      bridgeStatsIps.put(country, ips);
+                      bridgeStatsTotal += Long.parseLong(ips);
+                    }
+                  }
+                } else if (line.startsWith("bridge-stats-end ")) {
+                  bridgeStatsEndLine = line;
+                } else if (line.startsWith("bridge-ips ") &&
+                    line.length() > "bridge-ips ".length() &&
+                    bridgeStatsEndLine != null) {
+                  parsedBridgeStats++;
+                  String[] parts = bridgeStatsEndLine.split(" ");
+                  bridgeStatsEnd = parts[1] + " " + parts[2];
+                  bridgeStatsSeconds = parts[3].substring(1);
+                  for (String pair : line.split(" ")[1].split(",")) {
+                    String country = pair.substring(0, 2);
+                    String ips = pair.substring(3);
+                    bridgeStatsIps.put(country, ips);
+                    bridgeStatsTotal += Long.parseLong(ips);
+                  }
+                }
+              }
+              br2.close();
+            }
+          }
+        }
+        br.close();
+        if (fingerprint == null || published == null || address == null ||
+            orPort == null || dirPort == null || version == null ||
+            platform == null || uptime == null) {
+          System.out.println("Server descriptor " + filename + " is "
+              + "missing critical information. Please check. Exiting.");
+          System.exit(1);
+        }
+        bw.write(filename + "," + fingerprint + "," + published + ","
+            + address + "," + orPort + "," + dirPort + "," + version + ","
+            + platform + "," + uptime);
+        if (bridgeStatsEnd != null) {
+          bw.write("," + bridgeStatsEnd + "," + bridgeStatsSeconds);
+          int bridgeStatsCountries = bridgeStatsIps.size();
+          for (String country : knownCountries) {
+            if (bridgeStatsIps.containsKey(country)) {
+              bw.write("," + bridgeStatsIps.remove(country));
+            } else {
+              bw.write(",0");
+            }
+          }
+          if (!bridgeStatsIps.isEmpty()) {
+            StringBuilder message = new StringBuilder();
+            for (String country : bridgeStatsIps.keySet()) {
+              message.append(", " + country);
+            }
+            System.out.println("Unknown " + (bridgeStatsIps.size() == 1 ?
+                "country" : "countries") + " " + message.toString().
+                substring(2) + " in extra-info descriptor "
+                + extraInfoDigest + ". Please check. Exiting.");
+            System.exit(1);
+          }
+          bw.write("," + bridgeStatsCountries + "," + bridgeStatsTotal
+              + "\n");
+        } else {
+          bw.write(",NA,NA");
+          for (String country : knownCountries) {
+            bw.write(",NA");
+          }
+          bw.write(",NA,NA\n");
+        }
+        writtenOutputLines++;
+        parsedServerDescriptors++;
+        if (parsedServerDescriptors % (totalServerDescriptors / 100)
+            == 0) {
+          double fractionDone = (double) (parsedServerDescriptors) /
+              (double) totalServerDescriptors;
+          double fractionLeft = 1.0D - fractionDone;
+          long now = System.currentTimeMillis();
+          double millisLeft = ((double) (now - started)) * fractionLeft /
+              fractionDone;
+          long secondsLeft = (long) millisLeft / 1000L;
+          System.out.println("  " + (parsedServerDescriptors /
+              (totalServerDescriptors / 100)) + "% done, " + secondsLeft
+              + " seconds left.");
+        }
+      }
+      bw.close();
+      System.out.println("Parsed " + parsedServerDescriptors + " server "
+           + "descriptors and " + parsedExtraInfoDescriptors
+           + " extra-info descriptors.\nParsed " + parsedGeoipStats
+           + " geoip-stats and " + parsedBridgeStats + " bridge-stats.\n"
+           + "Skipped " + skippedGeoipStats + " broken geoip-stats of "
+           + "0.2.2.x bridges.\nWrote " + writtenOutputLines + " to "
+           + "descriptors.csv.");
+    }
+
+    /* This is it. */
+    System.out.println("Terminating.");
+  }
+}
+
diff --git a/task-2680/README b/task-2680/README
new file mode 100644
index 0000000..a00856f
--- /dev/null
+++ b/task-2680/README
@@ -0,0 +1,145 @@
+This ticket contains Java and R code to
+
+ a) process bridge and relay data to convert them to a format that is more
+    useful for researchers and
+ b) verify that the output data files are valid.
+
+This README has a separate section for each Java or R code snippet.
+
+The Java applications produce three output formats containing bridge
+descriptors, bridge status lines, and hashed relay identities.  The data
+formats are described below.
+
+--------------------------------------------------------------------------
+
+ProcessSanitizedBridges.java
+
+ - Download sanitized bridge descriptors from the metrics website, e.g.,
+   https://metrics.torproject.org/data/bridge-descriptors-2011-01.tar.bz2,
+   and extract them in a local directory, e.g., bridge-descriptors/.
+
+ - Download Apache Commons Codec 1.4 or higher and put in in this
+   directory.
+
+ - Compile the Java class, e.g.,
+   $ javac -cp commons-codec-1.4.jar ProcessSanitizedBridges.java
+
+ - Run the Java class, e.g.,
+   $ java -cp .:commons-codec-1.4.jar ProcessSanitizedBridges
+     bridge-descriptors/
+
+ - Once the Java application is done, you'll find the two files
+   statuses.csv and descriptors.csv in this directory.
+
+--------------------------------------------------------------------------
+
+ProcessRelayConsensuses.java
+
+ - Download v3 relay consensuses from the metrics website, e.g.,
+   https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2, and
+   extract them in a local directory, e.g., consensuses/.
+
+ - Download Apache Commons Codec 1.4 or higher and put in in this
+   directory, unless you haven't already done this above for
+   ProcessSanitizedBridges.java.
+
+ - Compile the Java class, e.g.,
+   $ javac -cp commons-codec-1.4.jar ProcessRelayConsensuses.java
+
+ - Run the Java class, e.g.,
+   $ java -cp .:commons-codec-1.4.jar ProcessRelayConsensuses consensuses/
+
+ - Once the Java application is done, you'll find a file relays.csv in
+   this directory.
+
+--------------------------------------------------------------------------
+
+verify.R
+
+ - Run the R verification script like this:
+   $ R --slave -f verify.R
+
+--------------------------------------------------------------------------
+
+descriptors.csv
+
+The descriptors.csv file contains one line for each bridge descriptor that
+a bridge has published.  This descriptor consists of fields coming from
+the bridge's server descriptor and the bridge's extra-info descriptor that
+was published at the same time.
+
+The columns in descriptors.csv are:
+
+ - descriptor: Hex-formatted descriptor identifier
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+ - published: ISO-formatted descriptor publication time
+ - address: Sanitized IPv4 address in dotted notation
+ - orport: OR port
+ - dirport: Dir port
+ - version: Tor version
+ - platform: Operating system family (Windows, Linux, etc.)
+ - uptime: Uptime in seconds
+ - bridgestatsend: ISO-formatted time when stats interval ended
+ - bridgestatsseconds: Stats interval length in seconds
+ - ??: Unique client IP addresses that could not be resolved
+ - a1: Unique client IP addresses from anonymous proxies
+ - a2: Unique client IP addresses from satellite providers
+ - ad: Unique client IP addresses from Andorra
+ - ae: Unique client IP addresses from the United Arab Emirates
+ - [...] See ISO 3166-1 alpha-2 country codes
+ - zw: Unique client IP addresses from Zimbabwe
+ - bridgestatscountries: Number of countries with non-zero unique IPs
+ - bridgestatstotal: Total number of unique IPs
+
+There are two sources for the bridgestats* and country-code columns,
+depending on Tor's version.  Bridges running Tor version 0.2.1.x or
+earlier use dynamic stats intervals from a few hours to a few days.
+Bridges running early 0.2.2.x versions published faulty stats and are
+therefore removed from descriptors.csv.  Bridges running 0.2.2.x or higher
+(except the faulty 0.2.2.x versions) collect stats in 24-hour intervals.
+
+--------------------------------------------------------------------------
+
+statuses.csv
+
+The statuses.csv file contains one line for every bridge that is
+referenced in a bridge network status.  Note that if a bridge is running
+for, say, 12 hours, it will be contained in 24 half-hourly published
+statuses in that time and will be listed 24 times in statuses.csv.
+
+The columns in statuses.csv are:
+
+ - status: ISO-formatted status publication time
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+ - descriptor: Hex-formatted descriptor identifier
+ - published: ISO-formatted descriptor publication time
+ - address: Sanitized IPv4 address in dotted notation
+ - orport: OR port
+ - dirport: Dir port
+ - authority: TRUE if bridge has the Authority flag, FALSE otherwise
+ - badexit: TRUE if bridge has the BadExit flag, FALSE otherwise
+ - baddirectory: TRUE if bridge has the BadDirectory flag, FALSE otherwise
+ - exit: TRUE if bridge has the Exit flag, FALSE otherwise
+ - fast: TRUE if bridge has the Fast flag, FALSE otherwise
+ - guard: TRUE if bridge has the Guard flag, FALSE otherwise
+ - named: TRUE if bridge has the Named flag, FALSE otherwise
+ - stable: TRUE if bridge has the Stable flag, FALSE otherwise
+ - running: TRUE if bridge has the Running flag, FALSE otherwise
+ - valid: TRUE if bridge has the Valid flag, FALSE otherwise
+ - v2dir: TRUE if bridge has the V2Dir flag, FALSE otherwise
+
+--------------------------------------------------------------------------
+
+relays.csv
+
+The relays.csv file contains SHA-1 hashes of identity fingerprints of
+normal relays.  If a bridge uses the same identity key that it also used
+as a relay, it might observe more users than it would observe as a pure
+bridge.  Therefore, bridges that have been running as relays before should
+be excluded from bridge statistics.
+
+The columns in relays.csv are:
+
+ - consensus: ISO-formatted consensus publication time
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+
diff --git a/task-2680/verify.R b/task-2680/verify.R
new file mode 100644
index 0000000..63ef233
--- /dev/null
+++ b/task-2680/verify.R
@@ -0,0 +1,27 @@
+# Usage: R --slave -f verify.R
+
+if (file.exists("descriptors.csv")) {
+  cat("Verifying descriptors.csv. This may take a while.\n")
+  d <- read.csv("descriptors.csv", stringsAsFactors = FALSE)
+  cat(" ", length(na.omit(d$bridgestatsend)), "of", length(d$descriptor),
+      "descriptors contain bridge stats.\n")
+} else {
+  cat("descriptors.csv does not exist\n")
+}
+
+if (file.exists("statuses.csv")) {
+  cat("Verifying statuses.csv. This may take a while.\n")
+  s <- read.csv("statuses.csv", stringsAsFactors = FALSE)
+  cat(" ", length(s[s$running == TRUE, "running"]), "of",
+      length(s$running), "bridges contained in the statuses have the",
+      "Running flag.\n")
+} else {
+  cat("statuses.csv does not exist\n")
+}
+
+if (file.exists("relays.csv")) {
+  cat("Verifying relays.csv. This may take a while.\n")
+  r <- read.csv("relays.csv", stringsAsFactors = FALSE)
+  summary(as.POSIXct(r$consensus))
+}
+

    

[tor-commits] [metrics-tasks/master] Add code for 2680.

karsten＠torproject.org