[tor-commits] [metrics-tasks/master] Add bridge pool assignment file parser for #2680.

karsten at torproject.org karsten at torproject.org
Mon Mar 14 13:18:23 UTC 2011


commit 1e8dbd3857c58e26f57973da4d64170eae0e1be6
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Mar 14 14:15:33 2011 +0100

    Add bridge pool assignment file parser for #2680.
---
 task-2680/.gitignore                       |    2 +
 task-2680/ProcessSanitizedAssignments.java |  102 ++++++++++++++++++++++++++++
 task-2680/README                           |   40 ++++++++++-
 task-2680/verify.R                         |    6 ++
 4 files changed, 147 insertions(+), 3 deletions(-)

diff --git a/task-2680/.gitignore b/task-2680/.gitignore
index 6378a79..b394fe6 100644
--- a/task-2680/.gitignore
+++ b/task-2680/.gitignore
@@ -4,4 +4,6 @@ bridge-descriptors/
 commons-codec-1.4.jar
 consensuses/
 *.tar.bz2
+*.swp
+bridge-pool-assignments/
 
diff --git a/task-2680/ProcessSanitizedAssignments.java b/task-2680/ProcessSanitizedAssignments.java
new file mode 100644
index 0000000..9289aa1
--- /dev/null
+++ b/task-2680/ProcessSanitizedAssignments.java
@@ -0,0 +1,102 @@
+import java.io.*;
+import java.util.*;
+
+public class ProcessSanitizedAssignments {
+  public static void main(String[] args) throws IOException {
+
+    /* Validate command-line arguments. */
+    if (args.length != 1 || !new File(args[0]).exists()) {
+      System.out.println("Usage: java ProcessSanitizedAssignments <dir>");
+      System.exit(1);
+    }
+
+    /* Find all files that we should parse. Somewhat fragile, but should
+     * work. */
+    System.out.println("Creating list of files we should parse.");
+    SortedMap<String, File> assignments = new TreeMap<String, File>();
+    Stack<File> files = new Stack<File>();
+    files.add(new File(args[0]));
+    while (!files.isEmpty()) {
+      File file = files.pop();
+      if (file.isDirectory()) {
+        files.addAll(Arrays.asList(file.listFiles()));
+      } else {
+        assignments.put(file.getName(), file);
+      }
+    }
+    System.out.println("We found " + assignments.size() + " bridge pool "
+        + "assignment files.");
+
+    /* Parse assignments. */
+    if (!assignments.isEmpty()) {
+      System.out.println("Parsing bridge pool assignment files.");
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          "assignments.csv"));
+      bw.write("assignment,fingerprint,type,ring,port,flag,bucket\n");
+      int parsedAssignments = 0, totalAssignments = assignments.size(),
+          writtenOutputLines = 1;
+      long started = System.currentTimeMillis();
+      for (File file : assignments.values()) {
+        BufferedReader br = new BufferedReader(new FileReader(file));
+        String line, assignmentTime = null;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("bridge-pool-assignment ")) {
+            assignmentTime = line.substring("bridge-pool-assignment ".
+                length());
+          } else {
+            String[] parts = line.split(" ");
+            String fingerprint = parts[0];
+            String type = parts[1];
+            String ring = null, port = null, flag = null, bucket = null;
+            for (int i = 2; i < parts.length; i++) {
+              String[] parts2 = parts[i].split("=");
+              String key = parts2[0];
+              String value = parts2[1];
+              if (key.equals("ring")) {
+              } else if (key.equals("ring")) {
+                ring = value;
+              } else if (key.equals("port")) {
+                port = value;
+              } else if (key.equals("flag")) {
+                flag = value;
+              } else if (key.equals("bucket")) {
+                bucket = value;
+              } else {
+                System.out.println("Unknown keyword in line '" + line
+                    + "'. Please check. Exiting.");
+                System.exit(1);
+              }
+            }
+            bw.write(assignmentTime + "," + fingerprint + "," + type + ","
+                + (ring != null ? ring : "NA") + ","
+                + (port != null ? port : "NA") + ","
+                + (flag != null ? flag : "NA") + ","
+                + (bucket != null ? bucket : "NA") + "\n");
+            writtenOutputLines++;
+          }
+        }
+        br.close();
+        parsedAssignments++;
+        if (parsedAssignments % (totalAssignments / 10) == 0) {
+          double fractionDone = (double) (parsedAssignments) /
+              (double) totalAssignments;
+          double fractionLeft = 1.0D - fractionDone;
+          long now = System.currentTimeMillis();
+          double millisLeft = ((double) (now - started)) * fractionLeft /
+              fractionDone;
+          long secondsLeft = (long) millisLeft / 1000L;
+          System.out.println("  " + (parsedAssignments / (totalAssignments
+              / 10)) + "0% done, " + secondsLeft + " seconds left.");
+        }
+      }
+      bw.close();
+      System.out.println("Parsed " + parsedAssignments + " bridge pool "
+          + "assignment files and wrote " + writtenOutputLines + " lines "
+          + "to assignments.csv.");
+    }
+
+    /* This is it. */
+    System.out.println("Terminating.");
+  }
+}
+
diff --git a/task-2680/README b/task-2680/README
index a00856f..65d8b85 100644
--- a/task-2680/README
+++ b/task-2680/README
@@ -6,9 +6,9 @@ This ticket contains Java and R code to
 
 This README has a separate section for each Java or R code snippet.
 
-The Java applications produce three output formats containing bridge
-descriptors, bridge status lines, and hashed relay identities.  The data
-formats are described below.
+The Java applications produce four output formats containing bridge
+descriptors, bridge status lines, bridge pool assignments, and hashed
+relay identities.  The data formats are described below.
 
 --------------------------------------------------------------------------
 
@@ -33,6 +33,23 @@ ProcessSanitizedBridges.java
 
 --------------------------------------------------------------------------
 
+ProcessSanitizedAssignments.java
+
+ - Download sanitized bridge pool assignments from the metrics website,
+   e.g., https://metrics.torproject.org/data/bridge-pool-assignments-2011-01.tar.bz2
+   and extract them in a local directory, e.g., bridge-pool-assignments/.
+
+ - Compile the Java class, e.g.,
+   $ javac ProcessSanitizedAssignments.java
+
+ - Run the Java class, e.g.,
+   $ java ProcessSanitizedAssignments bridge-pool-assignments/
+
+ - Once the Java application is done, you'll find a file assignments.csv
+   in this directory.
+
+--------------------------------------------------------------------------
+
 ProcessRelayConsensuses.java
 
  - Download v3 relay consensuses from the metrics website, e.g.,
@@ -130,6 +147,23 @@ The columns in statuses.csv are:
 
 --------------------------------------------------------------------------
 
+assignments.csv
+
+The assignments.csv file contains one line for every running bridge and
+the rings, subrings, and buckets that BridgeDB assigned it to.
+
+The columns in assignments.csv are:
+
+ - assignment: ISO-formatted bridge pool assignment time
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+ - type: Name of the distributor: "https", "email", or "unallocated"
+ - ring: Ring number, only for distributor "https"
+ - port: Port subring
+ - flag: Flag subring
+ - bucket: File bucket, only for distributor "unallocated"
+
+--------------------------------------------------------------------------
+
 relays.csv
 
 The relays.csv file contains SHA-1 hashes of identity fingerprints of
diff --git a/task-2680/verify.R b/task-2680/verify.R
index 63ef233..241a196 100644
--- a/task-2680/verify.R
+++ b/task-2680/verify.R
@@ -25,3 +25,9 @@ if (file.exists("relays.csv")) {
   summary(as.POSIXct(r$consensus))
 }
 
+if (file.exists("assignments.csv")) {
+  cat("Verifying assignments.csv. This may take a while.\n")
+  r <- read.csv("assignments.csv", stringsAsFactors = FALSE)
+  summary(as.POSIXct(r$assignment))
+}
+



More information about the tor-commits mailing list