[tor-commits] [metrics-db/master] Sanitize bridge pool assignments.

karsten at torproject.org karsten at torproject.org
Mon Mar 14 09:55:36 UTC 2011


commit 01d8d919512d8dfe3df3440e5dec0bccb2942baa
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Mar 14 10:54:59 2011 +0100

    Sanitize bridge pool assignments.
---
 config.template                                    |   11 ++
 .../ernie/db/BridgePoolAssignmentsProcessor.java   |  128 ++++++++++++++++++++
 src/org/torproject/ernie/db/Configuration.java     |   23 +++-
 src/org/torproject/ernie/db/Main.java              |    7 +
 4 files changed, 167 insertions(+), 2 deletions(-)

diff --git a/config.template b/config.template
index 0d6743e..47a8c8d 100644
--- a/config.template
+++ b/config.template
@@ -44,6 +44,13 @@
 ## Download exit list and store it to disk
 #DownloadExitList 0
 #
+## Process bridge pool assignment files by sanitizing bridge fingerprints
+## and sorting sanitized files into subdirectories
+#ProcessBridgePoolAssignments 0
+#
+## Relative path to directory to read bridge pool assignment files from
+#AssignmentsDirectory assignments/
+#
 #### Data sinks ####
 #
 ## Write directory archives to disk
@@ -66,4 +73,8 @@
 #
 ## Relative path to directory to write sanitized bridges to
 #SanitizedBridgesWriteDirectory sanitized-bridges/
+#
+## Relative path to directory to write sanitized bridge pool assignment
+## files to
+#SanitizedAssignmentsDirectory sanitized-assignments/
 
diff --git a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
new file mode 100644
index 0000000..583d36e
--- /dev/null
+++ b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
@@ -0,0 +1,128 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db;
+
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+import org.apache.commons.codec.*;
+import org.apache.commons.codec.binary.*;
+import org.apache.commons.codec.digest.*;
+
+public class BridgePoolAssignmentsProcessor {
+
+  public BridgePoolAssignmentsProcessor(File assignmentsDirectory,
+        File sanitizedAssignmentsDirectory) {
+
+    Logger logger =
+        Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
+    if (assignmentsDirectory == null ||
+        sanitizedAssignmentsDirectory == null) {
+      IllegalArgumentException e = new IllegalArgumentException("Neither "
+          + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
+          + "be null!");
+      throw e;
+    }
+
+    List<File> assignmentFiles = new ArrayList<File>();
+    Stack<File> files = new Stack<File>();
+    files.add(assignmentsDirectory);
+    while (!files.isEmpty()) {
+      File file = files.pop();
+      if (file.isDirectory()) {
+        files.addAll(Arrays.asList(file.listFiles()));
+      } else {
+        assignmentFiles.add(file);
+      }
+    }
+
+    SimpleDateFormat assignmentFormat =
+        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat filenameFormat =
+        new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+    filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    for (File assignmentFile : assignmentFiles) {
+      try {
+        BufferedReader br = new BufferedReader(new FileReader(
+            assignmentFile));
+        String line, bridgePoolAssignmentLine = null;
+        SortedSet<String> sanitizedAssignments = new TreeSet<String>();
+        boolean wroteLastLine = false;
+        while ((line = br.readLine()) != null || !wroteLastLine) {
+          if (line == null ||
+              line.startsWith("bridge-pool-assignment ")) {
+            if (bridgePoolAssignmentLine != null) {
+              try {
+                long bridgePoolAssignmentTime = assignmentFormat.parse(
+                    bridgePoolAssignmentLine.substring(
+                    "bridge-pool-assignment ".length())).getTime();
+                File sanitizedAssignmentsFile = new File(
+                    sanitizedAssignmentsDirectory, filenameFormat.format(
+                    bridgePoolAssignmentTime));
+                if (!sanitizedAssignmentsFile.exists()) {
+                  sanitizedAssignmentsFile.getParentFile().mkdirs();
+                  BufferedWriter bw = new BufferedWriter(new FileWriter(
+                      sanitizedAssignmentsFile));
+                  bw.write(bridgePoolAssignmentLine + "\n");
+                  for (String assignmentLine : sanitizedAssignments) {
+                    bw.write(assignmentLine + "\n");
+                  }
+                  bw.close();
+                }
+              } catch (IOException e) {
+                logger.log(Level.WARNING, "Could not write sanitized "
+                    + "bridge pool assignment file for line '"
+                    + bridgePoolAssignmentLine + "' to disk. Skipping "
+                    + "bridge pool assignment file '"
+                    + assignmentFile.getAbsolutePath() + "'.", e);
+                break;
+              } catch (ParseException e) {
+                logger.log(Level.WARNING, "Could not write sanitized "
+                    + "bridge pool assignment file for line '"
+                    + bridgePoolAssignmentLine + "' to disk. Skipping "
+                    + "bridge pool assignment file '"
+                    + assignmentFile.getAbsolutePath() + "'.", e);
+                break;
+              }
+              sanitizedAssignments.clear();
+            }
+            if (line == null) {
+              wroteLastLine = true;
+            } else {
+              bridgePoolAssignmentLine = line;
+            }
+          } else {
+            String[] parts = line.split(" ");
+            if (parts.length < 2 || parts[0].length() < 40) {
+              logger.warning("Unrecognized line '" + line
+                  + "'. Skipping.");
+              continue;
+            }
+            String hashedFingerprint = null;
+            try {
+              hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
+                  line.split(" ")[0].toCharArray())).toLowerCase();
+            } catch (DecoderException e) {
+              logger.warning("Unable to decode hex fingerprint in line '"
+                  + line + "'. Skipping.");
+              continue;
+            }
+            String assignmentDetails = line.substring(40);
+            sanitizedAssignments.add(hashedFingerprint
+                + assignmentDetails);
+          }
+        }
+        br.close();
+      } catch (IOException e) {
+        logger.log(Level.WARNING, "Could not read bridge pool assignment "
+            + "file '" + assignmentFile.getAbsolutePath()
+            + "'. Skipping.", e);
+      }
+    }
+
+    logger.info("Finished processing bridge pool assignment file(s).");
+  }
+}
+
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index c1cdbea..ae24175 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -37,6 +37,9 @@ public class Configuration {
       + "~gettor/gettor_stats.txt";
   private String getTorDirectory = "gettor/";
   private boolean downloadExitList = false;
+  private boolean processBridgePoolAssignments = false;
+  private String assignmentsDirectory = "assignments/";
+  private String sanitizedAssignmentsDirectory = "sanitized-assignments/";
   public Configuration() {
 
     /* Initialize logger. */
@@ -127,6 +130,13 @@ public class Configuration {
         } else if (line.startsWith("DownloadExitList")) {
           this.downloadExitList = Integer.parseInt(
               line.split(" ")[1]) != 0;
+        } else if (line.startsWith("ProcessBridgePoolAssignments")) {
+          this.processBridgePoolAssignments = Integer.parseInt(
+              line.split(" ")[1]) != 0;
+        } else if (line.startsWith("AssignmentsDirectory")) {
+          this.assignmentsDirectory = line.split(" ")[1];
+        } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
+          this.sanitizedAssignmentsDirectory = line.split(" ")[1];
         } else {
           logger.severe("Configuration file contains unrecognized "
               + "configuration key in line '" + line + "'! Exiting!");
@@ -156,8 +166,8 @@ public class Configuration {
     if (!this.importCachedRelayDescriptors &&
         !this.importDirectoryArchives && !this.downloadRelayDescriptors &&
         !this.importBridgeSnapshots && !this.downloadGetTorStats &&
-        !this.downloadExitList && !this.writeDirectoryArchives &&
-        !this.writeSanitizedBridges) {
+        !this.downloadExitList && !this.processBridgePoolAssignments &&
+        !this.writeDirectoryArchives && !this.writeSanitizedBridges) {
       logger.warning("We have not been configured to read data from any "
           + "data source or write data to any data sink. You need to "
           + "edit your config file (" + configFile.getAbsolutePath()
@@ -246,5 +256,14 @@ public class Configuration {
   public boolean getDownloadExitList() {
     return this.downloadExitList;
   }
+  public boolean getProcessBridgePoolAssignments() {
+    return processBridgePoolAssignments;
+  }
+  public String getAssignmentsDirectory() {
+    return assignmentsDirectory;
+  }
+  public String getSanitizedAssignmentsDirectory() {
+    return sanitizedAssignmentsDirectory;
+  }
 }
 
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index 657cdfc..50c06bb 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -128,6 +128,13 @@ public class Main {
       new ExitListDownloader();
     }
 
+    // Process bridge pool assignments
+    if (config.getProcessBridgePoolAssignments()) {
+      new BridgePoolAssignmentsProcessor(
+          new File(config.getAssignmentsDirectory()),
+          new File(config.getSanitizedAssignmentsDirectory()));
+    }
+
     // Remove lock file
     lf.releaseLock();
 



More information about the tor-commits mailing list