[or-cvs] [metrics-utils/master 2/2] Check in ExoneraTor and bridge descriptor sanitizer from SVN.

karsten at torproject.org karsten at torproject.org
Mon Sep 20 08:40:04 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon, 20 Sep 2010 10:31:57 +0200
Subject: Check in ExoneraTor and bridge descriptor sanitizer from SVN.
Commit: d2c24d70f7bd92eeeeac71c48b80fe6e3b9b2fca

---
 bridge-desc-sanitizer/ConvertBridgeDescs.java |  452 +++++++++++++++++++++++++
 bridge-desc-sanitizer/HOWTO                   |  113 ++++++
 bridge-desc-sanitizer/extract-bridges.sh      |    8 +
 exonerator/ExoneraTor.java                    |  404 ++++++++++++++++++++++
 exonerator/HOWTO                              |  159 +++++++++
 exonerator/LICENSE                            |   30 ++
 exonerator/exonerator.py                      |  371 ++++++++++++++++++++
 7 files changed, 1537 insertions(+), 0 deletions(-)
 create mode 100644 bridge-desc-sanitizer/ConvertBridgeDescs.java
 create mode 100644 bridge-desc-sanitizer/HOWTO
 create mode 100755 bridge-desc-sanitizer/extract-bridges.sh
 create mode 100644 exonerator/ExoneraTor.java
 create mode 100644 exonerator/HOWTO
 create mode 100644 exonerator/LICENSE
 create mode 100755 exonerator/exonerator.py

diff --git a/bridge-desc-sanitizer/ConvertBridgeDescs.java b/bridge-desc-sanitizer/ConvertBridgeDescs.java
new file mode 100644
index 0000000..6a6c5bf
--- /dev/null
+++ b/bridge-desc-sanitizer/ConvertBridgeDescs.java
@@ -0,0 +1,452 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.codec.binary.*;
+
+public class ConvertBridgeDescs {
+
+  public static void main(String[] args) throws Exception {
+
+    long started = System.currentTimeMillis();
+
+    if (args.length < 5) {
+      System.err.println("Usage: java "
+          + ConvertBridgeDescs.class.getSimpleName()
+          + " <input directory> <geoip.txt file> <YYYY> <MM> "
+          + "<output directory>");
+      System.exit(1);
+    }
+    File inDir = new File(args[0]);
+    File geoipFile = new File(args[1]);
+    String year = args[2];
+    String month = args[3];
+    int yearInt = Integer.parseInt(year);
+    int monthInt = Integer.parseInt(month);
+    File outDir = new File(args[4]);
+    if (!outDir.exists()) {
+      outDir.mkdir();
+    }
+
+    SortedSet<File> statuses = new TreeSet<File>();
+    Set<File> descriptors = new HashSet<File>();
+    Set<File> extrainfos = new HashSet<File>();
+
+    System.out.print("Parsing geoip.txt file... ");
+    BufferedReader r = new BufferedReader(new FileReader(geoipFile));
+    String line0 = null;
+    SortedMap<Long, String> geoipDatabase = new TreeMap<Long, String>();
+    while ((line0 = r.readLine()) != null) {
+      if (!line0.startsWith("#"))
+        geoipDatabase.put(Long.parseLong(line0.split(",")[0]),
+            line0.substring(line0.indexOf(',') + 1));
+    }
+    System.out.println("Found " + geoipDatabase.size()
+        + " entries (expected 100,000 +- 10,000).");
+
+    System.out.println("Checking files in " + inDir.getAbsolutePath()
+        + "...");
+    Stack<File> directoriesLeftToParse = new Stack<File>();
+    directoriesLeftToParse.push(inDir);
+    String currentYearAndMonth = "from-tonga-" + year + "-" + month;
+    String previousYearAndMonth = "from-tonga-" + (monthInt == 1 ?
+        "" + (yearInt - 1) + "-12" :
+        year + "-" + (monthInt < 11 ? "0" : "") + (monthInt - 1));
+    String nextYearAndMonth = "from-tonga-" + (monthInt == 12 ?
+            "" + (yearInt + 1) + "-01" :
+            year + "-" + (monthInt < 9 ? "0" : "") + (monthInt + 1));
+    while (!directoriesLeftToParse.isEmpty()) {
+      File directoryOrFile = directoriesLeftToParse.pop();
+      String filename = directoryOrFile.getName();
+      boolean addDirectory = false;
+      if (directoryOrFile.isDirectory()) {
+        if (/* base directory */
+            filename.equals("in") ||
+            /* current month */
+            filename.startsWith(currentYearAndMonth) ||
+            /* last days of previous month */
+            (filename.startsWith(previousYearAndMonth)
+            && Integer.parseInt(filename.substring(19, 21)) > 24) ||
+            /* first days of next month */
+            (filename.startsWith(nextYearAndMonth)
+            && Integer.parseInt(filename.substring(19, 21)) < 6)) {
+          for (File fileInDir: directoryOrFile.listFiles()) {
+            directoriesLeftToParse.push(fileInDir);
+          }
+        }
+        continue;
+      }
+      if (filename.startsWith("cached-extrainfo")) {
+        extrainfos.add(directoryOrFile);
+      } else if (filename.equals("bridge-descriptors")) {
+        descriptors.add(directoryOrFile);
+      } else if (filename.equals("networkstatus-bridges")) {
+        statuses.add(directoryOrFile);
+      }
+    }
+
+    int days = ((extrainfos.size() / 2 + descriptors.size()
+        + statuses.size()) + 3 * 24) / (3 * 48);
+    System.out.println("Found " + extrainfos.size()
+        + " cached-extrainfo[.new] files, " + descriptors.size()
+        + " bridge-descriptors files, and " + statuses.size()
+        + " networkstatus-bridges files, covering approximately " + days
+        + " days.");
+
+    System.out.print("Parsing extra-info descriptors");
+    String[] hex = new String[] { "0", "1", "2", "3", "4", "5", "6", "7",
+        "8", "9", "a", "b", "c", "d", "e", "f" };
+    for (String x : hex)
+      for (String y : hex)
+        new File(outDir + File.separator + "extra-infos" + File.separator
+            + x + File.separator + y).mkdirs();
+    Set<File> writtenExtrainfos = new HashSet<File>();
+    Map<String, String> extrainfoMapping = new HashMap<String, String>();
+    int parsed = 0;
+    for (File file : extrainfos) {
+      if (parsed++ > extrainfos.size() / days) {
+        System.out.print(".");
+        parsed = 0;
+      }
+      BufferedReader br = new BufferedReader(new FileReader(file));
+      String line = null;
+      StringBuilder original = null, scrubbed = null;
+      boolean skipSignature = false;
+      while ((line = br.readLine()) != null) {
+        if (skipSignature && !line.equals("-----END SIGNATURE-----")) {
+          continue;
+        } else if (line.startsWith("extra-info ")) {
+          original = new StringBuilder(line + "\n");
+          scrubbed = new StringBuilder("extra-info Unnamed "
+              + DigestUtils.shaHex(Hex.decodeHex(
+              line.split(" ")[2].toCharArray())).toUpperCase() + "\n");
+        } else if (line.startsWith("published ")
+            || line.startsWith("write-history ")
+            || line.startsWith("read-history ")
+            || line.startsWith("geoip-start-time ")
+            || line.startsWith("geoip-client-origins ")) {
+          original.append(line + "\n");
+          scrubbed.append(line + "\n");
+        } else if (line.startsWith("router-signature")) {
+          String originalDesc = original.toString() + line + "\n";
+          String originalHash = DigestUtils.shaHex(originalDesc);
+          String scrubbedDesc = scrubbed.toString();
+          String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
+          if (extrainfoMapping.containsKey(originalHash) &&
+              !extrainfoMapping.get(originalHash).equals(scrubbedHash)) {
+            System.out.println("We already have an extra-info mapping "
+                + "from " + originalHash + " to "
+                + extrainfoMapping.get(originalHash) + ", but we now want "
+                + "to add a mapping to " + scrubbedHash + ". Exiting");
+            System.exit(1);
+          }
+          extrainfoMapping.put(originalHash, scrubbedHash);
+          File out = new File(outDir + File.separator + "extra-infos"
+              + File.separator + scrubbedHash.charAt(0) + File.separator
+              + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
+          if (!out.exists()) {
+            BufferedWriter bw = new BufferedWriter(new FileWriter(out));
+            bw.write(scrubbedDesc);
+            bw.close();
+            writtenExtrainfos.add(out);
+          }
+        } else if (line.equals("-----BEGIN SIGNATURE-----")) {
+          skipSignature = true;
+        } else if (line.equals("-----END SIGNATURE-----")) {
+          skipSignature = false;
+        } else {
+          System.out.println("Unrecognized line '" + line + "'. Exiting");
+          System.exit(1);
+        }
+      }
+      br.close();
+    }
+    System.out.println("\nWrote " + writtenExtrainfos.size()
+        + " extra-info descriptors.");
+
+    System.out.print("Parsing server descriptors");
+    for (String x : hex)
+      for (String y : hex)
+        new File(outDir + File.separator + "descriptors" + File.separator
+            + x + File.separator + y).mkdirs();
+    Set<File> writtenDescriptors = new HashSet<File>();
+    Map<File, File> referencedExtraInfos = new HashMap<File, File>();
+    Map<String, String> descriptorMapping = new HashMap<String, String>();
+    int found = 0, notfound = 0;
+    parsed = 0;
+    String haveExtraInfo = null;
+    for (File file : descriptors) {
+      if (parsed++ > descriptors.size() / days) {
+        System.out.print(".");
+        parsed = 0;
+      }
+      BufferedReader br = new BufferedReader(new FileReader(file));
+      String line = null, country = null;
+      StringBuilder original = null, scrubbed = null;
+      boolean skipCrypto = false, contactWritten = false;
+      while ((line = br.readLine()) != null) {
+        if (skipCrypto && !line.startsWith("-----END ")) {
+          original.append(line + "\n");
+          continue;
+        } else if (line.startsWith("router ")) {
+          original = new StringBuilder(line + "\n");
+          country = "zz";
+          String[] ipParts = line.split(" ")[2].replace('.', ' ').split(" ");
+          long ipNum = Long.parseLong(ipParts[0]) * 256L * 256L * 256L
+              + Long.parseLong(ipParts[1]) * 256L * 256L
+              + Long.parseLong(ipParts[2]) * 256L
+              + Long.parseLong(ipParts[3]);
+          long intervalStart = -1;
+          if (ipNum >= geoipDatabase.firstKey()) {
+            intervalStart = geoipDatabase.subMap(0L, ipNum).lastKey();
+            String dbContent = geoipDatabase.get(intervalStart);
+            long intervalEnd = Long.parseLong(dbContent.split(",")[0]);
+            if (ipNum <= intervalEnd)
+              country = dbContent.split(",")[1].toLowerCase();
+          }
+          scrubbed = new StringBuilder("router Unnamed 127.0.0.1 "
+              + line.split(" ")[3] + " " + line.split(" ")[4] + " "
+              + line.split(" ")[5] + "\n");
+          contactWritten = false;
+          haveExtraInfo = null;
+        } else if (line.startsWith("opt fingerprint ")) {
+          original.append(line + "\n");
+          scrubbed.append("opt fingerprint");
+          String fingerprint = DigestUtils.shaHex(Hex.decodeHex(
+              line.substring(16).replaceAll(" ", "").toCharArray())).
+              toUpperCase();
+          for (int i = 0; i < fingerprint.length() / 4; i++)
+            scrubbed.append(" " + fingerprint.substring(4 * i, 4 * (i + 1)));
+          scrubbed.append("\n");
+        } else if (line.startsWith("contact ")) {
+          original.append(line + "\n");
+          scrubbed.append("contact somebody at example dot " + country
+              + "\n");
+          contactWritten = true;
+        } else if (line.startsWith("router-signature")) {
+          String originalDesc = original.toString() + line + "\n";
+          String originalHash = DigestUtils.shaHex(originalDesc);
+          String scrubbedDesc = scrubbed.toString();
+          String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
+          if (descriptorMapping.containsKey(originalHash) &&
+              !descriptorMapping.get(originalHash).equals(scrubbedHash)) {
+            System.out.println("We already have a descriptor mapping "
+                + "from " + originalHash + " to "
+                + descriptorMapping.get(originalHash) + ", but we now "
+                + "want to add a mapping to " + scrubbedHash
+                + ". Exiting");
+            System.exit(1);
+          }
+          descriptorMapping.put(originalHash, scrubbedHash);
+          if (haveExtraInfo != null) {
+            File out = new File(outDir + File.separator + "descriptors"
+                + File.separator + scrubbedHash.charAt(0) + File.separator
+                + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
+            if (!out.exists()) {
+              BufferedWriter bw2 = new BufferedWriter(new FileWriter(out));
+              bw2.write(scrubbedDesc);
+              bw2.close();
+              writtenDescriptors.add(out);
+              String extraInfoHash = haveExtraInfo.toLowerCase();
+              File extrainfoFile = new File(outDir + File.separator
+                  + "extra-infos" + File.separator
+                  + extraInfoHash.charAt(0) + File.separator
+                  + extraInfoHash.charAt(1) + File.separator
+                  + extraInfoHash);
+              if (!extrainfoFile.exists()) {
+                System.out.println("Extra-info descriptor '"
+                    + extrainfoFile + "' does not exist.");
+                System.exit(1);
+              }
+              referencedExtraInfos.put(out, extrainfoFile);
+            }
+          }
+        } else if (line.startsWith("opt extra-info-digest ")) {
+          String originalExtraInfo = line.split(" ")[2].toLowerCase();
+          if (!extrainfoMapping.containsKey(originalExtraInfo)) {
+            notfound++;
+          } else {
+            found++;
+            original.append(line + "\n");
+            haveExtraInfo = extrainfoMapping.get(originalExtraInfo).
+                toUpperCase();
+            scrubbed.append("opt extra-info-digest " + haveExtraInfo
+                + "\n");
+          }
+        } else if (line.startsWith("reject ")
+            || line.startsWith("accept ")) {
+          if (!contactWritten) {
+            scrubbed.append("contact nobody at example dot " + country
+                + "\n");
+            contactWritten = true;
+          }
+          original.append(line + "\n");
+          scrubbed.append(line + "\n");
+        } else if (line.startsWith("platform ")
+            || line.startsWith("opt protocols ")
+            || line.startsWith("published ")
+            || line.startsWith("uptime ")
+            || line.startsWith("bandwidth ")
+            || line.startsWith("uptime ")
+            || line.startsWith("opt hibernating ")
+            || line.equals("opt hidden-service-dir")
+            || line.equals("opt caches-extra-info")) {
+          original.append(line + "\n");
+          scrubbed.append(line + "\n");
+        } else if (line.startsWith("family ")) {
+          StringBuilder familyLine = new StringBuilder("family");
+          for (String s : line.substring(7).split(" ")) {
+            if (s.startsWith("$"))
+              familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
+                  s.substring(1).toCharArray())).toUpperCase());
+            else
+              familyLine.append(" " + s);
+          }
+          original.append(line + "\n");
+          scrubbed.append(familyLine.toString() + "\n");
+        } else if (line.startsWith("@purpose ")) {
+          continue;
+        } else if (line.startsWith("-----BEGIN ")
+            || line.equals("onion-key") || line.equals("signing-key")) {
+          skipCrypto = true;
+          original.append(line + "\n");
+        } else if (line.startsWith("-----END ")) {
+          skipCrypto = false;
+          original.append(line + "\n");
+        } else {
+          System.out.println("Unrecognized line '" + line + "'. Exiting");
+          System.exit(1);
+        }
+      }
+      br.close();
+    }
+    System.out.println("\nWrote " + writtenDescriptors.size()
+        + " bridge descriptors. While parsing, we found that we parsed "
+        + found + " extra-info identifiers before, but are missing "
+        + notfound + ". (The number of missing identifiers should be "
+        + "significantly smaller.)");
+
+    System.out.print("Parsing network statuses");
+    Set<File> referencedDescriptors = new HashSet<File>();
+    parsed = notfound = found = 0;
+    for (File file : statuses) {
+      if (parsed++ > statuses.size() / days) {
+        System.out.print(".");
+        parsed = 0;
+      }
+      if (!file.getParent().substring(file.getParent().
+          indexOf("from-tonga-")).startsWith(currentYearAndMonth)) {
+        continue;
+      }
+      BufferedReader br = new BufferedReader(new FileReader(file));
+      String line = null;
+      StringBuilder scrubbed = new StringBuilder();
+      boolean addSLine = false;
+      while ((line = br.readLine()) != null) {
+        if (line.startsWith("r ")) {
+          String[] parts = line.split(" ");
+          String bridgeIdentity = parts[2] + "==";
+          String hexBridgeIdentity = Hex.encodeHexString(
+              Base64.decodeBase64(bridgeIdentity));
+          String hashedBridgeIdentity2 = Base64.encodeBase64String(
+              DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
+              replace("=", "");
+          String hashedBridgeIdentity = Base64.encodeBase64String(
+              DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
+              substring(0, 27);
+          String descIdentifier = parts[3] + "==";
+          String hexDescIdentifier = Hex.encodeHexString(
+              Base64.decodeBase64(descIdentifier));
+          if (!descriptorMapping.containsKey(hexDescIdentifier)) {
+            notfound++;
+            addSLine = false;
+          } else {
+            found++;
+            String refDesc = descriptorMapping.get(hexDescIdentifier).
+                toLowerCase();
+            File descriptorFile = new File(outDir + File.separator
+                + "descriptors" + File.separator + refDesc.charAt(0)
+                + File.separator + refDesc.charAt(1) + File.separator
+                + refDesc);
+            if (!descriptorFile.exists()) {
+              System.out.println("Descriptor file '"
+                  + descriptorFile.getAbsolutePath() + "' does not exist.");
+            }
+            String replacementDescIdentifier = Base64.encodeBase64String(
+                Hex.decodeHex(descriptorMapping.get(hexDescIdentifier).
+                toCharArray())).substring(0, 27);
+            scrubbed.append("r Unnamed " + hashedBridgeIdentity
+                + " " + replacementDescIdentifier + " " + parts[4] + " "
+                + parts[5] + " 127.0.0.1 " + parts[7] + " " + parts[8]
+                + "\n");
+            addSLine = true;
+            referencedDescriptors.add(descriptorFile);
+          }
+        } else if (line.startsWith("s ")) {
+          if (addSLine) {
+            scrubbed.append(line + "\n");
+          }
+        } else {
+          System.out.println("Unknown line: " + line);
+          System.exit(1);
+        }
+      }
+      String timeString = file.getParent().substring(file.getParent().
+          indexOf("from-tonga-") + 11);
+      String[] date = timeString.substring(0, 10).split("-");
+      String time = timeString.substring(11, 17);
+      File dir = new File(outDir + File.separator + "statuses"
+          + File.separator + date[0] + File.separator + date[1]
+          + File.separator + date[2] + File.separator);
+      dir.mkdirs();
+      File out = new File(dir.getAbsolutePath() + File.separator + date[0]
+          + date[1] + date[2] + "-" + time + "-"
+          + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
+      if (!out.exists()) {
+        BufferedWriter bw3 = new BufferedWriter(new FileWriter(out));
+        bw3.write(scrubbed.toString());
+        bw3.close();
+      }
+    }
+    System.out.println("\nWhile parsing, we found that we parsed "
+        + found + " bridge descriptors before, but are missing "
+        + notfound + ". (The number of missing identifiers should be "
+        + "significantly smaller.)");
+
+    Set<File> deleteFromReferencedExtraInfos = new HashSet<File>();
+    for (File e : referencedExtraInfos.keySet()) {
+      if (!referencedDescriptors.contains(e)) {
+        deleteFromReferencedExtraInfos.add(e);
+      }
+    }
+    for (File e : deleteFromReferencedExtraInfos) {
+      referencedExtraInfos.remove(e);
+    }
+    SortedSet<File> deleteDescriptors = new TreeSet<File>();
+    for (File e : writtenDescriptors) {
+      if (!referencedDescriptors.contains(e)) {
+        deleteDescriptors.add(e);
+      }
+    }
+    SortedSet<File> deleteExtraInfos = new TreeSet<File>();
+    for (File e : writtenExtrainfos) {
+      if (!referencedExtraInfos.values().contains(e)) {
+        deleteExtraInfos.add(e);
+      }
+    }
+    System.out.println("Deleting " + deleteDescriptors.size()
+        + " unreferenced bridge descriptors and "
+        + deleteExtraInfos.size() + " extra-info descriptors (keeping "
+        + (writtenDescriptors.size() - deleteDescriptors.size())
+        + " bridge descriptors and " + (writtenExtrainfos.size()
+        - deleteExtraInfos.size()) + " extra-info descriptors).");
+    for (File e : deleteDescriptors)
+      e.delete();
+    for (File e : deleteExtraInfos)
+      e.delete();
+
+    long finished = System.currentTimeMillis();
+    System.out.println("Processing took " + ((finished - started) / 1000)
+        + " seconds.");
+  }
+}
+
diff --git a/bridge-desc-sanitizer/HOWTO b/bridge-desc-sanitizer/HOWTO
new file mode 100644
index 0000000..b84d5ce
--- /dev/null
+++ b/bridge-desc-sanitizer/HOWTO
@@ -0,0 +1,113 @@
+Bridge descriptor sanitizer
+
+---------------------------------------------------------------------------
+
+Introduction:
+
+The bridge authority Tonga keeps a list of bridges in order to serve bridge
+addresses and descriptors to its clients. Every half hour, Tonga copies a
+snapshot of the known bridge descriptors to moria where these descriptors
+are archived for later statistical analysis. As a guiding principle, the
+Tor project makes all data that it uses for statistical analysis available
+to the interested public, in order to maximize transparency towards the
+community. However, the bridge descriptors contain the IP addresses and
+other contact information of bridges that must not be made public, or the
+purpose of bridges as non-public entry points into the Tor network would be
+obsolete. This script takes the half-hourly snapshots as input, removes all
+possibly sensitive information from the descriptors, and puts out the
+sanitized bridge descriptors that are safe to be published.
+
+---------------------------------------------------------------------------
+
+Processing steps:
+
+The following steps are taken to remove all potentially sensitive
+information from the bridge descriptors while keeping them useful for
+statistical analysis.
+
+1. Replace the bridge identity with its SHA1 value
+
+   Clients can request a bridge's current descriptor by sending its
+   identity string to the bridge authority. This is a feature to make
+   bridges on dynamic IP addresses useful. Therefore, the original
+   identities (and anything that could be used to derive them) need to be
+   removed from the descriptors. The bridge identity is replaced with its
+   SHA1 hash value. The idea is to have a consistent replacement that
+   remains stable over months or even years (without keeping a secret for a
+   keyed hash function).
+
+2. Remove all cryptographic keys and signatures
+
+   It would be straightforward to learn about the bridge identity from the
+   bridge's public key. Replacing keys by newly generated ones seemed to be
+   unnecessary (and would involve keeping a state over months/years), so
+   that all cryptographic objects have simply been removed. 
+
+3. Replace IP address with 127.0.0.1
+
+   Of course, the IP address needs to be removed, too. However, the IP
+   address is resolved to a country code first and the result written to
+   the contact line as "somebody at example dot de" for Germany, etc. The
+   ports are kept unchanged though.
+
+4. Replace contact information
+
+   If there is contact information in a descriptor, the contact line is
+   changed to "somebody at ...". If there is none, a contact line is added
+   saying "nobody at ..." in order to put in the country code.
+
+5. Replace nickname with Unnamed
+
+   The bridge nicknames might give hints on the location of the bridge if
+   chosen without care; e.g. a bridge nickname might be very similar to the
+   operators' relay nicknames which might be located on adjacent IP
+   addresses. All bridge nicknames are therefore replaced with the string
+   Unnamed.
+
+Note that these processing steps only prevent people from learning about
+new bridge locations. People who already know a bridge identity or location
+can easily learn more about this bridge from the sanitized descriptors.
+This is useful for statistical analysis, e.g. to filter out bridges that
+have been running as relays before.
+
+---------------------------------------------------------------------------
+
+Quick Start:
+
+The following steps are necessary to process the half-hourly snapshots as
+collected by moria:
+
+- Install Java 5 or higher.
+
+- Download Apache Commons Codec 1.4 or higher for Base 64 and hex encoding
+  from http://commons.apache.org/codec/ and place the .jar (in the
+  following assumed to be commons-codec-1.4.jar) in the same directory as
+  this HOWTO file.
+
+- Copy the half-hourly snapshots named from-tonga-YYYY-MM-DDThhmmssZ.tar.gz
+  in a directory called data/ in the same directory as this HOWTO file.
+
+- Run ./extract-bridges.sh to extract the half-hourly snapshots in data/
+  to separate directories in the newly created subdirectory in/ .
+
+- Copy the geoip.txt from the Tor sources (from /src/config/) to the same
+  directory as this HOWTO file.
+
+- Compile the Java class using
+
+  $ javac -cp commons-codec-1.4.jar ConvertBridgeDescs.java
+
+- Run the script, providing it with the parameters it needs:
+
+  java -cp .:commons-codec-1.4.jar ConvertBridgeDescs
+           <input directory> <geoip.txt file>
+           <YYYY> <MM> <output directory>
+
+  Note that YYYY and MM specify the month that shall be processed. The other
+  descriptors in the input directory are ignored.
+
+  A sample invocation might be:
+
+  $ java -cp .:commons-codec-1.4.jar ConvertBridgeDescs in/ geoip.txt
+        2008 10 out/
+
diff --git a/bridge-desc-sanitizer/extract-bridges.sh b/bridge-desc-sanitizer/extract-bridges.sh
new file mode 100755
index 0000000..5f412c3
--- /dev/null
+++ b/bridge-desc-sanitizer/extract-bridges.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+mkdir "in/"
+for i in `ls data/ | cut -c 1-29`
+do
+mkdir "in/"$i
+tar -C "in/"$i -xf "data/"$i".tar.gz"
+done
+
diff --git a/exonerator/ExoneraTor.java b/exonerator/ExoneraTor.java
new file mode 100644
index 0000000..eba3cca
--- /dev/null
+++ b/exonerator/ExoneraTor.java
@@ -0,0 +1,404 @@
+/* Copyright 2009 The Tor Project
+ * See LICENSE for licensing information */
+
+import java.io.*;
+import java.math.*;
+import java.text.*;
+import java.util.*;
+import org.bouncycastle.util.encoders.Base64;
+
+public final class ExoneraTor {
+
+  public static void main(final String[] args) throws Exception {
+
+    // check parameters
+    if (args.length < 4 || args.length > 5) {
+      System.err.println("\nUsage: java "
+          + ExoneraTor.class.getSimpleName()
+          + " <descriptor archive directory> <IP address in question> "
+          + "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> "
+          + "[<target address>[:<target port>]]\n");
+      return;
+    }
+    File archiveDirectory = new File(args[0]);
+    if (!archiveDirectory.exists() || !archiveDirectory.isDirectory()) {
+      System.err.println("\nDescriptor archive directory + "
+            + archiveDirectory.getAbsolutePath()
+            + " does not exist or is not a directory.\n");
+      return;
+    }
+    String relayIP = args[1];
+    String timestampStr = args[2] + " " + args[3];
+    SimpleDateFormat timeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long timestamp = timeFormat.parse(timestampStr).getTime();
+    String target = null, targetIP = null, targetPort = null;
+    String[] targetIPParts = null;
+    if (args.length > 4) {
+      target = args[4];
+      if (target.contains(":")) {
+        targetIP = target.split(":")[0];
+        targetPort = target.split(":")[1];
+      } else {
+        targetIP = target;
+      }
+      targetIPParts = targetIP.replace(".", " ").split(" ");
+    }
+    String DELIMITER = "--------------------------------------------------"
+        + "-------------------------";
+    System.out.println("\nTrying to find out whether " + relayIP + " was "
+        + "running as a Tor relay at " + timestampStr
+        + (target != null ? " permitting exiting to " + target : "")
+        + "...\n\n" + DELIMITER);
+
+    // check that we have the required archives
+    long timestampTooOld = timestamp - 300 * 60 * 1000;
+    long timestampFrom = timestamp - 180 * 60 * 1000;
+    long timestampTooNew = timestamp + 120 * 60 * 1000;
+    Calendar calTooOld = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+    Calendar calFrom = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+    Calendar calTooNew = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+    calTooOld.setTimeInMillis(timestampTooOld);
+    calFrom.setTimeInMillis(timestampFrom);
+    calTooNew.setTimeInMillis(timestampTooNew);
+    System.out.printf("%nChecking that relevant archives between "
+        + "%tF %<tT and %tF %<tT are available...%n", calTooOld,
+        calTooNew);
+    SortedSet<String> requiredDirs = new TreeSet<String>();
+    requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooOld));
+    requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooNew));
+    if (target != null) {
+      requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
+          calTooOld));
+      requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
+          calTooNew));
+    }
+    SortedSet<File> consensusDirs = new TreeSet<File>();
+    SortedSet<File> descriptorsDirs = new TreeSet<File>();
+    Stack<File> directoriesLeftToParse = new Stack<File>();
+    directoriesLeftToParse.push(archiveDirectory);
+    while (!directoriesLeftToParse.isEmpty()) {
+      File directoryOrFile = directoriesLeftToParse.pop();
+      if (directoryOrFile.getName().startsWith("consensuses-")) {
+        if (requiredDirs.contains(directoryOrFile.getName())) {
+          requiredDirs.remove(directoryOrFile.getName());
+          consensusDirs.add(directoryOrFile);
+        }
+      } else if (directoryOrFile.getName().startsWith(
+          "server-descriptors-")) {
+        if (requiredDirs.contains(directoryOrFile.getName())) {
+          requiredDirs.remove(directoryOrFile.getName());
+          descriptorsDirs.add(directoryOrFile);
+        }
+      } else {
+        for (File fileInDir : directoryOrFile.listFiles())
+          if (fileInDir.isDirectory())
+            directoriesLeftToParse.push(fileInDir);
+      }
+    }
+    for (File dir : consensusDirs)
+      System.out.println("  " + dir.getAbsolutePath());
+    for (File dir : descriptorsDirs)
+      System.out.println("  " + dir.getAbsolutePath());
+    if (!requiredDirs.isEmpty()) {
+      System.out.println("\nWe are missing consensuses and/or server "
+          + "descriptors. Please download these archives and extract them "
+          + "to your data directory. Be sure NOT to rename the extracted "
+          + "directories or the contained files.");
+      for (String dir : requiredDirs)
+        System.out.println("  " + dir + ".tar.bz2");
+      return;
+    }
+
+    // look for consensus files
+    System.out.printf("%nLooking for relevant consensuses between "
+        + "%tF %<tT and %s...%n", calFrom, timestampStr);
+    SortedSet<File> tooOldConsensuses = new TreeSet<File>();
+    SortedSet<File> relevantConsensuses = new TreeSet<File>();
+    SortedSet<File> tooNewConsensuses = new TreeSet<File>();
+    directoriesLeftToParse.clear();
+    for (File consensusDir : consensusDirs)
+      directoriesLeftToParse.push(consensusDir);
+    SimpleDateFormat consensusTimeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd-HH-mm-ss");
+    consensusTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    while (!directoriesLeftToParse.isEmpty()) {
+      File directoryOrFile = directoriesLeftToParse.pop();
+      if (directoryOrFile.isDirectory()) {
+        for (File fileInDir : directoryOrFile.listFiles()) {
+          directoriesLeftToParse.push(fileInDir);
+        }
+        continue;
+      } else {
+        String filename = directoryOrFile.getName();
+        if (filename.endsWith("consensus")) {
+          long consensusTime = consensusTimeFormat.parse(
+              filename.substring(0, 19)).getTime();
+          if (consensusTime >= timestampTooOld &&
+              consensusTime < timestampFrom)
+            tooOldConsensuses.add(directoryOrFile);
+          else if (consensusTime >= timestampFrom &&
+                   consensusTime <= timestamp)
+            relevantConsensuses.add(directoryOrFile);
+          else if (consensusTime > timestamp &&
+                   consensusTime <= timestampTooNew)
+            tooNewConsensuses.add(directoryOrFile);
+        }
+      }
+    }
+    SortedSet<File> allConsensuses = new TreeSet<File>();
+    allConsensuses.addAll(tooOldConsensuses);
+    allConsensuses.addAll(relevantConsensuses);
+    allConsensuses.addAll(tooNewConsensuses);
+    if (allConsensuses.isEmpty()) {
+      System.out.println("  None found!\n\n" + DELIMITER + "\n\nResult is "
+          + "INDECISIVE!\n\nWe cannot make any statement about IP address "
+          + relayIP + " being a relay at " + timestampStr + " or not! We "
+          + "did not find any relevant consensuses preceding the given "
+          + "time. This either means that you did not download and "
+          + "extract the consensus archives preceding the hours before "
+          + "the given time, or (in rare cases) that the directory "
+          + "archives are missing the hours before the timestamp. Please "
+          + "check that your directory archives contain consensus files "
+          + "of the interval 5:00 hours before and 2:00 hours after the "
+          + "time you are looking for.\n");
+      return;
+    }
+    for (File f : relevantConsensuses)
+      System.out.println("  " + f.getAbsolutePath());
+
+    // parse consensuses to find descriptors belonging to the IP address
+    System.out.println("\nLooking for descriptor identifiers referenced "
+        + "in \"r \" lines in these consensuses containing IP address "
+        + relayIP + "...");
+    SortedSet<File> positiveConsensusesNoTarget = new TreeSet<File>();
+    Set<String> addressesInSameNetwork = new HashSet<String>();
+    SortedMap<String, Set<File>> relevantDescriptors =
+        new TreeMap<String, Set<File>>();
+    for (File consensus : allConsensuses) {
+      if (relevantConsensuses.contains(consensus))
+        System.out.println("  " + consensus.getAbsolutePath());
+      BufferedReader br = new BufferedReader(new FileReader(consensus));
+      String line;
+      while ((line = br.readLine()) != null) {
+        if (!line.startsWith("r "))
+          continue;
+        String[] parts = line.split(" ");
+        String address = parts[6];
+        if (address.equals(relayIP)) {
+          byte[] result = Base64.decode(parts[3] + "==");
+          String hex = new BigInteger(1, Base64.decode(parts[3] +
+              "==")).toString(16).substring(0, 40);
+          if (!relevantDescriptors.containsKey(hex))
+            relevantDescriptors.put(hex, new HashSet<File>());
+          relevantDescriptors.get(hex).add(consensus);
+          positiveConsensusesNoTarget.add(consensus);
+          if (relevantConsensuses.contains(consensus))
+            System.out.println("    \"" + line + "\" references "
+                + "descriptor " + hex);
+        } else {
+          if (relayIP.startsWith(address.substring(0,
+              address.lastIndexOf(".")))) {
+            addressesInSameNetwork.add(address);
+          }
+        }
+      }
+      br.close();
+    }
+    if (relevantDescriptors.isEmpty()) {
+      System.out.printf("  None found!\n\n" + DELIMITER + "\n\nResult is "
+          + "NEGATIVE with moderate certainty!\n\nWe did not find IP "
+          + "address " + relayIP + " in any of the consensuses that were "
+          + "published between %tF %<tT and %tF %<tT.\n\nA possible "
+          + "reason for false negatives is that the relay is using a "
+          + "different IP address when generating a descriptor than for "
+          + "exiting to the Internet. We hope to provide better checks "
+          + "for this case in the future.", calTooOld, calTooNew);
+      if (!addressesInSameNetwork.isEmpty()) {
+        System.out.println("\n\nThe following other IP addresses of Tor "
+            + "relays were found in the mentioned consensus files that "
+            + "are in the same /24 network and that could be related to "
+            + "IP address " + relayIP + ":");
+        for (String s : addressesInSameNetwork) {
+          System.out.println("  " + s);
+        }
+      }
+      System.out.println();
+      return;
+    }
+
+    // parse router descriptors to check exit policies
+    SortedSet<File> positiveConsensuses = new TreeSet<File>();
+    Set<String> missingDescriptors = new HashSet<String>();
+    if (target != null) {
+      System.out.println("\nChecking if referenced descriptors permit "
+          + "exiting to " + target + "...");
+      Set<String> descriptors = relevantDescriptors.keySet();
+      missingDescriptors.addAll(relevantDescriptors.keySet());
+      directoriesLeftToParse.clear();
+      for (File descriptorsDir : descriptorsDirs)
+        directoriesLeftToParse.push(descriptorsDir);
+      while (!directoriesLeftToParse.isEmpty()) {
+        File directoryOrFile = directoriesLeftToParse.pop();
+        if (directoryOrFile.isDirectory()) {
+          for (File fileInDir : directoryOrFile.listFiles()) {
+            directoriesLeftToParse.push(fileInDir);
+          }
+          continue;
+        } else {
+          String filename = directoryOrFile.getName();
+          for (String descriptor : descriptors) {
+            if (filename.equals(descriptor)) {
+              missingDescriptors.remove(descriptor);
+              BufferedReader br = new BufferedReader(
+                  new FileReader(directoryOrFile));
+              String line;
+              while ((line = br.readLine()) != null) {
+                if (line.startsWith("reject ") ||
+                    line.startsWith("accept ")) {
+                  boolean ruleAccept = line.split(" ")[0].equals("accept");
+                  String ruleAddress = line.split(" ")[1].split(":")[0];
+                  if (!ruleAddress.equals("*")) {
+                    if (!ruleAddress.contains("/") &&
+                        !ruleAddress.equals(targetIP))
+                      continue; // IP address does not match
+                    String[] ruleIPParts = ruleAddress.split("/")[0].
+                        replace(".", " ").split(" ");
+                    int ruleNetwork = Integer.parseInt(
+                        ruleAddress.split("/")[1]);
+                    for (int i = 0; i < 4; i++) {
+                      if (ruleNetwork == 0) {
+                        break;
+                      } else if (ruleNetwork >= 8) {
+                        if (ruleIPParts[i].equals(targetIPParts[i]))
+                          ruleNetwork -= 8;
+                        else
+                          break;
+                      } else {
+                        int mask = 255 ^ 255 >>> ruleNetwork;
+                        if ((Integer.parseInt(ruleIPParts[i]) & mask) ==
+                            (Integer.parseInt(targetIPParts[i]) & mask))
+                          ruleNetwork = 0;
+                        break;
+                      }
+                    }
+                    if (ruleNetwork > 0)
+                      continue; // IP address does not match
+                  }
+                  String rulePort = line.split(" ")[1].split(":")[1];
+                  if (targetPort == null && !ruleAccept &&
+                      !rulePort.equals("*"))
+                    continue; // with no port given, we only consider
+                              // reject :* rules as matching
+                  if (targetPort != null) {
+                    if (!rulePort.equals("*") &&
+                        !targetPort.equals(rulePort))
+                      continue; // ports do not match
+                  }
+                  boolean relevantMatch = false;
+                  for (File f : relevantDescriptors.get(descriptor))
+                    if (relevantConsensuses.contains(f))
+                      relevantMatch = true;
+                  if (relevantMatch)
+                    System.out.println("  "
+                        + directoryOrFile.getAbsolutePath() + " "
+                        + (ruleAccept ? "permits" : "does not permit")
+                        + " exiting to " + target + " according to rule \""
+                        + line + "\"");
+                  if (ruleAccept)
+                    positiveConsensuses.addAll(
+                        relevantDescriptors.get(descriptor));
+                  break;
+                }
+              }
+              br.close();
+            }
+          }
+        }
+      }
+    }
+
+    // print out result
+    Set<File> matches = (target != null) ? positiveConsensuses
+                                         : positiveConsensusesNoTarget;
+    if (matches.contains(relevantConsensuses.last())) {
+      System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE with "
+          + "high certainty!\n\nWe found one or more relays on IP address "
+          + relayIP
+          + (target != null ? " permitting exit to " + target : "")
+          + " in the most recent consensus preceding " + timestampStr
+          + " that clients were likely to know.\n");
+      return;
+    }
+    boolean resultIndecisive = target != null
+        && !missingDescriptors.isEmpty();
+    if (resultIndecisive) {
+      System.out.println("\n" + DELIMITER + "\n\nResult is INDECISIVE!\n\n"
+          + "At least one referenced descriptor could not be found. This "
+          + "is a rare case, but one that (apparently) happens. We cannot "
+          + "make any good statement about exit relays without these "
+          + "descriptors. The following descriptors are missing:");
+      for (String desc : missingDescriptors)
+        System.out.println("  " + desc);
+    }
+    boolean inOtherRelevantConsensus = false, inTooOldConsensuses = false,
+        inTooNewConsensuses = false;
+    for (File f : matches)
+      if (relevantConsensuses.contains(f))
+        inOtherRelevantConsensus = true;
+      else if (tooOldConsensuses.contains(f))
+        inTooOldConsensuses = true;
+      else if (tooNewConsensuses.contains(f))
+        inTooNewConsensuses = true;
+    if (inOtherRelevantConsensus) {
+      if (!resultIndecisive)
+        System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE "
+            + "with moderate certainty!");
+      System.out.println("\nWe found one or more relays on IP address "
+          + relayIP
+          + (target != null ? " permitting exit to " + target : "")
+          + ", but not in the consensus immediately preceding "
+          + timestampStr + ". A possible reason for the relay being "
+          + "missing in the last consensus preceding the given time might "
+          + "be that some of the directory authorities had difficulties "
+          + "connecting to the relay. However, clients might still have "
+          + "used the relay.");
+    } else {
+      if (!resultIndecisive)
+        System.out.println("\n" + DELIMITER + "\n\nResult is NEGATIVE "
+            + "with high certainty!");
+      System.out.println("\nWe did not find any relay on IP address "
+          + relayIP
+          + (target != null ? " permitting exit to " + target : "")
+          + " in the consensuses 3:00 hours preceding " + timestampStr
+          + ".");
+      if (inTooOldConsensuses || inTooNewConsensuses) {
+        if (inTooOldConsensuses && !inTooNewConsensuses)
+          System.out.println("\nNote that we found a matching relay in "
+              + "consensuses that were published between 5:00 and 3:00 "
+              + "hours before " + timestampStr + ".");
+        else if (!inTooOldConsensuses && inTooNewConsensuses)
+          System.out.println("\nNote that we found a matching relay in "
+              + "consensuses that were published up to 2:00 hours after "
+              + timestampStr + ".");
+        else
+          System.out.println("\nNote that we found a matching relay in "
+              + "consensuses that were published between 5:00 and 3:00 "
+              + "hours before and in consensuses that were published up "
+              + "to 2:00 hours after " + timestampStr + ".");
+        System.out.println("Make sure that the timestamp you provided is "
+            + "in the correct timezone: UTC (or GMT).");
+      }
+    }
+    if (target != null) {
+      if (positiveConsensuses.isEmpty() &&
+          !positiveConsensusesNoTarget.isEmpty())
+        System.out.println("\nNote that although the found relay(s) did "
+            + "not permit exiting to " + target + ", there have been one "
+            + "or more relays running at the given time.");
+    }
+    System.out.println();
+  }
+}
+
diff --git a/exonerator/HOWTO b/exonerator/HOWTO
new file mode 100644
index 0000000..907a8f5
--- /dev/null
+++ b/exonerator/HOWTO
@@ -0,0 +1,159 @@
+ExoneraTor
+        or: a script that tells you whether some IP address was a Tor relay
+
+---------------------------------------------------------------------------
+
+Introduction:
+
+Some people have expressed the desire to learn whether a given IP address
+has been a Tor relay at a certain time. In addition to that, these people
+might want to know whether the IP address permitted exit to a given address
+and port.
+
+Answering these questions can be important for Tor relay operators to show
+to the authorities that an anonymous user might have conducted bad things
+with their IP address. Likewise, police investigators might be interested
+in the answer to these questions, too, in order to decide whether to
+proceed with their investigations or not.
+
+We can answer the above questions from looking at the descriptor archives
+that are available since late 2007 (or even beyond, but this script only
+works with the data format that was produced starting in October 2007).
+This script parses the directory archives to print out the answer whether
+a certain IP address was a Tor relay at a given time. The script further
+prints out all intermediate steps in answering this, so that users can
+confirm the correctness of the result themselves.
+
+This script is available in two versions written in Python and in Java with
+equivalent functionality.
+
+---------------------------------------------------------------------------
+
+Python Quick Start:
+
+In order to run the Python version of this script, you need to install and
+download the following software and data (please note that all instructions
+are written for Linux; commands for Windows or Mac OS X may vary):
+
+- Install Python 2.6.2 or higher. (Previous Python versions might work,
+  too, but have not been tested.)
+
+- Install the Python module IPy 0.62 or higher either from
+  http://pypi.python.org/pypi/IPy/ or using "apt-get install python-ipy" on
+  Debian-based systems.
+
+- Download the v3 consensuses and server descriptors of the relevant time
+  from http://metrics.torproject.org/data.html and extract them to a
+  directory in your working directory, e.g. /home/you/exonerator/data/ .
+  Don't rename the extracted directories or any of the contained files, or
+  the script won't find the contained descriptors.
+
+  Note that you only need the server descriptors if you want to learn
+  whether a given IP address permits exiting to a given target. If you
+  only want to learn whether that IP address was a Tor relay, you don't
+  need them.
+
+- Run the script, providing it with the parameters it needs:
+
+  python exonerator.py [--archive=<descriptor archive directory>]
+           <IP address in question>
+           <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
+           [<target address>[:<target port>]]
+
+  The --archive option defaults to data/ . In the following examples, it is
+  assumed that this default applies.
+
+  Make sure that the timestamp is provided in UTC, which is equivalent to
+  GMT, and not in your local timezone! Otherwise, results will very likely
+  be wrong.
+
+  A sample invocation might be:
+
+  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
+        209.85.129.104:80
+
+---------------------------------------------------------------------------
+
+Java Quick Start:
+
+In order to run the Java version of this script, you need to install and
+download the following software and data (please note that all instructions
+are written for Linux; commands for Windows or Mac OS X may vary):
+
+- Install Java 6 or higher.
+
+- Download the BouncyCastle provider that includes Base 64 decoding from
+  http://www.bouncycastle.org/download/bcprov-jdk16-143.jar and put it in
+  your working directory, e.g. /home/you/exonerator/ .
+
+- Download the v3 consensuses and server descriptors of the relevant time
+  from http://metrics.torproject.org/data.html and extract them to a
+  directory in your working directory, e.g. /home/you/exonerator/data/ .
+  Don't rename the extracted directories or any of the contained files, or
+  the script won't find the contained descriptors.
+
+  Note that you only need the server descriptors if you want to learn
+  whether a given IP address permits exiting to a given target. If you
+  only want to learn whether that IP address was a Tor relay, you don't
+  need them.
+
+- Compile the (single) Java class using this command:
+
+  $ javac -cp bcprov-jdk16-143.jar ExoneraTor.java
+
+- Run the script, providing it with the parameters it needs:
+
+  java -cp .:bcprov-jdk16-143.jar ExoneraTor
+           <descriptor archive directory>
+           <IP address in question>
+           <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
+           [<target address>[:<target port>]]
+
+  Make sure that the timestamp is provided in UTC, which is equivalent to
+  GMT, and not in your local timezone! Otherwise, results will very likely
+  be wrong.
+
+  A sample invocation might be:
+
+  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+        2009-08-15 16:05:00 209.85.129.104:80
+
+---------------------------------------------------------------------------
+
+Test cases:
+
+The following test cases work with the August 2009 archives and can be used
+to check whether this script works correctly:
+
+- Positive result of echelon1+2 being a relay:
+
+  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
+  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+        2009-08-15 16:05:00
+
+- Positive result of echelon1+2 exiting to google.com on any port
+
+  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 209.85.129.104
+  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+        2009-08-15 16:05:00 209.85.129.104
+
+- Positive result of echelon1+2 exiting to google.com on port 80
+
+  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
+        209.85.129.104:80
+  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+        2009-08-15 16:05:00 209.85.129.104:80
+
+- Negative result of echelon1+2 exiting to google.com, but not on port 25
+
+  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
+        209.85.129.104:25
+  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
+        2009-08-15 16:05:00 209.85.129.104:25
+
+- Negative result with IP address of echelon1+2 changed in the last octet
+
+  $ python exonerator.py 209.17.171.50 2009-08-15 16:05:00
+  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.50 \
+        2009-08-15 16:05:00
+
diff --git a/exonerator/LICENSE b/exonerator/LICENSE
new file mode 100644
index 0000000..4bdb99d
--- /dev/null
+++ b/exonerator/LICENSE
@@ -0,0 +1,30 @@
+Copyright 2009 The Tor Project
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following disclaimer
+  in the documentation and/or other materials provided with the
+  distribution.
+
+  * Neither the names of the copyright owners nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/exonerator/exonerator.py b/exonerator/exonerator.py
new file mode 100755
index 0000000..641d65e
--- /dev/null
+++ b/exonerator/exonerator.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python
+# Copyright 2009 The Tor Project -- see LICENSE for licensing information
+
+import binascii
+import os
+import sys
+import time
+from optparse import OptionParser
+from IPy import IP
+
+USAGE = "usage: %prog [options] <IP address in question> " \
+        "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> " \
+        "[<target address>[:<target port>]]"
+DELIMITER = "-" * 75
+
+if __name__ == '__main__':
+    # check parameters
+    parser = OptionParser(usage=USAGE)
+    parser.add_option("-a", "--archive", dest="archive", default="data/",
+                      help="descriptor archive directory")
+    (options, args) = parser.parse_args()
+    if len(args) not in (3, 4):
+        parser.error("incorrect number of arguments")
+    if not os.path.isdir(options.archive):
+        parser.error("descriptor archive directory %s does not exist or " \
+                     "is not a directory." % \
+                     os.path.abspath(options.archive))
+    archiveDirectory = os.path.dirname(options.archive)
+    try:
+        relayIP = IP(args[0])
+    except ValueError:
+        parser.error("invalid IP address in question: '%s'" % args[0])
+    timestampStr = "%s %s" % (args[1], args[2])
+    os.environ['TZ'] = 'UTC'
+    time.tzset()
+    try:
+        timestamp = time.strptime(timestampStr, "%Y-%m-%d %H:%M:%S")
+    except ValueError:
+        parser.error("incorrect time format: '%s'" % timestampStr)
+    # if a target is given, parse address and possibly port part of it
+    target = None
+    targetIP = None
+    targetPort = None
+    if len(args) == 4:
+        target = args[3]
+        targetParts = target.split(":")
+        try:
+            targetIP = IP(targetParts[0])
+        except ValueError:
+            parser.error("invalid target IP address in: '%s'" % args[3])
+        if len(targetParts) > 2:
+            parser.error("invalid target format: '%s'" % args[3])
+        if len(targetParts) > 1:
+            try:
+                targetPortTest = int(targetParts[1])
+            except ValueError:
+                parser.error("invalid target port number in: '%s'" % \
+                             args[3])
+            if targetPortTest not in range(1, 65535):
+                parser.error("invalid target port number in: '%s'" % \
+                             args[3])
+            targetPort = targetParts[1]
+
+    targetHelpStr = ""
+    if target:
+        targetHelpStr = " permitting exiting to %s" % target
+    print "\nTrying to find out whether %s was running a Tor relay at " \
+          "%s%s...\n\n%s\n" % (relayIP, timestampStr, targetHelpStr,
+          DELIMITER)
+
+    # check that we have the required archives
+    timestampTooOld = time.gmtime(time.mktime(timestamp) - 300 * 60)
+    timestampFrom = time.gmtime(time.mktime(timestamp) - 180 * 60)
+    timestampTooNew = time.gmtime(time.mktime(timestamp) + 120 * 60)
+    timestampTooOldStr = time.strftime("%Y-%m-%d %H:%M:%S",
+                                       timestampTooOld)
+    timestampFromStr = time.strftime("%Y-%m-%d %H:%M:%S", timestampFrom)
+    timestampTooNewStr = time.strftime("%Y-%m-%d %H:%M:%S",
+                                       timestampTooNew)
+    print "\nChecking that relevant archives between %s and %s are " \
+          "available..." % (timestampTooOldStr, timestampTooNewStr)
+
+    requiredDirs = set()
+    requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooOld))
+    requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooNew))
+    if target:
+        requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
+                                          timestampTooOld))
+        requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
+                                          timestampTooNew))
+
+    consensusDirs = list()
+    descriptorsDirs = list()
+    directoriesLeftToParse = list()
+    directoriesLeftToParse.append(archiveDirectory)
+
+    while directoriesLeftToParse:
+        directoryOrFile = directoriesLeftToParse.pop()
+        basename = os.path.basename(directoryOrFile)
+        if basename.startswith("consensuses-"):
+            if basename in requiredDirs:
+                requiredDirs.remove(basename)
+                consensusDirs.append(directoryOrFile)
+        elif basename.startswith("server-descriptors-"):
+            if basename in requiredDirs:
+                requiredDirs.remove(basename)
+                descriptorsDirs.append(directoryOrFile)
+        else:
+            for filename in os.listdir(directoryOrFile):
+                entry = "%s/%s" % (directoryOrFile, filename)
+                if os.path.isdir(entry):
+                    directoriesLeftToParse.append(entry)
+
+    consensusDirs.sort()
+    for consensusDir in consensusDirs:
+        print "  %s" % consensusDir
+    descriptorsDirs.sort()
+    for descriptorsDir in descriptorsDirs:
+        print "  %s" % descriptorsDir
+
+    if requiredDirs:
+        print "\nWe are missing consensuses and/or server descriptors. " \
+              "Please download these archives and extract them to your " \
+              "data directory. Be sure NOT to rename the extracted " \
+              "directories or the contained files."
+        for requiredDir in sorted(requiredDirs):
+            print "  %s.tar.bz2" % requiredDir
+        sys.exit()
+
+    # look for consensus files
+    print "\nLooking for relevant consensuses between %s and %s..." % \
+          (timestampFromStr, timestampStr)
+    tooOldConsensuses = set()
+    relevantConsensuses = set()
+    tooNewConsensuses = set()
+    directoriesLeftToParse = list(consensusDirs)
+    while directoriesLeftToParse:
+        directoryOrFile = directoriesLeftToParse.pop()
+        if os.path.isdir(directoryOrFile):
+            for filename in os.listdir(directoryOrFile):
+                entry = "%s/%s" % (directoryOrFile, filename)
+                directoriesLeftToParse.append(entry)
+        else:
+            basename = os.path.basename(directoryOrFile)
+            if (basename.endswith("consensus")):
+                consensusTime = time.strptime(basename[0:19],
+                                              "%Y-%m-%d-%H-%M-%S")
+                if consensusTime >= timestampTooOld and \
+                   consensusTime < timestampFrom:
+                    tooOldConsensuses.add(directoryOrFile)
+                elif consensusTime >= timestampFrom and \
+                     consensusTime <= timestamp:
+                    relevantConsensuses.add(directoryOrFile)
+                elif consensusTime > timestamp and \
+                     consensusTime <= timestampTooNew:
+                    tooNewConsensuses.add(directoryOrFile)
+    allConsensuses = set()
+    allConsensuses.update(tooOldConsensuses)
+    allConsensuses.update(relevantConsensuses)
+    allConsensuses.update(tooNewConsensuses)
+    if not allConsensuses:
+        print "  None found!\n\n%s\n\nResult is INDECISIVE!\n\nWe " \
+              "cannot make any statement about IP address %s being a " \
+              "relay at %s or not! We did not find any relevant " \
+              "consensuses preceding the given time. This either means " \
+              "that you did not download and extract the consensus " \
+              "archives preceding the hours before the given time, or " \
+              "(in rare cases) that the directory archives are missing " \
+              "the hours before the timestamp. Please check that your " \
+              "directory archives contain consensus files of the " \
+              "interval 5:00 hours before and 2:00 hours after the time " \
+              "you are looking for.\n" % (DELIMITER, relayIP, timestampStr)
+        sys.exit()
+    for consensus in sorted(relevantConsensuses):
+        print "  %s" % consensus
+
+    # parse consensuses to find descriptors belonging to the IP address
+    print "\nLooking for descriptor identifiers referenced in \"r \" " \
+          "lines in these consensuses containing IP address %s..." % \
+          relayIP
+    positiveConsensusesNoTarget = set()
+    addressesInSameNetwork = set()
+    relevantDescriptors = dict()
+    for consensus in allConsensuses:
+        if consensus in relevantConsensuses:
+            print "  %s" % consensus
+        consensusFile = open(consensus, "r")
+        line = consensusFile.readline()
+        while line:
+            if line.startswith("r "):
+                address = IP(line.split(" ")[6])
+                if address == relayIP:
+                    hexDesc = binascii.b2a_hex(binascii.a2b_base64(
+                                               line.split(" ")[3] + "=="))
+                    if hexDesc not in relevantDescriptors.keys():
+                        relevantDescriptors[hexDesc] = set()
+                    relevantDescriptors[hexDesc].add(consensus)
+                    positiveConsensusesNoTarget.add(consensus)
+                    if consensus in relevantConsensuses:
+                        print "    \"%s\" references descriptor %s" % \
+                              (line.rstrip(), hexDesc)
+                elif relayIP.overlaps(IP("%s/24" % address,
+                                         make_net=True)):
+                    addressesInSameNetwork.add(address)
+            line = consensusFile.readline()
+        consensusFile.close()
+    if not relevantDescriptors:
+        print "  None found!\n\n%s\n\nResult is NEGATIVE with moderate " \
+              "certainty!\n\nWe did not find IP address %s in any of " \
+              "the consensuses that were published between %s and " \
+              "%s.\n\nA possible reason for false negatives is that the " \
+              "relay is using a different IP address when generating a " \
+              "descriptor than for exiting to the Internet. We hope to " \
+              "provide better checks for this case in the future." % \
+              (DELIMITER, relayIP, timestampTooOldStr, timestampTooNewStr)
+        if addressesInSameNetwork:
+            print "\nThe following other IP addresses of Tor relays " \
+                  "were found in the mentioned consensus files that are " \
+                  "in the same /24 network and that could be related to " \
+                  "IP address %s:" % relayIP
+            for addr in addressesInSameNetwork:
+                print "  %s" % addr
+        print ""
+        sys.exit()
+
+    # parse router descriptors to check exit policies
+    positiveConsensuses = set()
+    missingDescriptors = set()
+    if target:
+        print "\nChecking if referenced descriptors permit exiting to " \
+              "%s..." % target
+        descriptors = relevantDescriptors.keys()
+        for desc in descriptors:
+            missingDescriptors.add(desc)
+        directoriesLeftToParse = list(descriptorsDirs)
+        while directoriesLeftToParse:
+            directoryOrFile = directoriesLeftToParse.pop()
+            if os.path.isdir(directoryOrFile):
+                for filename in os.listdir(directoryOrFile):
+                    entry = "%s/%s" % (directoryOrFile, filename)
+                    directoriesLeftToParse.append(entry)
+            else:
+                basename = os.path.basename(directoryOrFile)
+                for descriptor in descriptors:
+                    if basename == descriptor:
+                        missingDescriptors.remove(descriptor)
+                        descriptorFile = open(directoryOrFile, "r")
+                        line = descriptorFile.readline()
+                        while line:
+                            if line.startswith("reject ") or \
+                               line.startswith("accept "):
+                                ruleAccept = line.split()[0] == "accept"
+                                ruleAddress = line.split()[1].split(":")[0]
+                                if ruleAddress != "*" and not \
+                                   IP(ruleAddress).overlaps(targetIP):
+                                    # IP address does not match
+                                    line = descriptorFile.readline()
+                                    continue
+                                rulePort = line.split()[1].split(":")[1]
+                                if not targetPort and not ruleAccept and \
+                                   rulePort != "*":
+                                    # with no port given, we only consider
+                                    # reject :* rules as matching
+                                    line = descriptorFile.readline()
+                                    continue
+                                if targetPort and rulePort != "*" and \
+                                   targetPort != rulePort:
+                                    # ports do not match
+                                    line = descriptorFile.readline()
+                                    continue
+                                relevantMatch = False
+                                for f in relevantDescriptors.get(
+                                                             descriptor):
+                                    if f in relevantConsensuses:
+                                        relevantMatch = True
+                                if relevantMatch:
+                                    if ruleAccept:
+                                        print "  %s permits exiting to " \
+                                              "%s according to rule " \
+                                              "\"%s\"" % (directoryOrFile,
+                                              target, line.rstrip())
+                                    else:
+                                        print "  %s does not permit " \
+                                              "exiting to %s according " \
+                                              "to rule \"%s\"" % \
+                                              (directoryOrFile,
+                                              target, line.rstrip())
+                                if ruleAccept:
+                                    for consensus in \
+                                        relevantDescriptors.get(
+                                                            descriptor):
+                                        positiveConsensuses.add(consensus)
+                                break
+                            line = descriptorFile.readline()
+                        descriptorFile.close()
+
+    # print out result
+    matches = None
+    if target:
+        matches = positiveConsensuses
+    else:
+        matches = positiveConsensusesNoTarget
+    lastConsensus = sorted(relevantConsensuses)[len(relevantConsensuses)-1]
+    if lastConsensus in matches:
+        print "\n%s\n\nResult is POSITIVE with high certainty!\n\nWe " \
+              "found one or more relays on IP address %s%s in the most " \
+              "recent consensus preceding %s that clients were likely " \
+              "to know.\n" % (DELIMITER, relayIP, targetHelpStr,
+              timestampStr)
+        sys.exit()
+    resultIndecisive = target and len(missingDescriptors) > 0
+    if resultIndecisive:
+        print "\n%s\n\nResult is INDECISIVE!\n\nAt least one " \
+              "referenced descriptor could not be found. This is a rare " \
+              "case, but one that (apparently) happens. We cannot make " \
+              "any good statement about exit relays without these " \
+              "descriptors. The following descriptors are missing:" % \
+              DELIMITER
+        for desc in missingDescriptors:
+            print "  %s" % desc
+    inOtherRelevantConsensus = False
+    inTooOldConsensuses = False
+    inTooNewConsensuses = False
+    for f in matches:
+        if f in relevantConsensuses:
+            inOtherRelevantConsensus = True
+        elif f in tooOldConsensuses:
+            inTooOldConsensuses = True
+        elif f in tooNewConsensuses:
+            inTooNewConsensuses = True
+    if inOtherRelevantConsensus:
+        if not resultIndecisive:
+            print "\n%s\n\nResult is POSITIVE with moderate certainty!" % \
+                  DELIMITER
+        print "\nWe found one or more relays on IP address %s%s, but " \
+              "not in the consensus immediately preceding %s. A " \
+              "possible reason for the relay being missing in the last " \
+              "consensus preceding the given time might be that some of " \
+              "the directory authorities had difficulties connecting to " \
+              "the relay. However, clients might still have used the " \
+              "relay." % (relayIP, targetHelpStr, timestampStr)
+    else:
+        if not resultIndecisive:
+            print "\n%s\n\nResult is NEGATIVE with high certainty!" % \
+                  DELIMITER
+        print "\nWe did not find any relay on IP address %s%s in the " \
+              "consensuses 3:00 hours preceding %s." % (relayIP,
+              targetHelpStr, timestampStr)
+        if inTooOldConsensuses or inTooNewConsensuses:
+            if inTooOldConsensuses and not inTooNewConsensuses:
+                print "\nNote that we found a matching relay in " \
+                      "consensuses that were published between 5:00 and " \
+                      "3:00 hours before %s." % timestampStr
+            elif not inTooOldConsensuses and inTooNewConsensuses:
+                print "\nNote that we found a matching relay in " \
+                      "consensuses that were published up to 2:00 hours " \
+                      "after %s." % timestampStr
+            else:
+                print "\nNote that we found a matching relay in " \
+                      "consensuses that were published between 5:00 and " \
+                      "3:00 hours before and in consensuses that were " \
+                      "published up to 2:00 hours after %s." % timestampStr
+            print "Make sure that the timestamp you provided is in the " \
+                  "correct timezone: UTC (or GMT)."
+    if target:
+        if not positiveConsensuses and positiveConsensusesNoTarget:
+            print "\nNote that although the found relay(s) did not " \
+                  "permit exiting to %s there have been one or more " \
+                  "relays running at the given time." % target
+    print ""
+    
-- 
1.7.1



More information about the tor-commits mailing list