r24768: {} Only leave a note that the code moved to Git. (in projects/archives/trunk: . bridge-desc-sanitizer exonerator)

18 May 2011

Author: kloesing
Date: 2011-05-18 20:15:43 +0000 (Wed, 18 May 2011)
New Revision: 24768

Added:
   projects/archives/trunk/README
Removed:
   projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java
   projects/archives/trunk/bridge-desc-sanitizer/HOWTO
   projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
   projects/archives/trunk/exonerator/ExoneraTor.java
   projects/archives/trunk/exonerator/HOWTO
   projects/archives/trunk/exonerator/LICENSE
   projects/archives/trunk/exonerator/exonerator.py
Log:
Only leave a note that the code moved to Git.

Added: projects/archives/trunk/README
===================================================================

--- projects/archives/trunk/README	                        (rev 0)
+++ projects/archives/trunk/README	2011-05-18 20:15:43 UTC (rev 24768)
@@ -0,0 +1,7 @@
+---------------------------------------------------------------------------
+
+                     THIS REPOSITORY HAS MOVED TO GIT!
+
+             git clone git://git.torproject.org/metrics-utils/
+
+---------------------------------------------------------------------------

Deleted: projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,504 +0,0 @@
-import java.io.*;
-import java.util.*;
-import com.maxmind.geoip.*;
-import org.apache.commons.codec.digest.*;
-import org.apache.commons.codec.binary.*;
-
-public class ConvertBridgeDescs {
-
-  public static void main(String[] args) throws Exception {
-
-    /* If the following flag is set to true, don't write 127.0.0.1 for
-     * bridge IP addresses, but put replace IP addresses with
-     * H(IP address + bridge identity + secret)[:4] formatted as IP
-     * address. An example for the hash input would be:
-     * "12.34.56.78ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDpassword"
-     * (without quotes) */
-    boolean hashIpAddresses = false;
-    String secret = "password";
-
-    long started = System.currentTimeMillis();
-
-    if (args.length < 5) {
-      System.err.println("Usage: java "
-          + ConvertBridgeDescs.class.getSimpleName()
-          + " <input directory> <geoip.txt file> <YYYY> <MM> "
-          + "<output directory>");
-      System.exit(1);
-    }
-    File inDir = new File(args[0]);
-    File geoipFile = new File(args[1]);
-    LookupService cl = new LookupService(geoipFile,
-        LookupService.GEOIP_MEMORY_CACHE);
-    Set<String> unresolved = new HashSet<String>();
-    unresolved.add("--");
-    unresolved.add("a1");
-    unresolved.add("a2");
-    unresolved.add("eu");
-    unresolved.add("ap");
-    String year = args[2];
-    String month = args[3];
-    int yearInt = Integer.parseInt(year);
-    int monthInt = Integer.parseInt(month);
-    File outDir = new File(args[4] + File.separator
-        + "bridge-descriptors-" + year + "-" + month);
-    outDir.mkdirs();
-
-    SortedSet<File> statuses = new TreeSet<File>();
-    Set<File> descriptors = new HashSet<File>();
-    Set<File> extrainfos = new HashSet<File>();
-
-    System.out.println("Checking files in " + inDir.getAbsolutePath()
-        + "...");
-    Stack<File> directoriesLeftToParse = new Stack<File>();
-    directoriesLeftToParse.push(inDir);
-    String currentYearAndMonth = "from-tonga-" + year + "-" + month;
-    String previousYearAndMonth = "from-tonga-" + (monthInt == 1 ?
-        "" + (yearInt - 1) + "-12" :
-        year + "-" + (monthInt < 11 ? "0" : "") + (monthInt - 1));
-    String nextYearAndMonth = "from-tonga-" + (monthInt == 12 ?
-            "" + (yearInt + 1) + "-01" :
-            year + "-" + (monthInt < 9 ? "0" : "") + (monthInt + 1));
-    while (!directoriesLeftToParse.isEmpty()) {
-      File directoryOrFile = directoriesLeftToParse.pop();
-      String filename = directoryOrFile.getName();
-      if (directoryOrFile.isDirectory()) {
-        if (/* base directory */
-            filename.equals(inDir.getName()) ||
-            /* current month */
-            filename.startsWith(currentYearAndMonth) ||
-            /* last days of previous month */
-            (filename.startsWith(previousYearAndMonth)
-            && Integer.parseInt(filename.substring(19, 21)) > 24) ||
-            /* first days of next month */
-            (filename.startsWith(nextYearAndMonth)
-            && Integer.parseInt(filename.substring(19, 21)) < 6)) {
-          for (File fileInDir : directoryOrFile.listFiles()) {
-            directoriesLeftToParse.push(fileInDir);
-          }
-        }
-        continue;
-      }
-      if (filename.startsWith("cached-extrainfo")) {
-        extrainfos.add(directoryOrFile);
-      } else if (filename.equals("bridge-descriptors")) {
-        descriptors.add(directoryOrFile);
-      } else if (filename.equals("networkstatus-bridges")) {
-        statuses.add(directoryOrFile);
-      }
-    }
-
-    int days = ((extrainfos.size() / 2 + descriptors.size()
-        + statuses.size()) + 3 * 24) / (3 * 48);
-    System.out.println("Found " + extrainfos.size()
-        + " cached-extrainfo[.new] files, " + descriptors.size()
-        + " bridge-descriptors files, and " + statuses.size()
-        + " networkstatus-bridges files, covering approximately " + days
-        + " days.");
-
-    System.out.print("Parsing server descriptors to find out country "
-        + "codes of bridges in extra-info descriptors");
-    Map<String, String> bridgeCountries = new HashMap<String, String>();
-    int parsed = 0;
-    for (File file : descriptors) {
-      if (parsed++ > descriptors.size() / days) {
-        System.out.print(".");
-        parsed = 0;
-      }
-      BufferedReader br = new BufferedReader(new FileReader(file));
-      String line = null, routerLine = null;
-      while ((line = br.readLine()) != null) {
-        if (line.startsWith("router ")) {
-          routerLine = line;
-        } else if (line.startsWith("opt extra-info-digest ")) {
-          String extraInfoDigest = line.split(" ")[2];
-          String countryCode = cl.getCountry(routerLine.split(" ")[2]).
-              getCode();
-          if (bridgeCountries.containsKey(extraInfoDigest) &&
-              !bridgeCountries.get(extraInfoDigest).
-              equals(countryCode)) {
-            System.out.println("Mapping already contains extra-info "
-                + "digest " + extraInfoDigest + " with different "
-                + "country. Exiting.");
-            System.exit(1);
-          }
-          bridgeCountries.put(extraInfoDigest, countryCode);
-        }
-      }
-    }
-    System.out.println("Mapping contains " + bridgeCountries.size()
-        + " entries.");
-
-    System.out.print("Parsing extra-info descriptors");
-    String[] hex = new String[] { "0", "1", "2", "3", "4", "5", "6", "7",
-        "8", "9", "a", "b", "c", "d", "e", "f" };
-    for (String x : hex)
-      for (String y : hex)
-        new File(outDir + File.separator + "extra-infos" + File.separator
-            + x + File.separator + y).mkdirs();
-    int writtenExtrainfos = 0;
-    Map<String, String> extrainfoMapping = new HashMap<String, String>();
-    parsed = 0;
-    for (File file : extrainfos) {
-      if (parsed++ > extrainfos.size() / days) {
-        System.out.print(".");
-        parsed = 0;
-      }
-      FileInputStream fis = new FileInputStream(file);
-      BufferedInputStream bis = new BufferedInputStream(fis);
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      int len;
-      byte[] data = new byte[1024];
-      while ((len = bis.read(data, 0, 1024)) >= 0) {
-        baos.write(data, 0, len);
-      }
-      bis.close();
-      byte[] allData = baos.toByteArray();
-      int startDescriptorIndex = -1, endDescriptorIndex = -1;
-      String asciiString = new String(allData, "US-ASCII");
-      BufferedReader br = new BufferedReader(new StringReader(
-          asciiString));
-      String line = null;
-      StringBuilder scrubbed = null;
-      boolean skipSignature = false;
-      boolean skipDescriptor = false;
-      while ((line = br.readLine()) != null) {
-        if (skipSignature && !line.equals("-----END SIGNATURE-----")) {
-          continue;
-        } else if (line.startsWith("extra-info ")) {
-          endDescriptorIndex = startDescriptorIndex =
-              asciiString.indexOf(line, startDescriptorIndex + 1);
-          scrubbed = new StringBuilder(DigestUtils.shaHex(Hex.decodeHex(
-              line.split(" ")[2].toCharArray())).toUpperCase() + "\n");
-        } else if (line.startsWith("published ")
-            || line.startsWith("write-history ")
-            || line.startsWith("read-history ")
-            || line.startsWith("geoip-start-time ")
-            || line.startsWith("geoip-client-origins ")
-            || line.startsWith("bridge-stats-end ") 
-            || line.startsWith("bridge-ips ")) {
-          scrubbed.append(line + "\n");
-        } else if (line.startsWith("router-signature")) {
-          if (skipDescriptor) {
-            System.out.println("Skipping!");
-            skipDescriptor = false;
-          } else {
-            endDescriptorIndex = asciiString.indexOf(line,
-                endDescriptorIndex + 1) + line.length() + 1;
-            byte[] forDigest = new byte[endDescriptorIndex -
-                startDescriptorIndex];
-            System.arraycopy(allData, startDescriptorIndex, forDigest, 0,
-                endDescriptorIndex - startDescriptorIndex);
-            String originalHash = DigestUtils.shaHex(forDigest);
-            String countryCode = "ZZ";
-            if (bridgeCountries.containsKey(originalHash.toUpperCase())) {
-              countryCode = bridgeCountries.get(originalHash.toUpperCase());
-            }
-            String scrubbedDesc = "extra-info Unnamed" + countryCode + " "
-                + scrubbed.toString();
-            String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
-            if (extrainfoMapping.containsKey(originalHash) &&
-                !extrainfoMapping.get(originalHash).equals(scrubbedHash)) {
-              System.out.println("We already have an extra-info mapping "
-                  + "from " + originalHash + " to "
-                  + extrainfoMapping.get(originalHash) + ", but we now "
-                  + "want to add a mapping to " + scrubbedHash
-                  + ". Exiting");
-              System.exit(1);
-            }
-            extrainfoMapping.put(originalHash, scrubbedHash);
-            File out = new File(outDir + File.separator + "extra-infos"
-                + File.separator + scrubbedHash.charAt(0) + File.separator
-                + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
-            if (!out.exists()) {
-              BufferedWriter bw = new BufferedWriter(new FileWriter(out));
-              bw.write(scrubbedDesc);
-              bw.close();
-              writtenExtrainfos++;
-            }
-          }
-        } else if (line.equals("-----BEGIN SIGNATURE-----")) {
-          skipSignature = true;
-        } else if (line.equals("-----END SIGNATURE-----")) {
-          skipSignature = false;
-        } else if (line.startsWith("dirreq-") || line.startsWith("cell-")
-            || line.startsWith("exit-")) {
-          continue;
-        } else {
-          System.out.println("Unrecognized line '" + line + "'. Skipping");
-          skipDescriptor = true;
-        }
-      }
-      br.close();
-    }
-    System.out.println("\nWrote " + writtenExtrainfos
-        + " extra-info descriptors.");
-
-    System.out.print("Parsing server descriptors");
-    for (String x : hex)
-      for (String y : hex)
-        new File(outDir + File.separator + "server-descriptors"
-            + File.separator + x + File.separator + y).mkdirs();
-    int writtenDescriptors = 0;
-    Map<String, String> descriptorMapping = new HashMap<String, String>();
-    int found = 0, notfound = 0;
-    parsed = 0;
-    String haveExtraInfo = null;
-    for (File file : descriptors) {
-      if (parsed++ > descriptors.size() / days) {
-        System.out.print(".");
-        parsed = 0;
-      }
-      FileInputStream fis = new FileInputStream(file);
-      BufferedInputStream bis = new BufferedInputStream(fis);
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      int len;
-      byte[] data = new byte[1024];
-      while ((len = bis.read(data, 0, 1024)) >= 0) {
-        baos.write(data, 0, len);
-      }
-      bis.close();
-      byte[] allData = baos.toByteArray();
-      int startDescriptorIndex = -1, endDescriptorIndex = -1;
-      String asciiString = new String(allData, "US-ASCII");
-      BufferedReader br = new BufferedReader(new StringReader(
-          asciiString));
-      String line = null, country = null, originalAddress = null,
-          ipAddress = "127.0.0.1", routerLinePartOne = null,
-          routerLinePartTwo = null;
-      StringBuilder scrubbed = null;
-      boolean skipCrypto = false, contactWritten = false;
-      while ((line = br.readLine()) != null) {
-        if (skipCrypto && !line.startsWith("-----END ")) {
-          continue;
-        } else if (line.startsWith("router ")) {
-          endDescriptorIndex = startDescriptorIndex =
-              asciiString.indexOf(line, startDescriptorIndex + 1);
-          country = cl.getCountry(line.split(" ")[2]).getCode().
-              toLowerCase();
-          if (unresolved.contains(country)) {
-            country = "zz";
-          }
-          originalAddress = line.split(" ")[2];
-          scrubbed = new StringBuilder();
-          routerLinePartOne = "router Unnamed" + country.toUpperCase();
-          routerLinePartTwo = line.split(" ")[3] + " "
-              + line.split(" ")[4] + " " + line.split(" ")[5] + "\n";
-          contactWritten = false;
-          haveExtraInfo = null;
-        } else if (line.startsWith("opt fingerprint ")) {
-          scrubbed.append("opt fingerprint");
-          String fingerprint = DigestUtils.shaHex(Hex.decodeHex(
-              line.substring(16).replaceAll(" ", "").toCharArray())).
-              toUpperCase();
-          for (int i = 0; i < fingerprint.length() / 4; i++)
-            scrubbed.append(" " + fingerprint.substring(4 * i,
-                4 * (i + 1)));
-          scrubbed.append("\n");
-          if (hashIpAddresses) {
-            byte[] hashedOctets = DigestUtils.sha(originalAddress
-                + line.substring(16).replaceAll(" ", "") + secret);
-            String hashedIp = "";
-            for (int i = 0; i < 4; i++) {
-              hashedIp += "." + ((int) hashedOctets[i] + 256) % 256;
-            }
-            ipAddress = hashedIp.substring(1);
-          }
-        } else if (line.startsWith("contact ")) {
-          scrubbed.append("contact somebody at example dot " + country
-              + "\n");
-          contactWritten = true;
-        } else if (line.startsWith("router-signature")) {
-          endDescriptorIndex = asciiString.indexOf(line,
-              endDescriptorIndex + 1) + line.length() + 1;
-          byte[] forDigest = new byte[endDescriptorIndex -
-              startDescriptorIndex];
-          System.arraycopy(allData, startDescriptorIndex, forDigest, 0,
-              endDescriptorIndex - startDescriptorIndex);
-          String originalHash = DigestUtils.shaHex(forDigest);
-          String scrubbedDesc = routerLinePartOne + " " + ipAddress
-              + " " + routerLinePartTwo + scrubbed.toString();
-          String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
-          if (descriptorMapping.containsKey(originalHash) &&
-              !descriptorMapping.get(originalHash).equals(scrubbedHash)) {
-            System.out.println("We already have a descriptor mapping "
-                + "from " + originalHash + " to "
-                + descriptorMapping.get(originalHash) + ", but we now "
-                + "want to add a mapping to " + scrubbedHash
-                + ". Exiting");
-            System.exit(1);
-          }
-          descriptorMapping.put(originalHash, scrubbedHash);
-          if (haveExtraInfo != null) {
-            File out = new File(outDir + File.separator
-                + "server-descriptors" + File.separator
-                + scrubbedHash.charAt(0) + File.separator
-                + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
-            if (!out.exists()) {
-              BufferedWriter bw2 = new BufferedWriter(new FileWriter(out));
-              bw2.write(scrubbedDesc);
-              bw2.close();
-              writtenDescriptors++;
-            }
-          }
-        } else if (line.startsWith("opt extra-info-digest ")) {
-          String originalExtraInfo = line.split(" ")[2].toLowerCase();
-          if (!extrainfoMapping.containsKey(originalExtraInfo)) {
-            notfound++;
-            haveExtraInfo = "0000000000000000000000000000000000000000";
-          } else {
-            found++;
-            haveExtraInfo = extrainfoMapping.get(originalExtraInfo).
-                toUpperCase();
-          }
-          scrubbed.append("opt extra-info-digest " + haveExtraInfo
-              + "\n");
-        } else if (line.startsWith("reject ")
-            || line.startsWith("accept ")) {
-          if (!contactWritten) {
-            scrubbed.append("contact nobody at example dot " + country
-                + "\n");
-            contactWritten = true;
-          }
-          scrubbed.append(line + "\n");
-        } else if (line.startsWith("platform ")
-            || line.startsWith("opt protocols ")
-            || line.startsWith("published ")
-            || line.startsWith("uptime ")
-            || line.startsWith("bandwidth ")
-            || line.startsWith("opt hibernating ")
-            || line.equals("opt hidden-service-dir")
-            || line.equals("opt caches-extra-info")
-            || line.equals("opt allow-single-hop-exits")) {
-          scrubbed.append(line + "\n");
-        } else if (line.startsWith("family ")) {
-          StringBuilder familyLine = new StringBuilder("family");
-          for (String s : line.substring(7).split(" ")) {
-            if (s.startsWith("$")) {
-              familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
-                  s.substring(1).toCharArray())).toUpperCase());
-            } else {
-              familyLine.append(" Unnamed");
-            }
-          }
-          scrubbed.append(familyLine.toString() + "\n");
-        } else if (line.startsWith("@purpose ")) {
-          continue;
-        } else if (line.startsWith("-----BEGIN ")
-            || line.equals("onion-key") || line.equals("signing-key")) {
-          skipCrypto = true;
-        } else if (line.startsWith("-----END ")) {
-          skipCrypto = false;
-        } else {
-          System.out.println("Unrecognized line '" + line + "'. Exiting");
-          System.exit(1);
-        }
-      }
-      br.close();
-    }
-    System.out.println("\nWrote " + writtenDescriptors
-        + " bridge descriptors. While parsing, we found that we parsed "
-        + found + " extra-info identifiers before, but are missing "
-        + notfound + ". (The number of missing identifiers should be "
-        + "significantly smaller.)");
-
-    System.out.print("Parsing network statuses");
-    parsed = notfound = found = 0;
-    for (File file : statuses) {
-      if (parsed++ > statuses.size() / days) {
-        System.out.print(".");
-        parsed = 0;
-      }
-      if (!file.getParent().substring(file.getParent().
-          indexOf("from-tonga-")).startsWith(currentYearAndMonth)) {
-        continue;
-      }
-      BufferedReader br = new BufferedReader(new FileReader(file));
-      String line = null;
-      StringBuilder scrubbed = new StringBuilder();
-      while ((line = br.readLine()) != null) {
-        if (line.startsWith("r ")) {
-          String[] parts = line.split(" ");
-          String bridgeIdentity = parts[2] + "==";
-          String hashedBridgeIdentity = Base64.encodeBase64String(
-              DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
-              substring(0, 27);
-          String descIdentifier = parts[3] + "==";
-          String hexDescIdentifier = Hex.encodeHexString(
-              Base64.decodeBase64(descIdentifier));
-          String replacementDescIdentifier = null;
-          if (!descriptorMapping.containsKey(hexDescIdentifier)) {
-            notfound++;
-            replacementDescIdentifier = "AAAAAAAAAAAAAAAAAAAAAAAAAAA";
-          } else {
-            found++;
-            String refDesc = descriptorMapping.get(hexDescIdentifier).
-                toLowerCase();
-            File descriptorFile = new File(outDir + File.separator
-                + "server-descriptors" + File.separator
-                + refDesc.charAt(0) + File.separator + refDesc.charAt(1)
-                + File.separator + refDesc);
-            if (!descriptorFile.exists()) {
-              System.out.println("Descriptor file '"
-                  + descriptorFile.getAbsolutePath() + "' does not exist.");
-              System.exit(1);
-            }
-            replacementDescIdentifier = Base64.encodeBase64String(
-                Hex.decodeHex(descriptorMapping.get(hexDescIdentifier).
-                toCharArray())).substring(0, 27);
-          }
-          String country = cl.getCountry(parts[6]).getCode().
-              toLowerCase();
-          if (unresolved.contains(country)) {
-            country = "zz";
-          }
-          String ipAddress = "127.0.0.1";
-          if (hashIpAddresses) {
-            byte[] hashedOctets = DigestUtils.sha(parts[6]
-                + Hex.encodeHexString(Base64.decodeBase64(
-                bridgeIdentity)).toUpperCase() + secret);
-            String hashedIp = "";
-            for (int i = 0; i < 4; i++) {
-              hashedIp += "." + ((int) hashedOctets[i] + 256) % 256;
-            }
-            ipAddress = hashedIp.substring(1);
-          }
-          scrubbed.append("r Unnamed" + country.toUpperCase() + " "
-              + hashedBridgeIdentity
-              + " " + replacementDescIdentifier + " " + parts[4] + " "
-              + parts[5] + " " + ipAddress + " " + parts[7] + " "
-              + parts[8] + "\n");
-        } else if (line.startsWith("s ")) {
-          scrubbed.append(line + "\n");
-        } else {
-          System.out.println("Unknown line: " + line);
-          System.exit(1);
-        }
-      }
-      String timeString = file.getParent().substring(file.getParent().
-          indexOf("from-tonga-") + 11);
-      String[] date = timeString.substring(0, 10).split("-");
-      String time = timeString.substring(11, 17);
-      File dir = new File(outDir + File.separator + "statuses"
-          + File.separator + date[2] + File.separator);
-      dir.mkdirs();
-      File out = new File(dir.getAbsolutePath() + File.separator + date[0]
-          + date[1] + date[2] + "-" + time + "-"
-          + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
-      if (!out.exists()) {
-        BufferedWriter bw3 = new BufferedWriter(new FileWriter(out));
-        bw3.write(scrubbed.toString());
-        bw3.close();
-      }
-    }
-    System.out.println("\nWhile parsing, we found that we parsed "
-        + found + " bridge descriptors before, but are missing "
-        + notfound + ". (The number of missing identifiers should be "
-        + "significantly smaller.)");
-
-    long finished = System.currentTimeMillis();
-    System.out.println("Processing took " + ((finished - started) / 1000)
-        + " seconds.");
-  }
-}
-

Deleted: projects/archives/trunk/bridge-desc-sanitizer/HOWTO
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/HOWTO	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/bridge-desc-sanitizer/HOWTO	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,138 +0,0 @@
-Bridge descriptor sanitizer
-
----------------------------------------------------------------------------
-
-                     THIS REPOSITORY HAS MOVED TO GIT!
-
-             git clone git://git.torproject.org/metrics-utils/
-
----------------------------------------------------------------------------
-
-Introduction:
-
-The bridge authority Tonga maintains a list of bridges in order to serve
-bridge addresses and descriptors to its clients. Every half hour, Tonga
-takes a snapshot of the known bridge descriptors and copies them to
-byblos for later statistical analysis. As a guiding principle, the Tor
-project makes all data that it uses for statistical analysis available to
-the interested public, in order to maximize transparency towards the
-community. However, the bridge descriptors contain the IP addresses and
-other contact information of bridges that must not be made public, or the
-purpose of bridges as non-public entry points into the Tor network would
-be obsolete. This script takes the half-hourly snapshots as input, removes
-all possibly sensitive information from the descriptors, and puts out the
-sanitized bridge descriptors that are safe to be published.
-
----------------------------------------------------------------------------
-
-Processing steps:
-
-The following steps are taken to remove all potentially sensitive
-information from the bridge descriptors while keeping them useful for
-statistical analysis.
-
-1. Replace the bridge identity with its SHA1 value
-
-   Clients can request a bridge's current descriptor by sending its
-   identity string to the bridge authority. This is a feature to make
-   bridges on dynamic IP addresses useful. Therefore, the original
-   identities (and anything that could be used to derive them) need to be
-   removed from the descriptors. The bridge identity is replaced with its
-   SHA1 hash value. The idea is to have a consistent replacement that
-   remains stable over months or even years (without keeping a secret for a
-   keyed hash function).
-
-2. Remove all cryptographic keys and signatures
-
-   It would be straightforward to learn about the bridge identity from the
-   bridge's public key. Replacing keys by newly generated ones seemed to be
-   unnecessary (and would involve keeping a state over months/years), so
-   that all cryptographic objects have simply been removed. 
-
-3. Replace IP address with 127.0.0.1
-
-   Of course, the IP address needs to be removed, too. However, the IP
-   address is resolved to a country code first and the result written to
-   the contact line as "somebody at example dot de" for Germany, etc. The
-   ports are kept unchanged though.
-
-4. Replace contact information
-
-   If there is contact information in a descriptor, the contact line is
-   changed to "somebody at ...". If there is none, a contact line is added
-   saying "nobody at ..." in order to put in the country code. If the
-   bridge's IP address cannot be resolved to a country, the unassigned
-   country code "zz" is written to the contact line.
-
-5. Replace nickname with UnnamedCC
-
-   The bridge nicknames might give hints on the location of the bridge if
-   chosen without care; e.g. a bridge nickname might be very similar to the
-   operators' relay nicknames which might be located on adjacent IP
-   addresses. All bridge nicknames are therefore replaced with the string
-   UnnamedCC with CC being the upper-case country code.
-
-6. Replace references to descriptors
-
-   Changing anything in the server descriptors or extra-info descriptors
-   invalidates the references from network statuses or server descriptors,
-   respectively. All references are replaced with the new hashes of
-   referenced descriptors, if available. In case of missing descriptors,
-   references are replaced with all zeros (or 'A's in base 64 encoding).
-
-Note that these processing steps only prevent people from learning about
-new bridge locations. People who already know a bridge identity or location
-can easily learn more about this bridge from the sanitized descriptors.
-This is useful for statistical analysis, e.g. to filter out bridges that
-have been running as relays before.
-
----------------------------------------------------------------------------
-
-Quick Start:
-
-The following steps are necessary to process the half-hourly snapshots as
-collected by moria:
-
-- Install Java 5 or higher.
-
-- Download Apache Commons Codec 1.4 or higher for Base 64 and hex encoding
-  from http://commons.apache.org/codec/ and place the .jar (in the
-  following assumed to be commons-codec-1.4.jar) in the same directory as
-  this HOWTO file.
-
-- Download MaxMind GeoIP Java library from http://geolite.maxmind.com/
-  download/geoip/api/java/ and generate a JAR file as described in the
-  README file. Place the resulting maxmindgeoip.jar in the same directory
-  as this HOWTO file.
-
-- Copy the half-hourly snapshots named from-tonga-YYYY-MM-DDThhmmssZ.tar.gz
-  in a directory called data/ in the same directory as this HOWTO file.
-
-- Run ./extract-bridges.sh to extract the half-hourly snapshots in data/
-  to separate directories in the newly created subdirectory in/ .
-
-- Put the binary MaxMind GeoIP database file that shall be used for
-  resolving IP addresses to country codes in the same directory as this
-  HOWTO file. Either the free or the commercial version of the database
-  can be used. For the archives provided by The Tor Project, the first
-  available commercial version of the subsequent month is used.
-
-- Compile the Java class using
-
-  $ javac -cp commons-codec-1.4.jar:maxmindgeoip.jar
-          ConvertBridgeDescs.java
-
-- Run the script, providing it with the parameters it needs:
-
-  java -cp .:commons-codec-1.4.jar:maxmindgeoip.jar ConvertBridgeDescs
-           <input directory> <geoip database file> <YYYY> <MM>
-           <output directory>
-
-  Note that YYYY and MM specify the month that shall be processed. The other
-  descriptors in the input directory are ignored.
-
-  A sample invocation might be:
-
-  $ java -cp .:commons-codec-1.4.jar:maxmindgeoip.jar ConvertBridgeDescs
-        in/ GeoIP-106_20081101.dat 2008 10 out/
-

Deleted: projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,8 +0,0 @@
-#!/bin/bash
-mkdir "in/"
-for i in `ls data/ | cut -c 1-29`
-do
-mkdir "in/"$i
-tar -C "in/"$i -xf "data/"$i".tar.gz"
-done
-

Deleted: projects/archives/trunk/exonerator/ExoneraTor.java
===================================================================
--- projects/archives/trunk/exonerator/ExoneraTor.java	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/ExoneraTor.java	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,404 +0,0 @@
-/* Copyright 2009 The Tor Project
- * See LICENSE for licensing information */
-
-import java.io.*;
-import java.math.*;
-import java.text.*;
-import java.util.*;
-import org.bouncycastle.util.encoders.Base64;
-
-public final class ExoneraTor {
-
-  public static void main(final String[] args) throws Exception {
-
-    // check parameters
-    if (args.length < 4 || args.length > 5) {
-      System.err.println("\nUsage: java "
-          + ExoneraTor.class.getSimpleName()
-          + " <descriptor archive directory> <IP address in question> "
-          + "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> "
-          + "[<target address>[:<target port>]]\n");
-      return;
-    }
-    File archiveDirectory = new File(args[0]);
-    if (!archiveDirectory.exists() || !archiveDirectory.isDirectory()) {
-      System.err.println("\nDescriptor archive directory + "
-            + archiveDirectory.getAbsolutePath()
-            + " does not exist or is not a directory.\n");
-      return;
-    }
-    String relayIP = args[1];
-    String timestampStr = args[2] + " " + args[3];
-    SimpleDateFormat timeFormat = new SimpleDateFormat(
-        "yyyy-MM-dd HH:mm:ss");
-    timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    long timestamp = timeFormat.parse(timestampStr).getTime();
-    String target = null, targetIP = null, targetPort = null;
-    String[] targetIPParts = null;
-    if (args.length > 4) {
-      target = args[4];
-      if (target.contains(":")) {
-        targetIP = target.split(":")[0];
-        targetPort = target.split(":")[1];
-      } else {
-        targetIP = target;
-      }
-      targetIPParts = targetIP.replace(".", " ").split(" ");
-    }
-    String DELIMITER = "--------------------------------------------------"
-        + "-------------------------";
-    System.out.println("\nTrying to find out whether " + relayIP + " was "
-        + "running as a Tor relay at " + timestampStr
-        + (target != null ? " permitting exiting to " + target : "")
-        + "...\n\n" + DELIMITER);
-
-    // check that we have the required archives
-    long timestampTooOld = timestamp - 300 * 60 * 1000;
-    long timestampFrom = timestamp - 180 * 60 * 1000;
-    long timestampTooNew = timestamp + 120 * 60 * 1000;
-    Calendar calTooOld = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
-    Calendar calFrom = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
-    Calendar calTooNew = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
-    calTooOld.setTimeInMillis(timestampTooOld);
-    calFrom.setTimeInMillis(timestampFrom);
-    calTooNew.setTimeInMillis(timestampTooNew);
-    System.out.printf("%nChecking that relevant archives between "
-        + "%tF %<tT and %tF %<tT are available...%n", calTooOld,
-        calTooNew);
-    SortedSet<String> requiredDirs = new TreeSet<String>();
-    requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooOld));
-    requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooNew));
-    if (target != null) {
-      requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
-          calTooOld));
-      requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
-          calTooNew));
-    }
-    SortedSet<File> consensusDirs = new TreeSet<File>();
-    SortedSet<File> descriptorsDirs = new TreeSet<File>();
-    Stack<File> directoriesLeftToParse = new Stack<File>();
-    directoriesLeftToParse.push(archiveDirectory);
-    while (!directoriesLeftToParse.isEmpty()) {
-      File directoryOrFile = directoriesLeftToParse.pop();
-      if (directoryOrFile.getName().startsWith("consensuses-")) {
-        if (requiredDirs.contains(directoryOrFile.getName())) {
-          requiredDirs.remove(directoryOrFile.getName());
-          consensusDirs.add(directoryOrFile);
-        }
-      } else if (directoryOrFile.getName().startsWith(
-          "server-descriptors-")) {
-        if (requiredDirs.contains(directoryOrFile.getName())) {
-          requiredDirs.remove(directoryOrFile.getName());
-          descriptorsDirs.add(directoryOrFile);
-        }
-      } else {
-        for (File fileInDir : directoryOrFile.listFiles())
-          if (fileInDir.isDirectory())
-            directoriesLeftToParse.push(fileInDir);
-      }
-    }
-    for (File dir : consensusDirs)
-      System.out.println("  " + dir.getAbsolutePath());
-    for (File dir : descriptorsDirs)
-      System.out.println("  " + dir.getAbsolutePath());
-    if (!requiredDirs.isEmpty()) {
-      System.out.println("\nWe are missing consensuses and/or server "
-          + "descriptors. Please download these archives and extract them "
-          + "to your data directory. Be sure NOT to rename the extracted "
-          + "directories or the contained files.");
-      for (String dir : requiredDirs)
-        System.out.println("  " + dir + ".tar.bz2");
-      return;
-    }
-
-    // look for consensus files
-    System.out.printf("%nLooking for relevant consensuses between "
-        + "%tF %<tT and %s...%n", calFrom, timestampStr);
-    SortedSet<File> tooOldConsensuses = new TreeSet<File>();
-    SortedSet<File> relevantConsensuses = new TreeSet<File>();
-    SortedSet<File> tooNewConsensuses = new TreeSet<File>();
-    directoriesLeftToParse.clear();
-    for (File consensusDir : consensusDirs)
-      directoriesLeftToParse.push(consensusDir);
-    SimpleDateFormat consensusTimeFormat = new SimpleDateFormat(
-        "yyyy-MM-dd-HH-mm-ss");
-    consensusTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    while (!directoriesLeftToParse.isEmpty()) {
-      File directoryOrFile = directoriesLeftToParse.pop();
-      if (directoryOrFile.isDirectory()) {
-        for (File fileInDir : directoryOrFile.listFiles()) {
-          directoriesLeftToParse.push(fileInDir);
-        }
-        continue;
-      } else {
-        String filename = directoryOrFile.getName();
-        if (filename.endsWith("consensus")) {
-          long consensusTime = consensusTimeFormat.parse(
-              filename.substring(0, 19)).getTime();
-          if (consensusTime >= timestampTooOld &&
-              consensusTime < timestampFrom)
-            tooOldConsensuses.add(directoryOrFile);
-          else if (consensusTime >= timestampFrom &&
-                   consensusTime <= timestamp)
-            relevantConsensuses.add(directoryOrFile);
-          else if (consensusTime > timestamp &&
-                   consensusTime <= timestampTooNew)
-            tooNewConsensuses.add(directoryOrFile);
-        }
-      }
-    }
-    SortedSet<File> allConsensuses = new TreeSet<File>();
-    allConsensuses.addAll(tooOldConsensuses);
-    allConsensuses.addAll(relevantConsensuses);
-    allConsensuses.addAll(tooNewConsensuses);
-    if (allConsensuses.isEmpty()) {
-      System.out.println("  None found!\n\n" + DELIMITER + "\n\nResult is "
-          + "INDECISIVE!\n\nWe cannot make any statement about IP address "
-          + relayIP + " being a relay at " + timestampStr + " or not! We "
-          + "did not find any relevant consensuses preceding the given "
-          + "time. This either means that you did not download and "
-          + "extract the consensus archives preceding the hours before "
-          + "the given time, or (in rare cases) that the directory "
-          + "archives are missing the hours before the timestamp. Please "
-          + "check that your directory archives contain consensus files "
-          + "of the interval 5:00 hours before and 2:00 hours after the "
-          + "time you are looking for.\n");
-      return;
-    }
-    for (File f : relevantConsensuses)
-      System.out.println("  " + f.getAbsolutePath());
-
-    // parse consensuses to find descriptors belonging to the IP address
-    System.out.println("\nLooking for descriptor identifiers referenced "
-        + "in \"r \" lines in these consensuses containing IP address "
-        + relayIP + "...");
-    SortedSet<File> positiveConsensusesNoTarget = new TreeSet<File>();
-    Set<String> addressesInSameNetwork = new HashSet<String>();
-    SortedMap<String, Set<File>> relevantDescriptors =
-        new TreeMap<String, Set<File>>();
-    for (File consensus : allConsensuses) {
-      if (relevantConsensuses.contains(consensus))
-        System.out.println("  " + consensus.getAbsolutePath());
-      BufferedReader br = new BufferedReader(new FileReader(consensus));
-      String line;
-      while ((line = br.readLine()) != null) {
-        if (!line.startsWith("r "))
-          continue;
-        String[] parts = line.split(" ");
-        String address = parts[6];
-        if (address.equals(relayIP)) {
-          byte[] result = Base64.decode(parts[3] + "==");
-          String hex = String.format("%040x", new BigInteger(1,
-               Base64.decode(parts[3] + "==")));
-          if (!relevantDescriptors.containsKey(hex))
-            relevantDescriptors.put(hex, new HashSet<File>());
-          relevantDescriptors.get(hex).add(consensus);
-          positiveConsensusesNoTarget.add(consensus);
-          if (relevantConsensuses.contains(consensus))
-            System.out.println("    \"" + line + "\" references "
-                + "descriptor " + hex);
-        } else {
-          if (relayIP.startsWith(address.substring(0,
-              address.lastIndexOf(".")))) {
-            addressesInSameNetwork.add(address);
-          }
-        }
-      }
-      br.close();
-    }
-    if (relevantDescriptors.isEmpty()) {
-      System.out.printf("  None found!\n\n" + DELIMITER + "\n\nResult is "
-          + "NEGATIVE with moderate certainty!\n\nWe did not find IP "
-          + "address " + relayIP + " in any of the consensuses that were "
-          + "published between %tF %<tT and %tF %<tT.\n\nA possible "
-          + "reason for false negatives is that the relay is using a "
-          + "different IP address when generating a descriptor than for "
-          + "exiting to the Internet. We hope to provide better checks "
-          + "for this case in the future.", calTooOld, calTooNew);
-      if (!addressesInSameNetwork.isEmpty()) {
-        System.out.println("\n\nThe following other IP addresses of Tor "
-            + "relays were found in the mentioned consensus files that "
-            + "are in the same /24 network and that could be related to "
-            + "IP address " + relayIP + ":");
-        for (String s : addressesInSameNetwork) {
-          System.out.println("  " + s);
-        }
-      }
-      System.out.println();
-      return;
-    }
-
-    // parse router descriptors to check exit policies
-    SortedSet<File> positiveConsensuses = new TreeSet<File>();
-    Set<String> missingDescriptors = new HashSet<String>();
-    if (target != null) {
-      System.out.println("\nChecking if referenced descriptors permit "
-          + "exiting to " + target + "...");
-      Set<String> descriptors = relevantDescriptors.keySet();
-      missingDescriptors.addAll(relevantDescriptors.keySet());
-      directoriesLeftToParse.clear();
-      for (File descriptorsDir : descriptorsDirs)
-        directoriesLeftToParse.push(descriptorsDir);
-      while (!directoriesLeftToParse.isEmpty()) {
-        File directoryOrFile = directoriesLeftToParse.pop();
-        if (directoryOrFile.isDirectory()) {
-          for (File fileInDir : directoryOrFile.listFiles()) {
-            directoriesLeftToParse.push(fileInDir);
-          }
-          continue;
-        } else {
-          String filename = directoryOrFile.getName();
-          for (String descriptor : descriptors) {
-            if (filename.equals(descriptor)) {
-              missingDescriptors.remove(descriptor);
-              BufferedReader br = new BufferedReader(
-                  new FileReader(directoryOrFile));
-              String line;
-              while ((line = br.readLine()) != null) {
-                if (line.startsWith("reject ") ||
-                    line.startsWith("accept ")) {
-                  boolean ruleAccept = line.split(" ")[0].equals("accept");
-                  String ruleAddress = line.split(" ")[1].split(":")[0];
-                  if (!ruleAddress.equals("*")) {
-                    if (!ruleAddress.contains("/") &&
-                        !ruleAddress.equals(targetIP))
-                      continue; // IP address does not match
-                    String[] ruleIPParts = ruleAddress.split("/")[0].
-                        replace(".", " ").split(" ");
-                    int ruleNetwork = ruleAddress.contains("/") ?
-                        Integer.parseInt(ruleAddress.split("/")[1]) : 32;
-                    for (int i = 0; i < 4; i++) {
-                      if (ruleNetwork == 0) {
-                        break;
-                      } else if (ruleNetwork >= 8) {
-                        if (ruleIPParts[i].equals(targetIPParts[i]))
-                          ruleNetwork -= 8;
-                        else
-                          break;
-                      } else {
-                        int mask = 255 ^ 255 >>> ruleNetwork;
-                        if ((Integer.parseInt(ruleIPParts[i]) & mask) ==
-                            (Integer.parseInt(targetIPParts[i]) & mask))
-                          ruleNetwork = 0;
-                        break;
-                      }
-                    }
-                    if (ruleNetwork > 0)
-                      continue; // IP address does not match
-                  }
-                  String rulePort = line.split(" ")[1].split(":")[1];
-                  if (targetPort == null && !ruleAccept &&
-                      !rulePort.equals("*"))
-                    continue; // with no port given, we only consider
-                              // reject :* rules as matching
-                  if (targetPort != null) {
-                    if (!rulePort.equals("*") &&
-                        !targetPort.equals(rulePort))
-                      continue; // ports do not match
-                  }
-                  boolean relevantMatch = false;
-                  for (File f : relevantDescriptors.get(descriptor))
-                    if (relevantConsensuses.contains(f))
-                      relevantMatch = true;
-                  if (relevantMatch)
-                    System.out.println("  "
-                        + directoryOrFile.getAbsolutePath() + " "
-                        + (ruleAccept ? "permits" : "does not permit")
-                        + " exiting to " + target + " according to rule \""
-                        + line + "\"");
-                  if (ruleAccept)
-                    positiveConsensuses.addAll(
-                        relevantDescriptors.get(descriptor));
-                  break;
-                }
-              }
-              br.close();
-            }
-          }
-        }
-      }
-    }
-
-    // print out result
-    Set<File> matches = (target != null) ? positiveConsensuses
-                                         : positiveConsensusesNoTarget;
-    if (matches.contains(relevantConsensuses.last())) {
-      System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE with "
-          + "high certainty!\n\nWe found one or more relays on IP address "
-          + relayIP
-          + (target != null ? " permitting exit to " + target : "")
-          + " in the most recent consensus preceding " + timestampStr
-          + " that clients were likely to know.\n");
-      return;
-    }
-    boolean resultIndecisive = target != null
-        && !missingDescriptors.isEmpty();
-    if (resultIndecisive) {
-      System.out.println("\n" + DELIMITER + "\n\nResult is INDECISIVE!\n\n"
-          + "At least one referenced descriptor could not be found. This "
-          + "is a rare case, but one that (apparently) happens. We cannot "
-          + "make any good statement about exit relays without these "
-          + "descriptors. The following descriptors are missing:");
-      for (String desc : missingDescriptors)
-        System.out.println("  " + desc);
-    }
-    boolean inOtherRelevantConsensus = false, inTooOldConsensuses = false,
-        inTooNewConsensuses = false;
-    for (File f : matches)
-      if (relevantConsensuses.contains(f))
-        inOtherRelevantConsensus = true;
-      else if (tooOldConsensuses.contains(f))
-        inTooOldConsensuses = true;
-      else if (tooNewConsensuses.contains(f))
-        inTooNewConsensuses = true;
-    if (inOtherRelevantConsensus) {
-      if (!resultIndecisive)
-        System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE "
-            + "with moderate certainty!");
-      System.out.println("\nWe found one or more relays on IP address "
-          + relayIP
-          + (target != null ? " permitting exit to " + target : "")
-          + ", but not in the consensus immediately preceding "
-          + timestampStr + ". A possible reason for the relay being "
-          + "missing in the last consensus preceding the given time might "
-          + "be that some of the directory authorities had difficulties "
-          + "connecting to the relay. However, clients might still have "
-          + "used the relay.");
-    } else {
-      if (!resultIndecisive)
-        System.out.println("\n" + DELIMITER + "\n\nResult is NEGATIVE "
-            + "with high certainty!");
-      System.out.println("\nWe did not find any relay on IP address "
-          + relayIP
-          + (target != null ? " permitting exit to " + target : "")
-          + " in the consensuses 3:00 hours preceding " + timestampStr
-          + ".");
-      if (inTooOldConsensuses || inTooNewConsensuses) {
-        if (inTooOldConsensuses && !inTooNewConsensuses)
-          System.out.println("\nNote that we found a matching relay in "
-              + "consensuses that were published between 5:00 and 3:00 "
-              + "hours before " + timestampStr + ".");
-        else if (!inTooOldConsensuses && inTooNewConsensuses)
-          System.out.println("\nNote that we found a matching relay in "
-              + "consensuses that were published up to 2:00 hours after "
-              + timestampStr + ".");
-        else
-          System.out.println("\nNote that we found a matching relay in "
-              + "consensuses that were published between 5:00 and 3:00 "
-              + "hours before and in consensuses that were published up "
-              + "to 2:00 hours after " + timestampStr + ".");
-        System.out.println("Make sure that the timestamp you provided is "
-            + "in the correct timezone: UTC (or GMT).");
-      }
-    }
-    if (target != null) {
-      if (positiveConsensuses.isEmpty() &&
-          !positiveConsensusesNoTarget.isEmpty())
-        System.out.println("\nNote that although the found relay(s) did "
-            + "not permit exiting to " + target + ", there have been one "
-            + "or more relays running at the given time.");
-    }
-    System.out.println();
-  }
-}
-

Deleted: projects/archives/trunk/exonerator/HOWTO
===================================================================
--- projects/archives/trunk/exonerator/HOWTO	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/HOWTO	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,165 +0,0 @@
-ExoneraTor
-        or: a script that tells you whether some IP address was a Tor relay
-
----------------------------------------------------------------------------
-
-                     THIS REPOSITORY HAS MOVED TO GIT!
-
-             git clone git://git.torproject.org/metrics-utils/
-
----------------------------------------------------------------------------
-
-Introduction:
-
-Some people have expressed the desire to learn whether a given IP address
-has been a Tor relay at a certain time. In addition to that, these people
-might want to know whether the IP address permitted exit to a given address
-and port.
-
-Answering these questions can be important for Tor relay operators to show
-to the authorities that an anonymous user might have conducted bad things
-with their IP address. Likewise, police investigators might be interested
-in the answer to these questions, too, in order to decide whether to
-proceed with their investigations or not.
-
-We can answer the above questions from looking at the descriptor archives
-that are available since late 2007 (or even beyond, but this script only
-works with the data format that was produced starting in October 2007).
-This script parses the directory archives to print out the answer whether
-a certain IP address was a Tor relay at a given time. The script further
-prints out all intermediate steps in answering this, so that users can
-confirm the correctness of the result themselves.
-
-This script is available in two versions written in Python and in Java with
-equivalent functionality.
-
----------------------------------------------------------------------------
-
-Python Quick Start:
-
-In order to run the Python version of this script, you need to install and
-download the following software and data (please note that all instructions
-are written for Linux; commands for Windows or Mac OS X may vary):
-
-- Install Python 2.6.2 or higher. (Previous Python versions might work,
-  too, but have not been tested.)
-
-- Install the Python module IPy 0.62 or higher either from
-  http://pypi.python.org/pypi/IPy/ or using "apt-get install python-ipy" on
-  Debian-based systems.
-
-- Download the v3 consensuses and server descriptors of the relevant time
-  from http://metrics.torproject.org/data.html and extract them to a
-  directory in your working directory, e.g. /home/you/exonerator/data/ .
-  Don't rename the extracted directories or any of the contained files, or
-  the script won't find the contained descriptors.
-
-  Note that you only need the server descriptors if you want to learn
-  whether a given IP address permits exiting to a given target. If you
-  only want to learn whether that IP address was a Tor relay, you don't
-  need them.
-
-- Run the script, providing it with the parameters it needs:
-
-  python exonerator.py [--archive=<descriptor archive directory>]
-           <IP address in question>
-           <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
-           [<target address>[:<target port>]]
-
-  The --archive option defaults to data/ . In the following examples, it is
-  assumed that this default applies.
-
-  Make sure that the timestamp is provided in UTC, which is equivalent to
-  GMT, and not in your local timezone! Otherwise, results will very likely
-  be wrong.
-
-  A sample invocation might be:
-
-  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
-        209.85.129.104:80
-
----------------------------------------------------------------------------
-
-Java Quick Start:
-
-In order to run the Java version of this script, you need to install and
-download the following software and data (please note that all instructions
-are written for Linux; commands for Windows or Mac OS X may vary):
-
-- Install Java 6 or higher.
-
-- Download the BouncyCastle provider that includes Base 64 decoding from
-  http://www.bouncycastle.org/download/bcprov-jdk16-143.jar and put it in
-  your working directory, e.g. /home/you/exonerator/ .
-
-- Download the v3 consensuses and server descriptors of the relevant time
-  from http://metrics.torproject.org/data.html and extract them to a
-  directory in your working directory, e.g. /home/you/exonerator/data/ .
-  Don't rename the extracted directories or any of the contained files, or
-  the script won't find the contained descriptors.
-
-  Note that you only need the server descriptors if you want to learn
-  whether a given IP address permits exiting to a given target. If you
-  only want to learn whether that IP address was a Tor relay, you don't
-  need them.
-
-- Compile the (single) Java class using this command:
-
-  $ javac -cp bcprov-jdk16-143.jar ExoneraTor.java
-
-- Run the script, providing it with the parameters it needs:
-
-  java -cp .:bcprov-jdk16-143.jar ExoneraTor
-           <descriptor archive directory>
-           <IP address in question>
-           <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
-           [<target address>[:<target port>]]
-
-  Make sure that the timestamp is provided in UTC, which is equivalent to
-  GMT, and not in your local timezone! Otherwise, results will very likely
-  be wrong.
-
-  A sample invocation might be:
-
-  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
-        2009-08-15 16:05:00 209.85.129.104:80
-
----------------------------------------------------------------------------
-
-Test cases:
-
-The following test cases work with the August 2009 archives and can be used
-to check whether this script works correctly:
-
-- Positive result of echelon1+2 being a relay:
-
-  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
-  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
-        2009-08-15 16:05:00
-
-- Positive result of echelon1+2 exiting to google.com on any port
-
-  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 209.85.129.104
-  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
-        2009-08-15 16:05:00 209.85.129.104
-
-- Positive result of echelon1+2 exiting to google.com on port 80
-
-  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
-        209.85.129.104:80
-  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
-        2009-08-15 16:05:00 209.85.129.104:80
-
-- Negative result of echelon1+2 exiting to google.com, but not on port 25
-
-  $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
-        209.85.129.104:25
-  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
-        2009-08-15 16:05:00 209.85.129.104:25
-
-- Negative result with IP address of echelon1+2 changed in the last octet
-
-  $ python exonerator.py 209.17.171.50 2009-08-15 16:05:00
-  $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.50 \
-        2009-08-15 16:05:00
-

Deleted: projects/archives/trunk/exonerator/LICENSE
===================================================================
--- projects/archives/trunk/exonerator/LICENSE	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/LICENSE	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,30 +0,0 @@
-Copyright 2009 The Tor Project
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above
-  copyright notice, this list of conditions and the following disclaimer
-  in the documentation and/or other materials provided with the
-  distribution.
-
-  * Neither the names of the copyright owners nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-

Deleted: projects/archives/trunk/exonerator/exonerator.py
===================================================================
--- projects/archives/trunk/exonerator/exonerator.py	2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/exonerator.py	2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,370 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2009 The Tor Project -- see LICENSE for licensing information
-
-import binascii
-import os
-import sys
-import time
-import calendar
-from optparse import OptionParser
-from IPy import IP
-
-USAGE = "usage: %prog [options] <IP address in question> " \
-        "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> " \
-        "[<target address>[:<target port>]]"
-DELIMITER = "-" * 75
-
-if __name__ == '__main__':
-    # check parameters
-    parser = OptionParser(usage=USAGE)
-    parser.add_option("-a", "--archive", dest="archive", default="data/",
-                      help="descriptor archive directory")
-    (options, args) = parser.parse_args()
-    if len(args) not in (3, 4):
-        parser.error("incorrect number of arguments")
-    if not os.path.isdir(options.archive):
-        parser.error("descriptor archive directory %s does not exist or " \
-                     "is not a directory." % \
-                     os.path.abspath(options.archive))
-    archiveDirectory = os.path.dirname(options.archive)
-    try:
-        relayIP = IP(args[0])
-    except ValueError:
-        parser.error("invalid IP address in question: '%s'" % args[0])
-    timestampStr = "%s %s" % (args[1], args[2])
-    try:
-        timestamp = time.strptime(timestampStr + " UTC", "%Y-%m-%d %H:%M:%S %Z")
-    except ValueError:
-        parser.error("incorrect time format: '%s'" % timestampStr)
-    # if a target is given, parse address and possibly port part of it
-    target = None
-    targetIP = None
-    targetPort = None
-    if len(args) == 4:
-        target = args[3]
-        targetParts = target.split(":")
-        try:
-            targetIP = IP(targetParts[0])
-        except ValueError:
-            parser.error("invalid target IP address in: '%s'" % args[3])
-        if len(targetParts) > 2:
-            parser.error("invalid target format: '%s'" % args[3])
-        if len(targetParts) > 1:
-            try:
-                targetPortTest = int(targetParts[1])
-            except ValueError:
-                parser.error("invalid target port number in: '%s'" % \
-                             args[3])
-            if targetPortTest not in range(1, 65535):
-                parser.error("invalid target port number in: '%s'" % \
-                             args[3])
-            targetPort = targetParts[1]
-
-    targetHelpStr = ""
-    if target:
-        targetHelpStr = " permitting exiting to %s" % target
-    print "\nTrying to find out whether %s was running a Tor relay at " \
-          "%s%s...\n\n%s\n" % (relayIP, timestampStr, targetHelpStr,
-          DELIMITER)
-
-    # check that we have the required archives
-    timestampTooOld = time.gmtime(calendar.timegm(timestamp) - 300 * 60)
-    timestampFrom = time.gmtime(calendar.timegm(timestamp) - 180 * 60)
-    timestampTooNew = time.gmtime(calendar.timegm(timestamp) + 120 * 60)
-    timestampTooOldStr = time.strftime("%Y-%m-%d %H:%M:%S",
-                                       timestampTooOld)
-    timestampFromStr = time.strftime("%Y-%m-%d %H:%M:%S", timestampFrom)
-    timestampTooNewStr = time.strftime("%Y-%m-%d %H:%M:%S",
-                                       timestampTooNew)
-    print "\nChecking that relevant archives between %s and %s are " \
-          "available..." % (timestampTooOldStr, timestampTooNewStr)
-
-    requiredDirs = set()
-    requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooOld))
-    requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooNew))
-    if target:
-        requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
-                                          timestampTooOld))
-        requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
-                                          timestampTooNew))
-
-    consensusDirs = list()
-    descriptorsDirs = list()
-    directoriesLeftToParse = list()
-    directoriesLeftToParse.append(archiveDirectory)
-
-    while directoriesLeftToParse:
-        directoryOrFile = directoriesLeftToParse.pop()
-        basename = os.path.basename(directoryOrFile)
-        if basename.startswith("consensuses-"):
-            if basename in requiredDirs:
-                requiredDirs.remove(basename)
-                consensusDirs.append(directoryOrFile)
-        elif basename.startswith("server-descriptors-"):
-            if basename in requiredDirs:
-                requiredDirs.remove(basename)
-                descriptorsDirs.append(directoryOrFile)
-        else:
-            for filename in os.listdir(directoryOrFile):
-                entry = "%s/%s" % (directoryOrFile, filename)
-                if os.path.isdir(entry):
-                    directoriesLeftToParse.append(entry)
-
-    consensusDirs.sort()
-    for consensusDir in consensusDirs:
-        print "  %s" % consensusDir
-    descriptorsDirs.sort()
-    for descriptorsDir in descriptorsDirs:
-        print "  %s" % descriptorsDir
-
-    if requiredDirs:
-        print "\nWe are missing consensuses and/or server descriptors. " \
-              "Please download these archives and extract them to your " \
-              "data directory. Be sure NOT to rename the extracted " \
-              "directories or the contained files."
-        for requiredDir in sorted(requiredDirs):
-            print "  %s.tar.bz2" % requiredDir
-        sys.exit()
-
-    # look for consensus files
-    print "\nLooking for relevant consensuses between %s and %s..." % \
-          (timestampFromStr, timestampStr)
-    tooOldConsensuses = set()
-    relevantConsensuses = set()
-    tooNewConsensuses = set()
-    directoriesLeftToParse = list(consensusDirs)
-    while directoriesLeftToParse:
-        directoryOrFile = directoriesLeftToParse.pop()
-        if os.path.isdir(directoryOrFile):
-            for filename in os.listdir(directoryOrFile):
-                entry = "%s/%s" % (directoryOrFile, filename)
-                directoriesLeftToParse.append(entry)
-        else:
-            basename = os.path.basename(directoryOrFile)
-            if (basename.endswith("consensus")):
-                consensusTime = time.strptime(basename[0:19],
-                                              "%Y-%m-%d-%H-%M-%S")
-                if consensusTime >= timestampTooOld and \
-                   consensusTime < timestampFrom:
-                    tooOldConsensuses.add(directoryOrFile)
-                elif consensusTime >= timestampFrom and \
-                     consensusTime <= timestamp:
-                    relevantConsensuses.add(directoryOrFile)
-                elif consensusTime > timestamp and \
-                     consensusTime <= timestampTooNew:
-                    tooNewConsensuses.add(directoryOrFile)
-    allConsensuses = set()
-    allConsensuses.update(tooOldConsensuses)
-    allConsensuses.update(relevantConsensuses)
-    allConsensuses.update(tooNewConsensuses)
-    if not allConsensuses:
-        print "  None found!\n\n%s\n\nResult is INDECISIVE!\n\nWe " \
-              "cannot make any statement about IP address %s being a " \
-              "relay at %s or not! We did not find any relevant " \
-              "consensuses preceding the given time. This either means " \
-              "that you did not download and extract the consensus " \
-              "archives preceding the hours before the given time, or " \
-              "(in rare cases) that the directory archives are missing " \
-              "the hours before the timestamp. Please check that your " \
-              "directory archives contain consensus files of the " \
-              "interval 5:00 hours before and 2:00 hours after the time " \
-              "you are looking for.\n" % (DELIMITER, relayIP, timestampStr)
-        sys.exit()
-    for consensus in sorted(relevantConsensuses):
-        print "  %s" % consensus
-
-    # parse consensuses to find descriptors belonging to the IP address
-    print "\nLooking for descriptor identifiers referenced in \"r \" " \
-          "lines in these consensuses containing IP address %s..." % \
-          relayIP
-    positiveConsensusesNoTarget = set()
-    addressesInSameNetwork = set()
-    relevantDescriptors = dict()
-    for consensus in allConsensuses:
-        if consensus in relevantConsensuses:
-            print "  %s" % consensus
-        consensusFile = open(consensus, "r")
-        line = consensusFile.readline()
-        while line:
-            if line.startswith("r "):
-                address = IP(line.split(" ")[6])
-                if address == relayIP:
-                    hexDesc = binascii.b2a_hex(binascii.a2b_base64(
-                                               line.split(" ")[3] + "=="))
-                    if hexDesc not in relevantDescriptors.keys():
-                        relevantDescriptors[hexDesc] = set()
-                    relevantDescriptors[hexDesc].add(consensus)
-                    positiveConsensusesNoTarget.add(consensus)
-                    if consensus in relevantConsensuses:
-                        print "    \"%s\" references descriptor %s" % \
-                              (line.rstrip(), hexDesc)
-                elif relayIP.overlaps(IP("%s/24" % address,
-                                         make_net=True)):
-                    addressesInSameNetwork.add(address)
-            line = consensusFile.readline()
-        consensusFile.close()
-    if not relevantDescriptors:
-        print "  None found!\n\n%s\n\nResult is NEGATIVE with moderate " \
-              "certainty!\n\nWe did not find IP address %s in any of " \
-              "the consensuses that were published between %s and " \
-              "%s.\n\nA possible reason for false negatives is that the " \
-              "relay is using a different IP address when generating a " \
-              "descriptor than for exiting to the Internet. We hope to " \
-              "provide better checks for this case in the future." % \
-              (DELIMITER, relayIP, timestampTooOldStr, timestampTooNewStr)
-        if addressesInSameNetwork:
-            print "\nThe following other IP addresses of Tor relays " \
-                  "were found in the mentioned consensus files that are " \
-                  "in the same /24 network and that could be related to " \
-                  "IP address %s:" % relayIP
-            for addr in addressesInSameNetwork:
-                print "  %s" % addr
-        print ""
-        sys.exit()
-
-    # parse router descriptors to check exit policies
-    positiveConsensuses = set()
-    missingDescriptors = set()
-    if target:
-        print "\nChecking if referenced descriptors permit exiting to " \
-              "%s..." % target
-        descriptors = relevantDescriptors.keys()
-        for desc in descriptors:
-            missingDescriptors.add(desc)
-        directoriesLeftToParse = list(descriptorsDirs)
-        while directoriesLeftToParse:
-            directoryOrFile = directoriesLeftToParse.pop()
-            if os.path.isdir(directoryOrFile):
-                for filename in os.listdir(directoryOrFile):
-                    entry = "%s/%s" % (directoryOrFile, filename)
-                    directoriesLeftToParse.append(entry)
-            else:
-                basename = os.path.basename(directoryOrFile)
-                for descriptor in descriptors:
-                    if basename == descriptor:
-                        missingDescriptors.remove(descriptor)
-                        descriptorFile = open(directoryOrFile, "r")
-                        line = descriptorFile.readline()
-                        while line:
-                            if line.startswith("reject ") or \
-                               line.startswith("accept "):
-                                ruleAccept = line.split()[0] == "accept"
-                                ruleAddress = line.split()[1].split(":")[0]
-                                if ruleAddress != "*" and not \
-                                   IP(ruleAddress).overlaps(targetIP):
-                                    # IP address does not match
-                                    line = descriptorFile.readline()
-                                    continue
-                                rulePort = line.split()[1].split(":")[1]
-                                if not targetPort and not ruleAccept and \
-                                   rulePort != "*":
-                                    # with no port given, we only consider
-                                    # reject :* rules as matching
-                                    line = descriptorFile.readline()
-                                    continue
-                                if targetPort and rulePort != "*" and \
-                                   targetPort != rulePort:
-                                    # ports do not match
-                                    line = descriptorFile.readline()
-                                    continue
-                                relevantMatch = False
-                                for f in relevantDescriptors.get(
-                                                             descriptor):
-                                    if f in relevantConsensuses:
-                                        relevantMatch = True
-                                if relevantMatch:
-                                    if ruleAccept:
-                                        print "  %s permits exiting to " \
-                                              "%s according to rule " \
-                                              "\"%s\"" % (directoryOrFile,
-                                              target, line.rstrip())
-                                    else:
-                                        print "  %s does not permit " \
-                                              "exiting to %s according " \
-                                              "to rule \"%s\"" % \
-                                              (directoryOrFile,
-                                              target, line.rstrip())
-                                if ruleAccept:
-                                    for consensus in \
-                                        relevantDescriptors.get(
-                                                            descriptor):
-                                        positiveConsensuses.add(consensus)
-                                break
-                            line = descriptorFile.readline()
-                        descriptorFile.close()
-
-    # print out result
-    matches = None
-    if target:
-        matches = positiveConsensuses
-    else:
-        matches = positiveConsensusesNoTarget
-    lastConsensus = sorted(relevantConsensuses)[len(relevantConsensuses)-1]
-    if lastConsensus in matches:
-        print "\n%s\n\nResult is POSITIVE with high certainty!\n\nWe " \
-              "found one or more relays on IP address %s%s in the most " \
-              "recent consensus preceding %s that clients were likely " \
-              "to know.\n" % (DELIMITER, relayIP, targetHelpStr,
-              timestampStr)
-        sys.exit()
-    resultIndecisive = target and len(missingDescriptors) > 0
-    if resultIndecisive:
-        print "\n%s\n\nResult is INDECISIVE!\n\nAt least one " \
-              "referenced descriptor could not be found. This is a rare " \
-              "case, but one that (apparently) happens. We cannot make " \
-              "any good statement about exit relays without these " \
-              "descriptors. The following descriptors are missing:" % \
-              DELIMITER
-        for desc in missingDescriptors:
-            print "  %s" % desc
-    inOtherRelevantConsensus = False
-    inTooOldConsensuses = False
-    inTooNewConsensuses = False
-    for f in matches:
-        if f in relevantConsensuses:
-            inOtherRelevantConsensus = True
-        elif f in tooOldConsensuses:
-            inTooOldConsensuses = True
-        elif f in tooNewConsensuses:
-            inTooNewConsensuses = True
-    if inOtherRelevantConsensus:
-        if not resultIndecisive:
-            print "\n%s\n\nResult is POSITIVE with moderate certainty!" % \
-                  DELIMITER
-        print "\nWe found one or more relays on IP address %s%s, but " \
-              "not in the consensus immediately preceding %s. A " \
-              "possible reason for the relay being missing in the last " \
-              "consensus preceding the given time might be that some of " \
-              "the directory authorities had difficulties connecting to " \
-              "the relay. However, clients might still have used the " \
-              "relay." % (relayIP, targetHelpStr, timestampStr)
-    else:
-        if not resultIndecisive:
-            print "\n%s\n\nResult is NEGATIVE with high certainty!" % \
-                  DELIMITER
-        print "\nWe did not find any relay on IP address %s%s in the " \
-              "consensuses 3:00 hours preceding %s." % (relayIP,
-              targetHelpStr, timestampStr)
-        if inTooOldConsensuses or inTooNewConsensuses:
-            if inTooOldConsensuses and not inTooNewConsensuses:
-                print "\nNote that we found a matching relay in " \
-                      "consensuses that were published between 5:00 and " \
-                      "3:00 hours before %s." % timestampStr
-            elif not inTooOldConsensuses and inTooNewConsensuses:
-                print "\nNote that we found a matching relay in " \
-                      "consensuses that were published up to 2:00 hours " \
-                      "after %s." % timestampStr
-            else:
-                print "\nNote that we found a matching relay in " \
-                      "consensuses that were published between 5:00 and " \
-                      "3:00 hours before and in consensuses that were " \
-                      "published up to 2:00 hours after %s." % timestampStr
-            print "Make sure that the timestamp you provided is in the " \
-                  "correct timezone: UTC (or GMT)."
-    if target:
-        if not positiveConsensuses and positiveConsensusesNoTarget:
-            print "\nNote that although the found relay(s) did not " \
-                  "permit exiting to %s there have been one or more " \
-                  "relays running at the given time." % target
-    print ""
-

    

Karsten Loesing

tags

participants (1)