[tor-commits] [metrics-tasks/master] Add some code for #2394.

karsten at torproject.org karsten at torproject.org
Thu Mar 17 17:11:48 UTC 2011


commit a3a8b86287695e74e76e7f3dfe2a6443613cdfcc
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Mar 17 18:11:07 2011 +0100

    Add some code for #2394.
---
 task-2394/.gitignore            |    6 +
 task-2394/ParseDescriptors.java |  216 +++++++++++++++++++++++++++++++++++++++
 task-2394/README                |   27 +++++
 3 files changed, 249 insertions(+), 0 deletions(-)

diff --git a/task-2394/.gitignore b/task-2394/.gitignore
new file mode 100644
index 0000000..bef5dec
--- /dev/null
+++ b/task-2394/.gitignore
@@ -0,0 +1,6 @@
+*.class
+descriptors/
+*.pdf
+*.csv
+*.jar
+
diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java
new file mode 100644
index 0000000..b0ba916
--- /dev/null
+++ b/task-2394/ParseDescriptors.java
@@ -0,0 +1,216 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.binary.*;
+
+public class ParseDescriptors {
+  public static void main(String[] args) throws IOException {
+
+    /* Find all files that we should parse and distinguish between
+     * consensuses, votes, and server descriptors. */
+    SortedMap<String, File> consensuses = new TreeMap<String, File>();
+    SortedMap<String, File> descriptors = new TreeMap<String, File>();
+    SortedMap<String, File> votes = new TreeMap<String, File>();
+    Stack<File> files = new Stack<File>();
+    files.add(new File("descriptors"));
+    while (!files.isEmpty()) {
+      File file = files.pop();
+      String filename = file.getName();
+      if (file.isDirectory()) {
+        files.addAll(Arrays.asList(file.listFiles()));
+      } else if (filename.endsWith("-consensus")) {
+        consensuses.put(filename, file);
+      } else if (filename.contains("-vote-")) {
+        votes.put(filename, file);
+      } else if (filename.length() == 40) {
+        descriptors.put(filename, file);
+      }
+    }
+    System.out.println("We found " + consensuses.size()
+        + " consensuses, " + votes.size() + " votes, and "
+        + descriptors.size() + " server descriptors.");
+
+    /* Parse consensuses in an outer loop and the referenced votes and
+     * descriptors in inner loops.  Write the results to disk as soon as
+     * we can to avoid keeping many things in memory. */
+    SortedMap<String, String> bandwidthAuthorities =
+        new TreeMap<String, String>();
+    bandwidthAuthorities.put("27B6B5996C426270A5C95488AA5BCEB6BCC86956",
+        "ides");
+    bandwidthAuthorities.put("80550987E1D626E3EBA5E5E75A458DE0626D088C",
+        "urras");
+    bandwidthAuthorities.put("D586D18309DED4CD6D57C18FDB97EFA96D330566",
+        "moria1");
+    bandwidthAuthorities.put("ED03BB616EB2F60BEC80151114BB25CEF515B226",
+        "gabelmoo");
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "bandwidth-comparison.csv"));
+    bw.write("validafter,fingerprint,nickname,category,"
+        + "descriptorbandwidth,consensusbandwidth");
+    for (String bandwidthAuthority : bandwidthAuthorities.values()) {
+      bw.write("," + bandwidthAuthority + "bandwidth");
+    }
+    bw.write("\n");
+    Map<String, String> parsedDescriptors = new HashMap<String, String>();
+    for (File consensusFile : consensuses.values()) {
+      System.out.println("Parsing consensus " + consensusFile.getName());
+      BufferedReader brC = new BufferedReader(new FileReader(
+          consensusFile));
+      String lineC, validAfter = null, lastDirSource = null,
+          lastRLine = null, lastSLine = null;
+      String consensusTimestamp = consensusFile.getName().substring(0,
+              "YYYY-MM-DD-hh-mm-ss".length());
+      Map<String, Map<String, String>> measuredBandwidthsByDirSource =
+          new HashMap<String, Map<String, String>>();
+      while ((lineC = brC.readLine()) != null) {
+
+        /* Start with parsing a consensus to find out which votes it
+         * contains. */
+        if (lineC.startsWith("valid-after ")) {
+          validAfter = lineC.substring("valid-after ".length());
+        } else if (lineC.startsWith("dir-source ")) {
+          lastDirSource = lineC.split(" ")[2];
+        } else if (lineC.startsWith("vote-digest ") &&
+            bandwidthAuthorities.containsKey(lastDirSource)) {
+          String voteDigest = lineC.substring("vote-digest ".length());
+          String voteFilename = consensusTimestamp + "-vote-"
+              + lastDirSource + "-" + voteDigest;
+          if (votes.containsKey(voteFilename)) {
+
+            /* Parse votes first and extract measured bandwidths. */
+            Map<String, String> measuredBandwidths =
+                new HashMap<String, String>();
+            measuredBandwidthsByDirSource.put(lastDirSource,
+                measuredBandwidths);
+            BufferedReader brV = new BufferedReader(new FileReader(
+                votes.get(voteFilename)));
+            String lineV;
+            while ((lineV = brV.readLine()) != null) {
+              if (lineV.startsWith("r ")) {
+                lastRLine = lineV;
+              } else if (lineV.startsWith("w ") &&
+                  lineV.contains(" Measured=")) {
+                String fingerprint = Hex.encodeHexString(Base64.
+                    decodeBase64(lastRLine.split(" ")[2] + "="));
+                String measuredBandwidth = lineV.substring(lineV.indexOf(
+                    " Measured=") + " Measured=".length()).split(" ")[0];
+                measuredBandwidths.put(fingerprint, measuredBandwidth);
+              }
+            }
+            brV.close();
+          }
+
+        /* Parse r, s, and w lines from the consensus. */
+        } else if (lineC.startsWith("r ")) {
+          lastRLine = lineC;
+        } else if (lineC.startsWith("s ")) {
+          lastSLine = lineC;
+        } else if (lineC.startsWith("w ")) {
+          String[] parts = lastRLine.split(" ");
+          String nickname = parts[1];
+          String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
+              parts[2] + "="));
+          String descriptor = Hex.encodeHexString(Base64.decodeBase64(
+              parts[3] + "="));
+          boolean exitFlag = lastSLine.contains(" Exit");
+          boolean guardFlag = lastSLine.contains(" Guard");
+          String consensusBandwidth = lineC.substring(lineC.indexOf(
+              " Bandwidth=") + " Bandwidth=".length()).split(" ")[0];
+
+          /* Parse the referenced server descriptor (if we haven't done so
+           * before) to learn about the relay's exit policy and reported
+           * bandwidth. */
+          boolean parsedDescriptor = false, defaultPolicy = false;
+          String descriptorBandwidth = null;
+          if (parsedDescriptors.containsKey(descriptor)) {
+            String parseResults = parsedDescriptors.get(descriptor);
+            parsedDescriptor = true;
+            defaultPolicy = parseResults.endsWith("1");
+            descriptorBandwidth = parseResults.split(",")[0];
+          } else if (descriptors.containsKey(descriptor)) {
+            parsedDescriptor = true;
+            BufferedReader brD = new BufferedReader(new FileReader(
+                descriptors.get(descriptor)));
+            Set<String> defaultRejects = new HashSet<String>(
+                Arrays.asList(("0.0.0.0/8:*,169.254.0.0/16:*,"
+                + "127.0.0.0/8:*,192.168.0.0/16:*,10.0.0.0/8:*,"
+                + "172.16.0.0/12:*,$IP:*,*:25,*:119,*:135-139,*:445,"
+                + "*:563,*:1214,*:4661-4666,*:6346-6429,*:6699,"
+                + "*:6881-6999").split(",")));
+            /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
+             * in the default exit policy again (and therefore removed
+             * from the default reject lines). */
+            Set<String> optionalRejects = new HashSet<String>(
+                Arrays.asList("*:465,*:587".split(",")));
+            String lineD, address = null;
+            while ((lineD = brD.readLine()) != null) {
+              if (lineD.startsWith("router ")) {
+                address = lineD.split(" ")[2];
+              } else if (lineD.startsWith("bandwidth ")) {
+                descriptorBandwidth = lineD.split(" ")[3];
+              } else if (lineD.startsWith("reject ")) {
+                String rejectPattern = lineD.substring("reject ".
+                    length());
+                if (defaultRejects.contains(rejectPattern)) {
+                  defaultRejects.remove(rejectPattern);
+                } else if (optionalRejects.contains(rejectPattern)) {
+                  optionalRejects.remove(rejectPattern);
+                } else if (rejectPattern.equals(address + ":*")) {
+                  defaultRejects.remove("$IP:*");
+                } else {
+                  break;
+                }
+              } else if (lineD.startsWith("accept ")) {
+                if (defaultRejects.isEmpty() &&
+                    lineD.equals("accept *:*")) {
+                  defaultPolicy = true;
+                }
+                break;
+              }
+            }
+            brD.close();
+            parsedDescriptors.put(descriptor, descriptorBandwidth + ","
+                + (defaultPolicy ? "1" : "0"));
+          } else {
+            System.out.println("We're missing descriptor " + descriptor
+                + ".  Please make sure that all referenced server "
+                + "descriptors are available.  Continuing anyway.");
+          }
+
+          /* Write everything we know about this relay to disk. */
+          String category = null;
+          if (guardFlag && exitFlag && defaultPolicy) {
+            category = "Guard & Exit (default policy)";
+          } else if (!guardFlag && exitFlag && defaultPolicy) {
+            category = "Exit (default policy)";
+          } else if (guardFlag && exitFlag && !defaultPolicy) {
+            category = "Guard & Exit (non-default policy)";
+          } else if (!guardFlag && exitFlag && !defaultPolicy) {
+            category = "Exit (non-default policy)";
+          } else if (guardFlag && !exitFlag) {
+            category = "Guard";
+          } else if (!guardFlag && !exitFlag) {
+            category = "Middle";
+          }
+          bw.write(validAfter + "," + fingerprint + "," + nickname + ","
+              + category + "," + (parsedDescriptor ? descriptorBandwidth
+              : "NA") + "," + consensusBandwidth);
+          for (String bandwidthAuthority :
+              bandwidthAuthorities.keySet()) {
+            if (measuredBandwidthsByDirSource.containsKey(
+                bandwidthAuthority) && measuredBandwidthsByDirSource.get(
+                bandwidthAuthority).containsKey(fingerprint)) {
+              bw.write("," + measuredBandwidthsByDirSource.get(
+                  bandwidthAuthority).get(fingerprint));
+            } else {
+              bw.write(",NA");
+            }
+          }
+          bw.write("\n");
+        }
+      }
+      brC.close();
+    }
+    bw.close();
+  }
+}
+
diff --git a/task-2394/README b/task-2394/README
new file mode 100644
index 0000000..d74d962
--- /dev/null
+++ b/task-2394/README
@@ -0,0 +1,27 @@
+Visualize self-reported vs. measured bandwidth of relays
+========================================================
+
+ - Download consensuses, votes, and server descriptors from the metrics
+   website.  Be sure to download the server descriptors of the month
+   preceding the consensuses and votes, too.  For example, you could
+   download these files:
+
+   https://metrics.torproject.org/data/votes-2011-01.tar.bz2
+   https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2
+   https://metrics.torproject.org/data/server-descriptors-2010-12.tar.bz2
+   https://metrics.torproject.org/data/server-descriptors-2011-01.tar.bz2
+
+ - Extract the tarballs into subdirectory descriptors/.
+
+ - Download Apache Commons Codec 1.4 or higher and put in in this
+   directory.
+
+ - Compile the Java class, e.g.,
+   $ javac -cp commons-codec-1.4.jar ParseDescriptors.java
+
+ - Run the Java class, e.g.,
+   $ java -cp .:commons-codec-1.4.jar ParseDescriptors
+
+ - Once the Java application is done, you'll find a file
+   bandwidth-comparison.csv in this directory.
+



More information about the tor-commits mailing list