[tor-commits] [metrics-tasks/master] Add parsing and graphing code for #4147.

karsten at torproject.org karsten at torproject.org
Sat Mar 17 07:11:59 UTC 2012


commit 2e8049a8fdc0c72ba8136a9886ed3af75896cadc
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sat Mar 17 08:10:34 2012 +0100

    Add parsing and graphing code for #4147.
---
 task-4147/.gitignore                       |   13 ++
 task-4147/AnalyzeDifferentExitAddress.java |  246 ++++++++++++++++++++++++++++
 task-4147/README                           |   37 ++++
 task-4147/different-exit-address-1.R       |   12 ++
 task-4147/different-exit-address-2.R       |   39 +++++
 5 files changed, 347 insertions(+), 0 deletions(-)

diff --git a/task-4147/.gitignore b/task-4147/.gitignore
new file mode 100644
index 0000000..7e9e868
--- /dev/null
+++ b/task-4147/.gitignore
@@ -0,0 +1,13 @@
+in/
+out/
+status/
+*.jar
+*.class
+.classpath
+.project
+.settings/
+*.csv
+*.pdf
+*.png
+*.swp
+
diff --git a/task-4147/AnalyzeDifferentExitAddress.java b/task-4147/AnalyzeDifferentExitAddress.java
new file mode 100644
index 0000000..44984a8
--- /dev/null
+++ b/task-4147/AnalyzeDifferentExitAddress.java
@@ -0,0 +1,246 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.torproject.descriptor.BandwidthHistory;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitListEntry;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+/* Answer the question what fraction of bytes written by relays with the
+ * Exit flag could have used a different address for exiting than the
+ * relay used for registering in the Tor network. */
+public class AnalyzeDifferentExitAddress {
+  public static void main(String[] args) throws Exception {
+
+    System.out.println(new Date() + " Starting.");
+
+    /* Iterate over extra-info descriptors to learn about bandwidth
+     * histories.  Append 15-minute intervals of written bytes to
+     * status/written-bytes/$fingerprint. */
+    System.out.println(new Date() + " Reading in/extra-infos/* ...");
+    DescriptorReader extraInfoReader = DescriptorSourceFactory
+        .createDescriptorReader();
+    extraInfoReader.addDirectory(new File("in/extra-infos"));
+    extraInfoReader.setExcludeFiles(new File(
+        "status/extra-info-history"));
+    Iterator<DescriptorFile> extraInfoFiles =
+        extraInfoReader.readDescriptors();
+    while (extraInfoFiles.hasNext()) {
+      DescriptorFile extraInfoFile = extraInfoFiles.next();
+      if (extraInfoFile.getDescriptors() != null) {
+        for (Descriptor descriptor : extraInfoFile.getDescriptors()) {
+          ExtraInfoDescriptor extraInfoDescriptor =
+              (ExtraInfoDescriptor) descriptor;
+          BandwidthHistory writeHistory = extraInfoDescriptor.
+              getWriteHistory();
+          if (writeHistory == null) {
+            continue;
+          }
+          String fingerprint = extraInfoDescriptor.getFingerprint();
+          File writtenBytesFile = new File("status/written-bytes/"
+              + fingerprint);
+          writtenBytesFile.getParentFile().mkdirs();
+          BufferedWriter bw = new BufferedWriter(new FileWriter(
+              writtenBytesFile, true));
+          for (Map.Entry<Long, Long> e :
+              writeHistory.getBandwidthValues().entrySet()) {
+            long intervalEndMillis = e.getKey();
+            long bytesWritten = e.getValue();
+            bw.write(String.valueOf(intervalEndMillis) + " "
+                + String.valueOf(bytesWritten) + "\n");
+          }
+          bw.close();
+        }
+      }
+    }
+
+    /* Iterate over exit lists to learn about exit IP addresses.  Append
+     * lines to status/exit-addresses/$fingerprint. */
+    System.out.println(new Date() + " Reading in/exit-lists/* ...");
+    DescriptorReader exitListReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    exitListReader.addDirectory(new File("in/exit-lists"));
+    exitListReader.setExcludeFiles(new File("status/exit-list-history"));
+    Iterator<DescriptorFile> exitListFiles =
+        exitListReader.readDescriptors();
+    while (exitListFiles.hasNext()) {
+      DescriptorFile exitListFile = exitListFiles.next();
+      if (exitListFile.getDescriptors() != null) {
+        for (Descriptor descriptor : exitListFile.getDescriptors()) {
+          ExitList exitList = (ExitList) descriptor;
+          if (exitList.getExitListEntries() == null) {
+            continue;
+          }
+          for (ExitListEntry exitListEntry :
+                exitList.getExitListEntries()) {
+            String fingerprint = exitListEntry.getFingerprint();
+            File exitAddressesFile = new File("status/exit-addresses/"
+                + fingerprint);
+            exitAddressesFile.getParentFile().mkdirs();
+            long scanMillis = exitListEntry.getScanMillis();
+            String address = exitListEntry.getExitAddress();
+            BufferedWriter bw = new BufferedWriter(new FileWriter(
+                exitAddressesFile, true));
+            bw.write(String.valueOf(scanMillis) + " " + address + "\n");
+            bw.close();
+          }
+        }
+      }
+    }
+
+    /* Iterate over consensuses to learn about OR addresses of relays with
+     * the Exit flag.  Append lines to
+     * status/or-addresses/$fingerprint. */
+    System.out.println(new Date() + " Reading in/consensuses/* ...");
+    DescriptorReader consensusReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    consensusReader.addDirectory(new File("in/consensuses"));
+    consensusReader.setExcludeFiles(new File("status/consensus-history"));
+    Iterator<DescriptorFile> consensusFiles =
+        consensusReader.readDescriptors();
+    while (consensusFiles.hasNext()) {
+      DescriptorFile consensusFile = consensusFiles.next();
+      if (consensusFile.getDescriptors() != null) {
+        for (Descriptor descriptor : consensusFile.getDescriptors()) {
+          RelayNetworkStatusConsensus consensus =
+              (RelayNetworkStatusConsensus) descriptor;
+          if (consensus.getStatusEntries() == null) {
+            continue;
+          }
+          long validAfterMillis = consensus.getValidAfterMillis();
+          for (NetworkStatusEntry statusEntry :
+              consensus.getStatusEntries().values()) {
+            if (!statusEntry.getFlags().contains("Exit")) {
+              continue;
+            }
+            String fingerprint = statusEntry.getFingerprint();
+            File orAddressesFile = new File("status/or-addresses/"
+                + fingerprint);
+            orAddressesFile.getParentFile().mkdirs();
+            String address = statusEntry.getAddress();
+            BufferedWriter bw = new BufferedWriter(new FileWriter(
+                orAddressesFile, true));
+            bw.write(String.valueOf(validAfterMillis) + " " + address
+                + "\n");
+            bw.close();
+          }
+        }
+      }
+    }
+
+    /* Make sure not to overwrite existing results, and prepare writing
+     * results otherwise. */
+    File differentExitAddressFile = new File(
+        "out/different-exit-address.csv");
+    if (differentExitAddressFile.exists()) {
+      return;
+    } else {
+      differentExitAddressFile.getParentFile().mkdirs();
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          differentExitAddressFile));
+      bw.write("timestamp,differentaddress,writtenbytes\n");
+      bw.close();
+    }
+
+    /* Iterate over OR addresses of relays with the Exit flag. */
+    System.out.println(new Date() + " Writing "
+        + "out/different-exit-address.csv ...");
+    for (File orAddressesFile :
+          new File("status/or-addresses").listFiles()) {
+      String fingerprint = orAddressesFile.getName();
+
+      /* For every relay, read OR addresses, bandwidth histories, and
+       * exit addresses to memory. */
+      SortedMap<Long, String> orAddresses = new TreeMap<Long, String>();
+      SortedMap<Long, Long> writtenBytes = new TreeMap<Long, Long>();
+      SortedMap<Long, String> exitAddresses = new TreeMap<Long, String>();
+      String line;
+      BufferedReader br = new BufferedReader(new FileReader(
+          orAddressesFile));
+      while ((line = br.readLine()) != null) {
+        String[] parts = line.split(" ");
+        long validAfterMillis = Long.parseLong(parts[0]);
+        String address = parts[1];
+        orAddresses.put(validAfterMillis, address);
+      }
+      br.close();
+      File writtenBytesFile = new File("status/written-bytes/"
+          + fingerprint);
+      if (!writtenBytesFile.exists()) {
+        continue;
+      }
+      br = new BufferedReader(new FileReader(writtenBytesFile));
+      while ((line = br.readLine()) != null) {
+        String[] parts = line.split(" ");
+        long intervalEndMillis = Long.parseLong(parts[0]);
+        long bytes = Long.parseLong(parts[1]);
+        writtenBytes.put(intervalEndMillis, bytes);
+      }
+      br.close();
+      File exitAddressesFile = new File("status/exit-addresses/"
+          + fingerprint);
+      if (exitAddressesFile.exists()) {
+        br = new BufferedReader(new FileReader(exitAddressesFile));
+        while ((line = br.readLine()) != null) {
+          String[] parts = line.split(" ");
+          long scanMillis = Long.parseLong(parts[0]);
+          String address = parts[1];
+          exitAddresses.put(scanMillis, address);
+        }
+        br.close();
+      }
+
+      /* Go through consensuses containing this relay as Exit relay in
+       * chronological order, sum up written bytes in the hour after the
+       * consensuses' valid-after time, and look up any exit addresses
+       * found up to 23 hours before up to 1 hour after the valid-after
+       * time. */
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          differentExitAddressFile, true));
+      for (Map.Entry<Long, String> e : orAddresses.entrySet()) {
+        long validAfterMillis = e.getKey();
+        String currentOrAddress = e.getValue();
+        long currentWrittenBytes = 0L;
+        for (long currentBytes : writtenBytes.tailMap(validAfterMillis).
+            headMap(validAfterMillis + 1L * 60L * 60L * 1000L).values()) {
+          currentWrittenBytes += currentBytes;
+        }
+        if (currentWrittenBytes < 1L) {
+          continue;
+        }
+        Set<String> currentExitAddresses = new HashSet<String>();
+        for (String currentExitAddress : exitAddresses.tailMap(
+            validAfterMillis - 23L * 60L * 60L * 1000L).headMap(
+            validAfterMillis + 1L * 60L * 60L * 1000L).values()) {
+          if (!currentExitAddress.equals(currentOrAddress)) {
+            currentExitAddresses.add(currentExitAddress);
+          }
+        }
+        boolean usedOtherAddress = !currentExitAddresses.isEmpty();
+        bw.write(String.valueOf(validAfterMillis / 1000L) + ","
+            + (usedOtherAddress ? "TRUE" : "FALSE") + ","
+            + currentWrittenBytes + "\n");
+      }
+      bw.close();
+    }
+
+    System.out.println(new Date() + " Terminating.");
+  }
+}
+
diff --git a/task-4147/README b/task-4147/README
new file mode 100644
index 0000000..1af3f3a
--- /dev/null
+++ b/task-4147/README
@@ -0,0 +1,37 @@
+Answer the question what fraction of bytes written by relays with the Exit
+flag could have used a different address for exiting than the relay used
+for registering in the Tor network.
+==========================================================================
+
+Clone the metrics-lib repository, create the descriptor.jar file, and put
+it in this directory.
+
+Obtain the Apache Commons Codec 1.4 .jar file commons-codec-1.4.jar and
+put it in this directory.
+
+Download the metrics tarballs containing consensuses, extra-info
+descriptors, and exit lists for a common time period.  Note that the first
+and last 2 days of the period won't be usable.  Extract the tarballs and
+put them in in/consensuses/, in/extra-infos/, and in/exit-lists/ in this
+directory.
+
+Compile the Java class:
+
+  $ javac -cp descriptor.jar AnalyzeDifferentExitAddress.java
+
+Run the Java class:
+
+  $ java -cp descriptor.jar:commons-codec-1.4.jar:. \
+    AnalyzeDifferentExitAddress
+
+In order to re-run parts of the analysis, delete files in status/ or the
+results file in out/.
+
+Aggregate the results using R and ggplot2:
+
+  $ R --slave -f different-exit-address-1.R
+
+Draw graphs using R and ggplot2:
+
+  $ R --slave -f different-exit-address-2.R
+
diff --git a/task-4147/different-exit-address-1.R b/task-4147/different-exit-address-1.R
new file mode 100644
index 0000000..b5f172a
--- /dev/null
+++ b/task-4147/different-exit-address-1.R
@@ -0,0 +1,12 @@
+library(ggplot2)
+d <- read.csv("out/different-exit-address.csv", stringsAsFactors = FALSE)
+d <- data.frame(
+  date = as.Date(as.POSIXct(d$timestamp, origin = "1970-01-01 00:00:00")),
+  differentaddress = d$differentaddress,
+  writtenbytes = d$writtenbytes)
+d <- aggregate(list(writtenbytes = d$writtenbytes),
+  by = list(date = d$date, differentaddress = d$differentaddress),
+  FUN = sum)
+write.csv(d, file = "different-exit-address-aggregate.csv", quote = FALSE,
+  row.names = FALSE)
+
diff --git a/task-4147/different-exit-address-2.R b/task-4147/different-exit-address-2.R
new file mode 100644
index 0000000..15f097e
--- /dev/null
+++ b/task-4147/different-exit-address-2.R
@@ -0,0 +1,39 @@
+library(ggplot2)
+d <- read.csv("different-exit-address-aggregate.csv",
+  stringsAsFactors = FALSE)
+
+# Cut off dates before 2012-02-14, because exit lists were stale
+# Cut off dates after 2012-02-27, because we only imported February data
+# Leaves us with 2 weeks of data; should be fine
+d <- d[d$date >= "2012-02-14" & d$date <= "2012-02-27", ]
+
+a <- aggregate(x = list(writtenbytes = d$writtenbytes),
+  by = list(date = d$date), FUN = sum)
+ggplot(a, aes(x = as.Date(date), y = writtenbytes / 2^20 / 86400)) +
+geom_line() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "MiB/s\n",
+  limits = c(0, max(a$writtenbytes) / 2^20 / 86400)) +
+opts(title = "Bytes written by all relays with the Exit flag\n")
+
+ggplot(d, aes(x = as.Date(date), y = writtenbytes / 2^20 / 86400,
+  colour = differentaddress)) +
+geom_line() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "MiB/s\n",
+  limits = c(0, max(d$writtenbytes / 2^20 / 86400))) +
+opts(title = "Bytes written by all relays with the Exit flag\n")
+
+s <- cast(d, date ~ differentaddress)
+s <- data.frame(date = s$date, fracdifferent = s[, "TRUE"] / s[, "FALSE"])
+ggplot(s, aes(x = as.Date(date), y = fracdifferent)) +
+geom_line() +
+scale_x_date(name = "", format = "%Y-%m-%d") +
+scale_y_continuous(name = "", formatter = "percent", limits = c(0, 1)) +
+opts(title =
+  paste("Fraction of bytes written by relays with the Exit flag\n",
+        "which could have used a different address for exiting\n",
+        "than the relay used for registering in the Tor network\n",
+        sep = ""))
+ggsave("different-exit-address.png", width = 8, height = 5, dpi = 72)
+



More information about the tor-commits mailing list