[tor-commits] [metrics-tasks/master] Avoid downloading vote tarballs in #2394.

karsten at torproject.org karsten at torproject.org
Wed Jul 13 19:29:33 UTC 2011


commit dc4ab95ec8ec60e285fb715b7e94233ab5ac8e59
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Jul 13 21:28:32 2011 +0200

    Avoid downloading vote tarballs in #2394.
---
 task-2394/ParseDescriptors.java  |   70 +++++++++++++++++--------------------
 task-2394/README                 |   37 ++++++++++++++-----
 task-2394/bandwidth-comparison.R |    8 ++--
 3 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java
index b0ba916..2197670 100644
--- a/task-2394/ParseDescriptors.java
+++ b/task-2394/ParseDescriptors.java
@@ -19,7 +19,7 @@ public class ParseDescriptors {
         files.addAll(Arrays.asList(file.listFiles()));
       } else if (filename.endsWith("-consensus")) {
         consensuses.put(filename, file);
-      } else if (filename.contains("-vote-")) {
+      } else if (filename.endsWith("-votes")) {
         votes.put(filename, file);
       } else if (filename.length() == 40) {
         descriptors.put(filename, file);
@@ -55,51 +55,45 @@ public class ParseDescriptors {
       System.out.println("Parsing consensus " + consensusFile.getName());
       BufferedReader brC = new BufferedReader(new FileReader(
           consensusFile));
-      String lineC, validAfter = null, lastDirSource = null,
-          lastRLine = null, lastSLine = null;
+      String lastRLine = null, lastSLine = null;
       String consensusTimestamp = consensusFile.getName().substring(0,
               "YYYY-MM-DD-hh-mm-ss".length());
+      String votesFilename = consensusTimestamp + "-votes";
       Map<String, Map<String, String>> measuredBandwidthsByDirSource =
           new HashMap<String, Map<String, String>>();
-      while ((lineC = brC.readLine()) != null) {
-
-        /* Start with parsing a consensus to find out which votes it
-         * contains. */
-        if (lineC.startsWith("valid-after ")) {
-          validAfter = lineC.substring("valid-after ".length());
-        } else if (lineC.startsWith("dir-source ")) {
-          lastDirSource = lineC.split(" ")[2];
-        } else if (lineC.startsWith("vote-digest ") &&
-            bandwidthAuthorities.containsKey(lastDirSource)) {
-          String voteDigest = lineC.substring("vote-digest ".length());
-          String voteFilename = consensusTimestamp + "-vote-"
-              + lastDirSource + "-" + voteDigest;
-          if (votes.containsKey(voteFilename)) {
 
-            /* Parse votes first and extract measured bandwidths. */
-            Map<String, String> measuredBandwidths =
-                new HashMap<String, String>();
-            measuredBandwidthsByDirSource.put(lastDirSource,
+      /* Parse votes first, if we have them, and extract measured
+       * bandwidths. */
+      if (votes.containsKey(votesFilename)) {
+        BufferedReader brV = new BufferedReader(new FileReader(
+            votes.get(votesFilename)));
+        String lineV;
+        Map<String, String> measuredBandwidths = null;
+        while ((lineV = brV.readLine()) != null) {
+          if (lineV.startsWith("dir-source ")) {
+            String dirSource = lineV.split(" ")[2];
+            measuredBandwidths = new HashMap<String, String>();
+            measuredBandwidthsByDirSource.put(dirSource,
                 measuredBandwidths);
-            BufferedReader brV = new BufferedReader(new FileReader(
-                votes.get(voteFilename)));
-            String lineV;
-            while ((lineV = brV.readLine()) != null) {
-              if (lineV.startsWith("r ")) {
-                lastRLine = lineV;
-              } else if (lineV.startsWith("w ") &&
-                  lineV.contains(" Measured=")) {
-                String fingerprint = Hex.encodeHexString(Base64.
-                    decodeBase64(lastRLine.split(" ")[2] + "="));
-                String measuredBandwidth = lineV.substring(lineV.indexOf(
-                    " Measured=") + " Measured=".length()).split(" ")[0];
-                measuredBandwidths.put(fingerprint, measuredBandwidth);
-              }
-            }
-            brV.close();
+          } else if (lineV.startsWith("r ")) {
+            lastRLine = lineV;
+          } else if (lineV.startsWith("w ") &&
+              lineV.contains(" Measured=")) {
+            String fingerprint = Hex.encodeHexString(Base64.
+                decodeBase64(lastRLine.split(" ")[2] + "="));
+            String measuredBandwidth = lineV.substring(lineV.indexOf(
+                " Measured=") + " Measured=".length()).split(" ")[0];
+            measuredBandwidths.put(fingerprint, measuredBandwidth);
           }
+        }
+        brV.close();
+      }
 
-        /* Parse r, s, and w lines from the consensus. */
+      /* Parse r, s, and w lines from the consensus. */
+      String lineC, validAfter = null;
+      while ((lineC = brC.readLine()) != null) {
+        if (lineC.startsWith("valid-after ")) {
+          validAfter = lineC.substring("valid-after ".length());
         } else if (lineC.startsWith("r ")) {
           lastRLine = lineC;
         } else if (lineC.startsWith("s ")) {
diff --git a/task-2394/README b/task-2394/README
index d74d962..5cb47f6 100644
--- a/task-2394/README
+++ b/task-2394/README
@@ -1,17 +1,26 @@
 Visualize self-reported vs. measured bandwidth of relays
 ========================================================
 
- - Download consensuses, votes, and server descriptors from the metrics
-   website.  Be sure to download the server descriptors of the month
-   preceding the consensuses and votes, too.  For example, you could
-   download these files:
+ - Download consensus and votes from the metrics website, e.g.,
 
-   https://metrics.torproject.org/data/votes-2011-01.tar.bz2
-   https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2
-   https://metrics.torproject.org/data/server-descriptors-2010-12.tar.bz2
-   https://metrics.torproject.org/data/server-descriptors-2011-01.tar.bz2
+   https://metrics.torproject.org/votes?valid-after=2011-07-13-05-00-00
+   https://metrics.torproject.org/consensus?valid-after=2011-07-13-05-00-00
 
- - Extract the tarballs into subdirectory descriptors/.
+ - Also download the server descriptor archives containing the referenced
+   server descriptors from the metrics website.  If the consensus was
+   published on the 3rd of a month or later, it's sufficient to download a
+   single tarball.  Otherwise, download both tarballs for the current and
+   previous month:
+
+   https://metrics.torproject.org/data/server-descriptors-2011-07.tar.bz2
+
+ - Extract the server descriptor tarball(s) into the subdirectory
+   descriptors/.  Also copy the consensus and votes there.  With the URLs
+   stated above, the directory would contain these files:
+
+     2011-07-13-05-00-00-consensus
+     2011-07-13-05-00-00-votes
+     server-descriptors-2011-07
 
  - Download Apache Commons Codec 1.4 or higher and put in in this
    directory.
@@ -22,6 +31,14 @@ Visualize self-reported vs. measured bandwidth of relays
  - Run the Java class, e.g.,
    $ java -cp .:commons-codec-1.4.jar ParseDescriptors
 
- - Once the Java application is done, you'll find a file
+   Once the Java application is done, you'll find a file
    bandwidth-comparison.csv in this directory.
 
+ - Plot the data:
+
+   $ R --slave -f bandwidth-comparison.R
+
+   (Edit bandwidth-comparison.R to color urras or one of the other
+   directories specially by putting in the lines containing "purple" and
+   re-run the last command above.)
+
diff --git a/task-2394/bandwidth-comparison.R b/task-2394/bandwidth-comparison.R
index 77a5842..356464b 100644
--- a/task-2394/bandwidth-comparison.R
+++ b/task-2394/bandwidth-comparison.R
@@ -32,8 +32,6 @@ opts(title = "Ratio between measured and self-reported relay bandwidth",
 ggsave(filename = "bandwidth-comparison-relays.png",
   width = 8, height = 5, dpi = 150)
 
-stopit
-
 # Plot ECDFs to compare consensus to votes
 cdf_relays_category_votes <- function(data, category) {
   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
@@ -81,8 +79,9 @@ scale_x_log10("\nRatio of measured by self-reported bandwidth",
 scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
   formatter = "percent") +
 scale_colour_manual("",
-  breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"),
+  breaks = c("consensus", "urras", "ides", "moria1", "gabelmoo"),
   values = c("black", rep(alpha("black", 0.25), 4))) +
+# values = c("black", alpha("purple", 0.5), rep(alpha("black", 0.25), 3))) +
 geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
 opts(title = paste("Measured vs. self-reported bandwidth ratios in",
   "consensus and votes\n"), legend.position = "none")
@@ -141,8 +140,9 @@ scale_x_log10("\nRatio of measured by self-reported bandwidth",
 scale_y_continuous("Fraction of measured bandwidth\n", limits = c(0, 1),
   formatter = "percent") +
 scale_colour_manual("",
-  breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"),
+  breaks = c("consensus", "urras", "ides", "moria1", "gabelmoo"),
   values = c("black", rep(alpha("black", 0.25), 4))) +
+# values = c("black", alpha("purple", 0.5), rep(alpha("black", 0.25), 3))) +
 geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
 opts(title = paste("Measured vs. self-reported bandwidth ratios in",
   "consensus and votes\n"), legend.position = "none")



More information about the tor-commits mailing list