commit dc4ab95ec8ec60e285fb715b7e94233ab5ac8e59 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Jul 13 21:28:32 2011 +0200
Avoid downloading vote tarballs in #2394. --- task-2394/ParseDescriptors.java | 70 +++++++++++++++++-------------------- task-2394/README | 37 ++++++++++++++----- task-2394/bandwidth-comparison.R | 8 ++-- 3 files changed, 63 insertions(+), 52 deletions(-)
diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java index b0ba916..2197670 100644 --- a/task-2394/ParseDescriptors.java +++ b/task-2394/ParseDescriptors.java @@ -19,7 +19,7 @@ public class ParseDescriptors { files.addAll(Arrays.asList(file.listFiles())); } else if (filename.endsWith("-consensus")) { consensuses.put(filename, file); - } else if (filename.contains("-vote-")) { + } else if (filename.endsWith("-votes")) { votes.put(filename, file); } else if (filename.length() == 40) { descriptors.put(filename, file); @@ -55,51 +55,45 @@ public class ParseDescriptors { System.out.println("Parsing consensus " + consensusFile.getName()); BufferedReader brC = new BufferedReader(new FileReader( consensusFile)); - String lineC, validAfter = null, lastDirSource = null, - lastRLine = null, lastSLine = null; + String lastRLine = null, lastSLine = null; String consensusTimestamp = consensusFile.getName().substring(0, "YYYY-MM-DD-hh-mm-ss".length()); + String votesFilename = consensusTimestamp + "-votes"; Map<String, Map<String, String>> measuredBandwidthsByDirSource = new HashMap<String, Map<String, String>>(); - while ((lineC = brC.readLine()) != null) { - - /* Start with parsing a consensus to find out which votes it - * contains. */ - if (lineC.startsWith("valid-after ")) { - validAfter = lineC.substring("valid-after ".length()); - } else if (lineC.startsWith("dir-source ")) { - lastDirSource = lineC.split(" ")[2]; - } else if (lineC.startsWith("vote-digest ") && - bandwidthAuthorities.containsKey(lastDirSource)) { - String voteDigest = lineC.substring("vote-digest ".length()); - String voteFilename = consensusTimestamp + "-vote-" - + lastDirSource + "-" + voteDigest; - if (votes.containsKey(voteFilename)) {
- /* Parse votes first and extract measured bandwidths. */ - Map<String, String> measuredBandwidths = - new HashMap<String, String>(); - measuredBandwidthsByDirSource.put(lastDirSource, + /* Parse votes first, if we have them, and extract measured + * bandwidths. */ + if (votes.containsKey(votesFilename)) { + BufferedReader brV = new BufferedReader(new FileReader( + votes.get(votesFilename))); + String lineV; + Map<String, String> measuredBandwidths = null; + while ((lineV = brV.readLine()) != null) { + if (lineV.startsWith("dir-source ")) { + String dirSource = lineV.split(" ")[2]; + measuredBandwidths = new HashMap<String, String>(); + measuredBandwidthsByDirSource.put(dirSource, measuredBandwidths); - BufferedReader brV = new BufferedReader(new FileReader( - votes.get(voteFilename))); - String lineV; - while ((lineV = brV.readLine()) != null) { - if (lineV.startsWith("r ")) { - lastRLine = lineV; - } else if (lineV.startsWith("w ") && - lineV.contains(" Measured=")) { - String fingerprint = Hex.encodeHexString(Base64. - decodeBase64(lastRLine.split(" ")[2] + "=")); - String measuredBandwidth = lineV.substring(lineV.indexOf( - " Measured=") + " Measured=".length()).split(" ")[0]; - measuredBandwidths.put(fingerprint, measuredBandwidth); - } - } - brV.close(); + } else if (lineV.startsWith("r ")) { + lastRLine = lineV; + } else if (lineV.startsWith("w ") && + lineV.contains(" Measured=")) { + String fingerprint = Hex.encodeHexString(Base64. + decodeBase64(lastRLine.split(" ")[2] + "=")); + String measuredBandwidth = lineV.substring(lineV.indexOf( + " Measured=") + " Measured=".length()).split(" ")[0]; + measuredBandwidths.put(fingerprint, measuredBandwidth); } + } + brV.close(); + }
- /* Parse r, s, and w lines from the consensus. */ + /* Parse r, s, and w lines from the consensus. */ + String lineC, validAfter = null; + while ((lineC = brC.readLine()) != null) { + if (lineC.startsWith("valid-after ")) { + validAfter = lineC.substring("valid-after ".length()); } else if (lineC.startsWith("r ")) { lastRLine = lineC; } else if (lineC.startsWith("s ")) { diff --git a/task-2394/README b/task-2394/README index d74d962..5cb47f6 100644 --- a/task-2394/README +++ b/task-2394/README @@ -1,17 +1,26 @@ Visualize self-reported vs. measured bandwidth of relays ========================================================
- - Download consensuses, votes, and server descriptors from the metrics - website. Be sure to download the server descriptors of the month - preceding the consensuses and votes, too. For example, you could - download these files: + - Download consensus and votes from the metrics website, e.g.,
- https://metrics.torproject.org/data/votes-2011-01.tar.bz2 - https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2 - https://metrics.torproject.org/data/server-descriptors-2010-12.tar.bz2 - https://metrics.torproject.org/data/server-descriptors-2011-01.tar.bz2 + https://metrics.torproject.org/votes?valid-after=2011-07-13-05-00-00 + https://metrics.torproject.org/consensus?valid-after=2011-07-13-05-00-00
- - Extract the tarballs into subdirectory descriptors/. + - Also download the server descriptor archives containing the referenced + server descriptors from the metrics website. If the consensus was + published on the 3rd of a month or later, it's sufficient to download a + single tarball. Otherwise, download both tarballs for the current and + previous month: + + https://metrics.torproject.org/data/server-descriptors-2011-07.tar.bz2 + + - Extract the server descriptor tarball(s) into the subdirectory + descriptors/. Also copy the consensus and votes there. With the URLs + stated above, the directory would contain these files: + + 2011-07-13-05-00-00-consensus + 2011-07-13-05-00-00-votes + server-descriptors-2011-07
- Download Apache Commons Codec 1.4 or higher and put in in this directory. @@ -22,6 +31,14 @@ Visualize self-reported vs. measured bandwidth of relays - Run the Java class, e.g., $ java -cp .:commons-codec-1.4.jar ParseDescriptors
- - Once the Java application is done, you'll find a file + Once the Java application is done, you'll find a file bandwidth-comparison.csv in this directory.
+ - Plot the data: + + $ R --slave -f bandwidth-comparison.R + + (Edit bandwidth-comparison.R to color urras or one of the other + directories specially by putting in the lines containing "purple" and + re-run the last command above.) + diff --git a/task-2394/bandwidth-comparison.R b/task-2394/bandwidth-comparison.R index 77a5842..356464b 100644 --- a/task-2394/bandwidth-comparison.R +++ b/task-2394/bandwidth-comparison.R @@ -32,8 +32,6 @@ opts(title = "Ratio between measured and self-reported relay bandwidth", ggsave(filename = "bandwidth-comparison-relays.png", width = 8, height = 5, dpi = 150)
-stopit - # Plot ECDFs to compare consensus to votes cdf_relays_category_votes <- function(data, category) { d <- data[data$category == category & data$descriptorbandwidth > 0, ] @@ -81,8 +79,9 @@ scale_x_log10("\nRatio of measured by self-reported bandwidth", scale_y_continuous("Fraction of relays\n", limits = c(0, 1), formatter = "percent") + scale_colour_manual("", - breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"), + breaks = c("consensus", "urras", "ides", "moria1", "gabelmoo"), values = c("black", rep(alpha("black", 0.25), 4))) + +# values = c("black", alpha("purple", 0.5), rep(alpha("black", 0.25), 3))) + geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") + opts(title = paste("Measured vs. self-reported bandwidth ratios in", "consensus and votes\n"), legend.position = "none") @@ -141,8 +140,9 @@ scale_x_log10("\nRatio of measured by self-reported bandwidth", scale_y_continuous("Fraction of measured bandwidth\n", limits = c(0, 1), formatter = "percent") + scale_colour_manual("", - breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"), + breaks = c("consensus", "urras", "ides", "moria1", "gabelmoo"), values = c("black", rep(alpha("black", 0.25), 4))) + +# values = c("black", alpha("purple", 0.5), rep(alpha("black", 0.25), 3))) + geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") + opts(title = paste("Measured vs. self-reported bandwidth ratios in", "consensus and votes\n"), legend.position = "none")
tor-commits@lists.torproject.org