commit 7b75c704f5dad573ad6ba4f0a04cb8599b4cdb7a Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Jul 21 16:01:14 2011 +0200
Avoid downloading server descriptor tarballs in #2394. --- task-2394/ParseDescriptors.java | 133 ++++++++++++++++++++++----------------- task-2394/README | 17 ++---- 2 files changed, 80 insertions(+), 70 deletions(-)
diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java index 2197670..641518d 100644 --- a/task-2394/ParseDescriptors.java +++ b/task-2394/ParseDescriptors.java @@ -21,13 +21,13 @@ public class ParseDescriptors { consensuses.put(filename, file); } else if (filename.endsWith("-votes")) { votes.put(filename, file); - } else if (filename.length() == 40) { + } else if (filename.endsWith("-descriptors")) { descriptors.put(filename, file); } } System.out.println("We found " + consensuses.size() - + " consensuses, " + votes.size() + " votes, and " - + descriptors.size() + " server descriptors."); + + " consensus files, " + votes.size() + " vote files, and " + + descriptors.size() + " server descriptor files.");
/* Parse consensuses in an outer loop and the referenced votes and * descriptors in inner loops. Write the results to disk as soon as @@ -50,7 +50,6 @@ public class ParseDescriptors { bw.write("," + bandwidthAuthority + "bandwidth"); } bw.write("\n"); - Map<String, String> parsedDescriptors = new HashMap<String, String>(); for (File consensusFile : consensuses.values()) { System.out.println("Parsing consensus " + consensusFile.getName()); BufferedReader brC = new BufferedReader(new FileReader( @@ -58,12 +57,12 @@ public class ParseDescriptors { String lastRLine = null, lastSLine = null; String consensusTimestamp = consensusFile.getName().substring(0, "YYYY-MM-DD-hh-mm-ss".length()); - String votesFilename = consensusTimestamp + "-votes"; Map<String, Map<String, String>> measuredBandwidthsByDirSource = new HashMap<String, Map<String, String>>();
/* Parse votes first, if we have them, and extract measured * bandwidths. */ + String votesFilename = consensusTimestamp + "-votes"; if (votes.containsKey(votesFilename)) { BufferedReader brV = new BufferedReader(new FileReader( votes.get(votesFilename))); @@ -89,6 +88,74 @@ public class ParseDescriptors { brV.close(); }
+ /* Parse referenced server descriptors to learn about exit policies + * and reported bandwidths. */ + String descriptorsFilename = consensusTimestamp + "-descriptors"; + Map<String, String> parsedDescriptors = + new HashMap<String, String>(); + if (descriptors.containsKey(descriptorsFilename)) { + BufferedReader brD = new BufferedReader(new FileReader( + descriptors.get(descriptorsFilename))); + Set<String> defaultRejects = new HashSet<String>(); + /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed + * in the default exit policy again (and therefore removed + * from the default reject lines). */ + Set<String> optionalRejects = new HashSet<String>(); + String lineD, address = null, fingerprint = null, + descriptorBandwidth = null; + boolean defaultPolicy = false, comparePolicies = true; + while ((lineD = brD.readLine()) != null) { + if (lineD.startsWith("router ")) { + address = lineD.split(" ")[2]; + defaultRejects.clear(); + defaultRejects.addAll(Arrays.asList(("0.0.0.0/8:*," + + "169.254.0.0/16:*,127.0.0.0/8:*,192.168.0.0/16:*," + + "10.0.0.0/8:*,172.16.0.0/12:*,$IP:*,*:25,*:119," + + "*:135-139,*:445,*:563,*:1214,*:4661-4666,*:6346-6429," + + "*:6699,*:6881-6999").split(","))); + optionalRejects.clear(); + optionalRejects.addAll(Arrays.asList( + "*:465,*:587".split(","))); + fingerprint = null; + descriptorBandwidth = null; + defaultPolicy = false; + comparePolicies = true; + } else if (lineD.startsWith("opt fingerprint ") || + lineD.startsWith("fingerprint ")) { + fingerprint = lineD.substring(lineD.startsWith("opt ") ? + "opt fingerprint".length() : "fingerprint".length()). + replaceAll(" ", "").toLowerCase(); + } else if (lineD.startsWith("bandwidth ")) { + descriptorBandwidth = lineD.split(" ")[3]; + } else if (lineD.startsWith("reject ") && comparePolicies) { + String rejectPattern = lineD.substring("reject ". + length()); + if (defaultRejects.contains(rejectPattern)) { + defaultRejects.remove(rejectPattern); + } else if (optionalRejects.contains(rejectPattern)) { + optionalRejects.remove(rejectPattern); + } else if (rejectPattern.equals(address + ":*")) { + defaultRejects.remove("$IP:*"); + } else { + comparePolicies = false; + } + } else if (lineD.startsWith("accept ") && comparePolicies) { + if (defaultRejects.isEmpty() && + lineD.equals("accept *:*")) { + defaultPolicy = true; + } + comparePolicies = false; + } else if (lineD.equals("router-signature")) { + if (address != null && fingerprint != null && + descriptorBandwidth != null) { + parsedDescriptors.put(fingerprint, descriptorBandwidth + "," + + (defaultPolicy ? "1" : "0")); + } + } + } + brD.close(); + } + /* Parse r, s, and w lines from the consensus. */ String lineC, validAfter = null; while ((lineC = brC.readLine()) != null) { @@ -110,64 +177,14 @@ public class ParseDescriptors { String consensusBandwidth = lineC.substring(lineC.indexOf( " Bandwidth=") + " Bandwidth=".length()).split(" ")[0];
- /* Parse the referenced server descriptor (if we haven't done so - * before) to learn about the relay's exit policy and reported - * bandwidth. */ + /* Look up whether we parsed this descriptor before. */ boolean parsedDescriptor = false, defaultPolicy = false; String descriptorBandwidth = null; - if (parsedDescriptors.containsKey(descriptor)) { - String parseResults = parsedDescriptors.get(descriptor); + if (parsedDescriptors.containsKey(fingerprint)) { + String parseResults = parsedDescriptors.get(fingerprint); parsedDescriptor = true; defaultPolicy = parseResults.endsWith("1"); descriptorBandwidth = parseResults.split(",")[0]; - } else if (descriptors.containsKey(descriptor)) { - parsedDescriptor = true; - BufferedReader brD = new BufferedReader(new FileReader( - descriptors.get(descriptor))); - Set<String> defaultRejects = new HashSet<String>( - Arrays.asList(("0.0.0.0/8:*,169.254.0.0/16:*," - + "127.0.0.0/8:*,192.168.0.0/16:*,10.0.0.0/8:*," - + "172.16.0.0/12:*,$IP:*,*:25,*:119,*:135-139,*:445," - + "*:563,*:1214,*:4661-4666,*:6346-6429,*:6699," - + "*:6881-6999").split(","))); - /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed - * in the default exit policy again (and therefore removed - * from the default reject lines). */ - Set<String> optionalRejects = new HashSet<String>( - Arrays.asList("*:465,*:587".split(","))); - String lineD, address = null; - while ((lineD = brD.readLine()) != null) { - if (lineD.startsWith("router ")) { - address = lineD.split(" ")[2]; - } else if (lineD.startsWith("bandwidth ")) { - descriptorBandwidth = lineD.split(" ")[3]; - } else if (lineD.startsWith("reject ")) { - String rejectPattern = lineD.substring("reject ". - length()); - if (defaultRejects.contains(rejectPattern)) { - defaultRejects.remove(rejectPattern); - } else if (optionalRejects.contains(rejectPattern)) { - optionalRejects.remove(rejectPattern); - } else if (rejectPattern.equals(address + ":*")) { - defaultRejects.remove("$IP:*"); - } else { - break; - } - } else if (lineD.startsWith("accept ")) { - if (defaultRejects.isEmpty() && - lineD.equals("accept *:*")) { - defaultPolicy = true; - } - break; - } - } - brD.close(); - parsedDescriptors.put(descriptor, descriptorBandwidth + "," - + (defaultPolicy ? "1" : "0")); - } else { - System.out.println("We're missing descriptor " + descriptor - + ". Please make sure that all referenced server " - + "descriptors are available. Continuing anyway."); }
/* Write everything we know about this relay to disk. */ diff --git a/task-2394/README b/task-2394/README index 5cb47f6..714853e 100644 --- a/task-2394/README +++ b/task-2394/README @@ -1,26 +1,19 @@ Visualize self-reported vs. measured bandwidth of relays ========================================================
- - Download consensus and votes from the metrics website, e.g., + - Download a single consensus and all referenced votes and server + descriptors from the metrics website, e.g.,
https://metrics.torproject.org/votes?valid-after=2011-07-13-05-00-00 https://metrics.torproject.org/consensus?valid-after=2011-07-13-05-00-00 + https://metrics.torproject.org/serverdesc?valid-after=2011-07-13-05-00-00
- - Also download the server descriptor archives containing the referenced - server descriptors from the metrics website. If the consensus was - published on the 3rd of a month or later, it's sufficient to download a - single tarball. Otherwise, download both tarballs for the current and - previous month: - - https://metrics.torproject.org/data/server-descriptors-2011-07.tar.bz2 - - - Extract the server descriptor tarball(s) into the subdirectory - descriptors/. Also copy the consensus and votes there. With the URLs + - Put all files into a new subdirectory descriptors/. With the URLs stated above, the directory would contain these files:
2011-07-13-05-00-00-consensus 2011-07-13-05-00-00-votes - server-descriptors-2011-07 + 2011-07-13-05-00-00-descriptors
- Download Apache Commons Codec 1.4 or higher and put in in this directory.