commit 48886a04dfa052caaa2b9337ff45b95ebb12747f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Aug 2 14:50:52 2012 +0200
Extract data for fast exits graphs (#6498).
---
task-6498/.gitignore | 7 +
task-6498/download.sh | 4 +
task-6498/run.sh | 3 +
task-6498/src/Main.java | 279 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 293 insertions(+), 0 deletions(-)
diff --git a/task-6498/.gitignore b/task-6498/.gitignore
new file mode 100644
index 0000000..9dc71c8
--- /dev/null
+++ b/task-6498/.gitignore
@@ -0,0 +1,7 @@
+.classpath
+.project
+lib/
+bin/
+in/
+results.csv
+
diff --git a/task-6498/download.sh b/task-6498/download.sh
new file mode 100755
index 0000000..b6cb15d
--- /dev/null
+++ b/task-6498/download.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
+rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/server-descriptors in
+
diff --git a/task-6498/run.sh b/task-6498/run.sh
new file mode 100755
index 0000000..8621cfd
--- /dev/null
+++ b/task-6498/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+javac -d bin/ -cp lib/descriptor.jar:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar src/Main.java && java -Xmx2g -cp bin/:lib/descriptor.jar:lib/commons-codec-1.6.jar:lib/commons-compress-1.4.1.jar Main
+
diff --git a/task-6498/src/Main.java b/task-6498/src/Main.java
new file mode 100644
index 0000000..2a59a13
--- /dev/null
+++ b/task-6498/src/Main.java
@@ -0,0 +1,279 @@
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.ServerDescriptor;
+
+public class Main {
+ public static void main(String[] args) throws IOException {
+
+ /* Parse server descriptors in in/server-descriptors/, not keeping a
+ * parse history, and memorize bandwidth rate, burst, and observed
+ * bandwidth for every server descriptor. */
+ System.out.print(new Date() + ": Parsing server descriptors");
+ int parsedServerDescriptors = 0;
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(new File("in/server-descriptors"));
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ Map<String, int[]> serverDescriptors = new HashMap<String, int[]>();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof ServerDescriptor)) {
+ continue;
+ }
+ if (++parsedServerDescriptors >= 1000) {
+ System.out.print(".");
+ parsedServerDescriptors = 0;
+ }
+ ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor;
+ String digest = serverDescriptor.getServerDescriptorDigest();
+ int[] bandwidths = new int[] {
+ serverDescriptor.getBandwidthRate() / 1024,
+ serverDescriptor.getBandwidthBurst() / 1024,
+ serverDescriptor.getBandwidthObserved() / 1024 };
+ serverDescriptors.put(digest.toUpperCase(), bandwidths);
+ }
+ }
+
+ /* Parse consensuses in in/consensuses/, keeping a parse history. */
+ System.out.print("\n" + new Date() + ": Parsing consensuses");
+ descriptorReader = DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(new File("in/consensuses"));
+ /* TODO When running this program in a cronjob, add a history file
+ * using descriptorReader.setExcludeFiles() and set the file writer
+ * below to append mode instead of overwrite mode. */
+ descriptorFiles = descriptorReader.readDescriptors();
+ BufferedWriter bw = new BufferedWriter(new FileWriter("results.csv"));
+ bw.write("valid_after,min_rate,min_advbw,ports,relays,exit_prob\n");
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+ continue;
+ }
+ RelayNetworkStatusConsensus consensus =
+ (RelayNetworkStatusConsensus) descriptor;
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String validAfter = dateTimeFormat.format(
+ consensus.getValidAfterMillis());
+ System.out.print(".");
+ SortedMap<String, Integer> bandwidthWeights =
+ consensus.getBandwidthWeights();
+ if (bandwidthWeights == null) {
+ continue;
+ }
+ SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
+ "Wee,Wed".split(",")));
+ weightKeys.removeAll(bandwidthWeights.keySet());
+ if (!weightKeys.isEmpty()) {
+ continue;
+ }
+ double wee = ((double) bandwidthWeights.get("Wee")) / 10000.0,
+ wed = ((double) bandwidthWeights.get("Wed")) / 10000.0;
+ SortedSet<String> fingerprints = new TreeSet<String>();
+ Map<String, Double> exitWeights = new HashMap<String, Double>();
+ double totalExitWeight = 0.0;
+ Map<String, Integer> bandwidthRates =
+ new HashMap<String, Integer>();
+ Map<String, Integer> advertisedBandwidths =
+ new HashMap<String, Integer>();
+ Map<String, Set<Integer>> exitPorts =
+ new HashMap<String, Set<Integer>>();
+ Map<String, String> addressParts = new HashMap<String, String>();
+ for (NetworkStatusEntry relay :
+ consensus.getStatusEntries().values()) {
+ String fingerprint = relay.getFingerprint();
+ fingerprints.add(fingerprint);
+ if (!relay.getFlags().contains("Running")) {
+ continue;
+ }
+ boolean isExit = relay.getFlags().contains("Exit") &&
+ !relay.getFlags().contains("BadExit");
+ boolean isGuard = relay.getFlags().contains("Guard");
+ String serverDescriptorDigest = relay.getDescriptor().
+ toUpperCase();
+ int[] descriptorBandwidths = serverDescriptors.get(
+ serverDescriptorDigest);
+ int bandwidthRate = 0, advertisedBandwidth = 0;
+ if (descriptorBandwidths != null) {
+ bandwidthRate = descriptorBandwidths[0];
+ advertisedBandwidth = Math.min(Math.min(
+ descriptorBandwidths[0],
+ descriptorBandwidths[1]),
+ descriptorBandwidths[2]);
+ }
+ bandwidthRates.put(fingerprint, bandwidthRate);
+ advertisedBandwidths.put(fingerprint, advertisedBandwidth);
+ double exitWeight = (double) relay.getBandwidth();
+ if (isGuard && isExit) {
+ exitWeight *= wed;
+ } else if (isGuard) {
+ exitWeight = 0.0;
+ } else if (isExit) {
+ exitWeight *= wee;
+ } else {
+ exitWeight = 0.0;
+ }
+ exitWeights.put(fingerprint, exitWeight);
+ totalExitWeight += exitWeight;
+ exitPorts.put(fingerprint, new HashSet<Integer>());
+ if (relay.getDefaultPolicy() != null &&
+ relay.getPortList() != null) {
+ boolean acceptPolicy = relay.getDefaultPolicy().equals(
+ "accept");
+ Set<Integer> policyPorts = new HashSet<Integer>();
+ List<Integer> relevantPorts = new ArrayList<Integer>();
+ relevantPorts.add(80);
+ relevantPorts.add(443);
+ relevantPorts.add(554);
+ relevantPorts.add(1755);
+ for (String part : relay.getPortList().split(",")) {
+ int from, to;
+ if (part.contains("-")) {
+ from = Integer.parseInt(part.split("-")[0]);
+ to = Integer.parseInt(part.split("-")[1]);
+ } else {
+ from = to = Integer.parseInt(part);
+ }
+ while (!relevantPorts.isEmpty() &&
+ from > relevantPorts.get(0)) {
+ relevantPorts.remove(0);
+ }
+ while (!relevantPorts.isEmpty() &&
+ from <= relevantPorts.get(0) &&
+ to >= relevantPorts.get(0)) {
+ policyPorts.add(relevantPorts.remove(0));
+ }
+ }
+ for (int port : new int[] { 80, 443, 554, 1755}) {
+ if (!policyPorts.contains(port) ^ acceptPolicy) {
+ exitPorts.get(fingerprint).add(port);
+ }
+ }
+ }
+ String address = relay.getAddress();
+ addressParts.put(fingerprint, address.substring(0,
+ address.lastIndexOf(".")));
+ }
+
+ /* For the default setting, filter relays which have a bandwidth
+ * rate >= 11875 KB/s, advertised bandwidth >= 5000 KB/s, and
+ * which permit exiting to ports 80, 443, 554, and 1755. Only
+ * consider the fastest 2 relays per /24 with respect to exit
+ * probability. Also vary requirements. Overall, analyze these
+ * settings:
+ * - rate >= 11875, advbw >= 5000, exit to 80, 443, 554, 1755
+ * - rate >= 11875, advbw >= 5000, exit to 80, 443
+ * - rate >= 11875, advbw >= 4000, exit to 80, 443, 554, 1755
+ * - rate >= 11875, advbw >= 3000, exit to 80, 443, 554, 1755
+ * - rate >= 11875, advbw >= 2000, exit to 80, 443, 554, 1755
+ * - rate >= 11875, advbw >= 1000, exit to 80, 443, 554, 1755
+ * - rate >= 10000, advbw >= 2000, exit to 80, 443 */
+ int[] minimumBandwidthRates = new int[] { 11875, 11875, 11875,
+ 11875, 11875, 11875, 10000 };
+ int[] minimumAdvertisedBandwidths = new int[] { 5000, 5000, 4000,
+ 3000, 2000, 1000, 2000 };
+ Set<Integer> defaultPorts = new HashSet<Integer>();
+ defaultPorts.add(80);
+ defaultPorts.add(443);
+ defaultPorts.add(554);
+ defaultPorts.add(1755);
+ Set<Integer> reducedPorts = new HashSet<Integer>();
+ reducedPorts.add(80);
+ reducedPorts.add(443);
+ List<Set<Integer>> requiredPorts = new ArrayList<Set<Integer>>();
+ requiredPorts.add(defaultPorts);
+ requiredPorts.add(reducedPorts);
+ requiredPorts.add(defaultPorts);
+ requiredPorts.add(defaultPorts);
+ requiredPorts.add(defaultPorts);
+ requiredPorts.add(defaultPorts);
+ requiredPorts.add(reducedPorts);
+ for (int i = 0; i < minimumBandwidthRates.length; i++) {
+ int minimumBandwidthRate = minimumBandwidthRates[i];
+ int minimumAdvertisedBandwidth = minimumAdvertisedBandwidths[i];
+ Set<Integer> minimumRequiredPorts = requiredPorts.get(i);
+ Map<String, List<Double>> exitWeightFractionsByAddressParts =
+ new HashMap<String, List<Double>>();
+ for (String fingerprint : fingerprints) {
+ int bandwidthRate = bandwidthRates.get(fingerprint);
+ int advertisedBandwidth = advertisedBandwidths.get(
+ fingerprint);
+ Set<Integer> allowedExitPorts = exitPorts.get(fingerprint);
+ if (bandwidthRate < minimumBandwidthRate ||
+ advertisedBandwidth < minimumAdvertisedBandwidth ||
+ !allowedExitPorts.containsAll(minimumRequiredPorts)) {
+ continue;
+ }
+ double exitWeightFraction = exitWeights.get(fingerprint)
+ / totalExitWeight;
+ String addressPart = addressParts.get(fingerprint);
+ if (!exitWeightFractionsByAddressParts.containsKey(
+ addressPart)) {
+ exitWeightFractionsByAddressParts.put(addressPart,
+ new ArrayList<Double>());
+ }
+ exitWeightFractionsByAddressParts.get(addressPart).add(
+ exitWeightFraction);
+ }
+ double totalExitWeightFraction = 0.0;
+ int totalRelays = 0;
+ for (List<Double> weightFractions :
+ exitWeightFractionsByAddressParts.values()) {
+ Collections.sort(weightFractions);
+ while (weightFractions.size() > 2) {
+ weightFractions.remove(0);
+ }
+ for (double weightFraction : weightFractions) {
+ totalExitWeightFraction += weightFraction;
+ totalRelays++;
+ }
+ }
+
+ /* For each setting, append results to results.csv:
+ * - valid_after: consensus valid-after time
+ * - min_rate: minimum bandwidth rate in KB/s
+ * - min_advbw: minimum advertised bandwidth in KB/s
+ * - ports: "80-443" or "80-443-554-1755"
+ * - relays: number of relays matching the requirements
+ * - exit_prob: sum of exit probabilities */
+ bw.write(String.format("%s,%d,%d,%s,%d,%.4f%n",
+ validAfter, minimumBandwidthRate,
+ minimumAdvertisedBandwidth, minimumRequiredPorts.size() <= 2
+ ? "80-443" : "80-443-554-1755", totalRelays,
+ totalExitWeightFraction));
+ }
+ }
+ }
+ bw.close();
+ System.out.println("\n" + new Date() + ": Terminating.");
+ }
+}
+