[tor-commits] [metrics-tasks/master] Calculate path-selection probabilities (#5755).

karsten at torproject.org karsten at torproject.org
Tue Jul 17 19:56:16 UTC 2012


commit 2f0b14f116bcd77e04d7ddf7f7cff711edf6de0a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Jul 17 21:44:16 2012 +0200

    Calculate path-selection probabilities (#5755).
---
 task-5755/.gitignore                               |   11 +
 .../src/CalculatePathSelectionProbabilities.java   |  246 ++++++++++++++++++++
 2 files changed, 257 insertions(+), 0 deletions(-)

diff --git a/task-5755/.gitignore b/task-5755/.gitignore
new file mode 100644
index 0000000..5d5ac29
--- /dev/null
+++ b/task-5755/.gitignore
@@ -0,0 +1,11 @@
+.classpath
+.project
+*.pdf
+bin/
+in/
+lib/
+*.csv
+*.png
+*.tar
+*.tar.bz2
+
diff --git a/task-5755/src/CalculatePathSelectionProbabilities.java b/task-5755/src/CalculatePathSelectionProbabilities.java
new file mode 100644
index 0000000..369a994
--- /dev/null
+++ b/task-5755/src/CalculatePathSelectionProbabilities.java
@@ -0,0 +1,246 @@
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.ServerDescriptor;
+
+/*
+ * Calculate five path-selection probabilities for relays based on
+ * consensus weights and advertised bandwidths:
+ *
+ *  - advertised_bandwidth_fraction: Relative advertised bandwidth of
+ *    this relay compared to the total advertised bandwidth in the
+ *    network.  If there were no bandwidth authorities, this fraction
+ *    would be the probability of this relay to be selected by clients.
+ *
+ *  - consensus_weight_fraction: Fraction of this relay's consensus weight
+ *    compared to the sum of all consensus weights in the network.  This
+ *    fraction is a very rough approximation of the probability of this
+ *    relay to be selected by clients.
+ *
+ *  - guard_probability: Probability of this relay to be selected for the
+ *    guard position.  This probability is calculated based on consensus
+ *    weights, relay flags, directory ports, and bandwidth weights in the
+ *    consensus.  Path selection depends on more factors, so that this
+ *    probability can only be an approximation.
+ *
+ *  - middle_probability: Probability of this relay to be selected for the
+ *    middle position.  This probability is calculated based on consensus
+ *    weights, relay flags, directory ports, and bandwidth weights in the
+ *    consensus.  Path selection depends on more factors, so that this
+ *    probability can only be an approximation.
+ *
+ *  - exit_probability: Probability of this relay to be selected for the
+ *    exit position.  This probability is calculated based on consensus
+ *    weights, relay flags, directory ports, and bandwidth weights in the
+ *    consensus.  Path selection depends on more factors, so that this
+ *    probability can only be an approximation.
+ */
+public class CalculatePathSelectionProbabilities {
+  public static void main(String[] args) throws Exception {
+
+    /* Read advertised bandwidths of all server descriptors in
+     * in/server-descriptors/ to memory.  This is a rather brute-force
+     * approach, but it's fine for running this analysis. */
+    DescriptorReader descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(new File("in/server-descriptors"));
+    Iterator<DescriptorFile> descriptorFiles =
+        descriptorReader.readDescriptors();
+    Map<String, Integer> serverDescriptors =
+        new HashMap<String, Integer>();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (!(descriptor instanceof ServerDescriptor)) {
+          continue;
+        }
+        ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor;
+        String digest = serverDescriptor.getServerDescriptorDigest();
+        int advertisedBandwidth = Math.min(Math.min(
+            serverDescriptor.getBandwidthBurst(),
+            serverDescriptor.getBandwidthObserved()),
+            serverDescriptor.getBandwidthRate());
+        serverDescriptors.put(digest.toUpperCase(), advertisedBandwidth);
+      }
+    }
+
+    /* Go through consensuses in in/consensuses/ in arbitrary order and
+     * calculate the five path-selection probabilities for each of them.
+     * Write results for a given consensuses to a single new line per
+     * relay to out.csv. */
+    descriptorReader = DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(new File("in/consensuses"));
+    descriptorFiles = descriptorReader.readDescriptors();
+    BufferedWriter bw = new BufferedWriter(new FileWriter("out.csv"));
+    bw.write("validafter,fingerprint,advertised_bandwidth_fraction,"
+        + "consensus_weight_fraction,guard_probability,"
+        + "middle_probability,exit_probability\n");
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+          continue;
+        }
+        RelayNetworkStatusConsensus consensus =
+            (RelayNetworkStatusConsensus) descriptor;
+
+        /* Extract valid-after time and bandwidth weights from the parsed
+         * consensus. */
+        SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+            "yyyy-MM-dd HH:mm:ss");
+        dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+        String validAfter = dateTimeFormat.format(
+            consensus.getValidAfterMillis());
+        SortedMap<String, Integer> bandwidthWeights =
+            consensus.getBandwidthWeights();
+        SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList((
+            "Wgg,Wgm,Wgd,Wmg,Wmm,Wme,Wmd,Weg,Wem,Wee,Wed,Wgb,Wmb,Web,Wdb,"
+            + "Wbg,Wbm,Wbe,Wbd").split(",")));
+        weightKeys.removeAll(bandwidthWeights.keySet());
+        if (!weightKeys.isEmpty()) {
+          System.err.print("Consensus is missing bandwidth-weights:");
+          for (String weightKey : weightKeys) {
+            System.err.print(" " + weightKey);
+          }
+          System.err.println();
+          continue;
+        }
+        double wgg = ((double) bandwidthWeights.get("Wgg")) / 10000.0,
+            wgd = ((double) bandwidthWeights.get("Wgd")) / 10000.0,
+            wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0,
+            wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0,
+            wme = ((double) bandwidthWeights.get("Wme")) / 10000.0,
+            wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0,
+            wee = ((double) bandwidthWeights.get("Wee")) / 10000.0,
+            wed = ((double) bandwidthWeights.get("Wed")) / 10000.0;
+
+        /* Go through network statuses and calculate the five weights for
+         * each of them.  Also sum up totals to calculate probabilities
+         * later. */
+        SortedMap<String, Double>
+            advertisedBandwidths = new TreeMap<String, Double>(),
+            consensusWeights = new TreeMap<String, Double>(),
+            guardWeights = new TreeMap<String, Double>(),
+            middleWeights = new TreeMap<String, Double>(),
+            exitWeights = new TreeMap<String, Double>();
+        double totalAdvertisedBandwidth = 0.0;
+        double totalConsensusWeight = 0.0;
+        double totalGuardWeight = 0.0;
+        double totalMiddleWeight = 0.0;
+        double totalExitWeight = 0.0;
+        for (NetworkStatusEntry relay :
+            consensus.getStatusEntries().values()) {
+          String fingerprint = relay.getFingerprint();
+          if (!relay.getFlags().contains("Running")) {
+            continue;
+          }
+          boolean isExit = relay.getFlags().contains("Exit") &&
+              !relay.getFlags().contains("BadExit");
+          boolean isGuard = relay.getFlags().contains("Guard");
+          String serverDescriptorDigest = relay.getDescriptor().
+              toUpperCase();
+          double advertisedBandwidth;
+          if (!serverDescriptors.containsKey(serverDescriptorDigest)) {
+            System.err.println("Missing server descriptor "
+                + serverDescriptorDigest + " while processing consensus "
+                + validAfter + ".  Setting advertised bandwidth to 0.");
+            advertisedBandwidth = 0.0;
+          } else {
+            advertisedBandwidth = (double) serverDescriptors.get(
+                serverDescriptorDigest);
+          }
+          double consensusWeight = (double) relay.getBandwidth();
+          double guardWeight = (double) relay.getBandwidth();
+          double middleWeight = (double) relay.getBandwidth();
+          double exitWeight = (double) relay.getBandwidth();
+
+          /* Case 1: relay has both Guard and Exit flag and could be
+           * selected for either guard, middle, or exit position.  Apply
+           * bandwidth weights W?d. */
+          if (isGuard && isExit) {
+            guardWeight *= wgd;
+            middleWeight *= wmd;
+            exitWeight *= wed;
+
+          /* Case 2: relay only has the Guard flag, not the Exit flag.
+           * While, in theory, the relay could also be picked for the exit
+           * position (if it has a weird exit policy), Weg is hard-coded
+           * to 0 here.  Otherwise, relays with exit policy reject *:*
+           * would show up as possible exits, which makes no sense.  Maybe
+           * this is too much of an oversimplification?  For the other
+           * positions, apply bandwidth weights W?g. */
+          } else if (isGuard) {
+            guardWeight *= wgg;
+            middleWeight *= wmg;
+            exitWeight = 0.0;
+
+          /* Case 3: relay only has the Exit flag, not the Guard flag.  It
+           * cannot be picked for the guard position, so set Wge to 0.
+           * For the other positions, apply bandwidth weights W?e. */
+          } else if (isExit) {
+            guardWeight = 0.0;
+            middleWeight *= wme;
+            exitWeight *= wee;
+
+          /* Case 4: relay has neither Exit nor Guard flag.  Similar to
+           * case 2, this relay *could* have a weird exit policy and be
+           * picked in the exit position.  Same rationale applies, so Wme
+           * is set to 0.  The relay cannot be a guard, so Wgm is 0, too.
+           * For middle position, apply bandwidth weight Wmm. */
+          } else {
+            guardWeight = 0.0;
+            middleWeight *= wmm;
+            exitWeight = 0.0;
+          }
+
+          /* Store calculated weights and update totals. */
+          advertisedBandwidths.put(fingerprint, advertisedBandwidth);
+          consensusWeights.put(fingerprint, consensusWeight);
+          guardWeights.put(fingerprint, guardWeight);
+          middleWeights.put(fingerprint, middleWeight);
+          exitWeights.put(fingerprint, exitWeight);
+          totalAdvertisedBandwidth += advertisedBandwidth;
+          totalConsensusWeight += consensusWeight;
+          totalGuardWeight += guardWeight;
+          totalMiddleWeight += middleWeight;
+          totalExitWeight += exitWeight;
+        }
+
+        /* Write calculated path-selection probabilities to the output
+         * file. */
+        for (NetworkStatusEntry relay :
+            consensus.getStatusEntries().values()) {
+          String fingerprint = relay.getFingerprint();
+          bw.write(String.format("%s,%s,%.9f,%.9f,%.9f,%.9f,%.9f%n",
+              validAfter,
+              fingerprint,
+              advertisedBandwidths.get(fingerprint)
+                  / totalAdvertisedBandwidth,
+              consensusWeights.get(fingerprint) / totalConsensusWeight,
+              guardWeights.get(fingerprint) / totalGuardWeight,
+              middleWeights.get(fingerprint) / totalMiddleWeight,
+              exitWeights.get(fingerprint) / totalExitWeight));
+        }
+      }
+    }
+    bw.close();
+  }
+}
+



More information about the tor-commits mailing list