[tor-commits] [metrics-tasks/master] Add simulation code and LaTeX sources of #2911 draft.

karsten at torproject.org karsten at torproject.org
Mon Jul 4 07:46:07 UTC 2011


commit cff86510fd86e73a2f66f21ac8b47068d187d972
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon May 30 11:16:10 2011 +0200

    Add simulation code and LaTeX sources of #2911 draft.
---
 task-2911/.gitignore                               |    9 +
 task-2911/README                                   |  105 ++++++
 .../mtbf-sim/SimulateMeanTimeBetweenFailure.java   |  351 ++++++++++++++++++++
 task-2911/mtbf-sim/mtbf-sim.R                      |   73 ++++
 task-2911/report.tex                               |  295 ++++++++++++++++
 .../wfu-sim/SimulateWeightedFractionalUptime.java  |  314 +++++++++++++++++
 task-2911/wfu-sim/wfu-sim.R                        |   57 ++++
 7 files changed, 1204 insertions(+), 0 deletions(-)

diff --git a/task-2911/.gitignore b/task-2911/.gitignore
new file mode 100644
index 0000000..d2480c1
--- /dev/null
+++ b/task-2911/.gitignore
@@ -0,0 +1,9 @@
+*.class
+mtbf-sim/tunf/
+wfu-sim/fwfu/
+wfu-sim/consensuses/
+*.csv
+*.aux
+*.log
+*.pdf
+
diff --git a/task-2911/README b/task-2911/README
new file mode 100644
index 0000000..bcefa2d
--- /dev/null
+++ b/task-2911/README
@@ -0,0 +1,105 @@
+Tech report: An Analysis of Tor Relay Stability
+===============================================
+
+Simulation of MTBF requirements
+-------------------------------
+
+Change to the MTBF simulation directory:
+
+  $ cd mtbf-sim/
+
+Export status entries and server descriptor parts from the metrics
+database, once in reverse and once in forward order.  Note that each file
+will be 2.2G large for roughly 2.5 years of data.  Plan for a buffer of at
+least 4 months before and after the interval to investigate:
+
+  tordir=> \o running-relays-reverse.csv
+  tordir=> SELECT statusentry.validafter,
+             statusentry.fingerprint,
+             CASE WHEN descriptor.uptime IS NULL THEN FALSE ELSE
+               statusentry.validafter - descriptor.published +
+               descriptor.uptime * '1 second'::INTERVAL <
+                 '01:00:00'::INTERVAL END AS restarted
+           FROM statusentry
+           LEFT JOIN descriptor
+           ON statusentry.descriptor = descriptor.descriptor
+           WHERE statusentry.isrunning
+           AND statusentry.validafter >= '2009-01-01 00:00:00'
+           ORDER BY statusentry.validafter DESC, statusentry.fingerprint;
+  tordir=> \o
+  tordir=> \o running-relays-forward.csv
+  tordir=> SELECT statusentry.validafter,
+             statusentry.fingerprint,
+             CASE WHEN descriptor.uptime IS NULL THEN FALSE ELSE
+               statusentry.validafter - descriptor.published +
+               descriptor.uptime * '1 second'::INTERVAL <
+                 '01:00:00'::INTERVAL END AS restarted
+           FROM statusentry
+           LEFT JOIN descriptor
+           ON statusentry.descriptor = descriptor.descriptor
+           WHERE statusentry.isrunning
+           AND statusentry.validafter >= '2009-01-01 00:00:00'
+           ORDER BY statusentry.validafter, statusentry.fingerprint;
+  tordir=> \o
+
+Run the simulation consisting of a reverse and a forward run.  The results
+of the reverse run will be stored to the tunf/ directory and will be
+re-used in subsequent simulations.  Delete the tunf/ directory to repeat
+the reverse run, too.
+
+  $ javac SimulateMeanTimeBetweenFailure.java
+  $ java SimulateMeanTimeBetweenFailure
+
+Plot the results:
+
+  $ R --slave -f mtbf-sim.R
+
+Once you're satisfied with the result, copy the graph to the parent
+directory to include it in the report:
+
+  $ cp mtbf-sim.pdf ../
+
+
+Simulation of WFU requirements
+------------------------------
+
+Change to the WFU simulation directory:
+
+  $ cd wfu-sim/
+
+Create a consensuses/ directory and put the consensus files of the
+interval to investigate plus 4+ months before and 4+ months after in it:
+
+  $ mkdir consensuses/
+  $ ln -s $extracted/consensuses-20* .
+
+Run the simulation that first parses consensuses from last to first and
+then from first to last.  The results from the reverse direction will be
+stored in the fwfu/ directory and re-used in subsequent simulations.
+Delete the fwfu/ directory to re-run both simulation parts.
+
+  $ javac SimulateWeightedFractionalUptime.java
+  $ java SimulateWeightedFractionalUptime
+
+Plot the results:
+
+  $ R --slave -f wfu-sim.R
+
+Copy the graph to the parent directory to include it in the report:
+
+  $ cp wfu-sim.pdf ../
+
+
+Compiling the report
+--------------------
+
+Copy the generated graphs to the base directory, unless you have done so
+before:
+
+  $ cp mtbf-sim/mtbf-sim.pdf .
+  $ cp wfu-sim/wfu-sim.pdf .
+
+Compile the report:
+
+  $ pdflatex report.tex
+
diff --git a/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java b/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java
new file mode 100644
index 0000000..cd73f82
--- /dev/null
+++ b/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java
@@ -0,0 +1,351 @@
+/**
+ * Simulate variation of mean time between failure on Stable relays.  The
+ * simulation is based on the previously generated SQL results containing
+ * network status entries and parts of server descriptors.  In a first
+ * step, parse the SQL results that are in descending order to calculate
+ * time until next failure for all relays and write them to disk as one
+ * file per network status in tunf/$filename.  (Skip this step if there is
+ * already a tunf/ directory.)  In a second step, parse the network
+ * statuses again, but this time from first to last, calculate mean times
+ * between failure for all relays, form relay subsets based on minimal
+ * MTBF, look up what the time until next failure would be for a subset,
+ * and write results to mtbf-sim.csv to disk. */
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class SimulateMeanTimeBetweenFailure {
+  public static void main(String[] args) throws Exception {
+
+    /* Measure how long this execution takes. */
+    long started = System.currentTimeMillis();
+
+    /* Decide whether we need to do the reverse run, or if we can use
+     * previous results. */
+    if (!new File("tunf").exists()) {
+
+      /* For each relay as identified by its hex encoded fingerprint,
+       * track time until next failure in seconds in a long. */
+      SortedMap<String, Long> knownRelays = new TreeMap<String, Long>();
+
+      /* Parse previously exported network status entries in reverse
+       * order. */
+      SimpleDateFormat formatter = new SimpleDateFormat(
+          "yyyy-MM-dd-HH-mm-ss");
+      formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+      SimpleDateFormat isoFormatter = new SimpleDateFormat(
+          "yyyy-MM-dd HH:mm:ss");
+      isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+      Map<String, String> runningRelays = new HashMap<String, String>();
+      BufferedReader br = new BufferedReader(new FileReader(
+          "running-relays-reverse.csv"));
+      String line, lastValidAfter = null, lastButOneValidAfter = null;
+      while ((line = br.readLine()) != null) {
+        if (!line.startsWith("20")) {
+          continue;
+        }
+        String[] parts = line.split(",");
+        String validAfter = parts[0];
+        if (lastValidAfter != null &&
+            !lastValidAfter.equals(validAfter)) {
+
+          /* We just parsed all lines of a consensus.  Let's write times
+           * until next failure to disk for all running relays and update
+           * our internal history. */
+          if (lastButOneValidAfter == null) {
+            lastButOneValidAfter = lastValidAfter;
+          }
+          long lastValidAfterMillis = isoFormatter.parse(lastValidAfter).
+              getTime();
+          File tunfFile = new File("tunf",
+              formatter.format(lastValidAfterMillis));
+          tunfFile.getParentFile().mkdirs();
+          BufferedWriter bw = new BufferedWriter(new FileWriter(
+              tunfFile));
+          long secondsSinceLastValidAfter =
+              (isoFormatter.parse(lastButOneValidAfter).getTime()
+              - lastValidAfterMillis) / 1000L;
+
+          /* Iterate over our history first and see if these relays have
+           * been running in the considered consensus.  Remember changes
+           * to our history and modify it below to avoid concurrent
+           * modification errors. */
+          Set<String> removeFromHistory = new HashSet<String>();
+          Map<String, Long> addToHistory = new HashMap<String, Long>();
+          for (Map.Entry<String, Long> e : knownRelays.entrySet()) {
+            String fingerprint = e.getKey();
+            if (runningRelays.containsKey(fingerprint)) {
+
+              /* This relay has been running, so write it to the output
+               * file and update our history. */
+              long hoursUntilFailure = e.getValue();
+              bw.write(fingerprint + "," + (secondsSinceLastValidAfter
+                  + hoursUntilFailure) + "\n");
+              boolean restarted = runningRelays.get(fingerprint).
+                  split(",")[2].equals("t");
+              if (restarted) {
+                removeFromHistory.add(fingerprint);
+              } else {
+                addToHistory.put(fingerprint, secondsSinceLastValidAfter
+                    + hoursUntilFailure);
+              }
+              runningRelays.remove(fingerprint);
+            } else {
+
+              /* This relay has not been running, so remove it from our
+               * history. */
+              removeFromHistory.add(fingerprint);
+            }
+          }
+
+          /* Update our history for real now.  We couldn't do this above,
+           * or we'd have modified the set we've been iterating over. */
+          for (String f : removeFromHistory) {
+            knownRelays.remove(f);
+          }
+          for (Map.Entry<String, Long> e : addToHistory.entrySet()) {
+            knownRelays.put(e.getKey(), e.getValue());
+          }
+
+          /* Iterate over the relays that we found in the consensus, but
+           * that we didn't have in our history. */
+          for (Map.Entry<String, String> e : runningRelays.entrySet()) {
+            String fingerprint = e.getKey();
+            bw.write(fingerprint + ",0\n");
+            boolean restarted = e.getValue().split(",")[2].equals("t");
+            if (!restarted) {
+              knownRelays.put(fingerprint, 0L);
+            }
+          }
+          bw.close();
+
+          /* Prepare for next consensus. */
+          runningRelays = new HashMap<String, String>();
+          lastButOneValidAfter = lastValidAfter;
+        }
+
+        /* Add the running relay lines to a map that we parse once we have
+         * all lines of a consensus. */
+        String fingerprint = parts[1];
+        runningRelays.put(fingerprint, line);
+        lastValidAfter = validAfter;
+      }
+    }
+
+    /* Run the simulation for the following WMTBF percentiles: */
+    List<Long> requiredWMTBFs = new ArrayList<Long>();
+    for (long l : new long[] { 20, 30, 40, 50, 60, 70, 80 }) {
+      requiredWMTBFs.add(l);
+    }
+    Collections.sort(requiredWMTBFs);
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        "mtbf-sim.csv"));
+    bw.write("time");
+    for (long requiredWMTBF : requiredWMTBFs) {
+      bw.write(",mtunf" + requiredWMTBF + ",perc75tunf" + requiredWMTBF
+      + ",perc80tunf" + requiredWMTBF + ",perc85tunf" + requiredWMTBF
+      + ",perc90tunf" + requiredWMTBF + ",perc95tunf" + requiredWMTBF
+      + ",wmtbf" + requiredWMTBF);
+    }
+    bw.write("\n");
+
+    /* For each relay as identified by its base-64 encoded fingerprint,
+     * track weighted run length, total run weights, and current run
+     * length in a double[3]. */
+    SortedMap<String, double[]> knownRelays =
+        new TreeMap<String, double[]>();
+
+    /* Parse previously exported network status entries again, but this
+     * time in forward order. */
+    SimpleDateFormat formatter = new SimpleDateFormat(
+        "yyyy-MM-dd-HH-mm-ss");
+    formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat isoFormatter = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+    Map<String, String> runningRelays = new HashMap<String, String>(),
+        lastRunningRelays = new HashMap<String, String>();
+    BufferedReader br = new BufferedReader(new FileReader(
+        "running-relays-forward.csv"));
+    String line, lastValidAfter = null, firstValidAfter = null;
+    long nextWeightingInterval = -1L;
+    while ((line = br.readLine()) != null) {
+      if (!line.startsWith("20")) {
+        continue;
+      }
+      String[] parts = line.split(",");
+      String validAfter = parts[0];
+      if (firstValidAfter == null) {
+        firstValidAfter = validAfter;
+      }
+      if (lastValidAfter != null &&
+          !lastValidAfter.equals(validAfter)) {
+
+        /* We just parsed all lines of a consensus.  First, see if 12
+         * hours have passed since we last discounted weighted run lengths
+         * and total run weights.  If so, discount both variables for all
+         * known relays by factor 0.95 (or 19/20 since these are long
+         * integers) and remove those relays with a total run weight below
+         * 1/10000. */
+        long lastValidAfterMillis = isoFormatter.parse(lastValidAfter).
+            getTime();
+        long validAfterMillis = isoFormatter.parse(validAfter).getTime();
+        long weightingInterval = validAfterMillis
+            / (12L * 60L * 60L * 1000L);
+        if (nextWeightingInterval < 0L) {
+          nextWeightingInterval = weightingInterval;
+        }
+        while (weightingInterval > nextWeightingInterval) {
+          Set<String> relaysToRemove = new HashSet<String>();
+          for (Map.Entry<String, double[]> e : knownRelays.entrySet()) {
+            double[] w = e.getValue();
+            w[0] *= 0.95;
+            w[1] *= 0.95;
+          }
+          for (String fingerprint : relaysToRemove) {
+            knownRelays.remove(fingerprint);
+          }
+          nextWeightingInterval += 1L;
+        }
+
+        /* Update history for running relays.  Start by iterating over all
+         * relays in the history, see if they're running now and whether
+         * they have been restarted.  Distinguish four cases for relays in
+         * the history: 1) still running, 2) still running but restarted,
+         * 3) started in this consensus, 4) stopped in this consensus. */
+        double secondsSinceLastValidAfter =
+            (double) ((validAfterMillis - lastValidAfterMillis) / 1000L);
+        Set<String> updatedRelays = new HashSet<String>();
+        for (Map.Entry<String, double[]> e : knownRelays.entrySet()) {
+          String fingerprint = e.getKey();
+          double[] w = e.getValue();
+          if (runningRelays.containsKey(fingerprint)) {
+            if (w[2] > 0.1) {
+              if (!runningRelays.get(fingerprint).split(",")[2].
+                  equals("t")) {
+
+                /* Case 1) still running: */
+                w[2] += secondsSinceLastValidAfter;
+              } else {
+
+                /* Case 2) still running but restarted: */
+                w[0] += w[2];
+                w[1] += 1.0;
+                w[2] = secondsSinceLastValidAfter;
+              }
+            } else {
+
+              /* Case 3) started in this consensus: */
+              w[2] = secondsSinceLastValidAfter;
+            }
+
+            /* Mark relay as already processed, or we'd add it to the
+             * history as a new relay below. */
+            updatedRelays.add(fingerprint);
+          } else if (w[2] > 0.1) {
+
+            /* Case 4) stopped in this consensus: */
+            w[0] += w[2];
+            w[1] += 1.0;
+            w[2] = 0.0;
+          }
+        }
+
+        /* Iterate over the set of currently running relays and add those
+         * that we haven't processed above to our history. */
+        for (String fingerprint : runningRelays.keySet()) {
+          if (!updatedRelays.contains(fingerprint)) {
+            updatedRelays.add(fingerprint);
+            knownRelays.put(fingerprint, new double[] { 0.0, 0.0,
+                secondsSinceLastValidAfter });
+          }
+        }
+
+        /* Calculate WMTBFs for all running relays and put them in a list
+         * that we can sort by WMTBF in descending order. */
+        List<String> wmtbfs = new ArrayList<String>();
+        for (String fingerprint : runningRelays.keySet()) {
+          double[] w = knownRelays.get(fingerprint);
+          double totalRunLength = w[0] + w[2];
+          double totalWeights = w[1] + (w[2] > 0.1 ? 1.0 : 0.0);
+          long wmtbf = totalWeights < 0.0001 ? 0
+              : (long) (totalRunLength / totalWeights);
+          wmtbfs.add(String.format("%012d %s", wmtbf, fingerprint));
+        }
+        Collections.sort(wmtbfs, Collections.reverseOrder());
+
+        /* Read previously calculated TUNFs from disk. */
+        Map<String, Long> tunfs = new HashMap<String, Long>();
+        File tunfFile = new File("tunf",
+            formatter.format(lastValidAfterMillis));
+        if (!tunfFile.exists()) {
+          if (!lastValidAfter.equals(firstValidAfter)) {
+            System.out.println("Could not find file " + tunfFile
+                + ". Skipping simulation!");
+          }
+        } else {
+          BufferedReader tunfBr = new BufferedReader(new FileReader(
+              tunfFile));
+          String tunfLine;
+          while ((tunfLine = tunfBr.readLine()) != null) {
+            String[] tunfParts = tunfLine.split(",");
+            tunfs.put(tunfParts[0], Long.parseLong(tunfParts[1]));
+          }
+          tunfBr.close();
+
+          /* Run the simulation for the relays in the current consensus
+           * for various required WFUs. */
+          bw.write(isoFormatter.format(lastValidAfterMillis));
+          long totalRelays = (long) wmtbfs.size(), selectedRelays = 0L,
+              totalTunf = 0L, minimalWmtbf = 0L;
+          int simulationIndex = 0;
+          List<Long> tunfList = new ArrayList<Long>();
+          for (String relay : wmtbfs) {
+            while (simulationIndex < requiredWMTBFs.size() &&
+                selectedRelays * 100L > totalRelays
+                * requiredWMTBFs.get(simulationIndex)) {
+              if (selectedRelays == 0L) {
+                bw.write(",NA,NA,NA,NA,NA,NA");
+              } else {
+                Collections.sort(tunfList, Collections.reverseOrder());
+                long perc75 = tunfList.get((75 * tunfList.size()) / 100);
+                long perc80 = tunfList.get((80 * tunfList.size()) / 100);
+                long perc85 = tunfList.get((85 * tunfList.size()) / 100);
+                long perc90 = tunfList.get((90 * tunfList.size()) / 100);
+                long perc95 = tunfList.get((95 * tunfList.size()) / 100);
+                bw.write("," + (totalTunf / selectedRelays) + "," + perc75
+                    + "," + perc80 + "," + perc85 + "," + perc90 + ","
+                    + perc95);
+              }
+              bw.write("," + minimalWmtbf);
+              simulationIndex++;
+            }
+            String[] wmtbfParts = relay.split(" ");
+            minimalWmtbf = Long.parseLong(wmtbfParts[0]);
+            String fingerprint = wmtbfParts[1];
+            long tunf = tunfs.get(fingerprint);
+            totalTunf += tunf;
+            tunfList.add(tunf);
+            selectedRelays += 1L;
+          }
+          bw.write("\n");
+        }
+
+        /* We're done with this consensus.  Prepare for the next. */
+        lastRunningRelays = runningRelays;
+        runningRelays = new HashMap<String, String>();
+      }
+
+      /* Add the running relay lines to a map that we parse once we have
+       * all lines of a consensus. */
+      String fingerprint = parts[1];
+      runningRelays.put(fingerprint, line);
+      lastValidAfter = validAfter;
+    }
+    bw.close();
+
+    /* Print how long this execution took and exit. */
+    System.out.println("Execution took " + ((System.currentTimeMillis()
+        - started) / (60L * 1000L)) + " minutes.");
+  }
+}
+
diff --git a/task-2911/mtbf-sim/mtbf-sim.R b/task-2911/mtbf-sim/mtbf-sim.R
new file mode 100644
index 0000000..a630406
--- /dev/null
+++ b/task-2911/mtbf-sim/mtbf-sim.R
@@ -0,0 +1,73 @@
+library(ggplot2)
+
+data <- read.csv("mtbf-sim.csv", stringsAsFactors = FALSE)
+d <- data[data$time >= '2010' & data$time < '2011', ]
+d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+d <- rbind(
+  data.frame(x = d$wmtbf30, y = d$perc90tunf30, sim = "30 %"),
+  data.frame(x = d$wmtbf40, y = d$perc90tunf40, sim = "40 %"),
+  data.frame(x = d$wmtbf50, y = d$perc90tunf50, sim = "50 % (default)"),
+  data.frame(x = d$wmtbf60, y = d$perc90tunf60, sim = "60 %"),
+  data.frame(x = d$wmtbf70, y = d$perc90tunf70, sim = "70 %"))
+ggplot(d, aes(x = x / (24 * 60 * 60), y = y / (60 * 60))) +
+facet_wrap(~ sim) +
+geom_path() +
+scale_x_continuous("\nRequired WMTBF in days",
+  breaks = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 7),
+  minor = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 1)) +
+scale_y_continuous(paste("Time in hours until 10 % of relays\nor ",
+  "27.1 % of streams have failed\n", sep = ""),
+  breaks = seq(0, max(d$y, na.rm = TRUE) / (60 * 60), 24))
+ggsave(filename = "mtbf-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because this graph is meaningless in b/w.  The graph
+## above contains the same data, but can be printed in b/w.
+#data <- read.csv("mtbf-sim.csv", stringsAsFactors = FALSE)
+#d <- data[data$time >= '2010' & data$time < '2011', ]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- rbind(
+#  data.frame(x = d$wmtbf70, y = d$perc90tunf70, sim = "70 %"),
+#  data.frame(x = d$wmtbf60, y = d$perc90tunf60, sim = "60 %"),
+#  data.frame(x = d$wmtbf50, y = d$perc90tunf50, sim = "50 % (default)"),
+#  data.frame(x = d$wmtbf40, y = d$perc90tunf40, sim = "40 %"),
+#  data.frame(x = d$wmtbf30, y = d$perc90tunf30, sim = "30 %"))
+#ggplot(d, aes(x = x / (24 * 60 * 60), y = y / (60 * 60),
+#  colour = sim)) +
+#geom_path() +
+#scale_x_continuous("\nRequired WMTBF in days",
+#  breaks = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 7),
+#  minor = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 1)) +
+#scale_y_continuous(paste("Time until    \n10 % of relays or    \n",
+#  "27.1 % of streams    \nhave failed    \nin hours    ", sep = ""),
+#  breaks = seq(0, max(d$y, na.rm = TRUE) / (60 * 60), 24)) +
+#scale_colour_hue("Fraction of relays\nby highest WMTBF",
+#  breaks = c("30 %", "40 %", "50 % (default)", "60 %", "70 %")) +
+#opts(axis.title.x = theme_text(size = 12 * 0.8, face = "bold",
+#  hjust = 0.5),
+#  axis.title.y = theme_text(size = 12 * 0.8, face = "bold", vjust = 0.5,
+#  hjust = 1))
+#ggsave(filename = "mtbf-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because focusing on the development over time is the
+## wrong thing here.
+#simulations <- paste("mtunf", c(20, 30, 40, 50, 60, 70, 80),
+#  sep = "")
+#d <- data[data$time >= '2010' & data$time < '2011',
+#  c("time", simulations)]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- melt(d, id.vars = 1)
+#ggplot(d, aes(x = date, y = value / (24 * 60 * 60), colour = variable)) +
+#geom_line() +
+#scale_x_date("", major = "3 months", minor = "1 month",
+#  format = "%b %Y") +
+#scale_y_continuous(paste("Mean time    \nuntil next    \nfailure    \n",
+#  "in days    \n", sep = ""),
+#  limits = c(0, max(d$value, na.rm = TRUE) / (24 * 60 * 60))) +
+#scale_colour_hue(paste("Percentile\nhighest\nweighted mean\n",
+#  "time between\nfailures", sep = ""), breaks = simulations,
+#  labels = paste(substr(simulations, 6, 9),
+#  ifelse(simulations == "mtunf50", "(default)", ""))) +
+#opts(axis.title.y = theme_text(size = 12 * 0.8, face = "bold",
+#  vjust = 0.5, hjust = 1))
+#ggsave(filename = "mtbf-sim1.pdf", width = 8, height = 5, dpi = 100)
+
diff --git a/task-2911/report.tex b/task-2911/report.tex
new file mode 100644
index 0000000..4dc6ab9
--- /dev/null
+++ b/task-2911/report.tex
@@ -0,0 +1,295 @@
+\documentclass{article}
+\usepackage{url}
+\usepackage[pdftex]{graphicx}
+\usepackage{graphics}
+\usepackage{color}
+\begin{document}
+\title{An Analysis of Tor Relay Stability\\(DRAFT)}
+\author{Karsten Loesing\\{\tt karsten at torproject.org}}
+
+\maketitle
+
+\section{Introduction}
+
+The Tor network consists of 2,200 relays and 600 bridges run by
+volunteers, some of which are on dedicated servers and some on laptops or
+mobile devices.
+% TODO Look up more recent relay and bridge numbers.  -KL
+Obviously, we can expect the relays run on dedicated servers to be more
+``stable'' than those on mobile phones.
+But it is difficult to draw a line between stable and unstable relays.
+In most cases it depends on the context which relays count as stable:
+
+\begin{itemize}
+\item A stable relay that is supposed to be part of a circuit for a
+\emph{long-running stream} should not go offline during the next day.
+\item A stable relay that clients pick as \emph{entry guard} doesn't have
+to be running continuously, but should be online most of the time in the
+upcoming weeks.
+\item A stable relay that acts as \emph{hidden-service directory} should
+be part of a relay subset that mostly overlaps with the subsets 1, 2, or
+even 3 hours in the future.
+That means that the relays in this set should be stable, but also that not
+too many new relays should join the set of stable relays at once.
+\item A stable relay that clients use in a \emph{fallback consensus} that
+is already a few days or even weeks old should still be available on the
+same IP address and port.\footnote{See also proposal 146.}
+Such a relay doesn't necessarily have to run without interruption, though.
+% TODO Correctly cite proposal 146 here.  -KL
+\item A stable \emph{bridge relay} should be running on the same IP
+address a few days after a client learns about the bridge, but again,
+doesn't have to run continuously.
+\end{itemize}
+
+All these stability notions have in common that some relays or bridges are
+better suited for the described contexts than others.
+In this analysis we will look at various relay stability metrics to find
+the best suited set of relays for each context.
+The idea of this report is to use the results to optimize how the
+directory authorities assign relay flags that clients use to make path
+select decisions.
+
+For every context, we try to simulate what requirements based on past
+observations would have resulted in what relay stabilities in the near
+future.
+Generally, we'd expect that stricter requirements lead to higher
+stability.
+But every prediction contains a certain amount of randomness, so that we
+cannot tighten the requirements arbitrarily.
+Further, we want to ensure that the subset of relays identified as stable
+does not become too small.
+The reason is that there should be some diversity, so that not a few
+operators can aim at running most relays used in a given context.
+In some cases, the stable relays also need to provide sufficient bandwidth
+to the network in order not to become a performance bottleneck.
+We are going into more details about the requirements when looking into
+the separate analyses in the sections below.
+
+The analysis data and tools are available on the Tor metrics website at
+\url{https://metrics.torproject.org/}.\footnote{Or rather, will be made
+available.}
+
+\section{Choosing relays for long-lived streams}
+\label{sec:mtbf-sim}
+
+Whenever clients request Tor to open a long-lived stream, Tor should try
+to pick only those relays for the circuit that are not likely to disappear
+shortly after.
+If only a single relay in the circuit fails, the stream collapses and a
+new circuit needs to be built.
+Depending on how well the application handles connection failures this may
+impact usability significantly.
+
+In order to declare some relays as more useful for long-lived streams, the
+directory authorities track uptime sessions of all relays over time.
+Based on this history, they calculate the \emph{weighted mean time between
+failure (WMTBF)} for each relay.
+The MTBF part simply measures the average uptime between a relay showing
+up in the Tor network and either leaving or failing.
+In the weighted form of this metric, which is used here, older sessions
+are weighted to count less.
+The directory authorities assign the \texttt{Stable} flag to the 50~\% of
+relays with the highest WMTBF.
+
+In this simulation we want to find out how useful the WMTBF metric is for
+predicting future stability and how stability would be affected when
+declaring more or less than 50~\% of the relays as stable.
+The metric we chose for evaluating how stable a relay is is the \emph{time
+until next failure}.
+When running a simulation we determine the time until 10~\% of the
+``stable'' relays have failed.
+Under the (grossly simplified) assumption that relays are chosen
+uniformly, $1 - 0.9^3 = 27.1~\%$ of streams using relays from this set
+would have failed up to this point.
+
+\begin{figure}[t]
+\includegraphics[width=\textwidth]{mtbf-sim.pdf}
+\caption{Impact of assigning the \texttt{Stable} flag to a given fraction
+of relays on the actual required WMTBF ($x$ axis) and on the time
+until 10~\% of relays or 27.1~\% of streams have failed ($y$ axis)}
+\label{fig:mtbf-sim}
+\end{figure}
+
+Figure~\ref{fig:mtbf-sim} shows the analysis results for assigning the
+\texttt{Stable} flag to fractions of relays between 30~\% and 70~\% in a
+path plot.
+This path plot shows the effect of choosing a different fraction of
+relays on the actual required WMTBF value on the $x$ axis and on the
+resulting time until 10~\% of relays have failed on the $y$ axis.
+Two data points adjacent in time are connected by a line, forming a path.
+
+The results indicate a somewhat linear relation between required WMTBF and
+time until failure, which is as expected.
+The time until 10~\% of relays have failed in the default case of having
+50~\% stable relays is somewhere between 12 and 48 hours.
+If the directory authorities assigned the \texttt{Stable} flag to 60~\% or
+even 70~\% of all relays, this time would go down to on average 24 or 12
+hours.
+Reducing the set to only 40~\% or 30\% of relays would increase the time
+until failure to 36 or even 48 hours on average.
+
+\subsubsection*{Next steps}
+
+{\it
+\begin{itemize}
+\item What's the desired stability goal here?
+\item What other requirements (bandwidth) should go into the simulation?
+\end{itemize}
+}
+
+\section{Picking stable entry guards}
+
+Clients pick a set of entry guards as fixed entry points into the Tor
+network.
+Optimally, clients should be able to stick with their choice for a few
+weeks.
+While it is not required for all their entry guards to be running all the
+time, at least a subset of them should be running, or the client needs to
+pick a new set.
+
+Tor's metric for deciding which relays are stable enough to be entry
+guards is \emph{weighted fractional uptime (WFU)}.
+WFU measures the fraction of uptime of a relay in the past with older
+observations weighted to count less.
+The assumption is that a relay that was available most of the time in the
+past will also be available most of the time in the future.
+
+In a first analysis we simulate the effect of varying the requirements for
+becoming an entry guard on the average relay stability in the future.
+We measure future stability by using the same WFU metric, but for uptime
+in the future.
+We similarly weight observations farther in the future less than
+observations in the near future.
+We then simulate different pre-defined required WFUs between $90~\%$ and
+$99.9~\%$ and calculate what the mean future WFUs would be.
+
+\begin{figure}[t]
+\includegraphics[width=\textwidth]{wfu-sim.pdf}
+\caption{Impact of different required WFU on the mean empirical future WFU
+and fraction of potential entry guards}
+\label{fig:wfu-sim}
+\end{figure}
+
+Figure~\ref{fig:wfu-sim} shows the analysis results in a path plot similar
+to the one in Section~\ref{sec:mtbf-sim}.
+This path plot shows the effect of varying the WFU requirement, displayed
+as different line colors, on the fraction of relays meeting this
+requirement on the $x$ axis and on the WFU in the future on the $y$ axis.
+Two data points adjacent in time are connected by a line, forming a path.
+
+In this graph we can see that the majority of data points for the default
+required WFU of 98~\% falls in a future WFU range of 94~\% to 96\% with
+the smallest WFU being no less than 89~\%.
+In most cases, the fraction of relays meeting the default WFU requirement
+is between 40~\% and 50~\%.
+
+If the WFU requirement is relaxed to 95~\% or even 90~\%, the WFU in the
+future decreases slightly towards around 94~\% to 95~\% for most cases.
+At first sight it may seem surprising that a past WFU of 90~\% leads to
+a future WFU of 94~\%, but it makes sense, because the past WFU is a
+required minimum whereas the future WFU is a mean value of all relays
+meeting the requirement.
+Another effect of relaxing the required WFU is that the fraction of relays
+meeting the requirement increases from 50~\% to almost 66~\%.
+
+Interestingly, when tightening the requirement to a WFU value of 99~\% or
+even 99.9~\%, the future WFU does not increase significantly, if at all.
+To the contrary, the future WFU of relays meeting the 99.9~\% requirement
+drops to a range of 91~\% to 94~\% for quite a while.
+A likely explanation for this effect is that the fraction of relays
+meeting these high requirements is only 15~\%.
+While these 15~\% of relays may have had a very high uptime in the past,
+failure of only a few of these relays ruin the WFU metric in the future.
+
+A cautious conclusion of this analysis could be that, if the goal is to
+increase the number of \texttt{Guard} relays, reducing the required WFU to
+95~\% or even 90~\% wouldn't impact relay stability by too much.
+Conversely, increasing the required WFU beyond the current value of 98~\%
+doesn't make much sense and might even negatively affect relay stability.
+
+\subsubsection*{Next steps}
+
+{\it
+\begin{itemize}
+\item Tor penalizes relays that change their IP address or port by ending
+the running uptime session and starting a new uptime session.  This
+reduces both WFU and MTBF.  The simulation doesn't take this into account
+yet.  Should it?
+\item Add the bandwidth requirements to the simulation.  The current
+simulation doesn't make any assumptions about relay bandwidth when
+assigning \texttt{Guard} flags.  Which bandwidth value would we use here?
+\item Add another graph similar to Figure~\ref{fig:wfu-sim}, but replace
+the ``Fraction of relays meeting WFU requirement'' on the \emph{x} axis
+with the ``Fraction of \emph{bandwidth} of relays meeting WFU
+requirement.''
+After all, we're interested in having enough bandwidth capacity for the
+entry guard position, not (only) in having enough distinct relays.
+Which bandwidth value would we use here?
+\item Roger suggests to come up with a better metric than ``WFU since we
+first saw a relay.''
+He says ``it seems wrong to make something that we saw earlier have a
+worse WFU than something we saw later, even if they've had identical
+uptimes in that second period.''
+What would be good candidate metrics?
+\item Ponder finding another metric than WFU for future observations.  In
+particular, with the current WFU parameters of $0.95$ and $12$ hours, the
+WFU reaches up to 4 months into the future.  It seems useful to weight
+uptime in the near future higher than uptime in the farther future, but
+maybe we should use parameters to limit the interval to $1$ or $2$ months.
+\end{itemize}
+}
+
+\section{Forming stable hidden-service directory sets}
+
+{\it
+In this section we should evaluate the current requirements for getting
+the \texttt{HSDir} flag.
+Also, what happened to the number of relays with the \texttt{HSDir} flag
+in August 2010?
+}
+
+\section{Selecting stable relays for a fallback consensus}
+
+{\it
+Is the concept of a fallback consensus still worth considering?
+If so, we should analyze how to identify those relays that are most likely
+to be around and reachable under the same IP address.
+The result of this analysis could lead to adding a new \texttt{Longterm}
+(or \texttt{Fallback}?) flag as suggested in proposal 146.
+% TODO Correctly cite proposal 146 here.  -KL
+Maybe the analysis of bridges on stable IP addresses should come first,
+though.
+}
+
+\section{Distributing bridges with stable IP addresses}
+
+{\it
+A possible outcome of this analysis could be to add a new flag
+\texttt{StableAddress} (similar to the \texttt{Longterm} flag from the
+previous section) to bridge network statuses and to change BridgeDB to
+include at least one bridge with this flag in its results.
+One of the challenges of this analysis will be to connect sanitized bridge
+descriptors from two months with each other.
+The sanitized IP addresses of two bridges in two months do not match,
+because we're using a new secret key as input to the hash function every
+month.
+We might be able to correlate the descriptors of running bridges via their
+descriptor publication times or bridge statistics.
+But if that fails, we'll have to run the analysis with only 1 month of
+data at a time.
+}
+
+\section{Discussion and future work}
+
+The approach taken in this analysis was to select relays that are most
+stable in a given context based on their history.
+A different angle to obtain higher relay stability might be to identify
+what properties of a relay have a positive or negative impact on its
+stability.
+For example, relays running a given operating system or given Tor software
+version might have a higher stability than others.
+Possible consequences could be to facilitate setting up relays on a given
+operating system or to improve the upgrade process of the Tor software.
+
+\end{document}
+
diff --git a/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java b/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java
new file mode 100644
index 0000000..6a2d7a9
--- /dev/null
+++ b/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java
@@ -0,0 +1,314 @@
+/**
+ * Simulate variation of weighted fractional uptime on Guard relays.  In
+ * a first step, parse network status consensus in consensuses/ from last
+ * to first, calculate future weighted fractional uptimes for all relays,
+ * and write them to disk as one file per network status in
+ * fwfu/$filename.  (Skip this step if there is already a fwfu/
+ * directory.)  In a second step, parse the network statuse consensus
+ * again, but this time from first to last, calculate past weighted
+ * fractional uptimes for all relays, form relay subsets based on minimal
+ * WFU, look up what the mean future WFU would be for a subset, and write
+ * results to wfu-sim.csv to disk. */
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class SimulateWeightedFractionalUptime {
+  public static void main(String[] args) throws Exception {
+
+    /* Measure how long this execution takes. */
+    long started = System.currentTimeMillis();
+
+    /* Decide whether we need to do the reverse run, or if we can use
+     * previous results. */
+    if (!new File("fwfu").exists()) {
+
+      /* Scan existing consensus files and sort them in reverse order. */
+      SortedSet<File> allConsensuses =
+          new TreeSet<File>(Collections.reverseOrder());
+      Stack<File> files = new Stack<File>();
+      files.add(new File("consensuses"));
+      while (!files.isEmpty()) {
+        File file = files.pop();
+        if (file.isDirectory()) {
+          files.addAll(Arrays.asList(file.listFiles()));
+        } else {
+          if (file.getName().endsWith("-consensus")) {
+            allConsensuses.add(file);
+          }
+        }
+      }
+
+      /* For each relay as identified by its base-64 encoded fingerprint,
+       * track weighted uptime and total weighted time in a long[2]. */
+      SortedMap<String, long[]> knownRelays =
+          new TreeMap<String, long[]>();
+
+      /* Parse all consensuses in reverse order. */
+      SimpleDateFormat formatter = new SimpleDateFormat(
+          "yyyy-MM-dd-HH-mm-ss");
+      formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+      long nextWeightingInterval = formatter.parse(allConsensuses.first().
+          getName().substring(0, "yyyy-MM-dd-HH-mm-ss".length())).
+          getTime() / (12L * 60L * 60L * 1000L);
+      for (File consensus : allConsensuses) {
+
+        /* Every 12 hours, weight both uptime and total time of all known
+         * relays with 0.95 (or 19/20 since these are long integers) and
+         * remove all with a weighted fractional uptime below 1/10000. */
+        long validAfter = formatter.parse(consensus.getName().substring(0,
+            "yyyy-MM-dd-HH-mm-ss".length())).getTime();
+        long weightingInterval = validAfter / (12L * 60L * 60L * 1000L);
+        while (weightingInterval < nextWeightingInterval) {
+          Set<String> relaysToRemove = new HashSet<String>();
+          for (Map.Entry<String, long[]> e : knownRelays.entrySet()) {
+            long[] w = e.getValue();
+            w[0] *= 19L;
+            w[0] /= 20L;
+            w[1] *= 19L;
+            w[1] /= 20L;
+            if (((10000L * w[0]) / w[1]) < 1L) {
+              relaysToRemove.add(e.getKey());
+            }
+          }
+          for (String fingerprint : relaysToRemove) {
+            knownRelays.remove(fingerprint);
+          }
+          nextWeightingInterval -= 1L;
+        }
+
+        /* Parse all fingerprints of Running relays from the consensus. */
+        Set<String> fingerprints = new HashSet<String>();
+        BufferedReader br = new BufferedReader(new FileReader(consensus));
+        String line, rLine = null;
+        boolean reachedEnd = false;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("r ")) {
+            rLine = line;
+          } else if (line.startsWith("s ") && line.contains(" Running")) {
+            String[] parts = rLine.split(" ");
+            if (parts.length < 3) {
+              System.out.println("Illegal line '" + rLine + "' in "
+                  + consensus + ". Skipping consensus.");
+              continue;
+            } else {
+              String fingerprint = parts[2];
+              if (fingerprint.length() !=
+                  "AAAAAAAAAAAAAAAAAAAAAAAAAAA".length()) {
+                System.out.println("Illegal line '" + rLine + "' in "
+                    + consensus + ". Skipping consensus.");
+                continue;
+              }
+              fingerprints.add(fingerprint);
+            }
+          } else if (line.startsWith("directory-signature ")) {
+            reachedEnd = true;
+            break;
+          }
+        }
+        br.close();
+        if (!reachedEnd) {
+          System.out.println("Did not reach the consensus end of "
+              + consensus + ". Skipping consensus.");
+          continue;
+        }
+
+        /* Increment weighted uptime for all running relays by 3600
+         * seconds. */
+        for (String fingerprint : fingerprints) {
+          if (!knownRelays.containsKey(fingerprint)) {
+            knownRelays.put(fingerprint, new long[] { 3600L, 0L });
+          } else {
+            knownRelays.get(fingerprint)[0] += 3600L;
+          }
+        }
+
+        /* Increment total weighted time for all relays by 3600 seconds. */
+        for (long[] w : knownRelays.values()) {
+          w[1] += 3600L;
+        }
+
+        /* Write future WFUs for all known relays to disk. */
+        File fwfuFile = new File("fwfu", consensus.getName());
+        fwfuFile.getParentFile().mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(fwfuFile));
+        for (Map.Entry<String, long[]> e : knownRelays.entrySet()) {
+          bw.write(e.getKey() + " "
+              + ((10000L * e.getValue()[0]) / e.getValue()[1]) + "\n");
+        }
+        bw.close();
+      }
+    }
+
+    /* Run the simulation for the following WFU/10000 values: */
+    long[] requiredWFUs = new long[] { 9000, 9100, 9200, 9300, 9400, 9500,
+        9600, 9700, 9750, 9800, 9850, 9900, 9950, 9975, 9990, 9999 };
+    BufferedWriter bw = new BufferedWriter(new FileWriter("wfu-sim.csv"));
+    bw.write("time");
+    for (long requiredWFU : requiredWFUs) {
+      bw.write(",wfu" + requiredWFU + ",perc85wfu" + requiredWFU
+          + ",perc90wfu" + requiredWFU + ",perc95wfu" + requiredWFU
+          + ",guards" + requiredWFU);
+    }
+    bw.write("\n");
+
+    /* Scan existing consensus files and sort them in forward order. */
+    SortedSet<File> allConsensuses = new TreeSet<File>();
+    Stack<File> files = new Stack<File>();
+    files.add(new File("consensuses"));
+    while (!files.isEmpty()) {
+      File file = files.pop();
+      if (file.isDirectory()) {
+        files.addAll(Arrays.asList(file.listFiles()));
+      } else {
+        if (file.getName().endsWith("-consensus")) {
+          allConsensuses.add(file);
+        }
+      }
+    }
+
+    /* For each relay as identified by its base-64 encoded fingerprint,
+     * track weighted uptime and total weighted time in a long[2]. */
+    SortedMap<String, long[]> knownRelays = new TreeMap<String, long[]>();
+
+    /* Parse all consensuses in forward order. */
+    SimpleDateFormat formatter = new SimpleDateFormat(
+        "yyyy-MM-dd-HH-mm-ss");
+    formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat isoFormatter = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long nextWeightingInterval = formatter.parse(allConsensuses.first().
+        getName().substring(0, "yyyy-MM-dd-HH-mm-ss".length())).getTime()
+        / (12L * 60L * 60L * 1000L);
+    for (File consensus : allConsensuses) {
+
+      /* Every 12 hours, weight both uptime and total time of all known
+       * relays with 0.95 (or 19/20 since these are long integers) and
+       * remove all with a weighted fractional uptime below 1/10000. */
+      long validAfter = formatter.parse(consensus.getName().substring(0,
+          "yyyy-MM-dd-HH-mm-ss".length())).getTime();
+      long weightingInterval = validAfter / (12L * 60L * 60L * 1000L);
+      while (weightingInterval > nextWeightingInterval) {
+        Set<String> relaysToRemove = new HashSet<String>();
+        for (Map.Entry<String, long[]> e : knownRelays.entrySet()) {
+          long[] w = e.getValue();
+          w[0] *= 19L;
+          w[0] /= 20L;
+          w[1] *= 19L;
+          w[1] /= 20L;
+          if (((10000L * w[0]) / w[1]) < 1L) {
+            relaysToRemove.add(e.getKey());
+          }
+        }
+        for (String fingerprint : relaysToRemove) {
+          knownRelays.remove(fingerprint);
+        }
+        nextWeightingInterval += 1L;
+      }
+
+      /* Parse all fingerprints of Running relays from the consensus. */
+      Set<String> fingerprints = new HashSet<String>();
+      BufferedReader br = new BufferedReader(new FileReader(consensus));
+      String line, rLine = null;
+      boolean reachedEnd = false;
+      while ((line = br.readLine()) != null) {
+        if (line.startsWith("r ")) {
+          rLine = line;
+        } else if (line.startsWith("s ") && line.contains(" Running")) {
+          String[] parts = rLine.split(" ");
+          if (parts.length < 3) {
+            System.out.println("Illegal line '" + rLine + "' in "
+                + consensus + ". Skipping consensus.");
+            continue;
+          } else {
+            String fingerprint = parts[2];
+            if (fingerprint.length() !=
+                "AAAAAAAAAAAAAAAAAAAAAAAAAAA".length()) {
+              System.out.println("Illegal line '" + rLine + "' in "
+                  + consensus + ". Skipping consensus.");
+              continue;
+            }
+            fingerprints.add(fingerprint);
+          }
+        } else if (line.startsWith("directory-signature ")) {
+          reachedEnd = true;
+          break;
+        }
+      }
+      br.close();
+      if (!reachedEnd) {
+        System.out.println("Did not reach the consensus end of "
+            + consensus + ". Skipping consensus.");
+        continue;
+      }
+
+      /* Increment weighted uptime for all running relays by 3600
+       * seconds. */
+      for (String fingerprint : fingerprints) {
+        if (!knownRelays.containsKey(fingerprint)) {
+          knownRelays.put(fingerprint, new long[] { 3600L, 0L });
+        } else {
+          knownRelays.get(fingerprint)[0] += 3600L;
+        }
+      }
+
+      /* Increment total weighted time for all relays by 3600 seconds. */
+      for (long[] w : knownRelays.values()) {
+        w[1] += 3600L;
+      }
+
+      /* Read previously calculated future WFUs from disk. */
+      Map<String, Long> fwfus = new HashMap<String, Long>();
+      File fwfuFile = new File("fwfu", consensus.getName());
+      if (!fwfuFile.exists()) {
+        System.out.println("Could not find file " + fwfuFile
+            + ". Exiting!");
+        System.exit(1);
+      }
+      br = new BufferedReader(new FileReader(fwfuFile));
+      while ((line = br.readLine()) != null) {
+        String[] parts = line.split(" ");
+        fwfus.put(parts[0], Long.parseLong(parts[1]));
+      }
+
+      /* Run the simulation for the relays in the current consensus for
+       * various required WFUs. */
+      bw.write(isoFormatter.format(validAfter));
+      for (long requiredWFU : requiredWFUs) {
+        long selectedRelays = 0L,
+            totalRelays = (long) fingerprints.size(), totalFwfu = 0L;
+        List<Long> fwfuList = new ArrayList<Long>();
+        for (String fingerprint : fingerprints) {
+          long[] pwfu = knownRelays.get(fingerprint);
+          long wfu = (10000L * pwfu[0]) / pwfu[1];
+          if (wfu >= requiredWFU) {
+            selectedRelays += 1L;
+            if (fwfus.containsKey(fingerprint)) {
+              long fwfu = fwfus.get(fingerprint);
+              totalFwfu += fwfu;
+              fwfuList.add(fwfu);
+            }
+          }
+        }
+        if (selectedRelays == 0L) {
+          bw.write(",NA,NA,NA,NA");
+        } else {
+          Collections.sort(fwfuList, Collections.reverseOrder());
+          long perc85 = fwfuList.get((85 * fwfuList.size()) / 100);
+          long perc90 = fwfuList.get((90 * fwfuList.size()) / 100);
+          long perc95 = fwfuList.get((95 * fwfuList.size()) / 100);
+          bw.write("," + (totalFwfu / selectedRelays) + "," + perc85
+              + "," + perc90 + "," + perc95);
+        }
+        bw.write("," + (10000L * selectedRelays / totalRelays));
+      }
+      bw.write("\n");
+    }
+    bw.close();
+
+    /* Print how long this execution took and exit. */
+    System.out.println("Execution took " + ((System.currentTimeMillis()
+        - started) / (60L * 1000L)) + " minutes.");
+  }
+}
+
diff --git a/task-2911/wfu-sim/wfu-sim.R b/task-2911/wfu-sim/wfu-sim.R
new file mode 100644
index 0000000..149ce6d
--- /dev/null
+++ b/task-2911/wfu-sim/wfu-sim.R
@@ -0,0 +1,57 @@
+library(ggplot2)
+data <- read.csv("wfu-sim.csv", stringsAsFactors = FALSE)
+
+d <- data[data$time >= '2010' & data$time < '2011', ]
+d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+d <- rbind(
+  data.frame(x = d$guards9000, y = d$wfu9000, sim = "90 %"),
+  data.frame(x = d$guards9500, y = d$wfu9500, sim = "95 %"),
+  data.frame(x = d$guards9800, y = d$wfu9800, sim = "98 % (default)"),
+  data.frame(x = d$guards9900, y = d$wfu9900, sim = "99 %"),
+  data.frame(x = d$guards9990, y = d$wfu9990, sim = "99.9 %"))
+ggplot(d, aes(x = x / 10000.0, y = y / 10000.0)) +
+geom_path() +
+facet_wrap(~ sim) +
+scale_x_continuous("\nFraction of relays meeting WFU requirement",
+  formatter = "percent") +
+scale_y_continuous("Mean WFU in the future\n", formatter = "percent")
+ggsave(filename = "wfu-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because graph is meaningless in b/w.
+#d <- data[data$time >= '2010' & data$time < '2011', ]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- rbind(
+#  data.frame(x = d$guards9000, y = d$wfu9000, sim = "90 %"),
+#  data.frame(x = d$guards9500, y = d$wfu9500, sim = "95 %"),
+#  data.frame(x = d$guards9800, y = d$wfu9800, sim = "98 % (default)"),
+#  data.frame(x = d$guards9900, y = d$wfu9900, sim = "99 %"),
+#  data.frame(x = d$guards9990, y = d$wfu9990, sim = "99.9 %"))
+#ggplot(d, aes(x = x / 10000.0, y = y / 10000.0, colour = sim)) +
+#geom_path() +
+#scale_x_continuous("\nFraction of relays meeting WFU requirement",
+#  formatter = "percent") +#, trans = "reverse") +
+#scale_y_continuous("Mean WFU    \nin the future    ",
+#  formatter = "percent") +
+#scale_colour_hue("Required WFU") +
+#opts(axis.title.x = theme_text(size = 12 * 0.8, face = "bold",
+#  hjust = 0.5),
+#  axis.title.y = theme_text(size = 12 * 0.8, face = "bold", vjust = 0.5,
+#  hjust = 1))
+#ggsave(filename = "wfu-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because the time plot is not as useful as expected.
+#simulations <- paste("wfu", rev(c(9000, 9200, 9400, 9600, 9800)),
+#  sep = "")
+#d <- data[data$time >= '2010' & data$time < '2011',
+#  c("time", simulations)]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- melt(d, id.vars = 1)
+#ggplot(d, aes(x = date, y = value / 10000.0, colour = variable)) +
+#geom_line() +
+#scale_x_date("", major = "3 months", minor = "1 month",
+#  format = "%b %Y") +
+#scale_y_continuous("Empirical future WFU\n", formatter = "percent") +
+#scale_colour_hue("Required past WFU\n", breaks = simulations,
+#  labels = paste(as.numeric(substr(simulations, 4, 9)) / 100.0, "%"))
+#ggsave(filename = "wfu-sim-time.pdf", width = 8, height = 5, dpi = 100)
+





More information about the tor-commits mailing list