commit cff86510fd86e73a2f66f21ac8b47068d187d972 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon May 30 11:16:10 2011 +0200
Add simulation code and LaTeX sources of #2911 draft. --- task-2911/.gitignore | 9 + task-2911/README | 105 ++++++ .../mtbf-sim/SimulateMeanTimeBetweenFailure.java | 351 ++++++++++++++++++++ task-2911/mtbf-sim/mtbf-sim.R | 73 ++++ task-2911/report.tex | 295 ++++++++++++++++ .../wfu-sim/SimulateWeightedFractionalUptime.java | 314 +++++++++++++++++ task-2911/wfu-sim/wfu-sim.R | 57 ++++ 7 files changed, 1204 insertions(+), 0 deletions(-)
diff --git a/task-2911/.gitignore b/task-2911/.gitignore new file mode 100644 index 0000000..d2480c1 --- /dev/null +++ b/task-2911/.gitignore @@ -0,0 +1,9 @@ +*.class +mtbf-sim/tunf/ +wfu-sim/fwfu/ +wfu-sim/consensuses/ +*.csv +*.aux +*.log +*.pdf + diff --git a/task-2911/README b/task-2911/README new file mode 100644 index 0000000..bcefa2d --- /dev/null +++ b/task-2911/README @@ -0,0 +1,105 @@ +Tech report: An Analysis of Tor Relay Stability +=============================================== + +Simulation of MTBF requirements +------------------------------- + +Change to the MTBF simulation directory: + + $ cd mtbf-sim/ + +Export status entries and server descriptor parts from the metrics +database, once in reverse and once in forward order. Note that each file +will be 2.2G large for roughly 2.5 years of data. Plan for a buffer of at +least 4 months before and after the interval to investigate: + + tordir=> \o running-relays-reverse.csv + tordir=> SELECT statusentry.validafter, + statusentry.fingerprint, + CASE WHEN descriptor.uptime IS NULL THEN FALSE ELSE + statusentry.validafter - descriptor.published + + descriptor.uptime * '1 second'::INTERVAL < + '01:00:00'::INTERVAL END AS restarted + FROM statusentry + LEFT JOIN descriptor + ON statusentry.descriptor = descriptor.descriptor + WHERE statusentry.isrunning + AND statusentry.validafter >= '2009-01-01 00:00:00' + ORDER BY statusentry.validafter DESC, statusentry.fingerprint; + tordir=> \o + tordir=> \o running-relays-forward.csv + tordir=> SELECT statusentry.validafter, + statusentry.fingerprint, + CASE WHEN descriptor.uptime IS NULL THEN FALSE ELSE + statusentry.validafter - descriptor.published + + descriptor.uptime * '1 second'::INTERVAL < + '01:00:00'::INTERVAL END AS restarted + FROM statusentry + LEFT JOIN descriptor + ON statusentry.descriptor = descriptor.descriptor + WHERE statusentry.isrunning + AND statusentry.validafter >= '2009-01-01 00:00:00' + ORDER BY statusentry.validafter, statusentry.fingerprint; + tordir=> \o + +Run the simulation consisting of a reverse and a forward run. The results +of the reverse run will be stored to the tunf/ directory and will be +re-used in subsequent simulations. Delete the tunf/ directory to repeat +the reverse run, too. + + $ javac SimulateMeanTimeBetweenFailure.java + $ java SimulateMeanTimeBetweenFailure + +Plot the results: + + $ R --slave -f mtbf-sim.R + +Once you're satisfied with the result, copy the graph to the parent +directory to include it in the report: + + $ cp mtbf-sim.pdf ../ + + +Simulation of WFU requirements +------------------------------ + +Change to the WFU simulation directory: + + $ cd wfu-sim/ + +Create a consensuses/ directory and put the consensus files of the +interval to investigate plus 4+ months before and 4+ months after in it: + + $ mkdir consensuses/ + $ ln -s $extracted/consensuses-20* . + +Run the simulation that first parses consensuses from last to first and +then from first to last. The results from the reverse direction will be +stored in the fwfu/ directory and re-used in subsequent simulations. +Delete the fwfu/ directory to re-run both simulation parts. + + $ javac SimulateWeightedFractionalUptime.java + $ java SimulateWeightedFractionalUptime + +Plot the results: + + $ R --slave -f wfu-sim.R + +Copy the graph to the parent directory to include it in the report: + + $ cp wfu-sim.pdf ../ + + +Compiling the report +-------------------- + +Copy the generated graphs to the base directory, unless you have done so +before: + + $ cp mtbf-sim/mtbf-sim.pdf . + $ cp wfu-sim/wfu-sim.pdf . + +Compile the report: + + $ pdflatex report.tex + diff --git a/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java b/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java new file mode 100644 index 0000000..cd73f82 --- /dev/null +++ b/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java @@ -0,0 +1,351 @@ +/** + * Simulate variation of mean time between failure on Stable relays. The + * simulation is based on the previously generated SQL results containing + * network status entries and parts of server descriptors. In a first + * step, parse the SQL results that are in descending order to calculate + * time until next failure for all relays and write them to disk as one + * file per network status in tunf/$filename. (Skip this step if there is + * already a tunf/ directory.) In a second step, parse the network + * statuses again, but this time from first to last, calculate mean times + * between failure for all relays, form relay subsets based on minimal + * MTBF, look up what the time until next failure would be for a subset, + * and write results to mtbf-sim.csv to disk. */ +import java.io.*; +import java.text.*; +import java.util.*; +public class SimulateMeanTimeBetweenFailure { + public static void main(String[] args) throws Exception { + + /* Measure how long this execution takes. */ + long started = System.currentTimeMillis(); + + /* Decide whether we need to do the reverse run, or if we can use + * previous results. */ + if (!new File("tunf").exists()) { + + /* For each relay as identified by its hex encoded fingerprint, + * track time until next failure in seconds in a long. */ + SortedMap<String, Long> knownRelays = new TreeMap<String, Long>(); + + /* Parse previously exported network status entries in reverse + * order. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat isoFormatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC")); + Map<String, String> runningRelays = new HashMap<String, String>(); + BufferedReader br = new BufferedReader(new FileReader( + "running-relays-reverse.csv")); + String line, lastValidAfter = null, lastButOneValidAfter = null; + while ((line = br.readLine()) != null) { + if (!line.startsWith("20")) { + continue; + } + String[] parts = line.split(","); + String validAfter = parts[0]; + if (lastValidAfter != null && + !lastValidAfter.equals(validAfter)) { + + /* We just parsed all lines of a consensus. Let's write times + * until next failure to disk for all running relays and update + * our internal history. */ + if (lastButOneValidAfter == null) { + lastButOneValidAfter = lastValidAfter; + } + long lastValidAfterMillis = isoFormatter.parse(lastValidAfter). + getTime(); + File tunfFile = new File("tunf", + formatter.format(lastValidAfterMillis)); + tunfFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + tunfFile)); + long secondsSinceLastValidAfter = + (isoFormatter.parse(lastButOneValidAfter).getTime() + - lastValidAfterMillis) / 1000L; + + /* Iterate over our history first and see if these relays have + * been running in the considered consensus. Remember changes + * to our history and modify it below to avoid concurrent + * modification errors. */ + Set<String> removeFromHistory = new HashSet<String>(); + Map<String, Long> addToHistory = new HashMap<String, Long>(); + for (Map.Entry<String, Long> e : knownRelays.entrySet()) { + String fingerprint = e.getKey(); + if (runningRelays.containsKey(fingerprint)) { + + /* This relay has been running, so write it to the output + * file and update our history. */ + long hoursUntilFailure = e.getValue(); + bw.write(fingerprint + "," + (secondsSinceLastValidAfter + + hoursUntilFailure) + "\n"); + boolean restarted = runningRelays.get(fingerprint). + split(",")[2].equals("t"); + if (restarted) { + removeFromHistory.add(fingerprint); + } else { + addToHistory.put(fingerprint, secondsSinceLastValidAfter + + hoursUntilFailure); + } + runningRelays.remove(fingerprint); + } else { + + /* This relay has not been running, so remove it from our + * history. */ + removeFromHistory.add(fingerprint); + } + } + + /* Update our history for real now. We couldn't do this above, + * or we'd have modified the set we've been iterating over. */ + for (String f : removeFromHistory) { + knownRelays.remove(f); + } + for (Map.Entry<String, Long> e : addToHistory.entrySet()) { + knownRelays.put(e.getKey(), e.getValue()); + } + + /* Iterate over the relays that we found in the consensus, but + * that we didn't have in our history. */ + for (Map.Entry<String, String> e : runningRelays.entrySet()) { + String fingerprint = e.getKey(); + bw.write(fingerprint + ",0\n"); + boolean restarted = e.getValue().split(",")[2].equals("t"); + if (!restarted) { + knownRelays.put(fingerprint, 0L); + } + } + bw.close(); + + /* Prepare for next consensus. */ + runningRelays = new HashMap<String, String>(); + lastButOneValidAfter = lastValidAfter; + } + + /* Add the running relay lines to a map that we parse once we have + * all lines of a consensus. */ + String fingerprint = parts[1]; + runningRelays.put(fingerprint, line); + lastValidAfter = validAfter; + } + } + + /* Run the simulation for the following WMTBF percentiles: */ + List<Long> requiredWMTBFs = new ArrayList<Long>(); + for (long l : new long[] { 20, 30, 40, 50, 60, 70, 80 }) { + requiredWMTBFs.add(l); + } + Collections.sort(requiredWMTBFs); + BufferedWriter bw = new BufferedWriter(new FileWriter( + "mtbf-sim.csv")); + bw.write("time"); + for (long requiredWMTBF : requiredWMTBFs) { + bw.write(",mtunf" + requiredWMTBF + ",perc75tunf" + requiredWMTBF + + ",perc80tunf" + requiredWMTBF + ",perc85tunf" + requiredWMTBF + + ",perc90tunf" + requiredWMTBF + ",perc95tunf" + requiredWMTBF + + ",wmtbf" + requiredWMTBF); + } + bw.write("\n"); + + /* For each relay as identified by its base-64 encoded fingerprint, + * track weighted run length, total run weights, and current run + * length in a double[3]. */ + SortedMap<String, double[]> knownRelays = + new TreeMap<String, double[]>(); + + /* Parse previously exported network status entries again, but this + * time in forward order. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat isoFormatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC")); + Map<String, String> runningRelays = new HashMap<String, String>(), + lastRunningRelays = new HashMap<String, String>(); + BufferedReader br = new BufferedReader(new FileReader( + "running-relays-forward.csv")); + String line, lastValidAfter = null, firstValidAfter = null; + long nextWeightingInterval = -1L; + while ((line = br.readLine()) != null) { + if (!line.startsWith("20")) { + continue; + } + String[] parts = line.split(","); + String validAfter = parts[0]; + if (firstValidAfter == null) { + firstValidAfter = validAfter; + } + if (lastValidAfter != null && + !lastValidAfter.equals(validAfter)) { + + /* We just parsed all lines of a consensus. First, see if 12 + * hours have passed since we last discounted weighted run lengths + * and total run weights. If so, discount both variables for all + * known relays by factor 0.95 (or 19/20 since these are long + * integers) and remove those relays with a total run weight below + * 1/10000. */ + long lastValidAfterMillis = isoFormatter.parse(lastValidAfter). + getTime(); + long validAfterMillis = isoFormatter.parse(validAfter).getTime(); + long weightingInterval = validAfterMillis + / (12L * 60L * 60L * 1000L); + if (nextWeightingInterval < 0L) { + nextWeightingInterval = weightingInterval; + } + while (weightingInterval > nextWeightingInterval) { + Set<String> relaysToRemove = new HashSet<String>(); + for (Map.Entry<String, double[]> e : knownRelays.entrySet()) { + double[] w = e.getValue(); + w[0] *= 0.95; + w[1] *= 0.95; + } + for (String fingerprint : relaysToRemove) { + knownRelays.remove(fingerprint); + } + nextWeightingInterval += 1L; + } + + /* Update history for running relays. Start by iterating over all + * relays in the history, see if they're running now and whether + * they have been restarted. Distinguish four cases for relays in + * the history: 1) still running, 2) still running but restarted, + * 3) started in this consensus, 4) stopped in this consensus. */ + double secondsSinceLastValidAfter = + (double) ((validAfterMillis - lastValidAfterMillis) / 1000L); + Set<String> updatedRelays = new HashSet<String>(); + for (Map.Entry<String, double[]> e : knownRelays.entrySet()) { + String fingerprint = e.getKey(); + double[] w = e.getValue(); + if (runningRelays.containsKey(fingerprint)) { + if (w[2] > 0.1) { + if (!runningRelays.get(fingerprint).split(",")[2]. + equals("t")) { + + /* Case 1) still running: */ + w[2] += secondsSinceLastValidAfter; + } else { + + /* Case 2) still running but restarted: */ + w[0] += w[2]; + w[1] += 1.0; + w[2] = secondsSinceLastValidAfter; + } + } else { + + /* Case 3) started in this consensus: */ + w[2] = secondsSinceLastValidAfter; + } + + /* Mark relay as already processed, or we'd add it to the + * history as a new relay below. */ + updatedRelays.add(fingerprint); + } else if (w[2] > 0.1) { + + /* Case 4) stopped in this consensus: */ + w[0] += w[2]; + w[1] += 1.0; + w[2] = 0.0; + } + } + + /* Iterate over the set of currently running relays and add those + * that we haven't processed above to our history. */ + for (String fingerprint : runningRelays.keySet()) { + if (!updatedRelays.contains(fingerprint)) { + updatedRelays.add(fingerprint); + knownRelays.put(fingerprint, new double[] { 0.0, 0.0, + secondsSinceLastValidAfter }); + } + } + + /* Calculate WMTBFs for all running relays and put them in a list + * that we can sort by WMTBF in descending order. */ + List<String> wmtbfs = new ArrayList<String>(); + for (String fingerprint : runningRelays.keySet()) { + double[] w = knownRelays.get(fingerprint); + double totalRunLength = w[0] + w[2]; + double totalWeights = w[1] + (w[2] > 0.1 ? 1.0 : 0.0); + long wmtbf = totalWeights < 0.0001 ? 0 + : (long) (totalRunLength / totalWeights); + wmtbfs.add(String.format("%012d %s", wmtbf, fingerprint)); + } + Collections.sort(wmtbfs, Collections.reverseOrder()); + + /* Read previously calculated TUNFs from disk. */ + Map<String, Long> tunfs = new HashMap<String, Long>(); + File tunfFile = new File("tunf", + formatter.format(lastValidAfterMillis)); + if (!tunfFile.exists()) { + if (!lastValidAfter.equals(firstValidAfter)) { + System.out.println("Could not find file " + tunfFile + + ". Skipping simulation!"); + } + } else { + BufferedReader tunfBr = new BufferedReader(new FileReader( + tunfFile)); + String tunfLine; + while ((tunfLine = tunfBr.readLine()) != null) { + String[] tunfParts = tunfLine.split(","); + tunfs.put(tunfParts[0], Long.parseLong(tunfParts[1])); + } + tunfBr.close(); + + /* Run the simulation for the relays in the current consensus + * for various required WFUs. */ + bw.write(isoFormatter.format(lastValidAfterMillis)); + long totalRelays = (long) wmtbfs.size(), selectedRelays = 0L, + totalTunf = 0L, minimalWmtbf = 0L; + int simulationIndex = 0; + List<Long> tunfList = new ArrayList<Long>(); + for (String relay : wmtbfs) { + while (simulationIndex < requiredWMTBFs.size() && + selectedRelays * 100L > totalRelays + * requiredWMTBFs.get(simulationIndex)) { + if (selectedRelays == 0L) { + bw.write(",NA,NA,NA,NA,NA,NA"); + } else { + Collections.sort(tunfList, Collections.reverseOrder()); + long perc75 = tunfList.get((75 * tunfList.size()) / 100); + long perc80 = tunfList.get((80 * tunfList.size()) / 100); + long perc85 = tunfList.get((85 * tunfList.size()) / 100); + long perc90 = tunfList.get((90 * tunfList.size()) / 100); + long perc95 = tunfList.get((95 * tunfList.size()) / 100); + bw.write("," + (totalTunf / selectedRelays) + "," + perc75 + + "," + perc80 + "," + perc85 + "," + perc90 + "," + + perc95); + } + bw.write("," + minimalWmtbf); + simulationIndex++; + } + String[] wmtbfParts = relay.split(" "); + minimalWmtbf = Long.parseLong(wmtbfParts[0]); + String fingerprint = wmtbfParts[1]; + long tunf = tunfs.get(fingerprint); + totalTunf += tunf; + tunfList.add(tunf); + selectedRelays += 1L; + } + bw.write("\n"); + } + + /* We're done with this consensus. Prepare for the next. */ + lastRunningRelays = runningRelays; + runningRelays = new HashMap<String, String>(); + } + + /* Add the running relay lines to a map that we parse once we have + * all lines of a consensus. */ + String fingerprint = parts[1]; + runningRelays.put(fingerprint, line); + lastValidAfter = validAfter; + } + bw.close(); + + /* Print how long this execution took and exit. */ + System.out.println("Execution took " + ((System.currentTimeMillis() + - started) / (60L * 1000L)) + " minutes."); + } +} + diff --git a/task-2911/mtbf-sim/mtbf-sim.R b/task-2911/mtbf-sim/mtbf-sim.R new file mode 100644 index 0000000..a630406 --- /dev/null +++ b/task-2911/mtbf-sim/mtbf-sim.R @@ -0,0 +1,73 @@ +library(ggplot2) + +data <- read.csv("mtbf-sim.csv", stringsAsFactors = FALSE) +d <- data[data$time >= '2010' & data$time < '2011', ] +d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean) +d <- rbind( + data.frame(x = d$wmtbf30, y = d$perc90tunf30, sim = "30 %"), + data.frame(x = d$wmtbf40, y = d$perc90tunf40, sim = "40 %"), + data.frame(x = d$wmtbf50, y = d$perc90tunf50, sim = "50 % (default)"), + data.frame(x = d$wmtbf60, y = d$perc90tunf60, sim = "60 %"), + data.frame(x = d$wmtbf70, y = d$perc90tunf70, sim = "70 %")) +ggplot(d, aes(x = x / (24 * 60 * 60), y = y / (60 * 60))) + +facet_wrap(~ sim) + +geom_path() + +scale_x_continuous("\nRequired WMTBF in days", + breaks = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 7), + minor = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 1)) + +scale_y_continuous(paste("Time in hours until 10 % of relays\nor ", + "27.1 % of streams have failed\n", sep = ""), + breaks = seq(0, max(d$y, na.rm = TRUE) / (60 * 60), 24)) +ggsave(filename = "mtbf-sim.pdf", width = 8, height = 5, dpi = 100) + +## Commented out, because this graph is meaningless in b/w. The graph +## above contains the same data, but can be printed in b/w. +#data <- read.csv("mtbf-sim.csv", stringsAsFactors = FALSE) +#d <- data[data$time >= '2010' & data$time < '2011', ] +#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean) +#d <- rbind( +# data.frame(x = d$wmtbf70, y = d$perc90tunf70, sim = "70 %"), +# data.frame(x = d$wmtbf60, y = d$perc90tunf60, sim = "60 %"), +# data.frame(x = d$wmtbf50, y = d$perc90tunf50, sim = "50 % (default)"), +# data.frame(x = d$wmtbf40, y = d$perc90tunf40, sim = "40 %"), +# data.frame(x = d$wmtbf30, y = d$perc90tunf30, sim = "30 %")) +#ggplot(d, aes(x = x / (24 * 60 * 60), y = y / (60 * 60), +# colour = sim)) + +#geom_path() + +#scale_x_continuous("\nRequired WMTBF in days", +# breaks = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 7), +# minor = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 1)) + +#scale_y_continuous(paste("Time until \n10 % of relays or \n", +# "27.1 % of streams \nhave failed \nin hours ", sep = ""), +# breaks = seq(0, max(d$y, na.rm = TRUE) / (60 * 60), 24)) + +#scale_colour_hue("Fraction of relays\nby highest WMTBF", +# breaks = c("30 %", "40 %", "50 % (default)", "60 %", "70 %")) + +#opts(axis.title.x = theme_text(size = 12 * 0.8, face = "bold", +# hjust = 0.5), +# axis.title.y = theme_text(size = 12 * 0.8, face = "bold", vjust = 0.5, +# hjust = 1)) +#ggsave(filename = "mtbf-sim.pdf", width = 8, height = 5, dpi = 100) + +## Commented out, because focusing on the development over time is the +## wrong thing here. +#simulations <- paste("mtunf", c(20, 30, 40, 50, 60, 70, 80), +# sep = "") +#d <- data[data$time >= '2010' & data$time < '2011', +# c("time", simulations)] +#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean) +#d <- melt(d, id.vars = 1) +#ggplot(d, aes(x = date, y = value / (24 * 60 * 60), colour = variable)) + +#geom_line() + +#scale_x_date("", major = "3 months", minor = "1 month", +# format = "%b %Y") + +#scale_y_continuous(paste("Mean time \nuntil next \nfailure \n", +# "in days \n", sep = ""), +# limits = c(0, max(d$value, na.rm = TRUE) / (24 * 60 * 60))) + +#scale_colour_hue(paste("Percentile\nhighest\nweighted mean\n", +# "time between\nfailures", sep = ""), breaks = simulations, +# labels = paste(substr(simulations, 6, 9), +# ifelse(simulations == "mtunf50", "(default)", ""))) + +#opts(axis.title.y = theme_text(size = 12 * 0.8, face = "bold", +# vjust = 0.5, hjust = 1)) +#ggsave(filename = "mtbf-sim1.pdf", width = 8, height = 5, dpi = 100) + diff --git a/task-2911/report.tex b/task-2911/report.tex new file mode 100644 index 0000000..4dc6ab9 --- /dev/null +++ b/task-2911/report.tex @@ -0,0 +1,295 @@ +\documentclass{article} +\usepackage{url} +\usepackage[pdftex]{graphicx} +\usepackage{graphics} +\usepackage{color} +\begin{document} +\title{An Analysis of Tor Relay Stability\(DRAFT)} +\author{Karsten Loesing\{\tt karsten@torproject.org}} + +\maketitle + +\section{Introduction} + +The Tor network consists of 2,200 relays and 600 bridges run by +volunteers, some of which are on dedicated servers and some on laptops or +mobile devices. +% TODO Look up more recent relay and bridge numbers. -KL +Obviously, we can expect the relays run on dedicated servers to be more +``stable'' than those on mobile phones. +But it is difficult to draw a line between stable and unstable relays. +In most cases it depends on the context which relays count as stable: + +\begin{itemize} +\item A stable relay that is supposed to be part of a circuit for a +\emph{long-running stream} should not go offline during the next day. +\item A stable relay that clients pick as \emph{entry guard} doesn't have +to be running continuously, but should be online most of the time in the +upcoming weeks. +\item A stable relay that acts as \emph{hidden-service directory} should +be part of a relay subset that mostly overlaps with the subsets 1, 2, or +even 3 hours in the future. +That means that the relays in this set should be stable, but also that not +too many new relays should join the set of stable relays at once. +\item A stable relay that clients use in a \emph{fallback consensus} that +is already a few days or even weeks old should still be available on the +same IP address and port.\footnote{See also proposal 146.} +Such a relay doesn't necessarily have to run without interruption, though. +% TODO Correctly cite proposal 146 here. -KL +\item A stable \emph{bridge relay} should be running on the same IP +address a few days after a client learns about the bridge, but again, +doesn't have to run continuously. +\end{itemize} + +All these stability notions have in common that some relays or bridges are +better suited for the described contexts than others. +In this analysis we will look at various relay stability metrics to find +the best suited set of relays for each context. +The idea of this report is to use the results to optimize how the +directory authorities assign relay flags that clients use to make path +select decisions. + +For every context, we try to simulate what requirements based on past +observations would have resulted in what relay stabilities in the near +future. +Generally, we'd expect that stricter requirements lead to higher +stability. +But every prediction contains a certain amount of randomness, so that we +cannot tighten the requirements arbitrarily. +Further, we want to ensure that the subset of relays identified as stable +does not become too small. +The reason is that there should be some diversity, so that not a few +operators can aim at running most relays used in a given context. +In some cases, the stable relays also need to provide sufficient bandwidth +to the network in order not to become a performance bottleneck. +We are going into more details about the requirements when looking into +the separate analyses in the sections below. + +The analysis data and tools are available on the Tor metrics website at +\url{https://metrics.torproject.org/%7D.%5Cfootnote%7BOr rather, will be made +available.} + +\section{Choosing relays for long-lived streams} +\label{sec:mtbf-sim} + +Whenever clients request Tor to open a long-lived stream, Tor should try +to pick only those relays for the circuit that are not likely to disappear +shortly after. +If only a single relay in the circuit fails, the stream collapses and a +new circuit needs to be built. +Depending on how well the application handles connection failures this may +impact usability significantly. + +In order to declare some relays as more useful for long-lived streams, the +directory authorities track uptime sessions of all relays over time. +Based on this history, they calculate the \emph{weighted mean time between +failure (WMTBF)} for each relay. +The MTBF part simply measures the average uptime between a relay showing +up in the Tor network and either leaving or failing. +In the weighted form of this metric, which is used here, older sessions +are weighted to count less. +The directory authorities assign the \texttt{Stable} flag to the 50~% of +relays with the highest WMTBF. + +In this simulation we want to find out how useful the WMTBF metric is for +predicting future stability and how stability would be affected when +declaring more or less than 50~% of the relays as stable. +The metric we chose for evaluating how stable a relay is is the \emph{time +until next failure}. +When running a simulation we determine the time until 10~% of the +``stable'' relays have failed. +Under the (grossly simplified) assumption that relays are chosen +uniformly, $1 - 0.9^3 = 27.1~%$ of streams using relays from this set +would have failed up to this point. + +\begin{figure}[t] +\includegraphics[width=\textwidth]{mtbf-sim.pdf} +\caption{Impact of assigning the \texttt{Stable} flag to a given fraction +of relays on the actual required WMTBF ($x$ axis) and on the time +until 10~% of relays or 27.1~% of streams have failed ($y$ axis)} +\label{fig:mtbf-sim} +\end{figure} + +Figure~\ref{fig:mtbf-sim} shows the analysis results for assigning the +\texttt{Stable} flag to fractions of relays between 30~% and 70~% in a +path plot. +This path plot shows the effect of choosing a different fraction of +relays on the actual required WMTBF value on the $x$ axis and on the +resulting time until 10~% of relays have failed on the $y$ axis. +Two data points adjacent in time are connected by a line, forming a path. + +The results indicate a somewhat linear relation between required WMTBF and +time until failure, which is as expected. +The time until 10~% of relays have failed in the default case of having +50~% stable relays is somewhere between 12 and 48 hours. +If the directory authorities assigned the \texttt{Stable} flag to 60~% or +even 70~% of all relays, this time would go down to on average 24 or 12 +hours. +Reducing the set to only 40~% or 30% of relays would increase the time +until failure to 36 or even 48 hours on average. + +\subsubsection*{Next steps} + +{\it +\begin{itemize} +\item What's the desired stability goal here? +\item What other requirements (bandwidth) should go into the simulation? +\end{itemize} +} + +\section{Picking stable entry guards} + +Clients pick a set of entry guards as fixed entry points into the Tor +network. +Optimally, clients should be able to stick with their choice for a few +weeks. +While it is not required for all their entry guards to be running all the +time, at least a subset of them should be running, or the client needs to +pick a new set. + +Tor's metric for deciding which relays are stable enough to be entry +guards is \emph{weighted fractional uptime (WFU)}. +WFU measures the fraction of uptime of a relay in the past with older +observations weighted to count less. +The assumption is that a relay that was available most of the time in the +past will also be available most of the time in the future. + +In a first analysis we simulate the effect of varying the requirements for +becoming an entry guard on the average relay stability in the future. +We measure future stability by using the same WFU metric, but for uptime +in the future. +We similarly weight observations farther in the future less than +observations in the near future. +We then simulate different pre-defined required WFUs between $90~%$ and +$99.9~%$ and calculate what the mean future WFUs would be. + +\begin{figure}[t] +\includegraphics[width=\textwidth]{wfu-sim.pdf} +\caption{Impact of different required WFU on the mean empirical future WFU +and fraction of potential entry guards} +\label{fig:wfu-sim} +\end{figure} + +Figure~\ref{fig:wfu-sim} shows the analysis results in a path plot similar +to the one in Section~\ref{sec:mtbf-sim}. +This path plot shows the effect of varying the WFU requirement, displayed +as different line colors, on the fraction of relays meeting this +requirement on the $x$ axis and on the WFU in the future on the $y$ axis. +Two data points adjacent in time are connected by a line, forming a path. + +In this graph we can see that the majority of data points for the default +required WFU of 98~% falls in a future WFU range of 94~% to 96% with +the smallest WFU being no less than 89~%. +In most cases, the fraction of relays meeting the default WFU requirement +is between 40~% and 50~%. + +If the WFU requirement is relaxed to 95~% or even 90~%, the WFU in the +future decreases slightly towards around 94~% to 95~% for most cases. +At first sight it may seem surprising that a past WFU of 90~% leads to +a future WFU of 94~%, but it makes sense, because the past WFU is a +required minimum whereas the future WFU is a mean value of all relays +meeting the requirement. +Another effect of relaxing the required WFU is that the fraction of relays +meeting the requirement increases from 50~% to almost 66~%. + +Interestingly, when tightening the requirement to a WFU value of 99~% or +even 99.9~%, the future WFU does not increase significantly, if at all. +To the contrary, the future WFU of relays meeting the 99.9~% requirement +drops to a range of 91~% to 94~% for quite a while. +A likely explanation for this effect is that the fraction of relays +meeting these high requirements is only 15~%. +While these 15~% of relays may have had a very high uptime in the past, +failure of only a few of these relays ruin the WFU metric in the future. + +A cautious conclusion of this analysis could be that, if the goal is to +increase the number of \texttt{Guard} relays, reducing the required WFU to +95~% or even 90~% wouldn't impact relay stability by too much. +Conversely, increasing the required WFU beyond the current value of 98~% +doesn't make much sense and might even negatively affect relay stability. + +\subsubsection*{Next steps} + +{\it +\begin{itemize} +\item Tor penalizes relays that change their IP address or port by ending +the running uptime session and starting a new uptime session. This +reduces both WFU and MTBF. The simulation doesn't take this into account +yet. Should it? +\item Add the bandwidth requirements to the simulation. The current +simulation doesn't make any assumptions about relay bandwidth when +assigning \texttt{Guard} flags. Which bandwidth value would we use here? +\item Add another graph similar to Figure~\ref{fig:wfu-sim}, but replace +the ``Fraction of relays meeting WFU requirement'' on the \emph{x} axis +with the ``Fraction of \emph{bandwidth} of relays meeting WFU +requirement.'' +After all, we're interested in having enough bandwidth capacity for the +entry guard position, not (only) in having enough distinct relays. +Which bandwidth value would we use here? +\item Roger suggests to come up with a better metric than ``WFU since we +first saw a relay.'' +He says ``it seems wrong to make something that we saw earlier have a +worse WFU than something we saw later, even if they've had identical +uptimes in that second period.'' +What would be good candidate metrics? +\item Ponder finding another metric than WFU for future observations. In +particular, with the current WFU parameters of $0.95$ and $12$ hours, the +WFU reaches up to 4 months into the future. It seems useful to weight +uptime in the near future higher than uptime in the farther future, but +maybe we should use parameters to limit the interval to $1$ or $2$ months. +\end{itemize} +} + +\section{Forming stable hidden-service directory sets} + +{\it +In this section we should evaluate the current requirements for getting +the \texttt{HSDir} flag. +Also, what happened to the number of relays with the \texttt{HSDir} flag +in August 2010? +} + +\section{Selecting stable relays for a fallback consensus} + +{\it +Is the concept of a fallback consensus still worth considering? +If so, we should analyze how to identify those relays that are most likely +to be around and reachable under the same IP address. +The result of this analysis could lead to adding a new \texttt{Longterm} +(or \texttt{Fallback}?) flag as suggested in proposal 146. +% TODO Correctly cite proposal 146 here. -KL +Maybe the analysis of bridges on stable IP addresses should come first, +though. +} + +\section{Distributing bridges with stable IP addresses} + +{\it +A possible outcome of this analysis could be to add a new flag +\texttt{StableAddress} (similar to the \texttt{Longterm} flag from the +previous section) to bridge network statuses and to change BridgeDB to +include at least one bridge with this flag in its results. +One of the challenges of this analysis will be to connect sanitized bridge +descriptors from two months with each other. +The sanitized IP addresses of two bridges in two months do not match, +because we're using a new secret key as input to the hash function every +month. +We might be able to correlate the descriptors of running bridges via their +descriptor publication times or bridge statistics. +But if that fails, we'll have to run the analysis with only 1 month of +data at a time. +} + +\section{Discussion and future work} + +The approach taken in this analysis was to select relays that are most +stable in a given context based on their history. +A different angle to obtain higher relay stability might be to identify +what properties of a relay have a positive or negative impact on its +stability. +For example, relays running a given operating system or given Tor software +version might have a higher stability than others. +Possible consequences could be to facilitate setting up relays on a given +operating system or to improve the upgrade process of the Tor software. + +\end{document} + diff --git a/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java b/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java new file mode 100644 index 0000000..6a2d7a9 --- /dev/null +++ b/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java @@ -0,0 +1,314 @@ +/** + * Simulate variation of weighted fractional uptime on Guard relays. In + * a first step, parse network status consensus in consensuses/ from last + * to first, calculate future weighted fractional uptimes for all relays, + * and write them to disk as one file per network status in + * fwfu/$filename. (Skip this step if there is already a fwfu/ + * directory.) In a second step, parse the network statuse consensus + * again, but this time from first to last, calculate past weighted + * fractional uptimes for all relays, form relay subsets based on minimal + * WFU, look up what the mean future WFU would be for a subset, and write + * results to wfu-sim.csv to disk. */ +import java.io.*; +import java.text.*; +import java.util.*; +public class SimulateWeightedFractionalUptime { + public static void main(String[] args) throws Exception { + + /* Measure how long this execution takes. */ + long started = System.currentTimeMillis(); + + /* Decide whether we need to do the reverse run, or if we can use + * previous results. */ + if (!new File("fwfu").exists()) { + + /* Scan existing consensus files and sort them in reverse order. */ + SortedSet<File> allConsensuses = + new TreeSet<File>(Collections.reverseOrder()); + Stack<File> files = new Stack<File>(); + files.add(new File("consensuses")); + while (!files.isEmpty()) { + File file = files.pop(); + if (file.isDirectory()) { + files.addAll(Arrays.asList(file.listFiles())); + } else { + if (file.getName().endsWith("-consensus")) { + allConsensuses.add(file); + } + } + } + + /* For each relay as identified by its base-64 encoded fingerprint, + * track weighted uptime and total weighted time in a long[2]. */ + SortedMap<String, long[]> knownRelays = + new TreeMap<String, long[]>(); + + /* Parse all consensuses in reverse order. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + long nextWeightingInterval = formatter.parse(allConsensuses.first(). + getName().substring(0, "yyyy-MM-dd-HH-mm-ss".length())). + getTime() / (12L * 60L * 60L * 1000L); + for (File consensus : allConsensuses) { + + /* Every 12 hours, weight both uptime and total time of all known + * relays with 0.95 (or 19/20 since these are long integers) and + * remove all with a weighted fractional uptime below 1/10000. */ + long validAfter = formatter.parse(consensus.getName().substring(0, + "yyyy-MM-dd-HH-mm-ss".length())).getTime(); + long weightingInterval = validAfter / (12L * 60L * 60L * 1000L); + while (weightingInterval < nextWeightingInterval) { + Set<String> relaysToRemove = new HashSet<String>(); + for (Map.Entry<String, long[]> e : knownRelays.entrySet()) { + long[] w = e.getValue(); + w[0] *= 19L; + w[0] /= 20L; + w[1] *= 19L; + w[1] /= 20L; + if (((10000L * w[0]) / w[1]) < 1L) { + relaysToRemove.add(e.getKey()); + } + } + for (String fingerprint : relaysToRemove) { + knownRelays.remove(fingerprint); + } + nextWeightingInterval -= 1L; + } + + /* Parse all fingerprints of Running relays from the consensus. */ + Set<String> fingerprints = new HashSet<String>(); + BufferedReader br = new BufferedReader(new FileReader(consensus)); + String line, rLine = null; + boolean reachedEnd = false; + while ((line = br.readLine()) != null) { + if (line.startsWith("r ")) { + rLine = line; + } else if (line.startsWith("s ") && line.contains(" Running")) { + String[] parts = rLine.split(" "); + if (parts.length < 3) { + System.out.println("Illegal line '" + rLine + "' in " + + consensus + ". Skipping consensus."); + continue; + } else { + String fingerprint = parts[2]; + if (fingerprint.length() != + "AAAAAAAAAAAAAAAAAAAAAAAAAAA".length()) { + System.out.println("Illegal line '" + rLine + "' in " + + consensus + ". Skipping consensus."); + continue; + } + fingerprints.add(fingerprint); + } + } else if (line.startsWith("directory-signature ")) { + reachedEnd = true; + break; + } + } + br.close(); + if (!reachedEnd) { + System.out.println("Did not reach the consensus end of " + + consensus + ". Skipping consensus."); + continue; + } + + /* Increment weighted uptime for all running relays by 3600 + * seconds. */ + for (String fingerprint : fingerprints) { + if (!knownRelays.containsKey(fingerprint)) { + knownRelays.put(fingerprint, new long[] { 3600L, 0L }); + } else { + knownRelays.get(fingerprint)[0] += 3600L; + } + } + + /* Increment total weighted time for all relays by 3600 seconds. */ + for (long[] w : knownRelays.values()) { + w[1] += 3600L; + } + + /* Write future WFUs for all known relays to disk. */ + File fwfuFile = new File("fwfu", consensus.getName()); + fwfuFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(fwfuFile)); + for (Map.Entry<String, long[]> e : knownRelays.entrySet()) { + bw.write(e.getKey() + " " + + ((10000L * e.getValue()[0]) / e.getValue()[1]) + "\n"); + } + bw.close(); + } + } + + /* Run the simulation for the following WFU/10000 values: */ + long[] requiredWFUs = new long[] { 9000, 9100, 9200, 9300, 9400, 9500, + 9600, 9700, 9750, 9800, 9850, 9900, 9950, 9975, 9990, 9999 }; + BufferedWriter bw = new BufferedWriter(new FileWriter("wfu-sim.csv")); + bw.write("time"); + for (long requiredWFU : requiredWFUs) { + bw.write(",wfu" + requiredWFU + ",perc85wfu" + requiredWFU + + ",perc90wfu" + requiredWFU + ",perc95wfu" + requiredWFU + + ",guards" + requiredWFU); + } + bw.write("\n"); + + /* Scan existing consensus files and sort them in forward order. */ + SortedSet<File> allConsensuses = new TreeSet<File>(); + Stack<File> files = new Stack<File>(); + files.add(new File("consensuses")); + while (!files.isEmpty()) { + File file = files.pop(); + if (file.isDirectory()) { + files.addAll(Arrays.asList(file.listFiles())); + } else { + if (file.getName().endsWith("-consensus")) { + allConsensuses.add(file); + } + } + } + + /* For each relay as identified by its base-64 encoded fingerprint, + * track weighted uptime and total weighted time in a long[2]. */ + SortedMap<String, long[]> knownRelays = new TreeMap<String, long[]>(); + + /* Parse all consensuses in forward order. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat isoFormatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC")); + long nextWeightingInterval = formatter.parse(allConsensuses.first(). + getName().substring(0, "yyyy-MM-dd-HH-mm-ss".length())).getTime() + / (12L * 60L * 60L * 1000L); + for (File consensus : allConsensuses) { + + /* Every 12 hours, weight both uptime and total time of all known + * relays with 0.95 (or 19/20 since these are long integers) and + * remove all with a weighted fractional uptime below 1/10000. */ + long validAfter = formatter.parse(consensus.getName().substring(0, + "yyyy-MM-dd-HH-mm-ss".length())).getTime(); + long weightingInterval = validAfter / (12L * 60L * 60L * 1000L); + while (weightingInterval > nextWeightingInterval) { + Set<String> relaysToRemove = new HashSet<String>(); + for (Map.Entry<String, long[]> e : knownRelays.entrySet()) { + long[] w = e.getValue(); + w[0] *= 19L; + w[0] /= 20L; + w[1] *= 19L; + w[1] /= 20L; + if (((10000L * w[0]) / w[1]) < 1L) { + relaysToRemove.add(e.getKey()); + } + } + for (String fingerprint : relaysToRemove) { + knownRelays.remove(fingerprint); + } + nextWeightingInterval += 1L; + } + + /* Parse all fingerprints of Running relays from the consensus. */ + Set<String> fingerprints = new HashSet<String>(); + BufferedReader br = new BufferedReader(new FileReader(consensus)); + String line, rLine = null; + boolean reachedEnd = false; + while ((line = br.readLine()) != null) { + if (line.startsWith("r ")) { + rLine = line; + } else if (line.startsWith("s ") && line.contains(" Running")) { + String[] parts = rLine.split(" "); + if (parts.length < 3) { + System.out.println("Illegal line '" + rLine + "' in " + + consensus + ". Skipping consensus."); + continue; + } else { + String fingerprint = parts[2]; + if (fingerprint.length() != + "AAAAAAAAAAAAAAAAAAAAAAAAAAA".length()) { + System.out.println("Illegal line '" + rLine + "' in " + + consensus + ". Skipping consensus."); + continue; + } + fingerprints.add(fingerprint); + } + } else if (line.startsWith("directory-signature ")) { + reachedEnd = true; + break; + } + } + br.close(); + if (!reachedEnd) { + System.out.println("Did not reach the consensus end of " + + consensus + ". Skipping consensus."); + continue; + } + + /* Increment weighted uptime for all running relays by 3600 + * seconds. */ + for (String fingerprint : fingerprints) { + if (!knownRelays.containsKey(fingerprint)) { + knownRelays.put(fingerprint, new long[] { 3600L, 0L }); + } else { + knownRelays.get(fingerprint)[0] += 3600L; + } + } + + /* Increment total weighted time for all relays by 3600 seconds. */ + for (long[] w : knownRelays.values()) { + w[1] += 3600L; + } + + /* Read previously calculated future WFUs from disk. */ + Map<String, Long> fwfus = new HashMap<String, Long>(); + File fwfuFile = new File("fwfu", consensus.getName()); + if (!fwfuFile.exists()) { + System.out.println("Could not find file " + fwfuFile + + ". Exiting!"); + System.exit(1); + } + br = new BufferedReader(new FileReader(fwfuFile)); + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + fwfus.put(parts[0], Long.parseLong(parts[1])); + } + + /* Run the simulation for the relays in the current consensus for + * various required WFUs. */ + bw.write(isoFormatter.format(validAfter)); + for (long requiredWFU : requiredWFUs) { + long selectedRelays = 0L, + totalRelays = (long) fingerprints.size(), totalFwfu = 0L; + List<Long> fwfuList = new ArrayList<Long>(); + for (String fingerprint : fingerprints) { + long[] pwfu = knownRelays.get(fingerprint); + long wfu = (10000L * pwfu[0]) / pwfu[1]; + if (wfu >= requiredWFU) { + selectedRelays += 1L; + if (fwfus.containsKey(fingerprint)) { + long fwfu = fwfus.get(fingerprint); + totalFwfu += fwfu; + fwfuList.add(fwfu); + } + } + } + if (selectedRelays == 0L) { + bw.write(",NA,NA,NA,NA"); + } else { + Collections.sort(fwfuList, Collections.reverseOrder()); + long perc85 = fwfuList.get((85 * fwfuList.size()) / 100); + long perc90 = fwfuList.get((90 * fwfuList.size()) / 100); + long perc95 = fwfuList.get((95 * fwfuList.size()) / 100); + bw.write("," + (totalFwfu / selectedRelays) + "," + perc85 + + "," + perc90 + "," + perc95); + } + bw.write("," + (10000L * selectedRelays / totalRelays)); + } + bw.write("\n"); + } + bw.close(); + + /* Print how long this execution took and exit. */ + System.out.println("Execution took " + ((System.currentTimeMillis() + - started) / (60L * 1000L)) + " minutes."); + } +} + diff --git a/task-2911/wfu-sim/wfu-sim.R b/task-2911/wfu-sim/wfu-sim.R new file mode 100644 index 0000000..149ce6d --- /dev/null +++ b/task-2911/wfu-sim/wfu-sim.R @@ -0,0 +1,57 @@ +library(ggplot2) +data <- read.csv("wfu-sim.csv", stringsAsFactors = FALSE) + +d <- data[data$time >= '2010' & data$time < '2011', ] +d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean) +d <- rbind( + data.frame(x = d$guards9000, y = d$wfu9000, sim = "90 %"), + data.frame(x = d$guards9500, y = d$wfu9500, sim = "95 %"), + data.frame(x = d$guards9800, y = d$wfu9800, sim = "98 % (default)"), + data.frame(x = d$guards9900, y = d$wfu9900, sim = "99 %"), + data.frame(x = d$guards9990, y = d$wfu9990, sim = "99.9 %")) +ggplot(d, aes(x = x / 10000.0, y = y / 10000.0)) + +geom_path() + +facet_wrap(~ sim) + +scale_x_continuous("\nFraction of relays meeting WFU requirement", + formatter = "percent") + +scale_y_continuous("Mean WFU in the future\n", formatter = "percent") +ggsave(filename = "wfu-sim.pdf", width = 8, height = 5, dpi = 100) + +## Commented out, because graph is meaningless in b/w. +#d <- data[data$time >= '2010' & data$time < '2011', ] +#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean) +#d <- rbind( +# data.frame(x = d$guards9000, y = d$wfu9000, sim = "90 %"), +# data.frame(x = d$guards9500, y = d$wfu9500, sim = "95 %"), +# data.frame(x = d$guards9800, y = d$wfu9800, sim = "98 % (default)"), +# data.frame(x = d$guards9900, y = d$wfu9900, sim = "99 %"), +# data.frame(x = d$guards9990, y = d$wfu9990, sim = "99.9 %")) +#ggplot(d, aes(x = x / 10000.0, y = y / 10000.0, colour = sim)) + +#geom_path() + +#scale_x_continuous("\nFraction of relays meeting WFU requirement", +# formatter = "percent") +#, trans = "reverse") + +#scale_y_continuous("Mean WFU \nin the future ", +# formatter = "percent") + +#scale_colour_hue("Required WFU") + +#opts(axis.title.x = theme_text(size = 12 * 0.8, face = "bold", +# hjust = 0.5), +# axis.title.y = theme_text(size = 12 * 0.8, face = "bold", vjust = 0.5, +# hjust = 1)) +#ggsave(filename = "wfu-sim.pdf", width = 8, height = 5, dpi = 100) + +## Commented out, because the time plot is not as useful as expected. +#simulations <- paste("wfu", rev(c(9000, 9200, 9400, 9600, 9800)), +# sep = "") +#d <- data[data$time >= '2010' & data$time < '2011', +# c("time", simulations)] +#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean) +#d <- melt(d, id.vars = 1) +#ggplot(d, aes(x = date, y = value / 10000.0, colour = variable)) + +#geom_line() + +#scale_x_date("", major = "3 months", minor = "1 month", +# format = "%b %Y") + +#scale_y_continuous("Empirical future WFU\n", formatter = "percent") + +#scale_colour_hue("Required past WFU\n", breaks = simulations, +# labels = paste(as.numeric(substr(simulations, 4, 9)) / 100.0, "%")) +#ggsave(filename = "wfu-sim-time.pdf", width = 8, height = 5, dpi = 100) +