commit 0a10a39ca7f7f2b3e306ad6de7a1776a6bd50589 Author: Karsten Loesing karsten.loesing@gmx.net Date: Tue May 24 15:30:11 2011 +0200
Add #3277 code. --- task-3277/.gitignore | 5 ++++ task-3277/EvaluateHsDirs.java | 52 +++++++++++++++++++++++++++++++++++++++++ task-3277/README | 22 +++++++++++++++++ task-3277/hsdir-sessions.R | 21 ++++++++++++++++ 4 files changed, 100 insertions(+), 0 deletions(-)
diff --git a/task-3277/.gitignore b/task-3277/.gitignore new file mode 100644 index 0000000..5103210 --- /dev/null +++ b/task-3277/.gitignore @@ -0,0 +1,5 @@ +*.csv +*.class +Rplots.pdf +*.png + diff --git a/task-3277/EvaluateHsDirs.java b/task-3277/EvaluateHsDirs.java new file mode 100644 index 0000000..4e6f1a4 --- /dev/null +++ b/task-3277/EvaluateHsDirs.java @@ -0,0 +1,52 @@ +import java.io.*; +import java.text.*; +import java.util.*; +public class EvaluateHsDirs { + public static void main(String[] args) throws Exception { + BufferedWriter bw = new BufferedWriter(new FileWriter( + "hsdir-sessions.csv")); + bw.write("fingerprint,firstseen,lastseen,duration\n"); + BufferedReader br = new BufferedReader(new FileReader("hsdir.csv")); + String line = br.readLine(), firstValidAfter = null, + lastValidAfter = null, lastButOneValidAfter = null; + Map<String, String> last = new HashMap<String, String>(); + Map<String, String> current = new HashMap<String, String>(); + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + String validAfter = parts[0]; + if (firstValidAfter == null) { + firstValidAfter = validAfter; + lastValidAfter = validAfter; + } + if (!line.startsWith("20") || !lastValidAfter.equals(validAfter)) { + for (Map.Entry<String, String> e : last.entrySet()) { + if (!current.containsKey(e.getKey()) && + !e.getValue().equals(firstValidAfter)) { + long duration = + (formatter.parse(lastButOneValidAfter).getTime() + - formatter.parse(e.getValue()).getTime()) / 1000L; + bw.write(e.getKey() + "," + e.getValue() + "," + + lastButOneValidAfter + "," + duration + "\n"); + } + } + if (!line.startsWith("20")) { + break; + } + last = current; + current = new HashMap<String, String>(); + lastButOneValidAfter = lastValidAfter; + } else if (last.containsKey(parts[1])) { + current.put(parts[1], last.remove(parts[1])); + } else { + current.put(parts[1], validAfter); + } + lastValidAfter = validAfter; + } + br.close(); + bw.close(); + } +} + diff --git a/task-3277/README b/task-3277/README new file mode 100644 index 0000000..6e71abb --- /dev/null +++ b/task-3277/README @@ -0,0 +1,22 @@ +Analyze how long after earning the HSDir flag relays go away + +First, extract status entries with the HSDir flag set from the metrics +database: + + tordir=> \o hsdir.csv + tordir=> SELECT validafter, fingerprint FROM statusentry + WHERE validafter >= '2010-05-01' AND validafter < '2011-05-01' + AND ishsdir IS TRUE ORDER BY validafter, fingerprint; + tordir=> \o + +Next, calculate continuous sessions of a relay having the HSDir flag. For +each such session, extract the fingerprint and the first and last time it +was listed in the network status with the HSDir flag. Also calculate the +session duration in seconds: + + $ javac EvaluateHsDirs.java && java EvaluateHsDirs + +Plot an ECDF of the session length: + + $ R --slave -f hsdir-sessions.R + diff --git a/task-3277/hsdir-sessions.R b/task-3277/hsdir-sessions.R new file mode 100644 index 0000000..4b28131 --- /dev/null +++ b/task-3277/hsdir-sessions.R @@ -0,0 +1,21 @@ +library(ggplot2) +data <- read.csv("hsdir-sessions.csv", stringsAsFactors = FALSE) + +## Histogram; not that useful +#ggplot(data, aes(x = duration / 3600)) + +#geom_histogram(aes(y = ..density..), binwidth = 1) + +#scale_x_continuous(limits = c(0, 72)) + +#scale_y_continuous(formatter = "percent") + +data <- sort(data$duration) +data <- data.frame(x = data / (60 * 60), + y = (length(data):1) / length(data)) +ggplot(data, aes(x = x, y = y)) + +geom_line() + +scale_y_continuous("Cumulative fraction of continuous HSDir sessions\n", + formatter = "percent", limits = c(0, 1)) + +scale_x_continuous(paste("\nHSDir session time between the relay earning", + "the HSDir flag and going away in hours"), + limits = c(0, 3 * 24), breaks = seq(0, 3 * 24, 24)) +ggsave(filename = "hsdir-sessions.png", width = 8, height = 5, dpi = 72) +