[tor-commits] [metrics-tasks/master] Update hidserv-stats extrapolation code (#13192).

karsten at torproject.org karsten at torproject.org
Mon Feb 9 17:42:46 UTC 2015


commit 3c90c181a13dcc12c69e8e8fa013948b1a6405e2
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Feb 9 18:25:38 2015 +0100

    Update hidserv-stats extrapolation code (#13192).
---
 task-13192/.gitignore                            |    1 +
 task-13192/src/R/blog.R                          |   47 ++
 task-13192/src/R/plot.R                          |  214 +++----
 task-13192/src/java/Aggregate.java               |  129 ++++
 task-13192/src/java/Extrapolate.java             |  424 +++++++++++++
 task-13192/src/java/ExtrapolateHidServStats.java |  722 ----------------------
 task-13192/src/java/Simulate.java                |  357 +++++++++++
 7 files changed, 1066 insertions(+), 828 deletions(-)

diff --git a/task-13192/.gitignore b/task-13192/.gitignore
index 7e8bf3b..89d161b 100644
--- a/task-13192/.gitignore
+++ b/task-13192/.gitignore
@@ -4,4 +4,5 @@ in/
 src/bash/
 src/bin/
 out/
+Rplots.pdf
 
diff --git a/task-13192/src/R/blog.R b/task-13192/src/R/blog.R
new file mode 100644
index 0000000..82a07e9
--- /dev/null
+++ b/task-13192/src/R/blog.R
@@ -0,0 +1,47 @@
+# Load required libraries.
+require(ggplot2, warn.conflicts = FALSE, quietly = TRUE)
+require(scales, warn.conflicts = FALSE, quietly = TRUE)
+require(reshape, warn.conflicts = FALSE, quietly = TRUE)
+require(splines, warn.conflicts = FALSE, quietly = TRUE)
+require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
+
+# Read .csv files written by Java.
+h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
+
+# Create directories for graphs.
+dir.create(file.path("out", "graphs", "blog"), showWarnings = FALSE,
+  recursive = TRUE)
+
+# Cut off last two days, because stats might be incomplete for those.
+h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ]
+
+# Graph the number of reported stats by day.
+h7 <- data.frame(date = as.Date(h$stats_end), reports = 1)
+ggplot(h7, aes(x = date)) +
+geom_bar(colour = 'lightgray', width = .7, binwidth = 1) +
+scale_x_date("") +
+scale_y_continuous("")
+ggsave("out/graphs/blog/num-reported-stats.png", width = 10, height = 3,
+  dpi = 100)
+
+e <- read.csv("out/csv/hidserv-stats-extrapolated.csv",
+  stringsAsFactors = FALSE)
+e <- melt(e, by = c("date", "type"))
+e <- e[e$variable == "wiqm", ]
+e <- rbind(e, data.frame(date = NA, type = c("onions", "cells"),
+  variable = NA, value = 0))
+
+ggplot(e[e$type == "cells", ], aes(x = as.Date(date), y = value)) +
+geom_line() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "")
+ggsave("out/graphs/blog/extrapolated-cells.png", width = 10,
+  height = 3, dpi = 100)
+
+ggplot(e[e$type != "cells", ], aes(x = as.Date(date), y = value)) +
+geom_line() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "")
+ggsave("out/graphs/blog/extrapolated-onions.png", width = 10,
+  height = 3, dpi = 100)
+
diff --git a/task-13192/src/R/plot.R b/task-13192/src/R/plot.R
index 991928b..552b810 100644
--- a/task-13192/src/R/plot.R
+++ b/task-13192/src/R/plot.R
@@ -5,17 +5,12 @@ require(reshape, warn.conflicts = FALSE, quietly = TRUE)
 require(splines, warn.conflicts = FALSE, quietly = TRUE)
 require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
 
-# Avoid scientific notation.
-options(scipen = 15)
-
 # Read .csv file written by Java.
 h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
 
 # Create directories for graphs.
 dir.create(file.path("out", "graphs", "report"), showWarnings = FALSE,
   recursive = TRUE)
-dir.create(file.path("out", "graphs", "slides"), showWarnings = FALSE,
-  recursive = TRUE)
 
 # Cut off last two days, because stats might be incomplete for those.
 h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ]
@@ -28,17 +23,15 @@ scale_x_date("") +
 scale_y_continuous("")
 ggsave("out/graphs/report/num-reported-stats.pdf", width = 10, height = 3,
   dpi = 100)
-ggsave("out/graphs/slides/hidserv-12.png", width = 8, height = 3,
-  dpi = 100)
 
 # Graph distributions of reported values by day.
 h1 <- data.frame(date = as.Date(h$stats_end),
-  traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+  traffic = h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6),
   services = h$hidserv_dir_onions_seen)
 h1 <- melt(h1, "date")
 h1 <- data.frame(date = h1$date,
-  variable = ifelse(h1$variable == "traffic", "traffic in MB/s",
-  ".onion addresses"), value = h1$value)
+  variable = ifelse(h1$variable == "traffic", "traffic in Mbit/s",
+  "unique .onion addresses"), value = h1$value)
 ggplot(h1, aes(x = date, y = value, group = date)) +
 geom_boxplot() +
 facet_grid(variable ~ ., scales = "free_y") +
@@ -49,23 +42,22 @@ ggsave("out/graphs/report/stats-by-day.pdf", width = 10, height = 5,
 
 # Graph distributions of calculated fractions by day.
 h2 <- data.frame(date = as.Date(h$stats_end),
-  prob_rend_point = h$prob_rend_point,
-  x_frac_hsdesc = h$frac_hsdesc / 3.0)
+  frac_rend_relayed_cells = h$frac_rend_relayed_cells, x_frac_dir_onions_seen = h$frac_dir_onions_seen)
 h2 <- melt(h2, "date")
 h2 <- data.frame(date = h2$date,
-  variable = ifelse(h2$variable == "prob_rend_point",
-  "selected as rendezvous point", "responsible for a descriptor"),
+  variable = ifelse(h2$variable == "frac_rend_relayed_cells",
+  "cells on rendezvous circuits", "hidden-service descriptors"),
   value = h2$value)
 ggplot(h2, aes(x = date, y = value, group = date)) +
 geom_boxplot() +
 facet_grid(variable ~ ., scales = "free_y") +
 scale_x_date("") +
-scale_y_continuous("Calculated probabilities\n", labels = percent)
+scale_y_continuous("Calculated network fractions\n", labels = percent)
 ggsave("out/graphs/report/probs-by-relay.pdf", width = 10, height = 5,
   dpi = 100)
 
 # Graph ECDF of cells reported by relays with rend point probability of 0.
-h8 <- h[h$prob_rend_point == 0,
+h8 <- h[h$frac_rend_relayed_cells == 0,
         "hidserv_rend_relayed_cells" ]
 h8 <- sort(h8)
 h8 <- data.frame(x = h8, y = (1:length(h8)) / length(h8))
@@ -75,13 +67,14 @@ laplace_cells <- function(x) {
 ggplot(h8, aes(x = x, y = y)) +
 geom_line() +
 stat_function(fun = laplace_cells, colour = "blue") +
-scale_x_continuous("\nReported cells on rendezvous circuits") +
-scale_y_continuous("Cumulative probability\n")
+scale_x_continuous("\nReported cells on rendezvous circuits",
+  limits = c(max(h8[h8$y < 0.01, "x"]), min(h8[h8$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
 ggsave("out/graphs/report/zero-prob-cells.pdf", width = 5, height = 3,
   dpi = 100)
 
 # Graph ECDF of .onions reported by relays with HSDir probability of 0.
-h9 <- h[h$frac_hsdesc == 0, "hidserv_dir_onions_seen"]
+h9 <- h[h$frac_dir_onions_seen == 0, "hidserv_dir_onions_seen"]
 h9 <- sort(h9)
 h9 <- data.frame(x = h9, y = (1:length(h9)) / length(h9))
 laplace_onions <- function(x) {
@@ -90,51 +83,54 @@ laplace_onions <- function(x) {
 ggplot(h9, aes(x = x, y = y)) +
 geom_line() +
 stat_function(fun = laplace_onions, colour = "blue") +
-scale_x_continuous("\nReported .onion addresses") +
-scale_y_continuous("Cumulative probability\n")
+scale_x_continuous("\nReported .onion addresses",
+  limits = c(max(h9[h9$y < 0.01, "x"]), min(h9[h9$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
 ggsave("out/graphs/report/zero-prob-onions.pdf", width = 5, height = 3,
   dpi = 100)
 
 # Graph correlation between reports and fractions per relay.
 h3 <- rbind(
-  data.frame(x = h$frac_hsdesc / 3.0,
-    y = ifelse(h$frac_hsdesc == 0, NA, h$hidserv_dir_onions_seen),
+  data.frame(x = h$frac_dir_onions_seen,
+    y = ifelse(h$frac_dir_onions_seen == 0, NA, h$hidserv_dir_onions_seen),
     facet = ".onion addresses"),
-  data.frame(x = h$prob_rend_point,
-    y = ifelse(h$prob_rend_point == 0, NA,
-      h$hidserv_rend_relayed_cells * 512 / (86400 * 1000)),
-    facet = "traffic in kB/s"))
+  data.frame(x = h$frac_rend_relayed_cells,
+    y = ifelse(h$frac_rend_relayed_cells == 0, NA,
+      h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6)),
+    facet = "traffic in Mbit/s"))
 ggplot(h3[h3$facet == ".onion addresses", ], aes(x = x, y = y)) +
 geom_point(alpha = 0.5) +
 stat_smooth(method = "lm") +
-scale_x_continuous(name = "\nProbability", labels = percent) +
+scale_x_continuous(name = "\nFraction", labels = percent) +
 scale_y_continuous(name = "Reported .onion addresses\n")
 ggsave("out/graphs/report/corr-probs-onions-by-relay.pdf", width = 5,
   height = 3, dpi = 100)
-ggplot(h3[h3$facet == "traffic in kB/s", ], aes(x = x, y = y)) +
+ggplot(h3[h3$facet == "traffic in Mbit/s", ], aes(x = x, y = y)) +
 geom_point(alpha = 0.5) +
 stat_smooth(method = "lm") +
-scale_x_continuous(name = "\nProbability", labels = percent) +
-scale_y_continuous(name = "Reported traffic in kB/s\n")
+scale_x_continuous(name = "\nFraction", labels = percent) +
+scale_y_continuous(name = "Reported traffic in Mbit/s\n")
 ggsave("out/graphs/report/corr-probs-cells-by-relay.pdf", width = 5,
   height = 3, dpi = 100)
 
 # Graph correlation between reports and fractions per day.
 h5 <- rbind(
   data.frame(date = as.Date(h$stats_end),
-    prob = ifelse(h$frac_hsdesc == 0, NA, h$frac_hsdesc / 3.0),
+    prob = ifelse(h$frac_dir_onions_seen == 0, NA, h$frac_dir_onions_seen),
     reported = h$hidserv_dir_onions_seen, facet = "published descriptor"),
   data.frame(date = as.Date(h$stats_end),
-    prob = ifelse(h$prob_rend_point == 0, NA, h$prob_rend_point),
-    reported = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
-    facet = "traffic in MB/s"))
+    prob = ifelse(h$frac_rend_relayed_cells == 0, NA,
+      h$frac_rend_relayed_cells),
+    reported = h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6),
+    facet = "traffic in Mbit/s"))
 h5 <- na.omit(h5)
 h5 <- aggregate(list(prob = h5$prob, reported = h5$reported),
   by = list(date = h5$date, facet = h5$facet), FUN = sum)
-ggplot(h5[h5$facet == "traffic in MB/s", ], aes(x = prob, y = reported)) +
+ggplot(h5[h5$facet == "traffic in Mbit/s", ],
+  aes(x = prob, y = reported)) +
 geom_point(alpha = 0.5) +
 scale_x_continuous(name = "\nTotal probability", labels = percent) +
-scale_y_continuous(name = "Total traffic in MB/s\n") +
+scale_y_continuous(name = "Total traffic in Mbit/s\n") +
 stat_smooth(method = "lm") +
 geom_vline(xintercept = 0.01, linetype = 2)
 ggsave("out/graphs/report/corr-probs-cells-by-day.pdf", width = 5,
@@ -149,98 +145,104 @@ geom_vline(xintercept = 0.01, linetype = 2)
 ggsave("out/graphs/report/corr-probs-onions-by-day.pdf", width = 5,
   height = 3, dpi = 100)
 
+# Graph ECDF of extrapolated cells.
+h20 <- h[h$frac_rend_relayed_cells > 0, ]
+h20 <- h20$hidserv_rend_relayed_cells *
+  512 * 8 / (86400 * 1e6 * h20$frac_rend_relayed_cells)
+h20 <- sort(h20)
+h20 <- data.frame(x = h20, y = (1:length(h20)) / length(h20))
+ggplot(h20, aes(x = x, y = y)) +
+geom_line() +
+scale_x_continuous("\nExtrapolated total traffic in Mbit/s",
+  limits = c(max(h20[h20$y < 0.01, "x"]), min(h20[h20$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
+ggsave("out/graphs/report/extrapolated-cells.pdf", width = 5, height = 3,
+  dpi = 100)
+
+# Graph ECDF of extrapolated .onions.
+h21 <- h[h$frac_dir_onions_seen > 0, ]
+h21 <- h21$hidserv_dir_onions_seen / (12 * h21$frac_dir_onions_seen)
+h21 <- sort(h21)
+h21 <- data.frame(x = h21, y = (1:length(h21)) / length(h21))
+ggplot(h21, aes(x = x, y = y)) +
+geom_line() +
+scale_x_continuous("\nExtrapolated total .onion addresses",
+  limits = c(max(h21[h21$y < 0.01, "x"]), min(h21[h21$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
+ggsave("out/graphs/report/extrapolated-onions.pdf", width = 5, height = 3,
+  dpi = 100)
+
 # Graph extrapolated network totals.
-h6 <- data.frame(date = as.Date(h$stats_end),
-  traffic = ifelse(h$prob_rend_point == 0, 0,
-    h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000)),
-  prob_rend_point = h$prob_rend_point,
-  onions = ifelse(h$frac_hsdesc == 0, 0, h$hidserv_dir_onions_seen),
-  prob_onion = h$frac_hsdesc * 4.0)
-h6 <- aggregate(list(traffic = h6$traffic, 
-  prob_rend_point = h6$prob_rend_point,
-  onions = h6$onions,
-  prob_onion = h6$prob_onion), by = list(date = h6$date), FUN = sum)
-h6 <- data.frame(date = h6$date,
-  traffic = ifelse(h6$prob_rend_point < 0.01, 0,
-    h6$traffic / h6$prob_rend_point),
-  onions = ifelse(h6$prob_onion / 12.0 < 0.01, 0,
-    h6$onions / h6$prob_onion))
-h6 <- melt(h6, "date")
-h6 <- h6[h6$value > 0, ]
-h6 <- rbind(h6, data.frame(date = NA, variable = c('traffic', 'onions'),
-  value = 0))
-h6 <- data.frame(date = h6$date,
-  variable = ifelse(h6$variable == "traffic", "total traffic in MB/s",
-    ".onion addresses"), value = h6$value)
-ggplot(h6, aes(date, value)) +
-facet_grid(variable ~ ., scales = "free_y") +
-geom_point() +
-stat_smooth() +
+e <- read.csv("out/csv/hidserv-stats-extrapolated.csv",
+  stringsAsFactors = FALSE)
+e <- melt(e, by = c("date", "type"))
+e <- e[e$variable == "wiqm", ]
+e <- rbind(e, data.frame(date = NA, type = c("onions", "cells"),
+  variable = NA, value = 0))
+e <- data.frame(e, label = ifelse(e$type == "cells", "traffic in Mbit/s",
+  "unique .onion addresses"))
+ggplot(e, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(label ~ ., scales = "free_y") +
 scale_x_date(name = "") +
 scale_y_continuous(name = "Extrapolated network totals\n")
 ggsave("out/graphs/report/extrapolated-network-totals.pdf", width = 10,
   height = 5, dpi = 100)
 
-# Graph extrapolated number of .onion addresses.
-h11 <- h6[h6$variable == ".onion addresses", ]
-ggplot(h11, aes(x = date, y = value)) +
-geom_point() +
-stat_smooth() +
-scale_x_date(name = "") +
-scale_y_continuous(name = "")
-ggsave("out/graphs/slides/hidserv-13.png", width = 8, height = 3,
-  dpi = 100)
-
-# Graph extrapolated fraction of hidden-service traffic.
-b <- read.csv("in/metrics/bandwidth.csv", stringsAsFactors = FALSE)
-b <- b[b$isexit == '' & b$isguard == '' & b$date > '2014-12-20', ]
-h10 <- data.frame(date = as.Date(h$stats_end),
-  traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
-  prob_rend_point = h$prob_rend_point)
-h10 <- aggregate(list(traffic = h10$traffic, 
-  prob_rend_point = h10$prob_rend_point), by = list(date = h10$date),
-  FUN = sum)
-h10 <- data.frame(date = h10$date,
-  traffic = ifelse(h10$prob_rend_point < 0.01, 0,
-    h10$traffic / h10$prob_rend_point))
-h10 <- melt(h10, "date")
-h10 <- h10[h10$value > 0, ]
-h10 <- rbind(h10, data.frame(date = as.Date(b$date), variable = "bw",
-  value = b$bwread + b$bwwrite))
-h10 <- cast(h10, date ~ variable, value = "value")
-h10 <- na.omit(h10)
-h10 <- data.frame(date = h10$date,
-  value = h10$traffic * 1000 * 1000 / h10$bw)
-h10 <- rbind(h10, data.frame(date = NA, value = 0))
-ggplot(h10, aes(x = date, y = value)) +
-geom_point() +
-scale_x_date(name = "") +
-scale_y_continuous(name = "", labels = percent) +
-stat_smooth()
-ggsave("out/graphs/slides/hidserv-14.png", width = 8, height = 3,
+# Graph distributions of calculated fractions by day.
+h71 <- data.frame(date = as.Date(h$stats_end),
+  frac_rend_relayed_cells = h$frac_rend_relayed_cells,
+  frac_dir_onions_seen = h$frac_dir_onions_seen)
+summary(h71)
+h71 <- aggregate(list(
+  frac_rend_relayed_cells = h71$frac_rend_relayed_cells,
+  frac_dir_onions_seen = h71$frac_dir_onions_seen),
+  by = list(date = h71$date), FUN = sum)
+summary(h71)
+h71 <- melt(h71, "date")
+summary(h71)
+h71 <- data.frame(date = h71$date,
+  variable = ifelse(h71$variable == "frac_rend_relayed_cells",
+  "cells on rendezvous circuits", "hidden-service descriptors"),
+  value = h71$value)
+ggplot(h71, aes(x = date, y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+geom_hline(yintercept = 0.01, linetype = 2) +
+scale_x_date("") +
+scale_y_continuous("Total calculated network fractions per day\n",
+  labels = percent)
+ggsave("out/graphs/report/probs-by-day.pdf", width = 10, height = 5,
   dpi = 100)
 
 # Graph simulation results for cells on rendezvous circuits.
 s <- read.csv("out/csv/sim-cells.csv")
-ggplot(s, aes(x = frac, y = (p500 - 1e10) / 1e10,
-  ymin = (p025 - 1e10) / 1e10, ymax = (p975 - 1e10) / 1e10)) +
+s <- do.call(data.frame, aggregate(list(X = s$wiqm),
+  by = list(frac = s$frac), FUN = quantile, probs = c(0.025, 0.5, 0.975)))
+ggplot(s, aes(x = frac, y = (X.50. - 1e10) / 1e10,
+  ymin = (X.2.5. - 1e10) / 1e10, ymax = (X.97.5. - 1e10) / 1e10)) +
 geom_line() +
 geom_ribbon(alpha = 0.2) +
 scale_x_continuous("\nRendezvous points included in extrapolation",
   labels = percent) +
-scale_y_continuous("Deviation from network totals\n", labels = percent)
+scale_y_continuous("Deviation from actual\nhidden-service traffic\n",
+  labels = percent)
 ggsave("out/graphs/report/sim-cells.pdf", width = 5, height = 3,
   dpi = 100)
 
 # Graph simulation results for .onion addresses.
 o <- read.csv("out/csv/sim-onions.csv")
-ggplot(o, aes(x = frac, y = (p500 - 40000) / 40000,
-  ymin = (p025 - 40000) / 40000, ymax = (p975 - 40000) / 40000)) +
+o <- do.call(data.frame, aggregate(list(X = o$wiqm),
+  by = list(frac = o$frac), FUN = quantile, probs = c(0.025, 0.5, 0.975)))
+ggplot(o, aes(x = frac, y = (X.50. / 12 - 40000) / 40000,
+  ymin = (X.2.5. / 12 - 40000) / 40000,
+  ymax = (X.97.5. / 12 - 40000) / 40000)) +
 geom_line() +
 geom_ribbon(alpha = 0.2) +
 scale_x_continuous("\nDirectories included in extrapolation",
   labels = percent) +
-scale_y_continuous("Deviation from network totals\n", labels = percent)
+scale_y_continuous("Deviation from actual\nnumber of .onions\n",
+  labels = percent)
 ggsave("out/graphs/report/sim-onions.pdf", width = 5, height = 3,
   dpi = 100)
 
diff --git a/task-13192/src/java/Aggregate.java b/task-13192/src/java/Aggregate.java
new file mode 100644
index 0000000..56bac2c
--- /dev/null
+++ b/task-13192/src/java/Aggregate.java
@@ -0,0 +1,129 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+
+public class Aggregate {
+
+  private static File hidservStatsCsvFile =
+      new File("out/csv/hidserv-stats.csv");
+
+  private static File hidservStatsExtrapolatedCsvFile =
+      new File("out/csv/hidserv-stats-extrapolated.csv");
+
+  public static void main(String[] args) throws Exception {
+    aggregate();
+  }
+
+  private static final DateFormat DATE_TIME_FORMAT, DATE_FORMAT;
+
+  static {
+    DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    DATE_TIME_FORMAT.setLenient(false);
+    DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+    DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
+    DATE_FORMAT.setLenient(false);
+    DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+
+  private static void aggregate() throws Exception {
+    if (!hidservStatsCsvFile.exists() ||
+        hidservStatsCsvFile.isDirectory()) {
+      System.err.println("Unable to read "
+          + hidservStatsCsvFile.getAbsolutePath() + ".  Exiting.");
+      System.exit(1);
+    }
+    SortedMap<String, List<double[]>>
+        extrapolatedCells = new TreeMap<String, List<double[]>>(),
+        extrapolatedOnions = new TreeMap<String, List<double[]>>();
+    BufferedReader br = new BufferedReader(new FileReader(
+        hidservStatsCsvFile));
+    String line = br.readLine();
+    while ((line = br.readLine()) != null) {
+      String[] parts = line.split(",");
+      String date = DATE_FORMAT.format(DATE_TIME_FORMAT.parse(parts[2]));
+      double hidservRendRelayedCells = Double.parseDouble(parts[3]),
+          hidservDirOnionsSeen = Double.parseDouble(parts[4]),
+          fracRendRelayedCells = Double.parseDouble(parts[5]),
+          fracDirOnionsSeen = Double.parseDouble(parts[6]);
+      
+      if (fracRendRelayedCells > 0.0) {
+        if (!extrapolatedCells.containsKey(date)) {
+          extrapolatedCells.put(date, new ArrayList<double[]>());
+        }
+        extrapolatedCells.get(date).add(new double[] {
+            hidservRendRelayedCells * 512.0 * 8.0
+            / (86400.0 * 1000000.0 * fracRendRelayedCells),
+            fracRendRelayedCells });
+      }
+      if (fracDirOnionsSeen > 0.0) {
+        if (!extrapolatedOnions.containsKey(date)) {
+          extrapolatedOnions.put(date, new ArrayList<double[]>());
+        }
+        extrapolatedOnions.get(date).add(new double[] {
+            hidservDirOnionsSeen / (12.0 * fracDirOnionsSeen),
+            fracDirOnionsSeen });
+      }
+    }
+    br.close();
+    hidservStatsExtrapolatedCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        hidservStatsExtrapolatedCsvFile));
+    bw.write("date,type,wmean,wmedian,wiqm\n");
+    for (int i = 0; i < 2; i++) {
+      String type = i == 0 ? "cells" : "onions";
+      SortedMap<String, List<double[]>> extrapolated = i == 0
+          ? extrapolatedCells : extrapolatedOnions;
+      for (Map.Entry<String, List<double[]>> e :
+        extrapolated.entrySet()) {
+        String date = e.getKey();
+        List<double[]> weightedValues = e.getValue();
+        double totalFrac = 0.0;
+        for (double[] d : weightedValues) {
+          totalFrac += d[1];
+        }
+        if (totalFrac < 0.01) {
+          continue;
+        }
+        Collections.sort(weightedValues,
+            new Comparator<double[]>() {
+          public int compare(double[] o1, double[] o2) {
+            return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+          }
+        });
+        double totalWeighted = 0.0, totalProbability = 0.0;
+        double totalInterquartileFrac = 0.0,
+            totalWeightedInterquartile = 0.0;
+        Double weightedMedian = null;
+        for (double[] d : weightedValues) {
+          totalWeighted += d[0] * d[1];
+          totalProbability += d[1];
+          if (weightedMedian == null &&
+              totalProbability > totalFrac * 0.5) {
+            weightedMedian = d[0];
+          }
+          if (totalProbability >= totalFrac * 0.25 &&
+              totalProbability - d[1] <= totalFrac * 0.75) {
+            totalWeightedInterquartile += d[0] * d[1];
+            totalInterquartileFrac += d[1];
+          }
+        }
+        bw.write(String.format("%s,%s,%.0f,%.0f,%.0f%n", date, type,
+            totalWeighted / totalProbability, weightedMedian,
+            totalWeightedInterquartile / totalInterquartileFrac));
+      }
+    }
+    bw.close();
+  }
+}
diff --git a/task-13192/src/java/Extrapolate.java b/task-13192/src/java/Extrapolate.java
new file mode 100644
index 0000000..29ff518
--- /dev/null
+++ b/task-13192/src/java/Extrapolate.java
@@ -0,0 +1,424 @@
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.math.BigInteger;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+public class Extrapolate {
+
+  private static File archiveExtraInfosDirectory =
+      new File("in/collector/archive/relay-descriptors/extra-infos/");
+
+  private static File recentExtraInfosDirectory =
+      new File("in/collector/recent/relay-descriptors/extra-infos/");
+
+  private static File archiveConsensuses =
+      new File("in/collector/archive/relay-descriptors/consensuses/");
+
+  private static File recentConsensuses =
+      new File("in/collector/recent/relay-descriptors/consensuses/");
+
+  private static File hidservStatsCsvFile =
+      new File("out/csv/hidserv-stats.csv");
+
+  public static void main(String[] args) throws Exception {
+    System.out.println("Extracting hidserv-* lines from extra-info "
+        + "descriptors...");
+    SortedMap<String, SortedSet<HidServStats>> hidServStats =
+        extractHidServStats();
+    System.out.println("Extracting fractions from consensuses...");
+    SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions =
+        extractConsensusFractions(hidServStats.keySet());
+    System.out.println("Extrapolating statistics...");
+    extrapolateHidServStats(hidServStats, consensusFractions);
+    System.out.println(new Date() + " Terminating.");
+  }
+
+  private static final DateFormat DATE_TIME_FORMAT;
+
+  static {
+    DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    DATE_TIME_FORMAT.setLenient(false);
+    DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+
+  private static class HidServStats implements Comparable<HidServStats> {
+
+    /* Hidden-service statistics end timestamp in milliseconds. */
+    private long statsEndMillis;
+
+    /* Statistics interval length in seconds. */
+    private long statsIntervalSeconds;
+
+    /* Number of relayed cells reported by the relay and adjusted by
+     * rounding to the nearest right side of a bin and subtracting half of
+     * the bin size. */
+    private long rendRelayedCells;
+
+    /* Number of .onions reported by the relay and adjusted by rounding to
+     * the nearest right side of a bin and subtracting half of the bin
+     * size. */
+    private long dirOnionsSeen;
+
+    private HidServStats(long statsEndMillis, long statsIntervalSeconds,
+        long rendRelayedCells, long dirOnionsSeen) {
+      this.statsEndMillis = statsEndMillis;
+      this.statsIntervalSeconds = statsIntervalSeconds;
+      this.rendRelayedCells = rendRelayedCells;
+      this.dirOnionsSeen = dirOnionsSeen;
+    }
+
+    @Override
+    public boolean equals(Object otherObject) {
+      if (!(otherObject instanceof HidServStats)) {
+        return false;
+      }
+      HidServStats other = (HidServStats) otherObject;
+      return this.statsEndMillis == other.statsEndMillis &&
+          this.statsIntervalSeconds == other.statsIntervalSeconds &&
+          this.rendRelayedCells == other.rendRelayedCells &&
+          this.dirOnionsSeen == other.dirOnionsSeen;
+    }
+
+    @Override
+    public int compareTo(HidServStats other) {
+      return this.statsEndMillis < other.statsEndMillis ? -1 :
+          this.statsEndMillis > other.statsEndMillis ? 1 : 0;
+    }
+  }
+
+  /* Extract fingerprint and hidserv-* lines from extra-info descriptors
+   * located in in/{archive,recent}/relay-descriptors/extra-infos/. */
+  private static SortedMap<String, SortedSet<HidServStats>>
+      extractHidServStats() {
+    SortedMap<String, SortedSet<HidServStats>> extractedHidServStats =
+        new TreeMap<String, SortedSet<HidServStats>>();
+    DescriptorReader descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(archiveExtraInfosDirectory);
+    descriptorReader.addDirectory(recentExtraInfosDirectory);
+    Iterator<DescriptorFile> descriptorFiles =
+        descriptorReader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (!(descriptor instanceof ExtraInfoDescriptor)) {
+          continue;
+        }
+        String fingerprint =
+            ((ExtraInfoDescriptor) descriptor).getFingerprint();
+        Scanner scanner = new Scanner(new ByteArrayInputStream(
+            descriptor.getRawDescriptorBytes()));
+        Long statsEndMillis = null, statsIntervalSeconds = null,
+            rendRelayedCells = null, dirOnionsSeen = null;
+        try {
+          while (scanner.hasNext()) {
+            String line = scanner.nextLine();
+            if (line.startsWith("hidserv-")) {
+              String[] parts = line.split(" ");
+              if (parts[0].equals("hidserv-stats-end")) {
+                if (parts.length != 5 || !parts[3].startsWith("(") ||
+                    !parts[4].equals("s)")) {
+                  /* Will warn below, because statsEndMillis and
+                   * statsIntervalSeconds are still null. */
+                  continue;
+                }
+                statsEndMillis = DATE_TIME_FORMAT.parse(
+                    parts[1] + " " + parts[2]).getTime();
+                statsIntervalSeconds =
+                    Long.parseLong(parts[3].substring(1));
+              } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
+                if (parts.length != 5 ||
+                    !parts[4].startsWith("bin_size=")) {
+                  /* Will warn below, because rendRelayedCells is still
+                   * null. */
+                  continue;
+                }
+                rendRelayedCells = removeNoise(Long.parseLong(parts[1]),
+                    Long.parseLong(parts[4].substring(9)));
+              } else if (parts[0].equals("hidserv-dir-onions-seen")) {
+                if (parts.length != 5 ||
+                    !parts[4].startsWith("bin_size=")) {
+                  /* Will warn below, because dirOnionsSeen is still
+                   * null. */
+                  continue;
+                }
+                dirOnionsSeen = removeNoise(Long.parseLong(parts[1]),
+                    Long.parseLong(parts[4].substring(9)));
+              }
+            }
+          }
+        } catch (ParseException e) {
+          e.printStackTrace();
+          continue;
+        } catch (NumberFormatException e) {
+          e.printStackTrace();
+          continue;
+        }
+        if (statsEndMillis == null && statsIntervalSeconds == null &&
+            rendRelayedCells == null && dirOnionsSeen == null) {
+          continue;
+        } else if (statsEndMillis != null && statsIntervalSeconds != null
+            && rendRelayedCells != null && dirOnionsSeen != null) {
+          if (!extractedHidServStats.containsKey(fingerprint)) {
+            extractedHidServStats.put(fingerprint,
+                new TreeSet<HidServStats>());
+          }
+          extractedHidServStats.get(fingerprint).add(new HidServStats(
+              statsEndMillis, statsIntervalSeconds, rendRelayedCells,
+              dirOnionsSeen));
+        } else {
+          System.err.println("Relay " + fingerprint + " published "
+              + "incomplete hidserv-stats.  Ignoring.");
+        }
+      }
+    }
+    return extractedHidServStats;
+  }
+
+  private static long removeNoise(long reportedNumber, long binSize) {
+    long roundedToNearestRightSideOfTheBin =
+        ((reportedNumber + binSize / 2) / binSize) * binSize;
+    long subtractedHalfOfBinSize =
+        roundedToNearestRightSideOfTheBin - binSize / 2;
+    return subtractedHalfOfBinSize;
+  }
+
+  private static class ConsensusFraction
+      implements Comparable<ConsensusFraction> {
+
+    /* Valid-after timestamp of the consensus in milliseconds. */
+    private long validAfterMillis;
+
+    /* Fresh-until timestamp of the consensus in milliseconds. */
+    private long freshUntilMillis;
+
+    /* Probability for being selected by clients as rendezvous point. */
+    private double probabilityRendezvousPoint;
+
+    /* Probability for being selected as directory.  This is the fraction
+     * of descriptors identifiers that this relay has been responsible
+     * for, divided by 3. */
+    private double fractionResponsibleDescriptors;
+
+    private ConsensusFraction(long validAfterMillis,
+        long freshUntilMillis,
+        double probabilityRendezvousPoint,
+        double fractionResponsibleDescriptors) {
+      this.validAfterMillis = validAfterMillis;
+      this.freshUntilMillis = freshUntilMillis;
+      this.probabilityRendezvousPoint = probabilityRendezvousPoint;
+      this.fractionResponsibleDescriptors =
+          fractionResponsibleDescriptors;
+    }
+
+    @Override
+    public boolean equals(Object otherObject) {
+      if (!(otherObject instanceof ConsensusFraction)) {
+        return false;
+      }
+      ConsensusFraction other = (ConsensusFraction) otherObject;
+      return this.validAfterMillis == other.validAfterMillis &&
+          this.freshUntilMillis == other.freshUntilMillis &&
+          this.fractionResponsibleDescriptors ==
+          other.fractionResponsibleDescriptors &&
+          this.probabilityRendezvousPoint ==
+          other.probabilityRendezvousPoint;
+    }
+
+    @Override
+    public int compareTo(ConsensusFraction other) {
+      return this.validAfterMillis < other.validAfterMillis ? -1 :
+          this.validAfterMillis > other.validAfterMillis ? 1 : 0;
+    }
+  }
+
+  /* Extract fractions that relays were responsible for from consensuses
+   * located in in/{archive,recent}/relay-descriptors/consensuses/. */
+  private static SortedMap<String, SortedSet<ConsensusFraction>>
+      extractConsensusFractions(Collection<String> fingerprints) {
+    SortedMap<String, SortedSet<ConsensusFraction>>
+        extractedConsensusFractions =
+        new TreeMap<String, SortedSet<ConsensusFraction>>();
+    DescriptorReader descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(archiveConsensuses);
+    descriptorReader.addDirectory(recentConsensuses);
+    Iterator<DescriptorFile> descriptorFiles =
+        descriptorReader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+          continue;
+        }
+        RelayNetworkStatusConsensus consensus =
+            (RelayNetworkStatusConsensus) descriptor;
+        SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
+            "Wmg,Wmm,Wme,Wmd".split(",")));
+        weightKeys.removeAll(consensus.getBandwidthWeights().keySet());
+        if (!weightKeys.isEmpty()) {
+          System.err.println("Consensus with valid-after time "
+              + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis())
+              + " doesn't contain expected Wmx weights.  Skipping.");
+          continue;
+        }
+        double wmg = ((double) consensus.getBandwidthWeights().get("Wmg"))
+            / 10000.0;
+        double wmm = ((double) consensus.getBandwidthWeights().get("Wmm"))
+            / 10000.0;
+        double wme = ((double) consensus.getBandwidthWeights().get("Wme"))
+            / 10000.0;
+        double wmd = ((double) consensus.getBandwidthWeights().get("Wmd"))
+            / 10000.0;
+        SortedSet<String> hsDirs = new TreeSet<String>(
+            Collections.reverseOrder());
+        double totalWeightsRendezvousPoint = 0.0;
+        SortedMap<String, Double> weightsRendezvousPoint =
+            new TreeMap<String, Double>();
+        for (Map.Entry<String, NetworkStatusEntry> e :
+            consensus.getStatusEntries().entrySet()) {
+          String fingerprint = e.getKey();
+          NetworkStatusEntry statusEntry = e.getValue();
+          SortedSet<String> flags = statusEntry.getFlags();
+          if (flags.contains("HSDir")) {
+            hsDirs.add(statusEntry.getFingerprint());
+          }
+          double weightRendezvousPoint = 0.0;
+          if (flags.contains("Fast")) {
+            weightRendezvousPoint = (double) statusEntry.getBandwidth();
+            if (flags.contains("Guard") && flags.contains("Exit")) {
+              weightRendezvousPoint *= wmd;
+            } else if (flags.contains("Guard")) {
+              weightRendezvousPoint *= wmg;
+            } else if (flags.contains("Exit")) {
+              weightRendezvousPoint *= wme;
+            } else {
+              weightRendezvousPoint *= wmm;
+            }
+          }
+          weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+          totalWeightsRendezvousPoint += weightRendezvousPoint;
+        }
+        /* Add all HSDir fingerprints with leading "0" and "1" to
+         * simplify the logic to traverse the ring start. */
+        SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
+        hsDirs.clear();
+        for (String fingerprint : hsDirsCopy) {
+          hsDirs.add("0" + fingerprint);
+          hsDirs.add("1" + fingerprint);
+        }
+        final double RING_SIZE = new BigInteger(
+            "10000000000000000000000000000000000000000",
+            16).doubleValue();
+        for (String fingerprint : fingerprints) {
+          double probabilityRendezvousPoint = 0.0,
+              fractionDescriptors = 0.0;
+          NetworkStatusEntry statusEntry =
+              consensus.getStatusEntry(fingerprint);
+          if (statusEntry != null) {
+            if (hsDirs.contains("1" + fingerprint)) {
+              String startResponsible = fingerprint;
+              int positionsToGo = 3;
+              for (String hsDirFingerprint :
+                  hsDirs.tailSet("1" + fingerprint)) {
+                startResponsible = hsDirFingerprint;
+                if (positionsToGo-- <= 0) {
+                  break;
+                }
+              }
+              fractionDescriptors =
+                  new BigInteger("1" + fingerprint, 16).subtract(
+                  new BigInteger(startResponsible, 16)).doubleValue()
+                  / RING_SIZE;
+              fractionDescriptors /= 3.0;
+            }
+            probabilityRendezvousPoint =
+                weightsRendezvousPoint.get(fingerprint)
+                / totalWeightsRendezvousPoint;
+          }
+          if (!extractedConsensusFractions.containsKey(fingerprint)) {
+            extractedConsensusFractions.put(fingerprint,
+                new TreeSet<ConsensusFraction>());
+          }
+          extractedConsensusFractions.get(fingerprint).add(
+              new ConsensusFraction(consensus.getValidAfterMillis(),
+              consensus.getFreshUntilMillis(), probabilityRendezvousPoint,
+              fractionDescriptors));
+        }
+      }
+    }
+    return extractedConsensusFractions;
+  }
+
+  private static void extrapolateHidServStats(
+      SortedMap<String, SortedSet<HidServStats>> hidServStats,
+      SortedMap<String, SortedSet<ConsensusFraction>>
+      consensusFractions) throws Exception {
+    hidservStatsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(
+        new FileWriter(hidservStatsCsvFile));
+    bw.write("fingerprint,stats_start,stats_end,"
+        + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen,"
+        + "frac_rend_relayed_cells,frac_dir_onions_seen\n");
+    for (Map.Entry<String, SortedSet<HidServStats>> e :
+      hidServStats.entrySet()) {
+      String fingerprint = e.getKey();
+      if (!consensusFractions.containsKey(fingerprint)) {
+        System.err.println("We have hidserv-stats but no consensus "
+            + "fractions for " + fingerprint + ".  Skipping.");
+        continue;
+      }
+      for (HidServStats stats : e.getValue()) {
+        long statsStartMillis = stats.statsEndMillis
+            - stats.statsIntervalSeconds * 1000L;
+        double sumProbabilityRendezvousPoint = 0.0,
+            sumResponsibleDescriptors = 0.0;
+        int statusEntries = 0;
+        for (ConsensusFraction frac :
+            consensusFractions.get(fingerprint)) {
+          if (statsStartMillis <= frac.validAfterMillis &&
+              frac.validAfterMillis < stats.statsEndMillis) {
+            sumProbabilityRendezvousPoint +=
+                frac.probabilityRendezvousPoint;
+            sumResponsibleDescriptors +=
+                frac.fractionResponsibleDescriptors;
+            statusEntries++;
+          }
+        }
+        double fracCells = sumProbabilityRendezvousPoint / statusEntries,
+            fracDescs = sumResponsibleDescriptors / statusEntries;
+        bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint,
+            DATE_TIME_FORMAT.format(statsStartMillis),
+            DATE_TIME_FORMAT.format(stats.statsEndMillis),
+            stats.rendRelayedCells, stats.dirOnionsSeen, fracCells,
+            fracDescs));
+      }
+    }
+    bw.close();
+  }
+}
+
diff --git a/task-13192/src/java/ExtrapolateHidServStats.java b/task-13192/src/java/ExtrapolateHidServStats.java
deleted file mode 100644
index 100520d..0000000
--- a/task-13192/src/java/ExtrapolateHidServStats.java
+++ /dev/null
@@ -1,722 +0,0 @@
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileWriter;
-import java.math.BigInteger;
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Scanner;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.ExtraInfoDescriptor;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-
-public class ExtrapolateHidServStats {
-
-  private static File archiveExtraInfosDirectory =
-      new File("in/collector/archive/relay-descriptors/extra-infos/");
-
-  private static File recentExtraInfosDirectory =
-      new File("in/collector/recent/relay-descriptors/extra-infos/");
-
-  private static File archiveConsensuses =
-      new File("in/collector/archive/relay-descriptors/consensuses/");
-
-  private static File recentConsensuses =
-      new File("in/collector/recent/relay-descriptors/consensuses/");
-
-  private static File hidservStatsCsvFile =
-      new File("out/csv/hidserv-stats.csv");
-
-  private static File simCellsCsvFile =
-      new File("out/csv/sim-cells.csv");
-
-  private static File simOnionsCsvFile =
-      new File("out/csv/sim-onions.csv");
-
-  public static void main(String[] args) throws Exception {
-    System.out.println("Extracting hidserv-* lines from extra-info "
-        + "descriptors...");
-    SortedMap<String, SortedSet<HidServStats>> hidServStats =
-        extractHidServStats();
-    System.out.println("Extracting fractions from consensuses...");
-    SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions =
-        extractConsensusFractions(hidServStats.keySet());
-    System.out.println("Extrapolating statistics...");
-    extrapolateHidServStats(hidServStats, consensusFractions);
-    System.out.println("Simulating extrapolation of rendezvous cells...");
-    simulateCells();
-    System.out.println("Simulating extrapolation of .onions...");
-    simulateOnions();
-    System.out.println("Terminating.");
-  }
-
-  private static final DateFormat DATE_TIME_FORMAT;
-
-  static {
-    DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    DATE_TIME_FORMAT.setLenient(false);
-    DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
-  }
-
-  private static class HidServStats implements Comparable<HidServStats> {
-
-    /* Hidden-service statistics end timestamp in milliseconds. */
-    private long statsEndMillis;
-
-    /* Statistics interval length in seconds. */
-    private long statsIntervalSeconds;
-
-    /* Number of relayed cells reported by the relay and adjusted by
-     * rounding to the nearest right side of a bin and subtracting half of
-     * the bin size. */
-    private long rendRelayedCells;
-
-    /* Number of .onions reported by the relay and adjusted by rounding to
-     * the nearest right side of a bin and subtracting half of the bin
-     * size. */
-    private long dirOnionsSeen;
-
-    private HidServStats(long statsEndMillis, long statsIntervalSeconds,
-        long rendRelayedCells, long dirOnionsSeen) {
-      this.statsEndMillis = statsEndMillis;
-      this.statsIntervalSeconds = statsIntervalSeconds;
-      this.rendRelayedCells = rendRelayedCells;
-      this.dirOnionsSeen = dirOnionsSeen;
-    }
-
-    @Override
-    public boolean equals(Object otherObject) {
-      if (!(otherObject instanceof HidServStats)) {
-        return false;
-      }
-      HidServStats other = (HidServStats) otherObject;
-      return this.statsEndMillis == other.statsEndMillis &&
-          this.statsIntervalSeconds == other.statsIntervalSeconds &&
-          this.rendRelayedCells == other.rendRelayedCells &&
-          this.dirOnionsSeen == other.dirOnionsSeen;
-    }
-
-    @Override
-    public int compareTo(HidServStats other) {
-      return this.statsEndMillis < other.statsEndMillis ? -1 :
-          this.statsEndMillis > other.statsEndMillis ? 1 : 0;
-    }
-  }
-
-  /* Extract fingerprint and hidserv-* lines from extra-info descriptors
-   * located in in/{archive,recent}/relay-descriptors/extra-infos/. */
-  private static SortedMap<String, SortedSet<HidServStats>>
-      extractHidServStats() {
-    SortedMap<String, SortedSet<HidServStats>> extractedHidServStats =
-        new TreeMap<String, SortedSet<HidServStats>>();
-    DescriptorReader descriptorReader =
-        DescriptorSourceFactory.createDescriptorReader();
-    descriptorReader.addDirectory(archiveExtraInfosDirectory);
-    descriptorReader.addDirectory(recentExtraInfosDirectory);
-    Iterator<DescriptorFile> descriptorFiles =
-        descriptorReader.readDescriptors();
-    while (descriptorFiles.hasNext()) {
-      DescriptorFile descriptorFile = descriptorFiles.next();
-      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
-        if (!(descriptor instanceof ExtraInfoDescriptor)) {
-          continue;
-        }
-        String fingerprint =
-            ((ExtraInfoDescriptor) descriptor).getFingerprint();
-        Scanner scanner = new Scanner(new ByteArrayInputStream(
-            descriptor.getRawDescriptorBytes()));
-        Long statsEndMillis = null, statsIntervalSeconds = null,
-            rendRelayedCells = null, dirOnionsSeen = null;
-        try {
-          while (scanner.hasNext()) {
-            String line = scanner.nextLine();
-            if (line.startsWith("hidserv-")) {
-              String[] parts = line.split(" ");
-              if (parts[0].equals("hidserv-stats-end")) {
-                if (parts.length != 5 || !parts[3].startsWith("(") ||
-                    !parts[4].equals("s)")) {
-                  /* Will warn below, because statsEndMillis and
-                   * statsIntervalSeconds are still null. */
-                  continue;
-                }
-                statsEndMillis = DATE_TIME_FORMAT.parse(
-                    parts[1] + " " + parts[2]).getTime();
-                statsIntervalSeconds =
-                    Long.parseLong(parts[3].substring(1));
-              } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
-                if (parts.length != 5 ||
-                    !parts[4].startsWith("bin_size=")) {
-                  /* Will warn below, because rendRelayedCells is still
-                   * null. */
-                  continue;
-                }
-                rendRelayedCells = removeNoise(Long.parseLong(parts[1]),
-                    Long.parseLong(parts[4].substring(9)));
-              } else if (parts[0].equals("hidserv-dir-onions-seen")) {
-                if (parts.length != 5 ||
-                    !parts[4].startsWith("bin_size=")) {
-                  /* Will warn below, because dirOnionsSeen is still
-                   * null. */
-                  continue;
-                }
-                dirOnionsSeen = removeNoise(Long.parseLong(parts[1]),
-                    Long.parseLong(parts[4].substring(9)));
-              }
-            }
-          }
-        } catch (ParseException e) {
-          e.printStackTrace();
-          continue;
-        } catch (NumberFormatException e) {
-          e.printStackTrace();
-          continue;
-        }
-        if (statsEndMillis == null && statsIntervalSeconds == null &&
-            rendRelayedCells == null && dirOnionsSeen == null) {
-          continue;
-        } else if (statsEndMillis != null && statsIntervalSeconds != null
-            && rendRelayedCells != null && dirOnionsSeen != null) {
-          if (!extractedHidServStats.containsKey(fingerprint)) {
-            extractedHidServStats.put(fingerprint,
-                new TreeSet<HidServStats>());
-          }
-          extractedHidServStats.get(fingerprint).add(new HidServStats(
-              statsEndMillis, statsIntervalSeconds, rendRelayedCells,
-              dirOnionsSeen));
-        } else {
-          System.err.println("Relay " + fingerprint + " published "
-              + "incomplete hidserv-stats.  Ignoring.");
-        }
-      }
-    }
-    return extractedHidServStats;
-  }
-
-  private static long removeNoise(long reportedNumber, long binSize) {
-    long roundedToNearestRightSideOfTheBin =
-        ((reportedNumber + binSize / 2) / binSize) * binSize;
-    long subtractedHalfOfBinSize =
-        roundedToNearestRightSideOfTheBin - binSize / 2;
-    return subtractedHalfOfBinSize;
-  }
-
-  private static class ConsensusFraction
-      implements Comparable<ConsensusFraction> {
-
-    /* Valid-after timestamp of the consensus in milliseconds. */
-    private long validAfterMillis;
-
-    /* Fresh-until timestamp of the consensus in milliseconds. */
-    private long freshUntilMillis;
-
-    /* Fraction of consensus weight in [0.0, 1.0] of this relay. */
-    private double fractionConsensusWeight;
-
-    /* Probability for being selected by clients as rendezvous point. */
-    private double probabilityRendezvousPoint;
-
-    /* Fraction of descriptor identifiers in [0.0, 1.0] that this relay
-     * has been responsible for.  This is the "distance" from the
-     * fingerprint of the relay three HSDir positions earlier in the ring
-     * to the fingerprint of this relay.  Fractions of all HSDirs in a
-     * consensus add up to 3.0, not 1.0. */
-    private double fractionResponsibleDescriptors;
-
-    private ConsensusFraction(long validAfterMillis,
-        long freshUntilMillis,
-        double fractionConsensusWeight,
-        double probabilityRendezvousPoint,
-        double fractionResponsibleDescriptors) {
-      this.validAfterMillis = validAfterMillis;
-      this.freshUntilMillis = freshUntilMillis;
-      this.fractionConsensusWeight = fractionConsensusWeight;
-      this.probabilityRendezvousPoint = probabilityRendezvousPoint;
-      this.fractionResponsibleDescriptors =
-          fractionResponsibleDescriptors;
-    }
-
-    @Override
-    public boolean equals(Object otherObject) {
-      if (!(otherObject instanceof ConsensusFraction)) {
-        return false;
-      }
-      ConsensusFraction other = (ConsensusFraction) otherObject;
-      return this.validAfterMillis == other.validAfterMillis &&
-          this.freshUntilMillis == other.freshUntilMillis &&
-          this.fractionResponsibleDescriptors ==
-          other.fractionResponsibleDescriptors &&
-          this.fractionConsensusWeight == other.fractionConsensusWeight &&
-          this.probabilityRendezvousPoint ==
-          other.probabilityRendezvousPoint;
-    }
-
-    @Override
-    public int compareTo(ConsensusFraction other) {
-      return this.validAfterMillis < other.validAfterMillis ? -1 :
-          this.validAfterMillis > other.validAfterMillis ? 1 : 0;
-    }
-  }
-
-  /* Extract fractions that relays were responsible for from consensuses
-   * located in in/{archive,recent}/relay-descriptors/consensuses/. */
-  private static SortedMap<String, SortedSet<ConsensusFraction>>
-      extractConsensusFractions(Collection<String> fingerprints) {
-    SortedMap<String, SortedSet<ConsensusFraction>>
-        extractedConsensusFractions =
-        new TreeMap<String, SortedSet<ConsensusFraction>>();
-    DescriptorReader descriptorReader =
-        DescriptorSourceFactory.createDescriptorReader();
-    descriptorReader.addDirectory(archiveConsensuses);
-    descriptorReader.addDirectory(recentConsensuses);
-    Iterator<DescriptorFile> descriptorFiles =
-        descriptorReader.readDescriptors();
-    while (descriptorFiles.hasNext()) {
-      DescriptorFile descriptorFile = descriptorFiles.next();
-      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
-        if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
-          continue;
-        }
-        RelayNetworkStatusConsensus consensus =
-            (RelayNetworkStatusConsensus) descriptor;
-        SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
-            "Wmg,Wmm,Wme,Wmd".split(",")));
-        weightKeys.removeAll(consensus.getBandwidthWeights().keySet());
-        if (!weightKeys.isEmpty()) {
-          System.err.println("Consensus with valid-after time "
-              + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis())
-              + " doesn't contain expected Wmx weights.  Skipping.");
-          continue;
-        }
-        double wmg = ((double) consensus.getBandwidthWeights().get("Wmg"))
-            / 10000.0;
-        double wmm = ((double) consensus.getBandwidthWeights().get("Wmm"))
-            / 10000.0;
-        double wme = ((double) consensus.getBandwidthWeights().get("Wme"))
-            / 10000.0;
-        double wmd = ((double) consensus.getBandwidthWeights().get("Wmd"))
-            / 10000.0;
-        SortedSet<String> hsDirs = new TreeSet<String>(
-            Collections.reverseOrder());
-        long totalConsensusWeight = 0L;
-        double totalWeightsRendezvousPoint = 0.0;
-        SortedMap<String, Double> weightsRendezvousPoint =
-            new TreeMap<String, Double>();
-        for (Map.Entry<String, NetworkStatusEntry> e :
-            consensus.getStatusEntries().entrySet()) {
-          String fingerprint = e.getKey();
-          NetworkStatusEntry statusEntry = e.getValue();
-          SortedSet<String> flags = statusEntry.getFlags();
-          if (flags.contains("HSDir")) {
-            hsDirs.add(statusEntry.getFingerprint());
-          }
-          totalConsensusWeight += statusEntry.getBandwidth();
-          double weightRendezvousPoint = 0.0;
-          if (flags.contains("Fast")) {
-            weightRendezvousPoint = (double) statusEntry.getBandwidth();
-            if (flags.contains("Guard") && flags.contains("Exit")) {
-              weightRendezvousPoint *= wmd;
-            } else if (flags.contains("Guard")) {
-              weightRendezvousPoint *= wmg;
-            } else if (flags.contains("Exit")) {
-              weightRendezvousPoint *= wme;
-            } else {
-              weightRendezvousPoint *= wmm;
-            }
-          }
-          weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
-          totalWeightsRendezvousPoint += weightRendezvousPoint;
-        }
-        /* Add all HSDir fingerprints with leading "0" and "1" to
-         * simplify the logic to traverse the ring start. */
-        SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
-        hsDirs.clear();
-        for (String fingerprint : hsDirsCopy) {
-          hsDirs.add("0" + fingerprint);
-          hsDirs.add("1" + fingerprint);
-        }
-        final double RING_SIZE = new BigInteger(
-            "10000000000000000000000000000000000000000",
-            16).doubleValue();
-        for (String fingerprint : fingerprints) {
-          double probabilityRendezvousPoint = 0.0,
-              fractionResponsibleDescriptors = 0.0,
-              fractionConsensusWeight = 0.0;
-          NetworkStatusEntry statusEntry =
-              consensus.getStatusEntry(fingerprint);
-          if (statusEntry != null) {
-            if (hsDirs.contains("1" + fingerprint)) {
-              String startResponsible = fingerprint;
-              int positionsToGo = 3;
-              for (String hsDirFingerprint :
-                  hsDirs.tailSet("1" + fingerprint)) {
-                startResponsible = hsDirFingerprint;
-                if (positionsToGo-- <= 0) {
-                  break;
-                }
-              }
-              fractionResponsibleDescriptors =
-                  new BigInteger("1" + fingerprint, 16).subtract(
-                  new BigInteger(startResponsible, 16)).doubleValue()
-                  / RING_SIZE;
-            }
-            fractionConsensusWeight =
-                ((double) statusEntry.getBandwidth())
-                / ((double) totalConsensusWeight);
-            probabilityRendezvousPoint =
-                weightsRendezvousPoint.get(fingerprint)
-                / totalWeightsRendezvousPoint;
-          }
-          if (!extractedConsensusFractions.containsKey(fingerprint)) {
-            extractedConsensusFractions.put(fingerprint,
-                new TreeSet<ConsensusFraction>());
-          }
-          extractedConsensusFractions.get(fingerprint).add(
-              new ConsensusFraction(consensus.getValidAfterMillis(),
-              consensus.getFreshUntilMillis(), fractionConsensusWeight,
-              probabilityRendezvousPoint,
-              fractionResponsibleDescriptors));
-        }
-      }
-    }
-    return extractedConsensusFractions;
-  }
-
-  private static void extrapolateHidServStats(
-      SortedMap<String, SortedSet<HidServStats>> hidServStats,
-      SortedMap<String, SortedSet<ConsensusFraction>>
-      consensusFractions) throws Exception {
-    hidservStatsCsvFile.getParentFile().mkdirs();
-    BufferedWriter bw = new BufferedWriter(
-        new FileWriter(hidservStatsCsvFile));
-    bw.write("fingerprint,stats_start,stats_end,"
-        + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen,"
-        + "prob_rend_point,frac_hsdesc\n");
-    for (Map.Entry<String, SortedSet<HidServStats>> e :
-      hidServStats.entrySet()) {
-      String fingerprint = e.getKey();
-      if (!consensusFractions.containsKey(fingerprint)) {
-        System.err.println("We have hidserv-stats but no consensus "
-            + "fractions for " + fingerprint + ".  Skipping.");
-        continue;
-      }
-      for (HidServStats stats : e.getValue()) {
-        long statsStartMillis = stats.statsEndMillis
-            - stats.statsIntervalSeconds * 1000L;
-        double sumProbabilityRendezvousPoint = 0.0,
-            sumResponsibleDescriptors = 0.0;
-        int statusEntries = 0;
-        for (ConsensusFraction frac :
-            consensusFractions.get(fingerprint)) {
-          if (statsStartMillis <= frac.validAfterMillis &&
-              frac.validAfterMillis < stats.statsEndMillis) {
-            sumProbabilityRendezvousPoint +=
-                frac.probabilityRendezvousPoint;
-            sumResponsibleDescriptors +=
-                frac.fractionResponsibleDescriptors;
-            statusEntries++;
-          }
-        }
-        bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint,
-            DATE_TIME_FORMAT.format(statsStartMillis),
-            DATE_TIME_FORMAT.format(stats.statsEndMillis),
-            stats.rendRelayedCells, stats.dirOnionsSeen,
-            sumProbabilityRendezvousPoint / statusEntries,
-            sumResponsibleDescriptors / statusEntries));
-      }
-    }
-    bw.close();
-  }
-
-  private static Random rnd = new Random(3);
-
-  private static void simulateCells() throws Exception {
-
-    /* Generate consensus weights following an exponential distribution
-     * with lambda = 1 for 3000 potential rendezvous points. */
-    final int numberRendPoints = 3000;
-    double[] consensusWeights = new double[numberRendPoints];
-    double totalConsensusWeight = 0.0;
-    for (int i = 0; i < numberRendPoints; i++) {
-      double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
-      consensusWeights[i] = consensusWeight;
-      totalConsensusWeight += consensusWeight;
-    }
-
-    /* Compute probabilities for being selected as rendezvous point. */
-    double[] probRendPoint = new double[numberRendPoints];
-    for (int i = 0; i < numberRendPoints; i++) {
-      probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
-    }
-
-    /* Generate 10,000,000,000 (roughly 60 MiB/s) cells in chunks
-     * following an exponential distribution with lambda = 0.00001 and
-     * randomly assign them to a rendezvous point to report them later. */
-    long cellsLeft = 10000000000L;
-    final double cellsLambda = 0.00001;
-    long[] observedCells = new long[numberRendPoints];
-    while (cellsLeft > 0) {
-      long cells = (long) (-Math.log(1.0 - rnd.nextDouble())
-          / cellsLambda);
-      double selectRendPoint = rnd.nextDouble();
-      for (int i = 0; i < probRendPoint.length; i++) {
-        selectRendPoint -= probRendPoint[i];
-        if (selectRendPoint <= 0.0) {
-          observedCells[i] += cells;
-          break;
-        }
-      }
-      cellsLeft -= cells;
-    }
-
-    /* Obfuscate reports using binning and Laplace noise, and then attempt
-     * to remove noise again. */
-    final long binSize = 1024L;
-    final double b = 2048.0 / 0.3;
-    long[] reportedCells = new long[numberRendPoints];
-    long[] removedNoiseCells = new long[numberRendPoints];
-    for (int i = 0; i < numberRendPoints; i++) {
-      long observed = observedCells[i];
-      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
-      double p = rnd.nextDouble();
-      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
-          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
-      long reported = afterBinning + (long) laplaceNoise;
-      reportedCells[i] = reported;
-      long roundedToNearestRightSideOfTheBin =
-          ((reported + binSize / 2) / binSize) * binSize;
-      long subtractedHalfOfBinSize =
-          roundedToNearestRightSideOfTheBin - binSize / 2;
-      removedNoiseCells[i] = subtractedHalfOfBinSize;
-    }
-
-    /* Perform 10,000 extrapolations from random fractions of reports by
-     * probability to be selected as rendezvous point. */
-    simCellsCsvFile.getParentFile().mkdirs();
-    BufferedWriter bw = new BufferedWriter(new FileWriter(
-        simCellsCsvFile));
-    bw.write("frac,p025,p500,p975\n");
-    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
-        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
-    final int numberOfExtrapolations = 10000;
-    for (double fraction : fractions) {
-      List<Long> extrapolations = new ArrayList<Long>();
-      for (int i = 0; i < numberOfExtrapolations; i++) {
-        SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
-        for (int j = 0; j < numberRendPoints; j++) {
-          nonReportingRelays.add(j);
-        }
-        List<Integer> shuffledRelays = new ArrayList<Integer>(
-            nonReportingRelays);
-        Collections.shuffle(shuffledRelays);
-        SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
-        for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
-            j++) {
-          reportingRelays.add(shuffledRelays.get(j));
-          nonReportingRelays.remove(shuffledRelays.get(j));
-        }
-        double reportingProbability;
-        long totalReports;
-        do {
-          reportingProbability = 0.0;
-          totalReports = 0L;
-          for (int reportingRelay : reportingRelays) {
-            reportingProbability += probRendPoint[reportingRelay];
-            totalReports += removedNoiseCells[reportingRelay];
-          }
-          if (reportingProbability < fraction - 0.001) {
-            int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
-                rnd.nextInt(nonReportingRelays.size()));
-            nonReportingRelays.remove(addRelay);
-            reportingRelays.add(addRelay);
-          } else if (reportingProbability > fraction + 0.001) {
-            int removeRelay = new ArrayList<Integer>(reportingRelays).get(
-                rnd.nextInt(reportingRelays.size()));
-            reportingRelays.remove(removeRelay);
-            nonReportingRelays.add(removeRelay);
-          }
-        } while (reportingProbability < fraction - 0.001 ||
-            reportingProbability > fraction + 0.001);
-        extrapolations.add((long) ((double) totalReports
-            / reportingProbability));
-      }
-      Collections.sort(extrapolations);
-      long p025 = extrapolations.get((extrapolations.size() * 25) / 1000),
-          p500 = extrapolations.get((extrapolations.size() * 500) / 1000),
-          p975 = extrapolations.get((extrapolations.size() * 975) / 1000);
-      bw.write(String.format("%.2f,%d,%d,%d%n", fraction, p025, p500,
-          p975));
-    }
-    bw.close();
-  }
-
-  private static void simulateOnions() throws Exception {
-
-    /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
-    final int numberHsDirs = 3000;
-    SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
-    for (int i = 0; i < numberHsDirs; i++) {
-      hsDirFingerprints.add(rnd.nextDouble());
-    }
-
-    /* Compute fractions of observed descriptor space. */
-    SortedSet<Double> ring =
-        new TreeSet<Double>(Collections.reverseOrder());
-    for (double fingerprint : hsDirFingerprints) {
-      ring.add(fingerprint);
-      ring.add(fingerprint - 1.0);
-    }
-    SortedMap<Double, Double> hsDirFractions =
-        new TreeMap<Double, Double>();
-    for (double fingerprint : hsDirFingerprints) {
-      double start = fingerprint;
-      int positionsToGo = 3;
-      for (double prev : ring.tailSet(fingerprint)) {
-        start = prev;
-        if (positionsToGo-- <= 0) {
-          break;
-        }
-      }
-      hsDirFractions.put(fingerprint, fingerprint - start);
-    }
-
-    /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
-    final int numberOnions = 40000;
-    final int replicas = 4;
-    final int storeOnDirs = 3;
-    SortedMap<Double, SortedSet<Integer>> storedDescs =
-        new TreeMap<Double, SortedSet<Integer>>();
-    for (double fingerprint : hsDirFingerprints) {
-      storedDescs.put(fingerprint, new TreeSet<Integer>());
-    }
-    for (int i = 0; i < numberOnions; i++) {
-      for (int j = 0; j < replicas; j++) {
-        int leftToStore = storeOnDirs;
-        for (double fingerprint :
-            hsDirFingerprints.tailSet(rnd.nextDouble())) {
-          storedDescs.get(fingerprint).add(i);
-          if (--leftToStore <= 0) {
-            break;
-          }
-        }
-        if (leftToStore > 0) {
-          for (double fingerprint : hsDirFingerprints) {
-            storedDescs.get(fingerprint).add(i);
-            if (--leftToStore <= 0) {
-              break;
-            }
-          }
-        }
-      }
-    }
-
-    /* Obfuscate reports using binning and Laplace noise, and then attempt
-     * to remove noise again. */
-    final long binSize = 8L;
-    final double b = 8.0 / 0.3;
-    SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
-        removedNoiseOnions = new TreeMap<Double, Long>();
-    for (Map.Entry<Double, SortedSet<Integer>> e :
-      storedDescs.entrySet()) {
-      double fingerprint = e.getKey();
-      long observed = (long) e.getValue().size();
-      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
-      double p = rnd.nextDouble();
-      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
-          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
-      long reported = afterBinning + (long) laplaceNoise;
-      reportedOnions.put(fingerprint, reported);
-      long roundedToNearestRightSideOfTheBin =
-          ((reported + binSize / 2) / binSize) * binSize;
-      long subtractedHalfOfBinSize =
-          roundedToNearestRightSideOfTheBin - binSize / 2;
-      removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
-    }
-
-    /* Perform 10,000 extrapolations from random fractions of reports by
-     * probability to be selected as rendezvous point. */
-    simOnionsCsvFile.getParentFile().mkdirs();
-    BufferedWriter bw = new BufferedWriter(new FileWriter(
-        simOnionsCsvFile));
-    bw.write("frac,p025,p500,p975\n");
-    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
-        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
-    final int numberOfExtrapolations = 10000;
-    for (double fraction : fractions) {
-      List<Long> extrapolationsTwo = new ArrayList<Long>();
-      for (int i = 0; i < numberOfExtrapolations; i++) {
-        SortedSet<Double> nonReportingRelays =
-            new TreeSet<Double>(hsDirFractions.keySet());
-        List<Double> shuffledRelays = new ArrayList<Double>(
-            nonReportingRelays);
-        Collections.shuffle(shuffledRelays);
-        SortedSet<Double> reportingRelays = new TreeSet<Double>();
-        for (int j = 0; j < (int) ((double) hsDirFractions.size()
-            * fraction); j++) {
-          reportingRelays.add(shuffledRelays.get(j));
-          nonReportingRelays.remove(shuffledRelays.get(j));
-        }
-        double reportingProbability;
-        long totalReports;
-        do {
-          reportingProbability = 0.0;
-          totalReports = 0L;
-          for (double reportingRelay : reportingRelays) {
-            reportingProbability += hsDirFractions.get(reportingRelay)
-                / 3.0;
-            totalReports += removedNoiseOnions.get(reportingRelay);
-          }
-          if (reportingProbability < fraction - 0.001) {
-            double addRelay =
-                new ArrayList<Double>(nonReportingRelays).get(
-                rnd.nextInt(nonReportingRelays.size()));
-            nonReportingRelays.remove(addRelay);
-            reportingRelays.add(addRelay);
-          } else if (reportingProbability > fraction + 0.001) {
-            double removeRelay =
-                new ArrayList<Double>(reportingRelays).get(
-                rnd.nextInt(reportingRelays.size()));
-            reportingRelays.remove(removeRelay);
-            nonReportingRelays.add(removeRelay);
-          }
-        } while (reportingProbability < fraction - 0.001 ||
-            reportingProbability > fraction + 0.001);
-        double totalFraction = 0.0;
-        for (double fingerprint : reportingRelays) {
-          totalFraction += hsDirFractions.get(fingerprint) * 4.0;
-        }
-        extrapolationsTwo.add((long) ((double) totalReports
-            / totalFraction));
-      }
-      Collections.sort(extrapolationsTwo);
-      long pTwo025 = extrapolationsTwo.get(
-          (extrapolationsTwo.size() * 25) / 1000),
-          pTwo500 = extrapolationsTwo.get(
-          (extrapolationsTwo.size() * 500) / 1000),
-          pTwo975 = extrapolationsTwo.get(
-          (extrapolationsTwo.size() * 975) / 1000);
-      bw.write(String.format("%.2f,%d,%d,%d%n", fraction, pTwo025,
-          pTwo500, pTwo975));
-    }
-    bw.close();
-  }
-}
-
diff --git a/task-13192/src/java/Simulate.java b/task-13192/src/java/Simulate.java
new file mode 100644
index 0000000..e41e02c
--- /dev/null
+++ b/task-13192/src/java/Simulate.java
@@ -0,0 +1,357 @@
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+
+public class Simulate {
+  private static File simCellsCsvFile =
+      new File("out/csv/sim-cells.csv");
+
+  private static File simOnionsCsvFile =
+      new File("out/csv/sim-onions.csv");
+
+  public static void main(String[] args) throws Exception {
+    System.out.print("Simulating extrapolation of rendezvous cells");
+    simulateManyCells();
+    System.out.print("\nSimulating extrapolation of .onions");
+    simulateManyOnions();
+    System.out.println("\nTerminating.");
+  }
+
+  private static Random rnd = new Random();
+
+  private static void simulateManyCells() throws Exception {
+    simCellsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        simCellsCsvFile));
+    bw.write("run,frac,wmean,wmedian,wiqm\n");
+    final int numberOfExtrapolations = 1000;
+    for (int i = 0; i < numberOfExtrapolations; i++) {
+      bw.write(simulateCells(i));
+      System.out.print(".");
+    }
+    bw.close();
+  }
+
+  private static void simulateManyOnions() throws Exception {
+    simOnionsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        simOnionsCsvFile));
+    bw.write("run,frac,wmean,wmedian,wiqm\n");
+    final int numberOfExtrapolations = 1000;
+    for (int i = 0; i < numberOfExtrapolations; i++) {
+      bw.write(simulateOnions(i));
+      System.out.print(".");
+    }
+    bw.close();
+  }
+
+  private static String simulateCells(int run) {
+
+    /* Generate consensus weights following an exponential distribution
+     * with lambda = 1 for 3000 potential rendezvous points. */
+    final int numberRendPoints = 3000;
+    double[] consensusWeights = new double[numberRendPoints];
+    double totalConsensusWeight = 0.0;
+    for (int i = 0; i < numberRendPoints; i++) {
+      double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+      consensusWeights[i] = consensusWeight;
+      totalConsensusWeight += consensusWeight;
+    }
+
+    /* Compute probabilities for being selected as rendezvous point. */
+    double[] probRendPoint = new double[numberRendPoints];
+    for (int i = 0; i < numberRendPoints; i++) {
+      probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+    }
+
+    /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an
+     * exponential distribution with lambda = 0.0001, so on average
+     * 10,000 cells per chunk, and randomly assign them to a rendezvous
+     * point to report them later. */
+    long cellsLeft = 10000000000L;
+    final double cellsLambda = 0.0001;
+    long[] observedCells = new long[numberRendPoints];
+    while (cellsLeft > 0) {
+      long cells = Math.min(cellsLeft,
+          (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda));
+      double selectRendPoint = rnd.nextDouble();
+      for (int i = 0; i < probRendPoint.length; i++) {
+        selectRendPoint -= probRendPoint[i];
+        if (selectRendPoint <= 0.0) {
+          observedCells[i] += cells;
+          break;
+        }
+      }
+      cellsLeft -= cells;
+    }
+
+    /* Obfuscate reports using binning and Laplace noise, and then attempt
+     * to remove noise again. */
+    final long binSize = 1024L;
+    final double b = 2048.0 / 0.3;
+    long[] reportedCells = new long[numberRendPoints];
+    long[] removedNoiseCells = new long[numberRendPoints];
+    for (int i = 0; i < numberRendPoints; i++) {
+      long observed = observedCells[i];
+      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+      double p = rnd.nextDouble();
+      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+      long reported = afterBinning + (long) laplaceNoise;
+      reportedCells[i] = reported;
+      long roundedToNearestRightSideOfTheBin =
+          ((reported + binSize / 2) / binSize) * binSize;
+      long subtractedHalfOfBinSize =
+          roundedToNearestRightSideOfTheBin - binSize / 2;
+      removedNoiseCells[i] = subtractedHalfOfBinSize;
+    }
+
+    /* Perform extrapolations from random fractions of reports by
+     * probability to be selected as rendezvous point. */
+    StringBuilder sb = new StringBuilder();
+    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+    for (double fraction : fractions) {
+      SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
+      for (int j = 0; j < numberRendPoints; j++) {
+        nonReportingRelays.add(j);
+      }
+      List<Integer> shuffledRelays = new ArrayList<Integer>(
+          nonReportingRelays);
+      Collections.shuffle(shuffledRelays);
+      SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
+      for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+          j++) {
+        reportingRelays.add(shuffledRelays.get(j));
+        nonReportingRelays.remove(shuffledRelays.get(j));
+      }
+      List<double[]> singleRelayExtrapolations;
+      double totalReportingProbability;
+      do {
+        singleRelayExtrapolations = new ArrayList<double[]>();
+        totalReportingProbability = 0.0;
+        for (int reportingRelay : reportingRelays) {
+          double probability = probRendPoint[reportingRelay];
+          if (probability > 0.0) {
+            singleRelayExtrapolations.add(
+                new double[] {
+                    removedNoiseCells[reportingRelay] / probability,
+                    removedNoiseCells[reportingRelay],
+                    probability });
+          }
+          totalReportingProbability += probability;
+        }
+        if (totalReportingProbability < fraction - 0.001) {
+          int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
+              rnd.nextInt(nonReportingRelays.size()));
+          nonReportingRelays.remove(addRelay);
+          reportingRelays.add(addRelay);
+        } else if (totalReportingProbability > fraction + 0.001) {
+          int removeRelay = new ArrayList<Integer>(reportingRelays).get(
+              rnd.nextInt(reportingRelays.size()));
+          reportingRelays.remove(removeRelay);
+          nonReportingRelays.add(removeRelay);
+        }
+      } while (totalReportingProbability < fraction - 0.001 ||
+          totalReportingProbability > fraction + 0.001);
+      Collections.sort(singleRelayExtrapolations,
+          new Comparator<double[]>() {
+        public int compare(double[] o1, double[] o2) {
+          return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+        }
+      });
+      double totalProbability = 0.0, totalValues = 0.0;
+      double totalInterquartileProbability = 0.0,
+          totalInterquartileValues = 0.0;
+      Double weightedMedian = null;
+      for (double[] extrapolation : singleRelayExtrapolations) {
+        totalValues += extrapolation[1];
+        totalProbability += extrapolation[2];
+        if (weightedMedian == null &&
+            totalProbability > totalReportingProbability * 0.5) {
+          weightedMedian = extrapolation[0];
+        }
+        if (totalProbability > totalReportingProbability * 0.25 &&
+            totalProbability < totalReportingProbability * 0.75) {
+          totalInterquartileValues += extrapolation[1];
+          totalInterquartileProbability += extrapolation[2];
+        }
+      }
+      sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+          totalValues / totalProbability, weightedMedian,
+          totalInterquartileValues / totalInterquartileProbability));
+    }
+    return sb.toString();
+  }
+
+  private static String simulateOnions(final int run) {
+
+    /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+    final int numberHsDirs = 3000;
+    SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
+    for (int i = 0; i < numberHsDirs; i++) {
+      hsDirFingerprints.add(rnd.nextDouble());
+    }
+
+    /* Compute fractions of observed descriptor space. */
+    SortedSet<Double> ring =
+        new TreeSet<Double>(Collections.reverseOrder());
+    for (double fingerprint : hsDirFingerprints) {
+      ring.add(fingerprint);
+      ring.add(fingerprint - 1.0);
+    }
+    SortedMap<Double, Double> hsDirFractions =
+        new TreeMap<Double, Double>();
+    for (double fingerprint : hsDirFingerprints) {
+      double start = fingerprint;
+      int positionsToGo = 3;
+      for (double prev : ring.tailSet(fingerprint)) {
+        start = prev;
+        if (positionsToGo-- <= 0) {
+          break;
+        }
+      }
+      hsDirFractions.put(fingerprint, fingerprint - start);
+    }
+
+    /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+    final int numberOnions = 40000;
+    final int replicas = 4;
+    final int storeOnDirs = 3;
+    SortedMap<Double, SortedSet<Integer>> storedDescs =
+        new TreeMap<Double, SortedSet<Integer>>();
+    for (double fingerprint : hsDirFingerprints) {
+      storedDescs.put(fingerprint, new TreeSet<Integer>());
+    }
+    for (int i = 0; i < numberOnions; i++) {
+      for (int j = 0; j < replicas; j++) {
+        int leftToStore = storeOnDirs;
+        for (double fingerprint :
+            hsDirFingerprints.tailSet(rnd.nextDouble())) {
+          storedDescs.get(fingerprint).add(i);
+          if (--leftToStore <= 0) {
+            break;
+          }
+        }
+        if (leftToStore > 0) {
+          for (double fingerprint : hsDirFingerprints) {
+            storedDescs.get(fingerprint).add(i);
+            if (--leftToStore <= 0) {
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    /* Obfuscate reports using binning and Laplace noise, and then attempt
+     * to remove noise again. */
+    final long binSize = 8L;
+    final double b = 8.0 / 0.3;
+    SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
+        removedNoiseOnions = new TreeMap<Double, Long>();
+    for (Map.Entry<Double, SortedSet<Integer>> e :
+      storedDescs.entrySet()) {
+      double fingerprint = e.getKey();
+      long observed = (long) e.getValue().size();
+      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+      double p = rnd.nextDouble();
+      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+      long reported = afterBinning + (long) laplaceNoise;
+      reportedOnions.put(fingerprint, reported);
+      long roundedToNearestRightSideOfTheBin =
+          ((reported + binSize / 2) / binSize) * binSize;
+      long subtractedHalfOfBinSize =
+          roundedToNearestRightSideOfTheBin - binSize / 2;
+      removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+    }
+
+    /* Perform extrapolations from random fractions of reports by
+     * probability to be selected as rendezvous point. */
+    StringBuilder sb = new StringBuilder();
+    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+    for (double fraction : fractions) {
+      SortedSet<Double> nonReportingRelays =
+          new TreeSet<Double>(hsDirFractions.keySet());
+      List<Double> shuffledRelays = new ArrayList<Double>(
+          nonReportingRelays);
+      Collections.shuffle(shuffledRelays);
+      SortedSet<Double> reportingRelays = new TreeSet<Double>();
+      for (int j = 0; j < (int) ((double) hsDirFractions.size()
+          * fraction); j++) {
+        reportingRelays.add(shuffledRelays.get(j));
+        nonReportingRelays.remove(shuffledRelays.get(j));
+      }
+      List<double[]> singleRelayExtrapolations;
+      double totalReportingProbability;
+      do {
+        singleRelayExtrapolations = new ArrayList<double[]>();
+        totalReportingProbability = 0.0;
+        for (double reportingRelay : reportingRelays) {
+          double probability = hsDirFractions.get(reportingRelay) / 3.0;
+          if (probability > 0.0) {
+            singleRelayExtrapolations.add(
+                new double[] { removedNoiseOnions.get(reportingRelay)
+                    / probability, removedNoiseOnions.get(reportingRelay),
+                    probability });
+          }
+          totalReportingProbability += probability;
+        }
+        if (totalReportingProbability < fraction - 0.001) {
+          double addRelay =
+              new ArrayList<Double>(nonReportingRelays).get(
+              rnd.nextInt(nonReportingRelays.size()));
+          nonReportingRelays.remove(addRelay);
+          reportingRelays.add(addRelay);
+        } else if (totalReportingProbability > fraction + 0.001) {
+          double removeRelay =
+              new ArrayList<Double>(reportingRelays).get(
+              rnd.nextInt(reportingRelays.size()));
+          reportingRelays.remove(removeRelay);
+          nonReportingRelays.add(removeRelay);
+        }
+      } while (totalReportingProbability < fraction - 0.001 ||
+          totalReportingProbability > fraction + 0.001);
+      Collections.sort(singleRelayExtrapolations,
+          new Comparator<double[]>() {
+        public int compare(double[] o1, double[] o2) {
+          return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+        }
+      });
+      double totalProbability = 0.0, totalValues = 0.0;
+      double totalInterquartileProbability = 0.0,
+          totalInterquartileValues = 0.0;
+      Double weightedMedian = null;
+      for (double[] extrapolation : singleRelayExtrapolations) {
+        totalValues += extrapolation[1];
+        totalProbability += extrapolation[2];
+        if (weightedMedian == null &&
+            totalProbability > totalReportingProbability * 0.5) {
+          weightedMedian = extrapolation[0];
+        }
+        if (totalProbability > totalReportingProbability * 0.25 &&
+            totalProbability < totalReportingProbability * 0.75) {
+          totalInterquartileValues += extrapolation[1];
+          totalInterquartileProbability += extrapolation[2];
+        }
+      }
+      sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+          totalValues / totalProbability, weightedMedian,
+          totalInterquartileValues / totalInterquartileProbability));
+    }
+    return sb.toString();
+  }
+}



More information about the tor-commits mailing list