commit 3c90c181a13dcc12c69e8e8fa013948b1a6405e2 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Feb 9 18:25:38 2015 +0100
Update hidserv-stats extrapolation code (#13192). --- task-13192/.gitignore | 1 + task-13192/src/R/blog.R | 47 ++ task-13192/src/R/plot.R | 214 +++---- task-13192/src/java/Aggregate.java | 129 ++++ task-13192/src/java/Extrapolate.java | 424 +++++++++++++ task-13192/src/java/ExtrapolateHidServStats.java | 722 ---------------------- task-13192/src/java/Simulate.java | 357 +++++++++++ 7 files changed, 1066 insertions(+), 828 deletions(-)
diff --git a/task-13192/.gitignore b/task-13192/.gitignore index 7e8bf3b..89d161b 100644 --- a/task-13192/.gitignore +++ b/task-13192/.gitignore @@ -4,4 +4,5 @@ in/ src/bash/ src/bin/ out/ +Rplots.pdf
diff --git a/task-13192/src/R/blog.R b/task-13192/src/R/blog.R new file mode 100644 index 0000000..82a07e9 --- /dev/null +++ b/task-13192/src/R/blog.R @@ -0,0 +1,47 @@ +# Load required libraries. +require(ggplot2, warn.conflicts = FALSE, quietly = TRUE) +require(scales, warn.conflicts = FALSE, quietly = TRUE) +require(reshape, warn.conflicts = FALSE, quietly = TRUE) +require(splines, warn.conflicts = FALSE, quietly = TRUE) +require(Hmisc, warn.conflicts = FALSE, quietly = TRUE) + +# Read .csv files written by Java. +h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE) + +# Create directories for graphs. +dir.create(file.path("out", "graphs", "blog"), showWarnings = FALSE, + recursive = TRUE) + +# Cut off last two days, because stats might be incomplete for those. +h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ] + +# Graph the number of reported stats by day. +h7 <- data.frame(date = as.Date(h$stats_end), reports = 1) +ggplot(h7, aes(x = date)) + +geom_bar(colour = 'lightgray', width = .7, binwidth = 1) + +scale_x_date("") + +scale_y_continuous("") +ggsave("out/graphs/blog/num-reported-stats.png", width = 10, height = 3, + dpi = 100) + +e <- read.csv("out/csv/hidserv-stats-extrapolated.csv", + stringsAsFactors = FALSE) +e <- melt(e, by = c("date", "type")) +e <- e[e$variable == "wiqm", ] +e <- rbind(e, data.frame(date = NA, type = c("onions", "cells"), + variable = NA, value = 0)) + +ggplot(e[e$type == "cells", ], aes(x = as.Date(date), y = value)) + +geom_line() + +scale_x_date(name = "") + +scale_y_continuous(name = "") +ggsave("out/graphs/blog/extrapolated-cells.png", width = 10, + height = 3, dpi = 100) + +ggplot(e[e$type != "cells", ], aes(x = as.Date(date), y = value)) + +geom_line() + +scale_x_date(name = "") + +scale_y_continuous(name = "") +ggsave("out/graphs/blog/extrapolated-onions.png", width = 10, + height = 3, dpi = 100) + diff --git a/task-13192/src/R/plot.R b/task-13192/src/R/plot.R index 991928b..552b810 100644 --- a/task-13192/src/R/plot.R +++ b/task-13192/src/R/plot.R @@ -5,17 +5,12 @@ require(reshape, warn.conflicts = FALSE, quietly = TRUE) require(splines, warn.conflicts = FALSE, quietly = TRUE) require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
-# Avoid scientific notation. -options(scipen = 15) - # Read .csv file written by Java. h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
# Create directories for graphs. dir.create(file.path("out", "graphs", "report"), showWarnings = FALSE, recursive = TRUE) -dir.create(file.path("out", "graphs", "slides"), showWarnings = FALSE, - recursive = TRUE)
# Cut off last two days, because stats might be incomplete for those. h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ] @@ -28,17 +23,15 @@ scale_x_date("") + scale_y_continuous("") ggsave("out/graphs/report/num-reported-stats.pdf", width = 10, height = 3, dpi = 100) -ggsave("out/graphs/slides/hidserv-12.png", width = 8, height = 3, - dpi = 100)
# Graph distributions of reported values by day. h1 <- data.frame(date = as.Date(h$stats_end), - traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000), + traffic = h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6), services = h$hidserv_dir_onions_seen) h1 <- melt(h1, "date") h1 <- data.frame(date = h1$date, - variable = ifelse(h1$variable == "traffic", "traffic in MB/s", - ".onion addresses"), value = h1$value) + variable = ifelse(h1$variable == "traffic", "traffic in Mbit/s", + "unique .onion addresses"), value = h1$value) ggplot(h1, aes(x = date, y = value, group = date)) + geom_boxplot() + facet_grid(variable ~ ., scales = "free_y") + @@ -49,23 +42,22 @@ ggsave("out/graphs/report/stats-by-day.pdf", width = 10, height = 5,
# Graph distributions of calculated fractions by day. h2 <- data.frame(date = as.Date(h$stats_end), - prob_rend_point = h$prob_rend_point, - x_frac_hsdesc = h$frac_hsdesc / 3.0) + frac_rend_relayed_cells = h$frac_rend_relayed_cells, x_frac_dir_onions_seen = h$frac_dir_onions_seen) h2 <- melt(h2, "date") h2 <- data.frame(date = h2$date, - variable = ifelse(h2$variable == "prob_rend_point", - "selected as rendezvous point", "responsible for a descriptor"), + variable = ifelse(h2$variable == "frac_rend_relayed_cells", + "cells on rendezvous circuits", "hidden-service descriptors"), value = h2$value) ggplot(h2, aes(x = date, y = value, group = date)) + geom_boxplot() + facet_grid(variable ~ ., scales = "free_y") + scale_x_date("") + -scale_y_continuous("Calculated probabilities\n", labels = percent) +scale_y_continuous("Calculated network fractions\n", labels = percent) ggsave("out/graphs/report/probs-by-relay.pdf", width = 10, height = 5, dpi = 100)
# Graph ECDF of cells reported by relays with rend point probability of 0. -h8 <- h[h$prob_rend_point == 0, +h8 <- h[h$frac_rend_relayed_cells == 0, "hidserv_rend_relayed_cells" ] h8 <- sort(h8) h8 <- data.frame(x = h8, y = (1:length(h8)) / length(h8)) @@ -75,13 +67,14 @@ laplace_cells <- function(x) { ggplot(h8, aes(x = x, y = y)) + geom_line() + stat_function(fun = laplace_cells, colour = "blue") + -scale_x_continuous("\nReported cells on rendezvous circuits") + -scale_y_continuous("Cumulative probability\n") +scale_x_continuous("\nReported cells on rendezvous circuits", + limits = c(max(h8[h8$y < 0.01, "x"]), min(h8[h8$y > 0.99, "x"]))) + +scale_y_continuous("Cumulative probability\n", labels = percent) ggsave("out/graphs/report/zero-prob-cells.pdf", width = 5, height = 3, dpi = 100)
# Graph ECDF of .onions reported by relays with HSDir probability of 0. -h9 <- h[h$frac_hsdesc == 0, "hidserv_dir_onions_seen"] +h9 <- h[h$frac_dir_onions_seen == 0, "hidserv_dir_onions_seen"] h9 <- sort(h9) h9 <- data.frame(x = h9, y = (1:length(h9)) / length(h9)) laplace_onions <- function(x) { @@ -90,51 +83,54 @@ laplace_onions <- function(x) { ggplot(h9, aes(x = x, y = y)) + geom_line() + stat_function(fun = laplace_onions, colour = "blue") + -scale_x_continuous("\nReported .onion addresses") + -scale_y_continuous("Cumulative probability\n") +scale_x_continuous("\nReported .onion addresses", + limits = c(max(h9[h9$y < 0.01, "x"]), min(h9[h9$y > 0.99, "x"]))) + +scale_y_continuous("Cumulative probability\n", labels = percent) ggsave("out/graphs/report/zero-prob-onions.pdf", width = 5, height = 3, dpi = 100)
# Graph correlation between reports and fractions per relay. h3 <- rbind( - data.frame(x = h$frac_hsdesc / 3.0, - y = ifelse(h$frac_hsdesc == 0, NA, h$hidserv_dir_onions_seen), + data.frame(x = h$frac_dir_onions_seen, + y = ifelse(h$frac_dir_onions_seen == 0, NA, h$hidserv_dir_onions_seen), facet = ".onion addresses"), - data.frame(x = h$prob_rend_point, - y = ifelse(h$prob_rend_point == 0, NA, - h$hidserv_rend_relayed_cells * 512 / (86400 * 1000)), - facet = "traffic in kB/s")) + data.frame(x = h$frac_rend_relayed_cells, + y = ifelse(h$frac_rend_relayed_cells == 0, NA, + h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6)), + facet = "traffic in Mbit/s")) ggplot(h3[h3$facet == ".onion addresses", ], aes(x = x, y = y)) + geom_point(alpha = 0.5) + stat_smooth(method = "lm") + -scale_x_continuous(name = "\nProbability", labels = percent) + +scale_x_continuous(name = "\nFraction", labels = percent) + scale_y_continuous(name = "Reported .onion addresses\n") ggsave("out/graphs/report/corr-probs-onions-by-relay.pdf", width = 5, height = 3, dpi = 100) -ggplot(h3[h3$facet == "traffic in kB/s", ], aes(x = x, y = y)) + +ggplot(h3[h3$facet == "traffic in Mbit/s", ], aes(x = x, y = y)) + geom_point(alpha = 0.5) + stat_smooth(method = "lm") + -scale_x_continuous(name = "\nProbability", labels = percent) + -scale_y_continuous(name = "Reported traffic in kB/s\n") +scale_x_continuous(name = "\nFraction", labels = percent) + +scale_y_continuous(name = "Reported traffic in Mbit/s\n") ggsave("out/graphs/report/corr-probs-cells-by-relay.pdf", width = 5, height = 3, dpi = 100)
# Graph correlation between reports and fractions per day. h5 <- rbind( data.frame(date = as.Date(h$stats_end), - prob = ifelse(h$frac_hsdesc == 0, NA, h$frac_hsdesc / 3.0), + prob = ifelse(h$frac_dir_onions_seen == 0, NA, h$frac_dir_onions_seen), reported = h$hidserv_dir_onions_seen, facet = "published descriptor"), data.frame(date = as.Date(h$stats_end), - prob = ifelse(h$prob_rend_point == 0, NA, h$prob_rend_point), - reported = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000), - facet = "traffic in MB/s")) + prob = ifelse(h$frac_rend_relayed_cells == 0, NA, + h$frac_rend_relayed_cells), + reported = h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6), + facet = "traffic in Mbit/s")) h5 <- na.omit(h5) h5 <- aggregate(list(prob = h5$prob, reported = h5$reported), by = list(date = h5$date, facet = h5$facet), FUN = sum) -ggplot(h5[h5$facet == "traffic in MB/s", ], aes(x = prob, y = reported)) + +ggplot(h5[h5$facet == "traffic in Mbit/s", ], + aes(x = prob, y = reported)) + geom_point(alpha = 0.5) + scale_x_continuous(name = "\nTotal probability", labels = percent) + -scale_y_continuous(name = "Total traffic in MB/s\n") + +scale_y_continuous(name = "Total traffic in Mbit/s\n") + stat_smooth(method = "lm") + geom_vline(xintercept = 0.01, linetype = 2) ggsave("out/graphs/report/corr-probs-cells-by-day.pdf", width = 5, @@ -149,98 +145,104 @@ geom_vline(xintercept = 0.01, linetype = 2) ggsave("out/graphs/report/corr-probs-onions-by-day.pdf", width = 5, height = 3, dpi = 100)
+# Graph ECDF of extrapolated cells. +h20 <- h[h$frac_rend_relayed_cells > 0, ] +h20 <- h20$hidserv_rend_relayed_cells * + 512 * 8 / (86400 * 1e6 * h20$frac_rend_relayed_cells) +h20 <- sort(h20) +h20 <- data.frame(x = h20, y = (1:length(h20)) / length(h20)) +ggplot(h20, aes(x = x, y = y)) + +geom_line() + +scale_x_continuous("\nExtrapolated total traffic in Mbit/s", + limits = c(max(h20[h20$y < 0.01, "x"]), min(h20[h20$y > 0.99, "x"]))) + +scale_y_continuous("Cumulative probability\n", labels = percent) +ggsave("out/graphs/report/extrapolated-cells.pdf", width = 5, height = 3, + dpi = 100) + +# Graph ECDF of extrapolated .onions. +h21 <- h[h$frac_dir_onions_seen > 0, ] +h21 <- h21$hidserv_dir_onions_seen / (12 * h21$frac_dir_onions_seen) +h21 <- sort(h21) +h21 <- data.frame(x = h21, y = (1:length(h21)) / length(h21)) +ggplot(h21, aes(x = x, y = y)) + +geom_line() + +scale_x_continuous("\nExtrapolated total .onion addresses", + limits = c(max(h21[h21$y < 0.01, "x"]), min(h21[h21$y > 0.99, "x"]))) + +scale_y_continuous("Cumulative probability\n", labels = percent) +ggsave("out/graphs/report/extrapolated-onions.pdf", width = 5, height = 3, + dpi = 100) + # Graph extrapolated network totals. -h6 <- data.frame(date = as.Date(h$stats_end), - traffic = ifelse(h$prob_rend_point == 0, 0, - h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000)), - prob_rend_point = h$prob_rend_point, - onions = ifelse(h$frac_hsdesc == 0, 0, h$hidserv_dir_onions_seen), - prob_onion = h$frac_hsdesc * 4.0) -h6 <- aggregate(list(traffic = h6$traffic, - prob_rend_point = h6$prob_rend_point, - onions = h6$onions, - prob_onion = h6$prob_onion), by = list(date = h6$date), FUN = sum) -h6 <- data.frame(date = h6$date, - traffic = ifelse(h6$prob_rend_point < 0.01, 0, - h6$traffic / h6$prob_rend_point), - onions = ifelse(h6$prob_onion / 12.0 < 0.01, 0, - h6$onions / h6$prob_onion)) -h6 <- melt(h6, "date") -h6 <- h6[h6$value > 0, ] -h6 <- rbind(h6, data.frame(date = NA, variable = c('traffic', 'onions'), - value = 0)) -h6 <- data.frame(date = h6$date, - variable = ifelse(h6$variable == "traffic", "total traffic in MB/s", - ".onion addresses"), value = h6$value) -ggplot(h6, aes(date, value)) + -facet_grid(variable ~ ., scales = "free_y") + -geom_point() + -stat_smooth() + +e <- read.csv("out/csv/hidserv-stats-extrapolated.csv", + stringsAsFactors = FALSE) +e <- melt(e, by = c("date", "type")) +e <- e[e$variable == "wiqm", ] +e <- rbind(e, data.frame(date = NA, type = c("onions", "cells"), + variable = NA, value = 0)) +e <- data.frame(e, label = ifelse(e$type == "cells", "traffic in Mbit/s", + "unique .onion addresses")) +ggplot(e, aes(x = as.Date(date), y = value)) + +geom_line() + +facet_grid(label ~ ., scales = "free_y") + scale_x_date(name = "") + scale_y_continuous(name = "Extrapolated network totals\n") ggsave("out/graphs/report/extrapolated-network-totals.pdf", width = 10, height = 5, dpi = 100)
-# Graph extrapolated number of .onion addresses. -h11 <- h6[h6$variable == ".onion addresses", ] -ggplot(h11, aes(x = date, y = value)) + -geom_point() + -stat_smooth() + -scale_x_date(name = "") + -scale_y_continuous(name = "") -ggsave("out/graphs/slides/hidserv-13.png", width = 8, height = 3, - dpi = 100) - -# Graph extrapolated fraction of hidden-service traffic. -b <- read.csv("in/metrics/bandwidth.csv", stringsAsFactors = FALSE) -b <- b[b$isexit == '' & b$isguard == '' & b$date > '2014-12-20', ] -h10 <- data.frame(date = as.Date(h$stats_end), - traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000), - prob_rend_point = h$prob_rend_point) -h10 <- aggregate(list(traffic = h10$traffic, - prob_rend_point = h10$prob_rend_point), by = list(date = h10$date), - FUN = sum) -h10 <- data.frame(date = h10$date, - traffic = ifelse(h10$prob_rend_point < 0.01, 0, - h10$traffic / h10$prob_rend_point)) -h10 <- melt(h10, "date") -h10 <- h10[h10$value > 0, ] -h10 <- rbind(h10, data.frame(date = as.Date(b$date), variable = "bw", - value = b$bwread + b$bwwrite)) -h10 <- cast(h10, date ~ variable, value = "value") -h10 <- na.omit(h10) -h10 <- data.frame(date = h10$date, - value = h10$traffic * 1000 * 1000 / h10$bw) -h10 <- rbind(h10, data.frame(date = NA, value = 0)) -ggplot(h10, aes(x = date, y = value)) + -geom_point() + -scale_x_date(name = "") + -scale_y_continuous(name = "", labels = percent) + -stat_smooth() -ggsave("out/graphs/slides/hidserv-14.png", width = 8, height = 3, +# Graph distributions of calculated fractions by day. +h71 <- data.frame(date = as.Date(h$stats_end), + frac_rend_relayed_cells = h$frac_rend_relayed_cells, + frac_dir_onions_seen = h$frac_dir_onions_seen) +summary(h71) +h71 <- aggregate(list( + frac_rend_relayed_cells = h71$frac_rend_relayed_cells, + frac_dir_onions_seen = h71$frac_dir_onions_seen), + by = list(date = h71$date), FUN = sum) +summary(h71) +h71 <- melt(h71, "date") +summary(h71) +h71 <- data.frame(date = h71$date, + variable = ifelse(h71$variable == "frac_rend_relayed_cells", + "cells on rendezvous circuits", "hidden-service descriptors"), + value = h71$value) +ggplot(h71, aes(x = date, y = value)) + +geom_line() + +facet_grid(variable ~ ., scales = "free_y") + +geom_hline(yintercept = 0.01, linetype = 2) + +scale_x_date("") + +scale_y_continuous("Total calculated network fractions per day\n", + labels = percent) +ggsave("out/graphs/report/probs-by-day.pdf", width = 10, height = 5, dpi = 100)
# Graph simulation results for cells on rendezvous circuits. s <- read.csv("out/csv/sim-cells.csv") -ggplot(s, aes(x = frac, y = (p500 - 1e10) / 1e10, - ymin = (p025 - 1e10) / 1e10, ymax = (p975 - 1e10) / 1e10)) + +s <- do.call(data.frame, aggregate(list(X = s$wiqm), + by = list(frac = s$frac), FUN = quantile, probs = c(0.025, 0.5, 0.975))) +ggplot(s, aes(x = frac, y = (X.50. - 1e10) / 1e10, + ymin = (X.2.5. - 1e10) / 1e10, ymax = (X.97.5. - 1e10) / 1e10)) + geom_line() + geom_ribbon(alpha = 0.2) + scale_x_continuous("\nRendezvous points included in extrapolation", labels = percent) + -scale_y_continuous("Deviation from network totals\n", labels = percent) +scale_y_continuous("Deviation from actual\nhidden-service traffic\n", + labels = percent) ggsave("out/graphs/report/sim-cells.pdf", width = 5, height = 3, dpi = 100)
# Graph simulation results for .onion addresses. o <- read.csv("out/csv/sim-onions.csv") -ggplot(o, aes(x = frac, y = (p500 - 40000) / 40000, - ymin = (p025 - 40000) / 40000, ymax = (p975 - 40000) / 40000)) + +o <- do.call(data.frame, aggregate(list(X = o$wiqm), + by = list(frac = o$frac), FUN = quantile, probs = c(0.025, 0.5, 0.975))) +ggplot(o, aes(x = frac, y = (X.50. / 12 - 40000) / 40000, + ymin = (X.2.5. / 12 - 40000) / 40000, + ymax = (X.97.5. / 12 - 40000) / 40000)) + geom_line() + geom_ribbon(alpha = 0.2) + scale_x_continuous("\nDirectories included in extrapolation", labels = percent) + -scale_y_continuous("Deviation from network totals\n", labels = percent) +scale_y_continuous("Deviation from actual\nnumber of .onions\n", + labels = percent) ggsave("out/graphs/report/sim-onions.pdf", width = 5, height = 3, dpi = 100)
diff --git a/task-13192/src/java/Aggregate.java b/task-13192/src/java/Aggregate.java new file mode 100644 index 0000000..56bac2c --- /dev/null +++ b/task-13192/src/java/Aggregate.java @@ -0,0 +1,129 @@ +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; + +public class Aggregate { + + private static File hidservStatsCsvFile = + new File("out/csv/hidserv-stats.csv"); + + private static File hidservStatsExtrapolatedCsvFile = + new File("out/csv/hidserv-stats-extrapolated.csv"); + + public static void main(String[] args) throws Exception { + aggregate(); + } + + private static final DateFormat DATE_TIME_FORMAT, DATE_FORMAT; + + static { + DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + DATE_TIME_FORMAT.setLenient(false); + DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); + DATE_FORMAT.setLenient(false); + DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static void aggregate() throws Exception { + if (!hidservStatsCsvFile.exists() || + hidservStatsCsvFile.isDirectory()) { + System.err.println("Unable to read " + + hidservStatsCsvFile.getAbsolutePath() + ". Exiting."); + System.exit(1); + } + SortedMap<String, List<double[]>> + extrapolatedCells = new TreeMap<String, List<double[]>>(), + extrapolatedOnions = new TreeMap<String, List<double[]>>(); + BufferedReader br = new BufferedReader(new FileReader( + hidservStatsCsvFile)); + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + String date = DATE_FORMAT.format(DATE_TIME_FORMAT.parse(parts[2])); + double hidservRendRelayedCells = Double.parseDouble(parts[3]), + hidservDirOnionsSeen = Double.parseDouble(parts[4]), + fracRendRelayedCells = Double.parseDouble(parts[5]), + fracDirOnionsSeen = Double.parseDouble(parts[6]); + + if (fracRendRelayedCells > 0.0) { + if (!extrapolatedCells.containsKey(date)) { + extrapolatedCells.put(date, new ArrayList<double[]>()); + } + extrapolatedCells.get(date).add(new double[] { + hidservRendRelayedCells * 512.0 * 8.0 + / (86400.0 * 1000000.0 * fracRendRelayedCells), + fracRendRelayedCells }); + } + if (fracDirOnionsSeen > 0.0) { + if (!extrapolatedOnions.containsKey(date)) { + extrapolatedOnions.put(date, new ArrayList<double[]>()); + } + extrapolatedOnions.get(date).add(new double[] { + hidservDirOnionsSeen / (12.0 * fracDirOnionsSeen), + fracDirOnionsSeen }); + } + } + br.close(); + hidservStatsExtrapolatedCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + hidservStatsExtrapolatedCsvFile)); + bw.write("date,type,wmean,wmedian,wiqm\n"); + for (int i = 0; i < 2; i++) { + String type = i == 0 ? "cells" : "onions"; + SortedMap<String, List<double[]>> extrapolated = i == 0 + ? extrapolatedCells : extrapolatedOnions; + for (Map.Entry<String, List<double[]>> e : + extrapolated.entrySet()) { + String date = e.getKey(); + List<double[]> weightedValues = e.getValue(); + double totalFrac = 0.0; + for (double[] d : weightedValues) { + totalFrac += d[1]; + } + if (totalFrac < 0.01) { + continue; + } + Collections.sort(weightedValues, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + }); + double totalWeighted = 0.0, totalProbability = 0.0; + double totalInterquartileFrac = 0.0, + totalWeightedInterquartile = 0.0; + Double weightedMedian = null; + for (double[] d : weightedValues) { + totalWeighted += d[0] * d[1]; + totalProbability += d[1]; + if (weightedMedian == null && + totalProbability > totalFrac * 0.5) { + weightedMedian = d[0]; + } + if (totalProbability >= totalFrac * 0.25 && + totalProbability - d[1] <= totalFrac * 0.75) { + totalWeightedInterquartile += d[0] * d[1]; + totalInterquartileFrac += d[1]; + } + } + bw.write(String.format("%s,%s,%.0f,%.0f,%.0f%n", date, type, + totalWeighted / totalProbability, weightedMedian, + totalWeightedInterquartile / totalInterquartileFrac)); + } + } + bw.close(); + } +} diff --git a/task-13192/src/java/Extrapolate.java b/task-13192/src/java/Extrapolate.java new file mode 100644 index 0000000..29ff518 --- /dev/null +++ b/task-13192/src/java/Extrapolate.java @@ -0,0 +1,424 @@ +import java.io.BufferedWriter; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileWriter; +import java.math.BigInteger; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.Map; +import java.util.Scanner; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; + +public class Extrapolate { + + private static File archiveExtraInfosDirectory = + new File("in/collector/archive/relay-descriptors/extra-infos/"); + + private static File recentExtraInfosDirectory = + new File("in/collector/recent/relay-descriptors/extra-infos/"); + + private static File archiveConsensuses = + new File("in/collector/archive/relay-descriptors/consensuses/"); + + private static File recentConsensuses = + new File("in/collector/recent/relay-descriptors/consensuses/"); + + private static File hidservStatsCsvFile = + new File("out/csv/hidserv-stats.csv"); + + public static void main(String[] args) throws Exception { + System.out.println("Extracting hidserv-* lines from extra-info " + + "descriptors..."); + SortedMap<String, SortedSet<HidServStats>> hidServStats = + extractHidServStats(); + System.out.println("Extracting fractions from consensuses..."); + SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions = + extractConsensusFractions(hidServStats.keySet()); + System.out.println("Extrapolating statistics..."); + extrapolateHidServStats(hidServStats, consensusFractions); + System.out.println(new Date() + " Terminating."); + } + + private static final DateFormat DATE_TIME_FORMAT; + + static { + DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + DATE_TIME_FORMAT.setLenient(false); + DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static class HidServStats implements Comparable<HidServStats> { + + /* Hidden-service statistics end timestamp in milliseconds. */ + private long statsEndMillis; + + /* Statistics interval length in seconds. */ + private long statsIntervalSeconds; + + /* Number of relayed cells reported by the relay and adjusted by + * rounding to the nearest right side of a bin and subtracting half of + * the bin size. */ + private long rendRelayedCells; + + /* Number of .onions reported by the relay and adjusted by rounding to + * the nearest right side of a bin and subtracting half of the bin + * size. */ + private long dirOnionsSeen; + + private HidServStats(long statsEndMillis, long statsIntervalSeconds, + long rendRelayedCells, long dirOnionsSeen) { + this.statsEndMillis = statsEndMillis; + this.statsIntervalSeconds = statsIntervalSeconds; + this.rendRelayedCells = rendRelayedCells; + this.dirOnionsSeen = dirOnionsSeen; + } + + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof HidServStats)) { + return false; + } + HidServStats other = (HidServStats) otherObject; + return this.statsEndMillis == other.statsEndMillis && + this.statsIntervalSeconds == other.statsIntervalSeconds && + this.rendRelayedCells == other.rendRelayedCells && + this.dirOnionsSeen == other.dirOnionsSeen; + } + + @Override + public int compareTo(HidServStats other) { + return this.statsEndMillis < other.statsEndMillis ? -1 : + this.statsEndMillis > other.statsEndMillis ? 1 : 0; + } + } + + /* Extract fingerprint and hidserv-* lines from extra-info descriptors + * located in in/{archive,recent}/relay-descriptors/extra-infos/. */ + private static SortedMap<String, SortedSet<HidServStats>> + extractHidServStats() { + SortedMap<String, SortedSet<HidServStats>> extractedHidServStats = + new TreeMap<String, SortedSet<HidServStats>>(); + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory(archiveExtraInfosDirectory); + descriptorReader.addDirectory(recentExtraInfosDirectory); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof ExtraInfoDescriptor)) { + continue; + } + String fingerprint = + ((ExtraInfoDescriptor) descriptor).getFingerprint(); + Scanner scanner = new Scanner(new ByteArrayInputStream( + descriptor.getRawDescriptorBytes())); + Long statsEndMillis = null, statsIntervalSeconds = null, + rendRelayedCells = null, dirOnionsSeen = null; + try { + while (scanner.hasNext()) { + String line = scanner.nextLine(); + if (line.startsWith("hidserv-")) { + String[] parts = line.split(" "); + if (parts[0].equals("hidserv-stats-end")) { + if (parts.length != 5 || !parts[3].startsWith("(") || + !parts[4].equals("s)")) { + /* Will warn below, because statsEndMillis and + * statsIntervalSeconds are still null. */ + continue; + } + statsEndMillis = DATE_TIME_FORMAT.parse( + parts[1] + " " + parts[2]).getTime(); + statsIntervalSeconds = + Long.parseLong(parts[3].substring(1)); + } else if (parts[0].equals("hidserv-rend-relayed-cells")) { + if (parts.length != 5 || + !parts[4].startsWith("bin_size=")) { + /* Will warn below, because rendRelayedCells is still + * null. */ + continue; + } + rendRelayedCells = removeNoise(Long.parseLong(parts[1]), + Long.parseLong(parts[4].substring(9))); + } else if (parts[0].equals("hidserv-dir-onions-seen")) { + if (parts.length != 5 || + !parts[4].startsWith("bin_size=")) { + /* Will warn below, because dirOnionsSeen is still + * null. */ + continue; + } + dirOnionsSeen = removeNoise(Long.parseLong(parts[1]), + Long.parseLong(parts[4].substring(9))); + } + } + } + } catch (ParseException e) { + e.printStackTrace(); + continue; + } catch (NumberFormatException e) { + e.printStackTrace(); + continue; + } + if (statsEndMillis == null && statsIntervalSeconds == null && + rendRelayedCells == null && dirOnionsSeen == null) { + continue; + } else if (statsEndMillis != null && statsIntervalSeconds != null + && rendRelayedCells != null && dirOnionsSeen != null) { + if (!extractedHidServStats.containsKey(fingerprint)) { + extractedHidServStats.put(fingerprint, + new TreeSet<HidServStats>()); + } + extractedHidServStats.get(fingerprint).add(new HidServStats( + statsEndMillis, statsIntervalSeconds, rendRelayedCells, + dirOnionsSeen)); + } else { + System.err.println("Relay " + fingerprint + " published " + + "incomplete hidserv-stats. Ignoring."); + } + } + } + return extractedHidServStats; + } + + private static long removeNoise(long reportedNumber, long binSize) { + long roundedToNearestRightSideOfTheBin = + ((reportedNumber + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + return subtractedHalfOfBinSize; + } + + private static class ConsensusFraction + implements Comparable<ConsensusFraction> { + + /* Valid-after timestamp of the consensus in milliseconds. */ + private long validAfterMillis; + + /* Fresh-until timestamp of the consensus in milliseconds. */ + private long freshUntilMillis; + + /* Probability for being selected by clients as rendezvous point. */ + private double probabilityRendezvousPoint; + + /* Probability for being selected as directory. This is the fraction + * of descriptors identifiers that this relay has been responsible + * for, divided by 3. */ + private double fractionResponsibleDescriptors; + + private ConsensusFraction(long validAfterMillis, + long freshUntilMillis, + double probabilityRendezvousPoint, + double fractionResponsibleDescriptors) { + this.validAfterMillis = validAfterMillis; + this.freshUntilMillis = freshUntilMillis; + this.probabilityRendezvousPoint = probabilityRendezvousPoint; + this.fractionResponsibleDescriptors = + fractionResponsibleDescriptors; + } + + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ConsensusFraction)) { + return false; + } + ConsensusFraction other = (ConsensusFraction) otherObject; + return this.validAfterMillis == other.validAfterMillis && + this.freshUntilMillis == other.freshUntilMillis && + this.fractionResponsibleDescriptors == + other.fractionResponsibleDescriptors && + this.probabilityRendezvousPoint == + other.probabilityRendezvousPoint; + } + + @Override + public int compareTo(ConsensusFraction other) { + return this.validAfterMillis < other.validAfterMillis ? -1 : + this.validAfterMillis > other.validAfterMillis ? 1 : 0; + } + } + + /* Extract fractions that relays were responsible for from consensuses + * located in in/{archive,recent}/relay-descriptors/consensuses/. */ + private static SortedMap<String, SortedSet<ConsensusFraction>> + extractConsensusFractions(Collection<String> fingerprints) { + SortedMap<String, SortedSet<ConsensusFraction>> + extractedConsensusFractions = + new TreeMap<String, SortedSet<ConsensusFraction>>(); + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory(archiveConsensuses); + descriptorReader.addDirectory(recentConsensuses); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof RelayNetworkStatusConsensus)) { + continue; + } + RelayNetworkStatusConsensus consensus = + (RelayNetworkStatusConsensus) descriptor; + SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList( + "Wmg,Wmm,Wme,Wmd".split(","))); + weightKeys.removeAll(consensus.getBandwidthWeights().keySet()); + if (!weightKeys.isEmpty()) { + System.err.println("Consensus with valid-after time " + + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis()) + + " doesn't contain expected Wmx weights. Skipping."); + continue; + } + double wmg = ((double) consensus.getBandwidthWeights().get("Wmg")) + / 10000.0; + double wmm = ((double) consensus.getBandwidthWeights().get("Wmm")) + / 10000.0; + double wme = ((double) consensus.getBandwidthWeights().get("Wme")) + / 10000.0; + double wmd = ((double) consensus.getBandwidthWeights().get("Wmd")) + / 10000.0; + SortedSet<String> hsDirs = new TreeSet<String>( + Collections.reverseOrder()); + double totalWeightsRendezvousPoint = 0.0; + SortedMap<String, Double> weightsRendezvousPoint = + new TreeMap<String, Double>(); + for (Map.Entry<String, NetworkStatusEntry> e : + consensus.getStatusEntries().entrySet()) { + String fingerprint = e.getKey(); + NetworkStatusEntry statusEntry = e.getValue(); + SortedSet<String> flags = statusEntry.getFlags(); + if (flags.contains("HSDir")) { + hsDirs.add(statusEntry.getFingerprint()); + } + double weightRendezvousPoint = 0.0; + if (flags.contains("Fast")) { + weightRendezvousPoint = (double) statusEntry.getBandwidth(); + if (flags.contains("Guard") && flags.contains("Exit")) { + weightRendezvousPoint *= wmd; + } else if (flags.contains("Guard")) { + weightRendezvousPoint *= wmg; + } else if (flags.contains("Exit")) { + weightRendezvousPoint *= wme; + } else { + weightRendezvousPoint *= wmm; + } + } + weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint); + totalWeightsRendezvousPoint += weightRendezvousPoint; + } + /* Add all HSDir fingerprints with leading "0" and "1" to + * simplify the logic to traverse the ring start. */ + SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs); + hsDirs.clear(); + for (String fingerprint : hsDirsCopy) { + hsDirs.add("0" + fingerprint); + hsDirs.add("1" + fingerprint); + } + final double RING_SIZE = new BigInteger( + "10000000000000000000000000000000000000000", + 16).doubleValue(); + for (String fingerprint : fingerprints) { + double probabilityRendezvousPoint = 0.0, + fractionDescriptors = 0.0; + NetworkStatusEntry statusEntry = + consensus.getStatusEntry(fingerprint); + if (statusEntry != null) { + if (hsDirs.contains("1" + fingerprint)) { + String startResponsible = fingerprint; + int positionsToGo = 3; + for (String hsDirFingerprint : + hsDirs.tailSet("1" + fingerprint)) { + startResponsible = hsDirFingerprint; + if (positionsToGo-- <= 0) { + break; + } + } + fractionDescriptors = + new BigInteger("1" + fingerprint, 16).subtract( + new BigInteger(startResponsible, 16)).doubleValue() + / RING_SIZE; + fractionDescriptors /= 3.0; + } + probabilityRendezvousPoint = + weightsRendezvousPoint.get(fingerprint) + / totalWeightsRendezvousPoint; + } + if (!extractedConsensusFractions.containsKey(fingerprint)) { + extractedConsensusFractions.put(fingerprint, + new TreeSet<ConsensusFraction>()); + } + extractedConsensusFractions.get(fingerprint).add( + new ConsensusFraction(consensus.getValidAfterMillis(), + consensus.getFreshUntilMillis(), probabilityRendezvousPoint, + fractionDescriptors)); + } + } + } + return extractedConsensusFractions; + } + + private static void extrapolateHidServStats( + SortedMap<String, SortedSet<HidServStats>> hidServStats, + SortedMap<String, SortedSet<ConsensusFraction>> + consensusFractions) throws Exception { + hidservStatsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter( + new FileWriter(hidservStatsCsvFile)); + bw.write("fingerprint,stats_start,stats_end," + + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen," + + "frac_rend_relayed_cells,frac_dir_onions_seen\n"); + for (Map.Entry<String, SortedSet<HidServStats>> e : + hidServStats.entrySet()) { + String fingerprint = e.getKey(); + if (!consensusFractions.containsKey(fingerprint)) { + System.err.println("We have hidserv-stats but no consensus " + + "fractions for " + fingerprint + ". Skipping."); + continue; + } + for (HidServStats stats : e.getValue()) { + long statsStartMillis = stats.statsEndMillis + - stats.statsIntervalSeconds * 1000L; + double sumProbabilityRendezvousPoint = 0.0, + sumResponsibleDescriptors = 0.0; + int statusEntries = 0; + for (ConsensusFraction frac : + consensusFractions.get(fingerprint)) { + if (statsStartMillis <= frac.validAfterMillis && + frac.validAfterMillis < stats.statsEndMillis) { + sumProbabilityRendezvousPoint += + frac.probabilityRendezvousPoint; + sumResponsibleDescriptors += + frac.fractionResponsibleDescriptors; + statusEntries++; + } + } + double fracCells = sumProbabilityRendezvousPoint / statusEntries, + fracDescs = sumResponsibleDescriptors / statusEntries; + bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint, + DATE_TIME_FORMAT.format(statsStartMillis), + DATE_TIME_FORMAT.format(stats.statsEndMillis), + stats.rendRelayedCells, stats.dirOnionsSeen, fracCells, + fracDescs)); + } + } + bw.close(); + } +} + diff --git a/task-13192/src/java/ExtrapolateHidServStats.java b/task-13192/src/java/ExtrapolateHidServStats.java deleted file mode 100644 index 100520d..0000000 --- a/task-13192/src/java/ExtrapolateHidServStats.java +++ /dev/null @@ -1,722 +0,0 @@ -import java.io.BufferedWriter; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileWriter; -import java.math.BigInteger; -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.Scanner; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.TreeSet; - -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; - -public class ExtrapolateHidServStats { - - private static File archiveExtraInfosDirectory = - new File("in/collector/archive/relay-descriptors/extra-infos/"); - - private static File recentExtraInfosDirectory = - new File("in/collector/recent/relay-descriptors/extra-infos/"); - - private static File archiveConsensuses = - new File("in/collector/archive/relay-descriptors/consensuses/"); - - private static File recentConsensuses = - new File("in/collector/recent/relay-descriptors/consensuses/"); - - private static File hidservStatsCsvFile = - new File("out/csv/hidserv-stats.csv"); - - private static File simCellsCsvFile = - new File("out/csv/sim-cells.csv"); - - private static File simOnionsCsvFile = - new File("out/csv/sim-onions.csv"); - - public static void main(String[] args) throws Exception { - System.out.println("Extracting hidserv-* lines from extra-info " - + "descriptors..."); - SortedMap<String, SortedSet<HidServStats>> hidServStats = - extractHidServStats(); - System.out.println("Extracting fractions from consensuses..."); - SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions = - extractConsensusFractions(hidServStats.keySet()); - System.out.println("Extrapolating statistics..."); - extrapolateHidServStats(hidServStats, consensusFractions); - System.out.println("Simulating extrapolation of rendezvous cells..."); - simulateCells(); - System.out.println("Simulating extrapolation of .onions..."); - simulateOnions(); - System.out.println("Terminating."); - } - - private static final DateFormat DATE_TIME_FORMAT; - - static { - DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - DATE_TIME_FORMAT.setLenient(false); - DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - private static class HidServStats implements Comparable<HidServStats> { - - /* Hidden-service statistics end timestamp in milliseconds. */ - private long statsEndMillis; - - /* Statistics interval length in seconds. */ - private long statsIntervalSeconds; - - /* Number of relayed cells reported by the relay and adjusted by - * rounding to the nearest right side of a bin and subtracting half of - * the bin size. */ - private long rendRelayedCells; - - /* Number of .onions reported by the relay and adjusted by rounding to - * the nearest right side of a bin and subtracting half of the bin - * size. */ - private long dirOnionsSeen; - - private HidServStats(long statsEndMillis, long statsIntervalSeconds, - long rendRelayedCells, long dirOnionsSeen) { - this.statsEndMillis = statsEndMillis; - this.statsIntervalSeconds = statsIntervalSeconds; - this.rendRelayedCells = rendRelayedCells; - this.dirOnionsSeen = dirOnionsSeen; - } - - @Override - public boolean equals(Object otherObject) { - if (!(otherObject instanceof HidServStats)) { - return false; - } - HidServStats other = (HidServStats) otherObject; - return this.statsEndMillis == other.statsEndMillis && - this.statsIntervalSeconds == other.statsIntervalSeconds && - this.rendRelayedCells == other.rendRelayedCells && - this.dirOnionsSeen == other.dirOnionsSeen; - } - - @Override - public int compareTo(HidServStats other) { - return this.statsEndMillis < other.statsEndMillis ? -1 : - this.statsEndMillis > other.statsEndMillis ? 1 : 0; - } - } - - /* Extract fingerprint and hidserv-* lines from extra-info descriptors - * located in in/{archive,recent}/relay-descriptors/extra-infos/. */ - private static SortedMap<String, SortedSet<HidServStats>> - extractHidServStats() { - SortedMap<String, SortedSet<HidServStats>> extractedHidServStats = - new TreeMap<String, SortedSet<HidServStats>>(); - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.addDirectory(archiveExtraInfosDirectory); - descriptorReader.addDirectory(recentExtraInfosDirectory); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (!(descriptor instanceof ExtraInfoDescriptor)) { - continue; - } - String fingerprint = - ((ExtraInfoDescriptor) descriptor).getFingerprint(); - Scanner scanner = new Scanner(new ByteArrayInputStream( - descriptor.getRawDescriptorBytes())); - Long statsEndMillis = null, statsIntervalSeconds = null, - rendRelayedCells = null, dirOnionsSeen = null; - try { - while (scanner.hasNext()) { - String line = scanner.nextLine(); - if (line.startsWith("hidserv-")) { - String[] parts = line.split(" "); - if (parts[0].equals("hidserv-stats-end")) { - if (parts.length != 5 || !parts[3].startsWith("(") || - !parts[4].equals("s)")) { - /* Will warn below, because statsEndMillis and - * statsIntervalSeconds are still null. */ - continue; - } - statsEndMillis = DATE_TIME_FORMAT.parse( - parts[1] + " " + parts[2]).getTime(); - statsIntervalSeconds = - Long.parseLong(parts[3].substring(1)); - } else if (parts[0].equals("hidserv-rend-relayed-cells")) { - if (parts.length != 5 || - !parts[4].startsWith("bin_size=")) { - /* Will warn below, because rendRelayedCells is still - * null. */ - continue; - } - rendRelayedCells = removeNoise(Long.parseLong(parts[1]), - Long.parseLong(parts[4].substring(9))); - } else if (parts[0].equals("hidserv-dir-onions-seen")) { - if (parts.length != 5 || - !parts[4].startsWith("bin_size=")) { - /* Will warn below, because dirOnionsSeen is still - * null. */ - continue; - } - dirOnionsSeen = removeNoise(Long.parseLong(parts[1]), - Long.parseLong(parts[4].substring(9))); - } - } - } - } catch (ParseException e) { - e.printStackTrace(); - continue; - } catch (NumberFormatException e) { - e.printStackTrace(); - continue; - } - if (statsEndMillis == null && statsIntervalSeconds == null && - rendRelayedCells == null && dirOnionsSeen == null) { - continue; - } else if (statsEndMillis != null && statsIntervalSeconds != null - && rendRelayedCells != null && dirOnionsSeen != null) { - if (!extractedHidServStats.containsKey(fingerprint)) { - extractedHidServStats.put(fingerprint, - new TreeSet<HidServStats>()); - } - extractedHidServStats.get(fingerprint).add(new HidServStats( - statsEndMillis, statsIntervalSeconds, rendRelayedCells, - dirOnionsSeen)); - } else { - System.err.println("Relay " + fingerprint + " published " - + "incomplete hidserv-stats. Ignoring."); - } - } - } - return extractedHidServStats; - } - - private static long removeNoise(long reportedNumber, long binSize) { - long roundedToNearestRightSideOfTheBin = - ((reportedNumber + binSize / 2) / binSize) * binSize; - long subtractedHalfOfBinSize = - roundedToNearestRightSideOfTheBin - binSize / 2; - return subtractedHalfOfBinSize; - } - - private static class ConsensusFraction - implements Comparable<ConsensusFraction> { - - /* Valid-after timestamp of the consensus in milliseconds. */ - private long validAfterMillis; - - /* Fresh-until timestamp of the consensus in milliseconds. */ - private long freshUntilMillis; - - /* Fraction of consensus weight in [0.0, 1.0] of this relay. */ - private double fractionConsensusWeight; - - /* Probability for being selected by clients as rendezvous point. */ - private double probabilityRendezvousPoint; - - /* Fraction of descriptor identifiers in [0.0, 1.0] that this relay - * has been responsible for. This is the "distance" from the - * fingerprint of the relay three HSDir positions earlier in the ring - * to the fingerprint of this relay. Fractions of all HSDirs in a - * consensus add up to 3.0, not 1.0. */ - private double fractionResponsibleDescriptors; - - private ConsensusFraction(long validAfterMillis, - long freshUntilMillis, - double fractionConsensusWeight, - double probabilityRendezvousPoint, - double fractionResponsibleDescriptors) { - this.validAfterMillis = validAfterMillis; - this.freshUntilMillis = freshUntilMillis; - this.fractionConsensusWeight = fractionConsensusWeight; - this.probabilityRendezvousPoint = probabilityRendezvousPoint; - this.fractionResponsibleDescriptors = - fractionResponsibleDescriptors; - } - - @Override - public boolean equals(Object otherObject) { - if (!(otherObject instanceof ConsensusFraction)) { - return false; - } - ConsensusFraction other = (ConsensusFraction) otherObject; - return this.validAfterMillis == other.validAfterMillis && - this.freshUntilMillis == other.freshUntilMillis && - this.fractionResponsibleDescriptors == - other.fractionResponsibleDescriptors && - this.fractionConsensusWeight == other.fractionConsensusWeight && - this.probabilityRendezvousPoint == - other.probabilityRendezvousPoint; - } - - @Override - public int compareTo(ConsensusFraction other) { - return this.validAfterMillis < other.validAfterMillis ? -1 : - this.validAfterMillis > other.validAfterMillis ? 1 : 0; - } - } - - /* Extract fractions that relays were responsible for from consensuses - * located in in/{archive,recent}/relay-descriptors/consensuses/. */ - private static SortedMap<String, SortedSet<ConsensusFraction>> - extractConsensusFractions(Collection<String> fingerprints) { - SortedMap<String, SortedSet<ConsensusFraction>> - extractedConsensusFractions = - new TreeMap<String, SortedSet<ConsensusFraction>>(); - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.addDirectory(archiveConsensuses); - descriptorReader.addDirectory(recentConsensuses); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (!(descriptor instanceof RelayNetworkStatusConsensus)) { - continue; - } - RelayNetworkStatusConsensus consensus = - (RelayNetworkStatusConsensus) descriptor; - SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList( - "Wmg,Wmm,Wme,Wmd".split(","))); - weightKeys.removeAll(consensus.getBandwidthWeights().keySet()); - if (!weightKeys.isEmpty()) { - System.err.println("Consensus with valid-after time " - + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis()) - + " doesn't contain expected Wmx weights. Skipping."); - continue; - } - double wmg = ((double) consensus.getBandwidthWeights().get("Wmg")) - / 10000.0; - double wmm = ((double) consensus.getBandwidthWeights().get("Wmm")) - / 10000.0; - double wme = ((double) consensus.getBandwidthWeights().get("Wme")) - / 10000.0; - double wmd = ((double) consensus.getBandwidthWeights().get("Wmd")) - / 10000.0; - SortedSet<String> hsDirs = new TreeSet<String>( - Collections.reverseOrder()); - long totalConsensusWeight = 0L; - double totalWeightsRendezvousPoint = 0.0; - SortedMap<String, Double> weightsRendezvousPoint = - new TreeMap<String, Double>(); - for (Map.Entry<String, NetworkStatusEntry> e : - consensus.getStatusEntries().entrySet()) { - String fingerprint = e.getKey(); - NetworkStatusEntry statusEntry = e.getValue(); - SortedSet<String> flags = statusEntry.getFlags(); - if (flags.contains("HSDir")) { - hsDirs.add(statusEntry.getFingerprint()); - } - totalConsensusWeight += statusEntry.getBandwidth(); - double weightRendezvousPoint = 0.0; - if (flags.contains("Fast")) { - weightRendezvousPoint = (double) statusEntry.getBandwidth(); - if (flags.contains("Guard") && flags.contains("Exit")) { - weightRendezvousPoint *= wmd; - } else if (flags.contains("Guard")) { - weightRendezvousPoint *= wmg; - } else if (flags.contains("Exit")) { - weightRendezvousPoint *= wme; - } else { - weightRendezvousPoint *= wmm; - } - } - weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint); - totalWeightsRendezvousPoint += weightRendezvousPoint; - } - /* Add all HSDir fingerprints with leading "0" and "1" to - * simplify the logic to traverse the ring start. */ - SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs); - hsDirs.clear(); - for (String fingerprint : hsDirsCopy) { - hsDirs.add("0" + fingerprint); - hsDirs.add("1" + fingerprint); - } - final double RING_SIZE = new BigInteger( - "10000000000000000000000000000000000000000", - 16).doubleValue(); - for (String fingerprint : fingerprints) { - double probabilityRendezvousPoint = 0.0, - fractionResponsibleDescriptors = 0.0, - fractionConsensusWeight = 0.0; - NetworkStatusEntry statusEntry = - consensus.getStatusEntry(fingerprint); - if (statusEntry != null) { - if (hsDirs.contains("1" + fingerprint)) { - String startResponsible = fingerprint; - int positionsToGo = 3; - for (String hsDirFingerprint : - hsDirs.tailSet("1" + fingerprint)) { - startResponsible = hsDirFingerprint; - if (positionsToGo-- <= 0) { - break; - } - } - fractionResponsibleDescriptors = - new BigInteger("1" + fingerprint, 16).subtract( - new BigInteger(startResponsible, 16)).doubleValue() - / RING_SIZE; - } - fractionConsensusWeight = - ((double) statusEntry.getBandwidth()) - / ((double) totalConsensusWeight); - probabilityRendezvousPoint = - weightsRendezvousPoint.get(fingerprint) - / totalWeightsRendezvousPoint; - } - if (!extractedConsensusFractions.containsKey(fingerprint)) { - extractedConsensusFractions.put(fingerprint, - new TreeSet<ConsensusFraction>()); - } - extractedConsensusFractions.get(fingerprint).add( - new ConsensusFraction(consensus.getValidAfterMillis(), - consensus.getFreshUntilMillis(), fractionConsensusWeight, - probabilityRendezvousPoint, - fractionResponsibleDescriptors)); - } - } - } - return extractedConsensusFractions; - } - - private static void extrapolateHidServStats( - SortedMap<String, SortedSet<HidServStats>> hidServStats, - SortedMap<String, SortedSet<ConsensusFraction>> - consensusFractions) throws Exception { - hidservStatsCsvFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter( - new FileWriter(hidservStatsCsvFile)); - bw.write("fingerprint,stats_start,stats_end," - + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen," - + "prob_rend_point,frac_hsdesc\n"); - for (Map.Entry<String, SortedSet<HidServStats>> e : - hidServStats.entrySet()) { - String fingerprint = e.getKey(); - if (!consensusFractions.containsKey(fingerprint)) { - System.err.println("We have hidserv-stats but no consensus " - + "fractions for " + fingerprint + ". Skipping."); - continue; - } - for (HidServStats stats : e.getValue()) { - long statsStartMillis = stats.statsEndMillis - - stats.statsIntervalSeconds * 1000L; - double sumProbabilityRendezvousPoint = 0.0, - sumResponsibleDescriptors = 0.0; - int statusEntries = 0; - for (ConsensusFraction frac : - consensusFractions.get(fingerprint)) { - if (statsStartMillis <= frac.validAfterMillis && - frac.validAfterMillis < stats.statsEndMillis) { - sumProbabilityRendezvousPoint += - frac.probabilityRendezvousPoint; - sumResponsibleDescriptors += - frac.fractionResponsibleDescriptors; - statusEntries++; - } - } - bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint, - DATE_TIME_FORMAT.format(statsStartMillis), - DATE_TIME_FORMAT.format(stats.statsEndMillis), - stats.rendRelayedCells, stats.dirOnionsSeen, - sumProbabilityRendezvousPoint / statusEntries, - sumResponsibleDescriptors / statusEntries)); - } - } - bw.close(); - } - - private static Random rnd = new Random(3); - - private static void simulateCells() throws Exception { - - /* Generate consensus weights following an exponential distribution - * with lambda = 1 for 3000 potential rendezvous points. */ - final int numberRendPoints = 3000; - double[] consensusWeights = new double[numberRendPoints]; - double totalConsensusWeight = 0.0; - for (int i = 0; i < numberRendPoints; i++) { - double consensusWeight = -Math.log(1.0 - rnd.nextDouble()); - consensusWeights[i] = consensusWeight; - totalConsensusWeight += consensusWeight; - } - - /* Compute probabilities for being selected as rendezvous point. */ - double[] probRendPoint = new double[numberRendPoints]; - for (int i = 0; i < numberRendPoints; i++) { - probRendPoint[i] = consensusWeights[i] / totalConsensusWeight; - } - - /* Generate 10,000,000,000 (roughly 60 MiB/s) cells in chunks - * following an exponential distribution with lambda = 0.00001 and - * randomly assign them to a rendezvous point to report them later. */ - long cellsLeft = 10000000000L; - final double cellsLambda = 0.00001; - long[] observedCells = new long[numberRendPoints]; - while (cellsLeft > 0) { - long cells = (long) (-Math.log(1.0 - rnd.nextDouble()) - / cellsLambda); - double selectRendPoint = rnd.nextDouble(); - for (int i = 0; i < probRendPoint.length; i++) { - selectRendPoint -= probRendPoint[i]; - if (selectRendPoint <= 0.0) { - observedCells[i] += cells; - break; - } - } - cellsLeft -= cells; - } - - /* Obfuscate reports using binning and Laplace noise, and then attempt - * to remove noise again. */ - final long binSize = 1024L; - final double b = 2048.0 / 0.3; - long[] reportedCells = new long[numberRendPoints]; - long[] removedNoiseCells = new long[numberRendPoints]; - for (int i = 0; i < numberRendPoints; i++) { - long observed = observedCells[i]; - long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; - double p = rnd.nextDouble(); - double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * - Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); - long reported = afterBinning + (long) laplaceNoise; - reportedCells[i] = reported; - long roundedToNearestRightSideOfTheBin = - ((reported + binSize / 2) / binSize) * binSize; - long subtractedHalfOfBinSize = - roundedToNearestRightSideOfTheBin - binSize / 2; - removedNoiseCells[i] = subtractedHalfOfBinSize; - } - - /* Perform 10,000 extrapolations from random fractions of reports by - * probability to be selected as rendezvous point. */ - simCellsCsvFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - simCellsCsvFile)); - bw.write("frac,p025,p500,p975\n"); - double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, - 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; - final int numberOfExtrapolations = 10000; - for (double fraction : fractions) { - List<Long> extrapolations = new ArrayList<Long>(); - for (int i = 0; i < numberOfExtrapolations; i++) { - SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>(); - for (int j = 0; j < numberRendPoints; j++) { - nonReportingRelays.add(j); - } - List<Integer> shuffledRelays = new ArrayList<Integer>( - nonReportingRelays); - Collections.shuffle(shuffledRelays); - SortedSet<Integer> reportingRelays = new TreeSet<Integer>(); - for (int j = 0; j < (int) ((double) numberRendPoints * fraction); - j++) { - reportingRelays.add(shuffledRelays.get(j)); - nonReportingRelays.remove(shuffledRelays.get(j)); - } - double reportingProbability; - long totalReports; - do { - reportingProbability = 0.0; - totalReports = 0L; - for (int reportingRelay : reportingRelays) { - reportingProbability += probRendPoint[reportingRelay]; - totalReports += removedNoiseCells[reportingRelay]; - } - if (reportingProbability < fraction - 0.001) { - int addRelay = new ArrayList<Integer>(nonReportingRelays).get( - rnd.nextInt(nonReportingRelays.size())); - nonReportingRelays.remove(addRelay); - reportingRelays.add(addRelay); - } else if (reportingProbability > fraction + 0.001) { - int removeRelay = new ArrayList<Integer>(reportingRelays).get( - rnd.nextInt(reportingRelays.size())); - reportingRelays.remove(removeRelay); - nonReportingRelays.add(removeRelay); - } - } while (reportingProbability < fraction - 0.001 || - reportingProbability > fraction + 0.001); - extrapolations.add((long) ((double) totalReports - / reportingProbability)); - } - Collections.sort(extrapolations); - long p025 = extrapolations.get((extrapolations.size() * 25) / 1000), - p500 = extrapolations.get((extrapolations.size() * 500) / 1000), - p975 = extrapolations.get((extrapolations.size() * 975) / 1000); - bw.write(String.format("%.2f,%d,%d,%d%n", fraction, p025, p500, - p975)); - } - bw.close(); - } - - private static void simulateOnions() throws Exception { - - /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */ - final int numberHsDirs = 3000; - SortedSet<Double> hsDirFingerprints = new TreeSet<Double>(); - for (int i = 0; i < numberHsDirs; i++) { - hsDirFingerprints.add(rnd.nextDouble()); - } - - /* Compute fractions of observed descriptor space. */ - SortedSet<Double> ring = - new TreeSet<Double>(Collections.reverseOrder()); - for (double fingerprint : hsDirFingerprints) { - ring.add(fingerprint); - ring.add(fingerprint - 1.0); - } - SortedMap<Double, Double> hsDirFractions = - new TreeMap<Double, Double>(); - for (double fingerprint : hsDirFingerprints) { - double start = fingerprint; - int positionsToGo = 3; - for (double prev : ring.tailSet(fingerprint)) { - start = prev; - if (positionsToGo-- <= 0) { - break; - } - } - hsDirFractions.put(fingerprint, fingerprint - start); - } - - /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */ - final int numberOnions = 40000; - final int replicas = 4; - final int storeOnDirs = 3; - SortedMap<Double, SortedSet<Integer>> storedDescs = - new TreeMap<Double, SortedSet<Integer>>(); - for (double fingerprint : hsDirFingerprints) { - storedDescs.put(fingerprint, new TreeSet<Integer>()); - } - for (int i = 0; i < numberOnions; i++) { - for (int j = 0; j < replicas; j++) { - int leftToStore = storeOnDirs; - for (double fingerprint : - hsDirFingerprints.tailSet(rnd.nextDouble())) { - storedDescs.get(fingerprint).add(i); - if (--leftToStore <= 0) { - break; - } - } - if (leftToStore > 0) { - for (double fingerprint : hsDirFingerprints) { - storedDescs.get(fingerprint).add(i); - if (--leftToStore <= 0) { - break; - } - } - } - } - } - - /* Obfuscate reports using binning and Laplace noise, and then attempt - * to remove noise again. */ - final long binSize = 8L; - final double b = 8.0 / 0.3; - SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(), - removedNoiseOnions = new TreeMap<Double, Long>(); - for (Map.Entry<Double, SortedSet<Integer>> e : - storedDescs.entrySet()) { - double fingerprint = e.getKey(); - long observed = (long) e.getValue().size(); - long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; - double p = rnd.nextDouble(); - double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * - Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); - long reported = afterBinning + (long) laplaceNoise; - reportedOnions.put(fingerprint, reported); - long roundedToNearestRightSideOfTheBin = - ((reported + binSize / 2) / binSize) * binSize; - long subtractedHalfOfBinSize = - roundedToNearestRightSideOfTheBin - binSize / 2; - removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize); - } - - /* Perform 10,000 extrapolations from random fractions of reports by - * probability to be selected as rendezvous point. */ - simOnionsCsvFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - simOnionsCsvFile)); - bw.write("frac,p025,p500,p975\n"); - double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, - 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; - final int numberOfExtrapolations = 10000; - for (double fraction : fractions) { - List<Long> extrapolationsTwo = new ArrayList<Long>(); - for (int i = 0; i < numberOfExtrapolations; i++) { - SortedSet<Double> nonReportingRelays = - new TreeSet<Double>(hsDirFractions.keySet()); - List<Double> shuffledRelays = new ArrayList<Double>( - nonReportingRelays); - Collections.shuffle(shuffledRelays); - SortedSet<Double> reportingRelays = new TreeSet<Double>(); - for (int j = 0; j < (int) ((double) hsDirFractions.size() - * fraction); j++) { - reportingRelays.add(shuffledRelays.get(j)); - nonReportingRelays.remove(shuffledRelays.get(j)); - } - double reportingProbability; - long totalReports; - do { - reportingProbability = 0.0; - totalReports = 0L; - for (double reportingRelay : reportingRelays) { - reportingProbability += hsDirFractions.get(reportingRelay) - / 3.0; - totalReports += removedNoiseOnions.get(reportingRelay); - } - if (reportingProbability < fraction - 0.001) { - double addRelay = - new ArrayList<Double>(nonReportingRelays).get( - rnd.nextInt(nonReportingRelays.size())); - nonReportingRelays.remove(addRelay); - reportingRelays.add(addRelay); - } else if (reportingProbability > fraction + 0.001) { - double removeRelay = - new ArrayList<Double>(reportingRelays).get( - rnd.nextInt(reportingRelays.size())); - reportingRelays.remove(removeRelay); - nonReportingRelays.add(removeRelay); - } - } while (reportingProbability < fraction - 0.001 || - reportingProbability > fraction + 0.001); - double totalFraction = 0.0; - for (double fingerprint : reportingRelays) { - totalFraction += hsDirFractions.get(fingerprint) * 4.0; - } - extrapolationsTwo.add((long) ((double) totalReports - / totalFraction)); - } - Collections.sort(extrapolationsTwo); - long pTwo025 = extrapolationsTwo.get( - (extrapolationsTwo.size() * 25) / 1000), - pTwo500 = extrapolationsTwo.get( - (extrapolationsTwo.size() * 500) / 1000), - pTwo975 = extrapolationsTwo.get( - (extrapolationsTwo.size() * 975) / 1000); - bw.write(String.format("%.2f,%d,%d,%d%n", fraction, pTwo025, - pTwo500, pTwo975)); - } - bw.close(); - } -} - diff --git a/task-13192/src/java/Simulate.java b/task-13192/src/java/Simulate.java new file mode 100644 index 0000000..e41e02c --- /dev/null +++ b/task-13192/src/java/Simulate.java @@ -0,0 +1,357 @@ +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + + +public class Simulate { + private static File simCellsCsvFile = + new File("out/csv/sim-cells.csv"); + + private static File simOnionsCsvFile = + new File("out/csv/sim-onions.csv"); + + public static void main(String[] args) throws Exception { + System.out.print("Simulating extrapolation of rendezvous cells"); + simulateManyCells(); + System.out.print("\nSimulating extrapolation of .onions"); + simulateManyOnions(); + System.out.println("\nTerminating."); + } + + private static Random rnd = new Random(); + + private static void simulateManyCells() throws Exception { + simCellsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simCellsCsvFile)); + bw.write("run,frac,wmean,wmedian,wiqm\n"); + final int numberOfExtrapolations = 1000; + for (int i = 0; i < numberOfExtrapolations; i++) { + bw.write(simulateCells(i)); + System.out.print("."); + } + bw.close(); + } + + private static void simulateManyOnions() throws Exception { + simOnionsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simOnionsCsvFile)); + bw.write("run,frac,wmean,wmedian,wiqm\n"); + final int numberOfExtrapolations = 1000; + for (int i = 0; i < numberOfExtrapolations; i++) { + bw.write(simulateOnions(i)); + System.out.print("."); + } + bw.close(); + } + + private static String simulateCells(int run) { + + /* Generate consensus weights following an exponential distribution + * with lambda = 1 for 3000 potential rendezvous points. */ + final int numberRendPoints = 3000; + double[] consensusWeights = new double[numberRendPoints]; + double totalConsensusWeight = 0.0; + for (int i = 0; i < numberRendPoints; i++) { + double consensusWeight = -Math.log(1.0 - rnd.nextDouble()); + consensusWeights[i] = consensusWeight; + totalConsensusWeight += consensusWeight; + } + + /* Compute probabilities for being selected as rendezvous point. */ + double[] probRendPoint = new double[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + probRendPoint[i] = consensusWeights[i] / totalConsensusWeight; + } + + /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an + * exponential distribution with lambda = 0.0001, so on average + * 10,000 cells per chunk, and randomly assign them to a rendezvous + * point to report them later. */ + long cellsLeft = 10000000000L; + final double cellsLambda = 0.0001; + long[] observedCells = new long[numberRendPoints]; + while (cellsLeft > 0) { + long cells = Math.min(cellsLeft, + (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda)); + double selectRendPoint = rnd.nextDouble(); + for (int i = 0; i < probRendPoint.length; i++) { + selectRendPoint -= probRendPoint[i]; + if (selectRendPoint <= 0.0) { + observedCells[i] += cells; + break; + } + } + cellsLeft -= cells; + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 1024L; + final double b = 2048.0 / 0.3; + long[] reportedCells = new long[numberRendPoints]; + long[] removedNoiseCells = new long[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + long observed = observedCells[i]; + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double p = rnd.nextDouble(); + double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * + Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedCells[i] = reported; + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseCells[i] = subtractedHalfOfBinSize; + } + + /* Perform extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + StringBuilder sb = new StringBuilder(); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + for (double fraction : fractions) { + SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>(); + for (int j = 0; j < numberRendPoints; j++) { + nonReportingRelays.add(j); + } + List<Integer> shuffledRelays = new ArrayList<Integer>( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet<Integer> reportingRelays = new TreeSet<Integer>(); + for (int j = 0; j < (int) ((double) numberRendPoints * fraction); + j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + List<double[]> singleRelayExtrapolations; + double totalReportingProbability; + do { + singleRelayExtrapolations = new ArrayList<double[]>(); + totalReportingProbability = 0.0; + for (int reportingRelay : reportingRelays) { + double probability = probRendPoint[reportingRelay]; + if (probability > 0.0) { + singleRelayExtrapolations.add( + new double[] { + removedNoiseCells[reportingRelay] / probability, + removedNoiseCells[reportingRelay], + probability }); + } + totalReportingProbability += probability; + } + if (totalReportingProbability < fraction - 0.001) { + int addRelay = new ArrayList<Integer>(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (totalReportingProbability > fraction + 0.001) { + int removeRelay = new ArrayList<Integer>(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (totalReportingProbability < fraction - 0.001 || + totalReportingProbability > fraction + 0.001); + Collections.sort(singleRelayExtrapolations, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + }); + double totalProbability = 0.0, totalValues = 0.0; + double totalInterquartileProbability = 0.0, + totalInterquartileValues = 0.0; + Double weightedMedian = null; + for (double[] extrapolation : singleRelayExtrapolations) { + totalValues += extrapolation[1]; + totalProbability += extrapolation[2]; + if (weightedMedian == null && + totalProbability > totalReportingProbability * 0.5) { + weightedMedian = extrapolation[0]; + } + if (totalProbability > totalReportingProbability * 0.25 && + totalProbability < totalReportingProbability * 0.75) { + totalInterquartileValues += extrapolation[1]; + totalInterquartileProbability += extrapolation[2]; + } + } + sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, + totalValues / totalProbability, weightedMedian, + totalInterquartileValues / totalInterquartileProbability)); + } + return sb.toString(); + } + + private static String simulateOnions(final int run) { + + /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */ + final int numberHsDirs = 3000; + SortedSet<Double> hsDirFingerprints = new TreeSet<Double>(); + for (int i = 0; i < numberHsDirs; i++) { + hsDirFingerprints.add(rnd.nextDouble()); + } + + /* Compute fractions of observed descriptor space. */ + SortedSet<Double> ring = + new TreeSet<Double>(Collections.reverseOrder()); + for (double fingerprint : hsDirFingerprints) { + ring.add(fingerprint); + ring.add(fingerprint - 1.0); + } + SortedMap<Double, Double> hsDirFractions = + new TreeMap<Double, Double>(); + for (double fingerprint : hsDirFingerprints) { + double start = fingerprint; + int positionsToGo = 3; + for (double prev : ring.tailSet(fingerprint)) { + start = prev; + if (positionsToGo-- <= 0) { + break; + } + } + hsDirFractions.put(fingerprint, fingerprint - start); + } + + /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */ + final int numberOnions = 40000; + final int replicas = 4; + final int storeOnDirs = 3; + SortedMap<Double, SortedSet<Integer>> storedDescs = + new TreeMap<Double, SortedSet<Integer>>(); + for (double fingerprint : hsDirFingerprints) { + storedDescs.put(fingerprint, new TreeSet<Integer>()); + } + for (int i = 0; i < numberOnions; i++) { + for (int j = 0; j < replicas; j++) { + int leftToStore = storeOnDirs; + for (double fingerprint : + hsDirFingerprints.tailSet(rnd.nextDouble())) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + if (leftToStore > 0) { + for (double fingerprint : hsDirFingerprints) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + } + } + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 8L; + final double b = 8.0 / 0.3; + SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(), + removedNoiseOnions = new TreeMap<Double, Long>(); + for (Map.Entry<Double, SortedSet<Integer>> e : + storedDescs.entrySet()) { + double fingerprint = e.getKey(); + long observed = (long) e.getValue().size(); + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double p = rnd.nextDouble(); + double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * + Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedOnions.put(fingerprint, reported); + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize); + } + + /* Perform extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + StringBuilder sb = new StringBuilder(); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + for (double fraction : fractions) { + SortedSet<Double> nonReportingRelays = + new TreeSet<Double>(hsDirFractions.keySet()); + List<Double> shuffledRelays = new ArrayList<Double>( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet<Double> reportingRelays = new TreeSet<Double>(); + for (int j = 0; j < (int) ((double) hsDirFractions.size() + * fraction); j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + List<double[]> singleRelayExtrapolations; + double totalReportingProbability; + do { + singleRelayExtrapolations = new ArrayList<double[]>(); + totalReportingProbability = 0.0; + for (double reportingRelay : reportingRelays) { + double probability = hsDirFractions.get(reportingRelay) / 3.0; + if (probability > 0.0) { + singleRelayExtrapolations.add( + new double[] { removedNoiseOnions.get(reportingRelay) + / probability, removedNoiseOnions.get(reportingRelay), + probability }); + } + totalReportingProbability += probability; + } + if (totalReportingProbability < fraction - 0.001) { + double addRelay = + new ArrayList<Double>(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (totalReportingProbability > fraction + 0.001) { + double removeRelay = + new ArrayList<Double>(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (totalReportingProbability < fraction - 0.001 || + totalReportingProbability > fraction + 0.001); + Collections.sort(singleRelayExtrapolations, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + }); + double totalProbability = 0.0, totalValues = 0.0; + double totalInterquartileProbability = 0.0, + totalInterquartileValues = 0.0; + Double weightedMedian = null; + for (double[] extrapolation : singleRelayExtrapolations) { + totalValues += extrapolation[1]; + totalProbability += extrapolation[2]; + if (weightedMedian == null && + totalProbability > totalReportingProbability * 0.5) { + weightedMedian = extrapolation[0]; + } + if (totalProbability > totalReportingProbability * 0.25 && + totalProbability < totalReportingProbability * 0.75) { + totalInterquartileValues += extrapolation[1]; + totalInterquartileProbability += extrapolation[2]; + } + } + sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, + totalValues / totalProbability, weightedMedian, + totalInterquartileValues / totalInterquartileProbability)); + } + return sb.toString(); + } +}
tor-commits@lists.torproject.org