[tor-commits] [metrics-tasks/master] Add hidserv-stats extrapolation code (#13192).

karsten at torproject.org karsten at torproject.org
Fri Feb 6 18:31:26 UTC 2015


commit 968def62d5872fb23279a35c2474db276ae455e7
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sat Jan 17 17:53:03 2015 +0100

    Add hidserv-stats extrapolation code (#13192).
---
 task-13192/.gitignore                            |    7 +
 task-13192/README.md                             |   24 +
 task-13192/src/R/plot.R                          |  246 ++++++++
 task-13192/src/java/ExtrapolateHidServStats.java |  722 ++++++++++++++++++++++
 4 files changed, 999 insertions(+)

diff --git a/task-13192/.gitignore b/task-13192/.gitignore
new file mode 100644
index 0000000..7e8bf3b
--- /dev/null
+++ b/task-13192/.gitignore
@@ -0,0 +1,7 @@
+in/
+.classpath
+.project
+src/bash/
+src/bin/
+out/
+
diff --git a/task-13192/README.md b/task-13192/README.md
new file mode 100644
index 0000000..c6ba2c8
--- /dev/null
+++ b/task-13192/README.md
@@ -0,0 +1,24 @@
+Extrapolating network totals from hidden-service statistics
+===========================================================
+
+Fetch (and inflate, but not extract) tarballs and/or fetch single files
+from CollecTor and store them in the following directories:
+
+  in/collector/archive/relay-descriptors/extra-infos/
+  in/collector/archive/relay-descriptors/consensuses/
+  in/collector/recent/relay-descriptors/extra-infos/
+  in/collector/recent/relay-descriptors/consensuses/
+
+Fetch the latest bandwidth.csv file from Metrics and put it in the
+following directory:
+
+  in/metrics/bandwidth.csv
+
+Add metrics-lib to the classpath and compile the classes in src/java/.
+
+Run Java class ExtrapolateHidServStats.
+
+Run the R script:
+
+  R --slave -f src/R/plot.R
+
diff --git a/task-13192/src/R/plot.R b/task-13192/src/R/plot.R
new file mode 100644
index 0000000..991928b
--- /dev/null
+++ b/task-13192/src/R/plot.R
@@ -0,0 +1,246 @@
+# Load required libraries.
+require(ggplot2, warn.conflicts = FALSE, quietly = TRUE)
+require(scales, warn.conflicts = FALSE, quietly = TRUE)
+require(reshape, warn.conflicts = FALSE, quietly = TRUE)
+require(splines, warn.conflicts = FALSE, quietly = TRUE)
+require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
+
+# Avoid scientific notation.
+options(scipen = 15)
+
+# Read .csv file written by Java.
+h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
+
+# Create directories for graphs.
+dir.create(file.path("out", "graphs", "report"), showWarnings = FALSE,
+  recursive = TRUE)
+dir.create(file.path("out", "graphs", "slides"), showWarnings = FALSE,
+  recursive = TRUE)
+
+# Cut off last two days, because stats might be incomplete for those.
+h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ]
+
+# Graph the number of reported stats by day.
+h7 <- data.frame(date = as.Date(h$stats_end), reports = 1)
+ggplot(h7, aes(x = date)) +
+geom_bar(colour = 'lightgray', width = .7, binwidth = 1) +
+scale_x_date("") +
+scale_y_continuous("")
+ggsave("out/graphs/report/num-reported-stats.pdf", width = 10, height = 3,
+  dpi = 100)
+ggsave("out/graphs/slides/hidserv-12.png", width = 8, height = 3,
+  dpi = 100)
+
+# Graph distributions of reported values by day.
+h1 <- data.frame(date = as.Date(h$stats_end),
+  traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+  services = h$hidserv_dir_onions_seen)
+h1 <- melt(h1, "date")
+h1 <- data.frame(date = h1$date,
+  variable = ifelse(h1$variable == "traffic", "traffic in MB/s",
+  ".onion addresses"), value = h1$value)
+ggplot(h1, aes(x = date, y = value, group = date)) +
+geom_boxplot() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_date("") +
+scale_y_continuous("Statistics reported by single relays\n")
+ggsave("out/graphs/report/stats-by-day.pdf", width = 10, height = 5,
+  dpi = 100)
+
+# Graph distributions of calculated fractions by day.
+h2 <- data.frame(date = as.Date(h$stats_end),
+  prob_rend_point = h$prob_rend_point,
+  x_frac_hsdesc = h$frac_hsdesc / 3.0)
+h2 <- melt(h2, "date")
+h2 <- data.frame(date = h2$date,
+  variable = ifelse(h2$variable == "prob_rend_point",
+  "selected as rendezvous point", "responsible for a descriptor"),
+  value = h2$value)
+ggplot(h2, aes(x = date, y = value, group = date)) +
+geom_boxplot() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_date("") +
+scale_y_continuous("Calculated probabilities\n", labels = percent)
+ggsave("out/graphs/report/probs-by-relay.pdf", width = 10, height = 5,
+  dpi = 100)
+
+# Graph ECDF of cells reported by relays with rend point probability of 0.
+h8 <- h[h$prob_rend_point == 0,
+        "hidserv_rend_relayed_cells" ]
+h8 <- sort(h8)
+h8 <- data.frame(x = h8, y = (1:length(h8)) / length(h8))
+laplace_cells <- function(x) {
+  0.5 + 0.5 * sign(x) * (1 - exp(abs(x) / (-2048/0.3)))
+}
+ggplot(h8, aes(x = x, y = y)) +
+geom_line() +
+stat_function(fun = laplace_cells, colour = "blue") +
+scale_x_continuous("\nReported cells on rendezvous circuits") +
+scale_y_continuous("Cumulative probability\n")
+ggsave("out/graphs/report/zero-prob-cells.pdf", width = 5, height = 3,
+  dpi = 100)
+
+# Graph ECDF of .onions reported by relays with HSDir probability of 0.
+h9 <- h[h$frac_hsdesc == 0, "hidserv_dir_onions_seen"]
+h9 <- sort(h9)
+h9 <- data.frame(x = h9, y = (1:length(h9)) / length(h9))
+laplace_onions <- function(x) {
+  0.5 + 0.5 * sign(x) * (1 - exp(abs(x) / (-8/0.3)))
+}
+ggplot(h9, aes(x = x, y = y)) +
+geom_line() +
+stat_function(fun = laplace_onions, colour = "blue") +
+scale_x_continuous("\nReported .onion addresses") +
+scale_y_continuous("Cumulative probability\n")
+ggsave("out/graphs/report/zero-prob-onions.pdf", width = 5, height = 3,
+  dpi = 100)
+
+# Graph correlation between reports and fractions per relay.
+h3 <- rbind(
+  data.frame(x = h$frac_hsdesc / 3.0,
+    y = ifelse(h$frac_hsdesc == 0, NA, h$hidserv_dir_onions_seen),
+    facet = ".onion addresses"),
+  data.frame(x = h$prob_rend_point,
+    y = ifelse(h$prob_rend_point == 0, NA,
+      h$hidserv_rend_relayed_cells * 512 / (86400 * 1000)),
+    facet = "traffic in kB/s"))
+ggplot(h3[h3$facet == ".onion addresses", ], aes(x = x, y = y)) +
+geom_point(alpha = 0.5) +
+stat_smooth(method = "lm") +
+scale_x_continuous(name = "\nProbability", labels = percent) +
+scale_y_continuous(name = "Reported .onion addresses\n")
+ggsave("out/graphs/report/corr-probs-onions-by-relay.pdf", width = 5,
+  height = 3, dpi = 100)
+ggplot(h3[h3$facet == "traffic in kB/s", ], aes(x = x, y = y)) +
+geom_point(alpha = 0.5) +
+stat_smooth(method = "lm") +
+scale_x_continuous(name = "\nProbability", labels = percent) +
+scale_y_continuous(name = "Reported traffic in kB/s\n")
+ggsave("out/graphs/report/corr-probs-cells-by-relay.pdf", width = 5,
+  height = 3, dpi = 100)
+
+# Graph correlation between reports and fractions per day.
+h5 <- rbind(
+  data.frame(date = as.Date(h$stats_end),
+    prob = ifelse(h$frac_hsdesc == 0, NA, h$frac_hsdesc / 3.0),
+    reported = h$hidserv_dir_onions_seen, facet = "published descriptor"),
+  data.frame(date = as.Date(h$stats_end),
+    prob = ifelse(h$prob_rend_point == 0, NA, h$prob_rend_point),
+    reported = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+    facet = "traffic in MB/s"))
+h5 <- na.omit(h5)
+h5 <- aggregate(list(prob = h5$prob, reported = h5$reported),
+  by = list(date = h5$date, facet = h5$facet), FUN = sum)
+ggplot(h5[h5$facet == "traffic in MB/s", ], aes(x = prob, y = reported)) +
+geom_point(alpha = 0.5) +
+scale_x_continuous(name = "\nTotal probability", labels = percent) +
+scale_y_continuous(name = "Total traffic in MB/s\n") +
+stat_smooth(method = "lm") +
+geom_vline(xintercept = 0.01, linetype = 2)
+ggsave("out/graphs/report/corr-probs-cells-by-day.pdf", width = 5,
+  height = 3, dpi = 100)
+ggplot(h5[h5$facet == "published descriptor", ],
+  aes(x = prob, y = reported)) +
+geom_point(alpha = 0.5) +
+scale_x_continuous(name = "\nTotal probability", labels = percent) +
+scale_y_continuous(name = "Total reported .onion addresses\n") +
+stat_smooth(method = "lm") +
+geom_vline(xintercept = 0.01, linetype = 2)
+ggsave("out/graphs/report/corr-probs-onions-by-day.pdf", width = 5,
+  height = 3, dpi = 100)
+
+# Graph extrapolated network totals.
+h6 <- data.frame(date = as.Date(h$stats_end),
+  traffic = ifelse(h$prob_rend_point == 0, 0,
+    h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000)),
+  prob_rend_point = h$prob_rend_point,
+  onions = ifelse(h$frac_hsdesc == 0, 0, h$hidserv_dir_onions_seen),
+  prob_onion = h$frac_hsdesc * 4.0)
+h6 <- aggregate(list(traffic = h6$traffic, 
+  prob_rend_point = h6$prob_rend_point,
+  onions = h6$onions,
+  prob_onion = h6$prob_onion), by = list(date = h6$date), FUN = sum)
+h6 <- data.frame(date = h6$date,
+  traffic = ifelse(h6$prob_rend_point < 0.01, 0,
+    h6$traffic / h6$prob_rend_point),
+  onions = ifelse(h6$prob_onion / 12.0 < 0.01, 0,
+    h6$onions / h6$prob_onion))
+h6 <- melt(h6, "date")
+h6 <- h6[h6$value > 0, ]
+h6 <- rbind(h6, data.frame(date = NA, variable = c('traffic', 'onions'),
+  value = 0))
+h6 <- data.frame(date = h6$date,
+  variable = ifelse(h6$variable == "traffic", "total traffic in MB/s",
+    ".onion addresses"), value = h6$value)
+ggplot(h6, aes(date, value)) +
+facet_grid(variable ~ ., scales = "free_y") +
+geom_point() +
+stat_smooth() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "Extrapolated network totals\n")
+ggsave("out/graphs/report/extrapolated-network-totals.pdf", width = 10,
+  height = 5, dpi = 100)
+
+# Graph extrapolated number of .onion addresses.
+h11 <- h6[h6$variable == ".onion addresses", ]
+ggplot(h11, aes(x = date, y = value)) +
+geom_point() +
+stat_smooth() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "")
+ggsave("out/graphs/slides/hidserv-13.png", width = 8, height = 3,
+  dpi = 100)
+
+# Graph extrapolated fraction of hidden-service traffic.
+b <- read.csv("in/metrics/bandwidth.csv", stringsAsFactors = FALSE)
+b <- b[b$isexit == '' & b$isguard == '' & b$date > '2014-12-20', ]
+h10 <- data.frame(date = as.Date(h$stats_end),
+  traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+  prob_rend_point = h$prob_rend_point)
+h10 <- aggregate(list(traffic = h10$traffic, 
+  prob_rend_point = h10$prob_rend_point), by = list(date = h10$date),
+  FUN = sum)
+h10 <- data.frame(date = h10$date,
+  traffic = ifelse(h10$prob_rend_point < 0.01, 0,
+    h10$traffic / h10$prob_rend_point))
+h10 <- melt(h10, "date")
+h10 <- h10[h10$value > 0, ]
+h10 <- rbind(h10, data.frame(date = as.Date(b$date), variable = "bw",
+  value = b$bwread + b$bwwrite))
+h10 <- cast(h10, date ~ variable, value = "value")
+h10 <- na.omit(h10)
+h10 <- data.frame(date = h10$date,
+  value = h10$traffic * 1000 * 1000 / h10$bw)
+h10 <- rbind(h10, data.frame(date = NA, value = 0))
+ggplot(h10, aes(x = date, y = value)) +
+geom_point() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+stat_smooth()
+ggsave("out/graphs/slides/hidserv-14.png", width = 8, height = 3,
+  dpi = 100)
+
+# Graph simulation results for cells on rendezvous circuits.
+s <- read.csv("out/csv/sim-cells.csv")
+ggplot(s, aes(x = frac, y = (p500 - 1e10) / 1e10,
+  ymin = (p025 - 1e10) / 1e10, ymax = (p975 - 1e10) / 1e10)) +
+geom_line() +
+geom_ribbon(alpha = 0.2) +
+scale_x_continuous("\nRendezvous points included in extrapolation",
+  labels = percent) +
+scale_y_continuous("Deviation from network totals\n", labels = percent)
+ggsave("out/graphs/report/sim-cells.pdf", width = 5, height = 3,
+  dpi = 100)
+
+# Graph simulation results for .onion addresses.
+o <- read.csv("out/csv/sim-onions.csv")
+ggplot(o, aes(x = frac, y = (p500 - 40000) / 40000,
+  ymin = (p025 - 40000) / 40000, ymax = (p975 - 40000) / 40000)) +
+geom_line() +
+geom_ribbon(alpha = 0.2) +
+scale_x_continuous("\nDirectories included in extrapolation",
+  labels = percent) +
+scale_y_continuous("Deviation from network totals\n", labels = percent)
+ggsave("out/graphs/report/sim-onions.pdf", width = 5, height = 3,
+  dpi = 100)
+
diff --git a/task-13192/src/java/ExtrapolateHidServStats.java b/task-13192/src/java/ExtrapolateHidServStats.java
new file mode 100644
index 0000000..100520d
--- /dev/null
+++ b/task-13192/src/java/ExtrapolateHidServStats.java
@@ -0,0 +1,722 @@
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.math.BigInteger;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+public class ExtrapolateHidServStats {
+
+  private static File archiveExtraInfosDirectory =
+      new File("in/collector/archive/relay-descriptors/extra-infos/");
+
+  private static File recentExtraInfosDirectory =
+      new File("in/collector/recent/relay-descriptors/extra-infos/");
+
+  private static File archiveConsensuses =
+      new File("in/collector/archive/relay-descriptors/consensuses/");
+
+  private static File recentConsensuses =
+      new File("in/collector/recent/relay-descriptors/consensuses/");
+
+  private static File hidservStatsCsvFile =
+      new File("out/csv/hidserv-stats.csv");
+
+  private static File simCellsCsvFile =
+      new File("out/csv/sim-cells.csv");
+
+  private static File simOnionsCsvFile =
+      new File("out/csv/sim-onions.csv");
+
+  public static void main(String[] args) throws Exception {
+    System.out.println("Extracting hidserv-* lines from extra-info "
+        + "descriptors...");
+    SortedMap<String, SortedSet<HidServStats>> hidServStats =
+        extractHidServStats();
+    System.out.println("Extracting fractions from consensuses...");
+    SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions =
+        extractConsensusFractions(hidServStats.keySet());
+    System.out.println("Extrapolating statistics...");
+    extrapolateHidServStats(hidServStats, consensusFractions);
+    System.out.println("Simulating extrapolation of rendezvous cells...");
+    simulateCells();
+    System.out.println("Simulating extrapolation of .onions...");
+    simulateOnions();
+    System.out.println("Terminating.");
+  }
+
+  private static final DateFormat DATE_TIME_FORMAT;
+
+  static {
+    DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    DATE_TIME_FORMAT.setLenient(false);
+    DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+
+  private static class HidServStats implements Comparable<HidServStats> {
+
+    /* Hidden-service statistics end timestamp in milliseconds. */
+    private long statsEndMillis;
+
+    /* Statistics interval length in seconds. */
+    private long statsIntervalSeconds;
+
+    /* Number of relayed cells reported by the relay and adjusted by
+     * rounding to the nearest right side of a bin and subtracting half of
+     * the bin size. */
+    private long rendRelayedCells;
+
+    /* Number of .onions reported by the relay and adjusted by rounding to
+     * the nearest right side of a bin and subtracting half of the bin
+     * size. */
+    private long dirOnionsSeen;
+
+    private HidServStats(long statsEndMillis, long statsIntervalSeconds,
+        long rendRelayedCells, long dirOnionsSeen) {
+      this.statsEndMillis = statsEndMillis;
+      this.statsIntervalSeconds = statsIntervalSeconds;
+      this.rendRelayedCells = rendRelayedCells;
+      this.dirOnionsSeen = dirOnionsSeen;
+    }
+
+    @Override
+    public boolean equals(Object otherObject) {
+      if (!(otherObject instanceof HidServStats)) {
+        return false;
+      }
+      HidServStats other = (HidServStats) otherObject;
+      return this.statsEndMillis == other.statsEndMillis &&
+          this.statsIntervalSeconds == other.statsIntervalSeconds &&
+          this.rendRelayedCells == other.rendRelayedCells &&
+          this.dirOnionsSeen == other.dirOnionsSeen;
+    }
+
+    @Override
+    public int compareTo(HidServStats other) {
+      return this.statsEndMillis < other.statsEndMillis ? -1 :
+          this.statsEndMillis > other.statsEndMillis ? 1 : 0;
+    }
+  }
+
+  /* Extract fingerprint and hidserv-* lines from extra-info descriptors
+   * located in in/{archive,recent}/relay-descriptors/extra-infos/. */
+  private static SortedMap<String, SortedSet<HidServStats>>
+      extractHidServStats() {
+    SortedMap<String, SortedSet<HidServStats>> extractedHidServStats =
+        new TreeMap<String, SortedSet<HidServStats>>();
+    DescriptorReader descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(archiveExtraInfosDirectory);
+    descriptorReader.addDirectory(recentExtraInfosDirectory);
+    Iterator<DescriptorFile> descriptorFiles =
+        descriptorReader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (!(descriptor instanceof ExtraInfoDescriptor)) {
+          continue;
+        }
+        String fingerprint =
+            ((ExtraInfoDescriptor) descriptor).getFingerprint();
+        Scanner scanner = new Scanner(new ByteArrayInputStream(
+            descriptor.getRawDescriptorBytes()));
+        Long statsEndMillis = null, statsIntervalSeconds = null,
+            rendRelayedCells = null, dirOnionsSeen = null;
+        try {
+          while (scanner.hasNext()) {
+            String line = scanner.nextLine();
+            if (line.startsWith("hidserv-")) {
+              String[] parts = line.split(" ");
+              if (parts[0].equals("hidserv-stats-end")) {
+                if (parts.length != 5 || !parts[3].startsWith("(") ||
+                    !parts[4].equals("s)")) {
+                  /* Will warn below, because statsEndMillis and
+                   * statsIntervalSeconds are still null. */
+                  continue;
+                }
+                statsEndMillis = DATE_TIME_FORMAT.parse(
+                    parts[1] + " " + parts[2]).getTime();
+                statsIntervalSeconds =
+                    Long.parseLong(parts[3].substring(1));
+              } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
+                if (parts.length != 5 ||
+                    !parts[4].startsWith("bin_size=")) {
+                  /* Will warn below, because rendRelayedCells is still
+                   * null. */
+                  continue;
+                }
+                rendRelayedCells = removeNoise(Long.parseLong(parts[1]),
+                    Long.parseLong(parts[4].substring(9)));
+              } else if (parts[0].equals("hidserv-dir-onions-seen")) {
+                if (parts.length != 5 ||
+                    !parts[4].startsWith("bin_size=")) {
+                  /* Will warn below, because dirOnionsSeen is still
+                   * null. */
+                  continue;
+                }
+                dirOnionsSeen = removeNoise(Long.parseLong(parts[1]),
+                    Long.parseLong(parts[4].substring(9)));
+              }
+            }
+          }
+        } catch (ParseException e) {
+          e.printStackTrace();
+          continue;
+        } catch (NumberFormatException e) {
+          e.printStackTrace();
+          continue;
+        }
+        if (statsEndMillis == null && statsIntervalSeconds == null &&
+            rendRelayedCells == null && dirOnionsSeen == null) {
+          continue;
+        } else if (statsEndMillis != null && statsIntervalSeconds != null
+            && rendRelayedCells != null && dirOnionsSeen != null) {
+          if (!extractedHidServStats.containsKey(fingerprint)) {
+            extractedHidServStats.put(fingerprint,
+                new TreeSet<HidServStats>());
+          }
+          extractedHidServStats.get(fingerprint).add(new HidServStats(
+              statsEndMillis, statsIntervalSeconds, rendRelayedCells,
+              dirOnionsSeen));
+        } else {
+          System.err.println("Relay " + fingerprint + " published "
+              + "incomplete hidserv-stats.  Ignoring.");
+        }
+      }
+    }
+    return extractedHidServStats;
+  }
+
+  private static long removeNoise(long reportedNumber, long binSize) {
+    long roundedToNearestRightSideOfTheBin =
+        ((reportedNumber + binSize / 2) / binSize) * binSize;
+    long subtractedHalfOfBinSize =
+        roundedToNearestRightSideOfTheBin - binSize / 2;
+    return subtractedHalfOfBinSize;
+  }
+
+  private static class ConsensusFraction
+      implements Comparable<ConsensusFraction> {
+
+    /* Valid-after timestamp of the consensus in milliseconds. */
+    private long validAfterMillis;
+
+    /* Fresh-until timestamp of the consensus in milliseconds. */
+    private long freshUntilMillis;
+
+    /* Fraction of consensus weight in [0.0, 1.0] of this relay. */
+    private double fractionConsensusWeight;
+
+    /* Probability for being selected by clients as rendezvous point. */
+    private double probabilityRendezvousPoint;
+
+    /* Fraction of descriptor identifiers in [0.0, 1.0] that this relay
+     * has been responsible for.  This is the "distance" from the
+     * fingerprint of the relay three HSDir positions earlier in the ring
+     * to the fingerprint of this relay.  Fractions of all HSDirs in a
+     * consensus add up to 3.0, not 1.0. */
+    private double fractionResponsibleDescriptors;
+
+    private ConsensusFraction(long validAfterMillis,
+        long freshUntilMillis,
+        double fractionConsensusWeight,
+        double probabilityRendezvousPoint,
+        double fractionResponsibleDescriptors) {
+      this.validAfterMillis = validAfterMillis;
+      this.freshUntilMillis = freshUntilMillis;
+      this.fractionConsensusWeight = fractionConsensusWeight;
+      this.probabilityRendezvousPoint = probabilityRendezvousPoint;
+      this.fractionResponsibleDescriptors =
+          fractionResponsibleDescriptors;
+    }
+
+    @Override
+    public boolean equals(Object otherObject) {
+      if (!(otherObject instanceof ConsensusFraction)) {
+        return false;
+      }
+      ConsensusFraction other = (ConsensusFraction) otherObject;
+      return this.validAfterMillis == other.validAfterMillis &&
+          this.freshUntilMillis == other.freshUntilMillis &&
+          this.fractionResponsibleDescriptors ==
+          other.fractionResponsibleDescriptors &&
+          this.fractionConsensusWeight == other.fractionConsensusWeight &&
+          this.probabilityRendezvousPoint ==
+          other.probabilityRendezvousPoint;
+    }
+
+    @Override
+    public int compareTo(ConsensusFraction other) {
+      return this.validAfterMillis < other.validAfterMillis ? -1 :
+          this.validAfterMillis > other.validAfterMillis ? 1 : 0;
+    }
+  }
+
+  /* Extract fractions that relays were responsible for from consensuses
+   * located in in/{archive,recent}/relay-descriptors/consensuses/. */
+  private static SortedMap<String, SortedSet<ConsensusFraction>>
+      extractConsensusFractions(Collection<String> fingerprints) {
+    SortedMap<String, SortedSet<ConsensusFraction>>
+        extractedConsensusFractions =
+        new TreeMap<String, SortedSet<ConsensusFraction>>();
+    DescriptorReader descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    descriptorReader.addDirectory(archiveConsensuses);
+    descriptorReader.addDirectory(recentConsensuses);
+    Iterator<DescriptorFile> descriptorFiles =
+        descriptorReader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+          continue;
+        }
+        RelayNetworkStatusConsensus consensus =
+            (RelayNetworkStatusConsensus) descriptor;
+        SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
+            "Wmg,Wmm,Wme,Wmd".split(",")));
+        weightKeys.removeAll(consensus.getBandwidthWeights().keySet());
+        if (!weightKeys.isEmpty()) {
+          System.err.println("Consensus with valid-after time "
+              + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis())
+              + " doesn't contain expected Wmx weights.  Skipping.");
+          continue;
+        }
+        double wmg = ((double) consensus.getBandwidthWeights().get("Wmg"))
+            / 10000.0;
+        double wmm = ((double) consensus.getBandwidthWeights().get("Wmm"))
+            / 10000.0;
+        double wme = ((double) consensus.getBandwidthWeights().get("Wme"))
+            / 10000.0;
+        double wmd = ((double) consensus.getBandwidthWeights().get("Wmd"))
+            / 10000.0;
+        SortedSet<String> hsDirs = new TreeSet<String>(
+            Collections.reverseOrder());
+        long totalConsensusWeight = 0L;
+        double totalWeightsRendezvousPoint = 0.0;
+        SortedMap<String, Double> weightsRendezvousPoint =
+            new TreeMap<String, Double>();
+        for (Map.Entry<String, NetworkStatusEntry> e :
+            consensus.getStatusEntries().entrySet()) {
+          String fingerprint = e.getKey();
+          NetworkStatusEntry statusEntry = e.getValue();
+          SortedSet<String> flags = statusEntry.getFlags();
+          if (flags.contains("HSDir")) {
+            hsDirs.add(statusEntry.getFingerprint());
+          }
+          totalConsensusWeight += statusEntry.getBandwidth();
+          double weightRendezvousPoint = 0.0;
+          if (flags.contains("Fast")) {
+            weightRendezvousPoint = (double) statusEntry.getBandwidth();
+            if (flags.contains("Guard") && flags.contains("Exit")) {
+              weightRendezvousPoint *= wmd;
+            } else if (flags.contains("Guard")) {
+              weightRendezvousPoint *= wmg;
+            } else if (flags.contains("Exit")) {
+              weightRendezvousPoint *= wme;
+            } else {
+              weightRendezvousPoint *= wmm;
+            }
+          }
+          weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+          totalWeightsRendezvousPoint += weightRendezvousPoint;
+        }
+        /* Add all HSDir fingerprints with leading "0" and "1" to
+         * simplify the logic to traverse the ring start. */
+        SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
+        hsDirs.clear();
+        for (String fingerprint : hsDirsCopy) {
+          hsDirs.add("0" + fingerprint);
+          hsDirs.add("1" + fingerprint);
+        }
+        final double RING_SIZE = new BigInteger(
+            "10000000000000000000000000000000000000000",
+            16).doubleValue();
+        for (String fingerprint : fingerprints) {
+          double probabilityRendezvousPoint = 0.0,
+              fractionResponsibleDescriptors = 0.0,
+              fractionConsensusWeight = 0.0;
+          NetworkStatusEntry statusEntry =
+              consensus.getStatusEntry(fingerprint);
+          if (statusEntry != null) {
+            if (hsDirs.contains("1" + fingerprint)) {
+              String startResponsible = fingerprint;
+              int positionsToGo = 3;
+              for (String hsDirFingerprint :
+                  hsDirs.tailSet("1" + fingerprint)) {
+                startResponsible = hsDirFingerprint;
+                if (positionsToGo-- <= 0) {
+                  break;
+                }
+              }
+              fractionResponsibleDescriptors =
+                  new BigInteger("1" + fingerprint, 16).subtract(
+                  new BigInteger(startResponsible, 16)).doubleValue()
+                  / RING_SIZE;
+            }
+            fractionConsensusWeight =
+                ((double) statusEntry.getBandwidth())
+                / ((double) totalConsensusWeight);
+            probabilityRendezvousPoint =
+                weightsRendezvousPoint.get(fingerprint)
+                / totalWeightsRendezvousPoint;
+          }
+          if (!extractedConsensusFractions.containsKey(fingerprint)) {
+            extractedConsensusFractions.put(fingerprint,
+                new TreeSet<ConsensusFraction>());
+          }
+          extractedConsensusFractions.get(fingerprint).add(
+              new ConsensusFraction(consensus.getValidAfterMillis(),
+              consensus.getFreshUntilMillis(), fractionConsensusWeight,
+              probabilityRendezvousPoint,
+              fractionResponsibleDescriptors));
+        }
+      }
+    }
+    return extractedConsensusFractions;
+  }
+
+  private static void extrapolateHidServStats(
+      SortedMap<String, SortedSet<HidServStats>> hidServStats,
+      SortedMap<String, SortedSet<ConsensusFraction>>
+      consensusFractions) throws Exception {
+    hidservStatsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(
+        new FileWriter(hidservStatsCsvFile));
+    bw.write("fingerprint,stats_start,stats_end,"
+        + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen,"
+        + "prob_rend_point,frac_hsdesc\n");
+    for (Map.Entry<String, SortedSet<HidServStats>> e :
+      hidServStats.entrySet()) {
+      String fingerprint = e.getKey();
+      if (!consensusFractions.containsKey(fingerprint)) {
+        System.err.println("We have hidserv-stats but no consensus "
+            + "fractions for " + fingerprint + ".  Skipping.");
+        continue;
+      }
+      for (HidServStats stats : e.getValue()) {
+        long statsStartMillis = stats.statsEndMillis
+            - stats.statsIntervalSeconds * 1000L;
+        double sumProbabilityRendezvousPoint = 0.0,
+            sumResponsibleDescriptors = 0.0;
+        int statusEntries = 0;
+        for (ConsensusFraction frac :
+            consensusFractions.get(fingerprint)) {
+          if (statsStartMillis <= frac.validAfterMillis &&
+              frac.validAfterMillis < stats.statsEndMillis) {
+            sumProbabilityRendezvousPoint +=
+                frac.probabilityRendezvousPoint;
+            sumResponsibleDescriptors +=
+                frac.fractionResponsibleDescriptors;
+            statusEntries++;
+          }
+        }
+        bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint,
+            DATE_TIME_FORMAT.format(statsStartMillis),
+            DATE_TIME_FORMAT.format(stats.statsEndMillis),
+            stats.rendRelayedCells, stats.dirOnionsSeen,
+            sumProbabilityRendezvousPoint / statusEntries,
+            sumResponsibleDescriptors / statusEntries));
+      }
+    }
+    bw.close();
+  }
+
+  private static Random rnd = new Random(3);
+
+  private static void simulateCells() throws Exception {
+
+    /* Generate consensus weights following an exponential distribution
+     * with lambda = 1 for 3000 potential rendezvous points. */
+    final int numberRendPoints = 3000;
+    double[] consensusWeights = new double[numberRendPoints];
+    double totalConsensusWeight = 0.0;
+    for (int i = 0; i < numberRendPoints; i++) {
+      double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+      consensusWeights[i] = consensusWeight;
+      totalConsensusWeight += consensusWeight;
+    }
+
+    /* Compute probabilities for being selected as rendezvous point. */
+    double[] probRendPoint = new double[numberRendPoints];
+    for (int i = 0; i < numberRendPoints; i++) {
+      probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+    }
+
+    /* Generate 10,000,000,000 (roughly 60 MiB/s) cells in chunks
+     * following an exponential distribution with lambda = 0.00001 and
+     * randomly assign them to a rendezvous point to report them later. */
+    long cellsLeft = 10000000000L;
+    final double cellsLambda = 0.00001;
+    long[] observedCells = new long[numberRendPoints];
+    while (cellsLeft > 0) {
+      long cells = (long) (-Math.log(1.0 - rnd.nextDouble())
+          / cellsLambda);
+      double selectRendPoint = rnd.nextDouble();
+      for (int i = 0; i < probRendPoint.length; i++) {
+        selectRendPoint -= probRendPoint[i];
+        if (selectRendPoint <= 0.0) {
+          observedCells[i] += cells;
+          break;
+        }
+      }
+      cellsLeft -= cells;
+    }
+
+    /* Obfuscate reports using binning and Laplace noise, and then attempt
+     * to remove noise again. */
+    final long binSize = 1024L;
+    final double b = 2048.0 / 0.3;
+    long[] reportedCells = new long[numberRendPoints];
+    long[] removedNoiseCells = new long[numberRendPoints];
+    for (int i = 0; i < numberRendPoints; i++) {
+      long observed = observedCells[i];
+      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+      double p = rnd.nextDouble();
+      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+      long reported = afterBinning + (long) laplaceNoise;
+      reportedCells[i] = reported;
+      long roundedToNearestRightSideOfTheBin =
+          ((reported + binSize / 2) / binSize) * binSize;
+      long subtractedHalfOfBinSize =
+          roundedToNearestRightSideOfTheBin - binSize / 2;
+      removedNoiseCells[i] = subtractedHalfOfBinSize;
+    }
+
+    /* Perform 10,000 extrapolations from random fractions of reports by
+     * probability to be selected as rendezvous point. */
+    simCellsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        simCellsCsvFile));
+    bw.write("frac,p025,p500,p975\n");
+    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+    final int numberOfExtrapolations = 10000;
+    for (double fraction : fractions) {
+      List<Long> extrapolations = new ArrayList<Long>();
+      for (int i = 0; i < numberOfExtrapolations; i++) {
+        SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
+        for (int j = 0; j < numberRendPoints; j++) {
+          nonReportingRelays.add(j);
+        }
+        List<Integer> shuffledRelays = new ArrayList<Integer>(
+            nonReportingRelays);
+        Collections.shuffle(shuffledRelays);
+        SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
+        for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+            j++) {
+          reportingRelays.add(shuffledRelays.get(j));
+          nonReportingRelays.remove(shuffledRelays.get(j));
+        }
+        double reportingProbability;
+        long totalReports;
+        do {
+          reportingProbability = 0.0;
+          totalReports = 0L;
+          for (int reportingRelay : reportingRelays) {
+            reportingProbability += probRendPoint[reportingRelay];
+            totalReports += removedNoiseCells[reportingRelay];
+          }
+          if (reportingProbability < fraction - 0.001) {
+            int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
+                rnd.nextInt(nonReportingRelays.size()));
+            nonReportingRelays.remove(addRelay);
+            reportingRelays.add(addRelay);
+          } else if (reportingProbability > fraction + 0.001) {
+            int removeRelay = new ArrayList<Integer>(reportingRelays).get(
+                rnd.nextInt(reportingRelays.size()));
+            reportingRelays.remove(removeRelay);
+            nonReportingRelays.add(removeRelay);
+          }
+        } while (reportingProbability < fraction - 0.001 ||
+            reportingProbability > fraction + 0.001);
+        extrapolations.add((long) ((double) totalReports
+            / reportingProbability));
+      }
+      Collections.sort(extrapolations);
+      long p025 = extrapolations.get((extrapolations.size() * 25) / 1000),
+          p500 = extrapolations.get((extrapolations.size() * 500) / 1000),
+          p975 = extrapolations.get((extrapolations.size() * 975) / 1000);
+      bw.write(String.format("%.2f,%d,%d,%d%n", fraction, p025, p500,
+          p975));
+    }
+    bw.close();
+  }
+
+  private static void simulateOnions() throws Exception {
+
+    /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+    final int numberHsDirs = 3000;
+    SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
+    for (int i = 0; i < numberHsDirs; i++) {
+      hsDirFingerprints.add(rnd.nextDouble());
+    }
+
+    /* Compute fractions of observed descriptor space. */
+    SortedSet<Double> ring =
+        new TreeSet<Double>(Collections.reverseOrder());
+    for (double fingerprint : hsDirFingerprints) {
+      ring.add(fingerprint);
+      ring.add(fingerprint - 1.0);
+    }
+    SortedMap<Double, Double> hsDirFractions =
+        new TreeMap<Double, Double>();
+    for (double fingerprint : hsDirFingerprints) {
+      double start = fingerprint;
+      int positionsToGo = 3;
+      for (double prev : ring.tailSet(fingerprint)) {
+        start = prev;
+        if (positionsToGo-- <= 0) {
+          break;
+        }
+      }
+      hsDirFractions.put(fingerprint, fingerprint - start);
+    }
+
+    /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+    final int numberOnions = 40000;
+    final int replicas = 4;
+    final int storeOnDirs = 3;
+    SortedMap<Double, SortedSet<Integer>> storedDescs =
+        new TreeMap<Double, SortedSet<Integer>>();
+    for (double fingerprint : hsDirFingerprints) {
+      storedDescs.put(fingerprint, new TreeSet<Integer>());
+    }
+    for (int i = 0; i < numberOnions; i++) {
+      for (int j = 0; j < replicas; j++) {
+        int leftToStore = storeOnDirs;
+        for (double fingerprint :
+            hsDirFingerprints.tailSet(rnd.nextDouble())) {
+          storedDescs.get(fingerprint).add(i);
+          if (--leftToStore <= 0) {
+            break;
+          }
+        }
+        if (leftToStore > 0) {
+          for (double fingerprint : hsDirFingerprints) {
+            storedDescs.get(fingerprint).add(i);
+            if (--leftToStore <= 0) {
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    /* Obfuscate reports using binning and Laplace noise, and then attempt
+     * to remove noise again. */
+    final long binSize = 8L;
+    final double b = 8.0 / 0.3;
+    SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
+        removedNoiseOnions = new TreeMap<Double, Long>();
+    for (Map.Entry<Double, SortedSet<Integer>> e :
+      storedDescs.entrySet()) {
+      double fingerprint = e.getKey();
+      long observed = (long) e.getValue().size();
+      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+      double p = rnd.nextDouble();
+      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+      long reported = afterBinning + (long) laplaceNoise;
+      reportedOnions.put(fingerprint, reported);
+      long roundedToNearestRightSideOfTheBin =
+          ((reported + binSize / 2) / binSize) * binSize;
+      long subtractedHalfOfBinSize =
+          roundedToNearestRightSideOfTheBin - binSize / 2;
+      removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+    }
+
+    /* Perform 10,000 extrapolations from random fractions of reports by
+     * probability to be selected as rendezvous point. */
+    simOnionsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        simOnionsCsvFile));
+    bw.write("frac,p025,p500,p975\n");
+    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+    final int numberOfExtrapolations = 10000;
+    for (double fraction : fractions) {
+      List<Long> extrapolationsTwo = new ArrayList<Long>();
+      for (int i = 0; i < numberOfExtrapolations; i++) {
+        SortedSet<Double> nonReportingRelays =
+            new TreeSet<Double>(hsDirFractions.keySet());
+        List<Double> shuffledRelays = new ArrayList<Double>(
+            nonReportingRelays);
+        Collections.shuffle(shuffledRelays);
+        SortedSet<Double> reportingRelays = new TreeSet<Double>();
+        for (int j = 0; j < (int) ((double) hsDirFractions.size()
+            * fraction); j++) {
+          reportingRelays.add(shuffledRelays.get(j));
+          nonReportingRelays.remove(shuffledRelays.get(j));
+        }
+        double reportingProbability;
+        long totalReports;
+        do {
+          reportingProbability = 0.0;
+          totalReports = 0L;
+          for (double reportingRelay : reportingRelays) {
+            reportingProbability += hsDirFractions.get(reportingRelay)
+                / 3.0;
+            totalReports += removedNoiseOnions.get(reportingRelay);
+          }
+          if (reportingProbability < fraction - 0.001) {
+            double addRelay =
+                new ArrayList<Double>(nonReportingRelays).get(
+                rnd.nextInt(nonReportingRelays.size()));
+            nonReportingRelays.remove(addRelay);
+            reportingRelays.add(addRelay);
+          } else if (reportingProbability > fraction + 0.001) {
+            double removeRelay =
+                new ArrayList<Double>(reportingRelays).get(
+                rnd.nextInt(reportingRelays.size()));
+            reportingRelays.remove(removeRelay);
+            nonReportingRelays.add(removeRelay);
+          }
+        } while (reportingProbability < fraction - 0.001 ||
+            reportingProbability > fraction + 0.001);
+        double totalFraction = 0.0;
+        for (double fingerprint : reportingRelays) {
+          totalFraction += hsDirFractions.get(fingerprint) * 4.0;
+        }
+        extrapolationsTwo.add((long) ((double) totalReports
+            / totalFraction));
+      }
+      Collections.sort(extrapolationsTwo);
+      long pTwo025 = extrapolationsTwo.get(
+          (extrapolationsTwo.size() * 25) / 1000),
+          pTwo500 = extrapolationsTwo.get(
+          (extrapolationsTwo.size() * 500) / 1000),
+          pTwo975 = extrapolationsTwo.get(
+          (extrapolationsTwo.size() * 975) / 1000);
+      bw.write(String.format("%.2f,%d,%d,%d%n", fraction, pTwo025,
+          pTwo500, pTwo975));
+    }
+    bw.close();
+  }
+}
+





More information about the tor-commits mailing list