[tor-commits] [metrics-web/master] Add hiserv-stats extrapolation code.

karsten at torproject.org karsten at torproject.org
Thu Mar 12 15:21:32 UTC 2015


commit fefb0f946aa5018639415cb67da7f35d35ff721b
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Mar 11 15:25:38 2015 +0100

    Add hiserv-stats extrapolation code.
---
 modules/hidserv/.gitignore                         |    4 +
 modules/hidserv/build.xml                          |   44 ++
 .../org/torproject/metrics/hidserv/Aggregator.java |  191 ++++++++
 .../metrics/hidserv/ComputedNetworkFractions.java  |  141 ++++++
 .../torproject/metrics/hidserv/DateTimeHelper.java |   95 ++++
 .../org/torproject/metrics/hidserv/Document.java   |   19 +
 .../torproject/metrics/hidserv/DocumentStore.java  |  157 +++++++
 .../metrics/hidserv/ExtrapolatedHidServStats.java  |  156 +++++++
 .../torproject/metrics/hidserv/Extrapolator.java   |  251 ++++++++++
 .../src/org/torproject/metrics/hidserv/Main.java   |   91 ++++
 .../src/org/torproject/metrics/hidserv/Parser.java |  484 ++++++++++++++++++++
 .../metrics/hidserv/ReportedHidServStats.java      |  130 ++++++
 .../org/torproject/metrics/hidserv/Simulate.java   |  360 +++++++++++++++
 shared/bin/70-run-hidserv-stats.sh                 |    5 +
 shared/bin/99-copy-stats-files.sh                  |    1 +
 15 files changed, 2129 insertions(+)

diff --git a/modules/hidserv/.gitignore b/modules/hidserv/.gitignore
new file mode 100644
index 0000000..4bb76a5
--- /dev/null
+++ b/modules/hidserv/.gitignore
@@ -0,0 +1,4 @@
+classes/
+stats/
+status/
+
diff --git a/modules/hidserv/build.xml b/modules/hidserv/build.xml
new file mode 100644
index 0000000..7480b8c
--- /dev/null
+++ b/modules/hidserv/build.xml
@@ -0,0 +1,44 @@
+<project default="run" name="hidserv" basedir=".">
+
+  <property name="sources" value="src"/>
+  <property name="classes" value="classes"/>
+  <path id="classpath">
+    <pathelement path="${classes}"/>
+    <fileset dir="/usr/share/java">
+      <include name="commons-codec-1.6.jar"/>
+      <include name="commons-compress-1.4.1.jar"/>
+      <include name="commons-lang-2.6.jar"/>
+    </fileset>
+    <fileset dir="../../deps/metrics-lib">
+      <include name="descriptor.jar"/>
+    </fileset>
+  </path>
+
+  <target name="metrics-lib">
+    <ant dir="../../deps/metrics-lib"/>
+  </target>
+
+  <target name="compile" depends="metrics-lib">
+    <mkdir dir="${classes}"/>
+    <javac destdir="${classes}"
+           srcdir="${sources}"
+           source="1.6"
+           target="1.6"
+           debug="true"
+           deprecation="true"
+           optimize="false"
+           failonerror="true"
+           includeantruntime="false">
+      <classpath refid="classpath"/>
+    </javac>
+  </target>
+
+  <target name="run" depends="compile">
+    <java fork="true"
+          maxmemory="1024m"
+          classname="org.torproject.metrics.hidserv.Main">
+      <classpath refid="classpath"/>
+    </java>
+  </target>
+</project>
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java
new file mode 100644
index 0000000..192a342
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java
@@ -0,0 +1,191 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/* Aggregate extrapolated network totals of hidden-service statistics by
+ * calculating statistics like the daily weighted interquartile mean.
+ * Also calculate simpler statistics like the number of reported
+ * statistics and the total network fraction of reporting relays. */
+public class Aggregator {
+
+  /* Document file containing extrapolated hidden-service statistics. */
+  private File extrapolatedHidServStatsFile;
+
+  /* Document store for storing and retrieving extrapolated hidden-service
+   * statistics. */
+  private DocumentStore<ExtrapolatedHidServStats>
+      extrapolatedHidServStatsStore;
+
+  /* Output file for writing aggregated statistics. */
+  private File hidservStatsCsvFile;
+
+  /* Initialize a new aggregator object using the given directory,
+   * document store, and output file for results. */
+  public Aggregator(File statusDirectory,
+      DocumentStore<ExtrapolatedHidServStats>
+      extrapolatedHidServStatsStore, File hidservStatsCsvFile) {
+
+    /* Create a File instance for the document file containing
+     * extrapolated network totals. */
+    this.extrapolatedHidServStatsFile = new File(statusDirectory,
+        "extrapolated-hidserv-stats");
+
+    /* Store references to the provided document store and output file. */
+    this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
+    this.hidservStatsCsvFile = hidservStatsCsvFile;
+  }
+
+  /* Calculate aggregates for all extrapolated hidden-service statistics
+   * and write them to the output file. */
+  public void aggregateHidServStats() {
+
+    /* Retrieve previously extrapolated network totals. */
+    Set<ExtrapolatedHidServStats> extrapolatedStats =
+        this.extrapolatedHidServStatsStore.retrieve(
+        this.extrapolatedHidServStatsFile);
+    if (extrapolatedStats == null) {
+      System.err.printf("Unable to retrieve extrapolated hidden-service "
+          + "statistics from file %s.  Skipping aggregation step.%n",
+          this.extrapolatedHidServStatsFile.getAbsolutePath());
+      return;
+    }
+
+    /* Re-arrange extrapolated network totals by statistics interval end
+     * date, and include the computed network total as weight for the
+     * extrapolated value.  More precisely, map keys are ISO-formatted
+     * dates, map values are double[] arrays with the extrapolated network
+     * total as first element and the corresponding computed network
+     * fraction as second element. */
+    SortedMap<String, List<double[]>>
+        extrapolatedCells = new TreeMap<String, List<double[]>>(),
+        extrapolatedOnions = new TreeMap<String, List<double[]>>();
+    for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) {
+      String date = DateTimeHelper.format(
+          extrapolated.getStatsDateMillis(),
+          DateTimeHelper.ISO_DATE_FORMAT);
+      if (extrapolated.getFractionRendRelayedCells() > 0.0) {
+        if (!extrapolatedCells.containsKey(date)) {
+          extrapolatedCells.put(date, new ArrayList<double[]>());
+        }
+        extrapolatedCells.get(date).add(new double[] {
+            extrapolated.getExtrapolatedRendRelayedCells(),
+            extrapolated.getFractionRendRelayedCells() });
+      }
+      if (extrapolated.getFractionDirOnionsSeen() > 0.0) {
+        if (!extrapolatedOnions.containsKey(date)) {
+          extrapolatedOnions.put(date, new ArrayList<double[]>());
+        }
+        extrapolatedOnions.get(date).add(new double[] {
+            extrapolated.getExtrapolatedDirOnionsSeen(),
+            extrapolated.getFractionDirOnionsSeen() });
+      }
+    }
+
+    /* Write all results to a string builder that will later be written to
+     * the output file.  Each line contains an ISO-formatted "date", a
+     * string identifier for the "type" of statistic, the weighted mean
+     * ("wmean"), weighted median ("wmedian"), weighted interquartile mean
+     * ("wiqm"), the total network "frac"tion, and the number of reported
+     * "stats" with non-zero computed network fraction. */
+    StringBuilder sb = new StringBuilder();
+    sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n");
+
+    /* Repeat all aggregation steps for both types of statistics. */
+    for (int i = 0; i < 2; i++) {
+      String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen";
+      SortedMap<String, List<double[]>> extrapolated = i == 0
+          ? extrapolatedCells : extrapolatedOnions;
+
+      /* Go through all dates. */
+      for (Map.Entry<String, List<double[]>> e :
+          extrapolated.entrySet()) {
+        String date = e.getKey();
+        List<double[]> weightedValues = e.getValue();
+        int numStats = weightedValues.size();
+
+        /* Sort extrapolated network totals contained in the first array
+         * element.  (The second array element contains the computed
+         * network fraction as weight.) */
+        Collections.sort(weightedValues,
+            new Comparator<double[]>() {
+          public int compare(double[] o1, double[] o2) {
+            return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+          }
+        });
+
+        /* For the weighted mean, sum up all previously extrapolated
+         * values weighted with their network fractions (which happens to
+         * be the values that relays reported), and sum up all network
+         * fractions.  Once we have those two sums, we can divide the sum
+         * of weighted extrapolated values by the sum of network fractions
+         * to obtain the weighted mean of extrapolated values. */
+        double sumReported = 0.0, sumFraction = 0.0;
+        for (double[] d : weightedValues) {
+          sumReported += d[0] * d[1];
+          sumFraction += d[1];
+        }
+        double weightedMean = sumReported / sumFraction;
+
+        /* For the weighted median and weighted interquartile mean, go
+         * through all values once again.  The weighted median is the
+         * first extrapolated value with weight interval end greater than
+         * 50% of reported network fractions.  For the weighted
+         * interquartile mean, sum up extrapolated values multiplied with
+         * network fractions and network fractions falling into the 25% to
+         * 75% range and later compute the weighted mean of those. */
+        double weightIntervalEnd = 0.0;
+        Double weightedMedian = null;
+        double sumFractionInterquartile = 0.0,
+            sumReportedInterquartile = 0.0;
+        for (double[] d : weightedValues) {
+          double extrapolatedValue = d[0], computedFraction = d[1];
+          double weightIntervalStart = weightIntervalEnd;
+          weightIntervalEnd += computedFraction;
+          if (weightedMedian == null &&
+              weightIntervalEnd > sumFraction * 0.5) {
+            weightedMedian = extrapolatedValue;
+          }
+          if (weightIntervalEnd >= sumFraction * 0.25 &&
+              weightIntervalStart <= sumFraction * 0.75) {
+            double fractionBetweenQuartiles =
+                Math.min(weightIntervalEnd, sumFraction * 0.75)
+                - Math.max(weightIntervalStart, sumFraction * 0.25);
+            sumReportedInterquartile += extrapolatedValue
+                * fractionBetweenQuartiles;
+            sumFractionInterquartile += fractionBetweenQuartiles;
+          }
+        }
+        double weightedInterquartileMean =
+            sumReportedInterquartile / sumFractionInterquartile;
+
+        /* Put together all aggregated values in a single line. */
+        sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date,
+            type, weightedMean, weightedMedian, weightedInterquartileMean,
+            sumFraction, numStats));
+      }
+    }
+
+    /* Write all aggregated results to the output file. */
+    try {
+      this.hidservStatsCsvFile.getParentFile().mkdirs();
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          this.hidservStatsCsvFile));
+      bw.write(sb.toString());
+      bw.close();
+    } catch (IOException e) {
+      System.err.printf("Unable to write results to %s.  Ignoring.");
+    }
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
new file mode 100644
index 0000000..1fe0020
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
@@ -0,0 +1,141 @@
+package org.torproject.metrics.hidserv;
+
+/* Computed fraction of hidden-service activity that a single relay is
+ * assumed to observe in the network.  These fractions are computed from
+ * status entries and bandwidth weights in a network status consensus. */
+public class ComputedNetworkFractions implements Document {
+
+  /* Relay fingerprint consisting of 40 upper-case hex characters. */
+  private String fingerprint;
+  public String getFingerprint() {
+    return this.fingerprint;
+  }
+
+  /* Valid-after timestamp of the consensus in milliseconds. */
+  private long validAfterMillis;
+  public long getValidAfterMillis() {
+    return this.validAfterMillis;
+  }
+
+  /* Fraction of cells on rendezvous circuits that this relay is assumed
+   * to observe in the network. */
+  private double fractionRendRelayedCells;
+  public void setFractionRendRelayedCells(
+      double fractionRendRelayedCells) {
+    this.fractionRendRelayedCells = fractionRendRelayedCells;
+  }
+  public double getFractionRendRelayedCells() {
+    return this.fractionRendRelayedCells;
+  }
+
+  /* Fraction of descriptors that this relay is assumed to observe in the
+   * network.  This is calculated as the fraction of descriptors
+   * identifiers that this relay was responsible for, divided by 3,
+   * because each descriptor that is published to this directory is also
+   * published to two other directories. */
+  private double fractionDirOnionsSeen;
+  public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
+    this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+  }
+  public double getFractionDirOnionsSeen() {
+    return this.fractionDirOnionsSeen;
+  }
+
+  /* Instantiate a new fractions object using fingerprint and consensus
+   * valid-after time which together uniquely identify the object. */
+  public ComputedNetworkFractions(String fingerprint,
+      long validAfterMillis) {
+    this.fingerprint = fingerprint;
+    this.validAfterMillis = validAfterMillis;
+  }
+
+  /* Return whether this object contains the same fingerprint and
+   * consensus valid-after time as the passed object. */
+  @Override
+  public boolean equals(Object otherObject) {
+    if (!(otherObject instanceof ComputedNetworkFractions)) {
+      return false;
+    }
+    ComputedNetworkFractions other =
+        (ComputedNetworkFractions) otherObject;
+    return this.fingerprint.equals(other.fingerprint) &&
+        this.validAfterMillis == other.validAfterMillis;
+  }
+
+  /* Return a (hopefully unique) hash code based on this object's
+   * fingerprint and consensus valid-after time. */
+  @Override
+  public int hashCode() {
+    return this.fingerprint.hashCode() +
+        (int) this.validAfterMillis;
+  }
+
+  /* Return a string representation of this object, consisting of two
+   * strings: the first string contains fingerprint and valid-after date,
+   * the second string contains the concatenation of all other
+   * attributes. */
+  @Override
+  public String[] format() {
+    String first = String.format("%s,%s", this.fingerprint,
+        DateTimeHelper.format(this.validAfterMillis,
+        DateTimeHelper.ISO_DATE_FORMAT));
+    String second = DateTimeHelper.format(this.validAfterMillis,
+        DateTimeHelper.ISO_HOUR_FORMAT)
+        + (this.fractionRendRelayedCells == 0.0 ? ","
+            : String.format(",%f", this.fractionRendRelayedCells))
+        + (this.fractionDirOnionsSeen == 0.0 ? ","
+            : String.format(",%f", this.fractionDirOnionsSeen));
+    return new String[] { first, second };
+  }
+
+  /* Instantiate an empty fractions object that will be initialized more
+   * by the parse method. */
+  ComputedNetworkFractions() {
+  }
+
+  /* Initialize this fractions object using the two provided strings that
+   * have been produced by the format method earlier.  Return whether this
+   * operation was successful. */
+  @Override
+  public boolean parse(String[] formattedStrings) {
+    if (formattedStrings.length != 2) {
+      System.err.printf("Invalid number of formatted strings.  "
+          + "Skipping.%n", formattedStrings.length);
+      return false;
+    }
+    String[] firstParts = formattedStrings[0].split(",", 2);
+    if (firstParts.length != 2) {
+      System.err.printf("Invalid number of comma-separated values.  "
+          + "Skipping.%n");
+      return false;
+    }
+    String fingerprint = firstParts[0];
+    String[] secondParts = formattedStrings[1].split(",", 3);
+    if (secondParts.length != 3) {
+      System.err.printf("Invalid number of comma-separated values.  "
+          + "Skipping.%n");
+      return false;
+    }
+    long validAfterMillis = DateTimeHelper.parse(firstParts[1] + " "
+        + secondParts[0], DateTimeHelper.ISO_DATE_HOUR_FORMAT);
+    if (validAfterMillis == DateTimeHelper.NO_TIME_AVAILABLE) {
+      System.err.printf("Invalid date/hour format.  Skipping.%n");
+      return false;
+    }
+    try {
+      double fractionRendRelayedCells = secondParts[1].equals("")
+          ? 0.0 : Double.parseDouble(secondParts[1]);
+      double fractionDirOnionsSeen = secondParts[2].equals("")
+          ? 0.0 : Double.parseDouble(secondParts[2]);
+      this.fingerprint = fingerprint;
+      this.validAfterMillis = validAfterMillis;
+      this.fractionRendRelayedCells = fractionRendRelayedCells;
+      this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+      return true;
+    } catch (NumberFormatException e) {
+      System.err.printf("Invalid number format.  Skipping.%n");
+      return false;
+    }
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java
new file mode 100644
index 0000000..c33a50d
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java
@@ -0,0 +1,95 @@
+package org.torproject.metrics.hidserv;
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+/* Utility class to format and parse dates and timestamps. */
+public class DateTimeHelper {
+
+  /* This class is not supposed to be instantiated, which is why its
+   * constructor has private visibility. */
+  private DateTimeHelper() {
+  }
+
+  /* Some useful time constant. */
+  public static final long
+      ONE_SECOND = 1000L,
+      ONE_MINUTE = 60L * ONE_SECOND,
+      ONE_HOUR = 60L * ONE_MINUTE,
+      ONE_DAY = 24L * ONE_HOUR;
+
+  /* Some useful date/time formats. */
+  public static final String
+      ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss",
+      ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH",
+      ISO_DATE_FORMAT = "yyyy-MM-dd",
+      ISO_HOUR_FORMAT = "HH";
+
+  /* Map of DateFormat instances for parsing and formatting dates and
+   * timestamps, protected using ThreadLocal to ensure that each thread
+   * uses its own instances. */
+  private static ThreadLocal<Map<String, DateFormat>> dateFormats =
+      new ThreadLocal<Map<String, DateFormat>> () {
+    public Map<String, DateFormat> get() {
+      return super.get();
+    }
+    protected Map<String, DateFormat> initialValue() {
+      return new HashMap<String, DateFormat>();
+    }
+    public void remove() {
+      super.remove();
+    }
+    public void set(Map<String, DateFormat> value) {
+      super.set(value);
+    }
+  };
+
+  /* Return an instance of DateFormat for the given format.  If no such
+   * instance exists, create one and put it in the map. */
+  private static DateFormat getDateFormat(String format) {
+    Map<String, DateFormat> threadDateFormats = dateFormats.get();
+    if (!threadDateFormats.containsKey(format)) {
+      DateFormat dateFormat = new SimpleDateFormat(format);
+      dateFormat.setLenient(false);
+      dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      threadDateFormats.put(format, dateFormat);
+    }
+    return threadDateFormats.get(format);
+  }
+
+  /* Format the given time in milliseconds using the given format. */
+  public static String format(long millis, String format) {
+    return getDateFormat(format).format(millis);
+  }
+
+  /* Format the given time in milliseconds using ISO date/time format. */
+  public static String format(long millis) {
+    return format(millis, ISO_DATETIME_FORMAT);
+  }
+
+  /* Default result of the parse methods if the provided time could not be
+   * parsed. */
+  public final static long NO_TIME_AVAILABLE = -1L;
+
+  /* Parse the given string using the given format. */
+  public static long parse(String string, String format) {
+    if (null == string) {
+      return NO_TIME_AVAILABLE;
+    }
+    try {
+      return getDateFormat(format).parse(string).getTime();
+    } catch (ParseException e) {
+      return NO_TIME_AVAILABLE;
+    }
+  }
+
+  /* Parse the given string using ISO date/time format. */
+  public static long parse(String string) {
+    return parse(string, ISO_DATETIME_FORMAT);
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java
new file mode 100644
index 0000000..47614f3
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java
@@ -0,0 +1,19 @@
+package org.torproject.metrics.hidserv;
+
+/* Common interface of documents that are supposed to be serialized and
+ * stored in document files and later retrieved and de-serialized. */
+public interface Document {
+
+  /* Return an array of two strings with a string representation of this
+   * document.  The first string will be used to start a group of
+   * documents, the second string will be used to represent a single
+   * document in that group.  Ideally, the first string is equivalent for
+   * many documents stored in the same file, and the second string is
+   * different for those documents. */
+  public String[] format();
+
+  /* Initialize an object using the given array of two strings.  These are
+   * the same two strings that the format method provides. */
+  public boolean parse(String[] formattedStrings);
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java
new file mode 100644
index 0000000..3266df5
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java
@@ -0,0 +1,157 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Utility class to store serialized objects implementing the Document
+ * interface to a file and later to retrieve them. */
+public class DocumentStore<T extends Document> {
+
+  /* Document class, needed to create new instances when retrieving
+   * documents. */
+  private Class<T> clazz;
+
+  /* Initialize a new store object for the given type of documents. */
+  DocumentStore(Class<T> clazz) {
+    this.clazz = clazz;
+  }
+
+  /* Store the provided documents in the given file and return whether the
+   * storage operation was successful.  If the file already existed and if
+   * it contains documents, merge the new documents with the existing
+   * ones. */
+  public boolean store(File documentFile, Set<T> documentsToStore) {
+
+    /* Retrieve existing documents. */
+    Set<T> retrievedDocuments = this.retrieve(documentFile);
+    if (retrievedDocuments == null) {
+      System.err.printf("Unable to read and update %s.  Not storing "
+          + "documents.%n", documentFile.getAbsoluteFile());
+      return false;
+    }
+
+    /* Merge new documents with existing ones. */
+    retrievedDocuments.addAll(documentsToStore);
+
+    /* Serialize documents. */
+    SortedMap<String, SortedSet<String>> formattedDocuments =
+        new TreeMap<String, SortedSet<String>>();
+    for (T retrieveDocument : retrievedDocuments) {
+      String[] formattedDocument = retrieveDocument.format();
+      if (!formattedDocuments.containsKey(formattedDocument[0])) {
+        formattedDocuments.put(formattedDocument[0],
+            new TreeSet<String>());
+      }
+      formattedDocuments.get(formattedDocument[0]).add(
+          formattedDocument[1]);
+    }
+
+    /* Check if a temporary file exists from the previous execution. */
+    File documentTempFile = new File(documentFile.getAbsoluteFile()
+        + ".tmp");
+    if (documentTempFile.exists()) {
+      System.err.printf("Temporary document file %s still exists, "
+          + "indicating that a previous execution did not terminate "
+          + "cleanly.  Not storing documents.%n",
+          documentTempFile.getAbsoluteFile());
+      return false;
+    }
+
+    /* Write to a new temporary file, then move it into place, possibly
+     * overwriting an existing file. */
+    try {
+      documentTempFile.getParentFile().mkdirs();
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          documentTempFile));
+      for (Map.Entry<String, SortedSet<String>> e :
+          formattedDocuments.entrySet()) {
+        bw.write(e.getKey() + "\n");
+        for (String s : e.getValue()) {
+          bw.write(" " + s + "\n");
+        }
+      }
+      bw.close();
+      documentFile.delete();
+      documentTempFile.renameTo(documentFile);
+    } catch (IOException e) {
+      System.err.printf("Unable to write %s.  Not storing documents.%n",
+          documentFile.getAbsolutePath());
+      return false;
+    }
+
+    /* Return success. */
+    return true;
+  }
+
+  /* Retrieve all previously stored documents from the given file. */
+  public Set<T> retrieve(File documentFile) {
+
+    /* Check if the document file exists, and if not, return an empty set.
+     * This is not an error case. */
+    Set<T> result = new HashSet<T>();
+    if (!documentFile.exists()) {
+      return result;
+    }
+
+    /* Parse the document file line by line and de-serialize contained
+     * documents. */
+    try {
+      LineNumberReader lnr = new LineNumberReader(new BufferedReader(
+          new FileReader(documentFile)));
+      String line, formattedString0 = null;
+      while ((line = lnr.readLine()) != null) {
+        if (!line.startsWith(" ")) {
+          formattedString0 = line;
+        } else if (formattedString0 == null) {
+          System.err.printf("First line in %s must not start with a "
+              + "space.  Not retrieving any previously stored "
+              + "documents.%n", documentFile.getAbsolutePath());
+          lnr.close();
+          return null;
+        } else {
+          T document = this.clazz.newInstance();
+          if (!document.parse(new String[] { formattedString0,
+              line.substring(1) })) {
+            System.err.printf("Unable to read line %d from %s.  Not "
+                + "retrieving any previously stored documents.%n",
+                lnr.getLineNumber(), documentFile.getAbsolutePath());
+            lnr.close();
+            return null;
+          }
+          result.add(document);
+        }
+      }
+      lnr.close();
+    } catch (IOException e) {
+      System.err.printf("Unable to read %s.  Not retrieving any "
+          + "previously stored documents.%n",
+          documentFile.getAbsolutePath());
+      e.printStackTrace();
+      return null;
+    } catch (InstantiationException e) {
+      System.err.printf("Unable to read %s.  Cannot instantiate document "
+          + "object.%n", documentFile.getAbsolutePath());
+      e.printStackTrace();
+      return null;
+    } catch (IllegalAccessException e) {
+      System.err.printf("Unable to read %s.  Cannot instantiate document "
+          + "object.%n", documentFile.getAbsolutePath());
+      e.printStackTrace();
+      return null;
+    }
+    return result;
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
new file mode 100644
index 0000000..52357d4
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
@@ -0,0 +1,156 @@
+package org.torproject.metrics.hidserv;
+
+/* Extrapolated network totals of hidden-service statistics reported by a
+ * single relay.  Extrapolated values are based on reported statistics and
+ * computed network fractions in the statistics interval. */
+public class ExtrapolatedHidServStats implements Document {
+
+  /* Date of statistics interval end in milliseconds. */
+  private long statsDateMillis;
+  public long getStatsDateMillis() {
+    return this.statsDateMillis;
+  }
+
+  /* Relay fingerprint consisting of 40 upper-case hex characters. */
+  private String fingerprint;
+  public String getFingerprint() {
+    return this.fingerprint;
+  }
+
+  /* Extrapolated number of cells on rendezvous circuits in the
+   * network. */
+  private double extrapolatedRendRelayedCells;
+  public void setExtrapolatedRendRelayedCells(
+      double extrapolatedRendRelayedCells) {
+    this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
+  }
+  public double getExtrapolatedRendRelayedCells() {
+    return this.extrapolatedRendRelayedCells;
+  }
+
+  /* Computed fraction of observed cells on rendezvous circuits in the
+   * network, used to weight this relay's extrapolated network total in
+   * the aggregation step. */
+  private double fractionRendRelayedCells;
+  public void setFractionRendRelayedCells(
+      double fractionRendRelayedCells) {
+    this.fractionRendRelayedCells = fractionRendRelayedCells;
+  }
+  public double getFractionRendRelayedCells() {
+    return this.fractionRendRelayedCells;
+  }
+
+  /* Extrapolated number of .onions in the network. */
+  private double extrapolatedDirOnionsSeen;
+  public void setExtrapolatedDirOnionsSeen(
+      double extrapolatedDirOnionsSeen) {
+    this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
+  }
+  public double getExtrapolatedDirOnionsSeen() {
+    return this.extrapolatedDirOnionsSeen;
+  }
+
+  /* Computed fraction of observed .onions in the network, used to weight
+   * this relay's extrapolated network total in the aggregation step. */
+  private double fractionDirOnionsSeen;
+  public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
+    this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+  }
+  public double getFractionDirOnionsSeen() {
+    return this.fractionDirOnionsSeen;
+  }
+
+  /* Instantiate a new stats object using fingerprint and statistics
+   * interval end date which together uniquely identify the object. */
+  public ExtrapolatedHidServStats(long statsDateMillis,
+      String fingerprint) {
+    this.statsDateMillis = statsDateMillis;
+    this.fingerprint = fingerprint;
+  }
+
+  /* Return whether this object contains the same fingerprint and
+   * statistics interval end date as the passed object. */
+  @Override
+  public boolean equals(Object otherObject) {
+    if (!(otherObject instanceof ExtrapolatedHidServStats)) {
+      return false;
+    }
+    ExtrapolatedHidServStats other =
+        (ExtrapolatedHidServStats) otherObject;
+    return this.fingerprint.equals(other.fingerprint) &&
+        this.statsDateMillis == other.statsDateMillis;
+  }
+
+  /* Return a (hopefully unique) hash code based on this object's
+   * fingerprint and statistics interval end date. */
+  @Override
+  public int hashCode() {
+    return this.fingerprint.hashCode() + (int) this.statsDateMillis;
+  }
+
+  /* Return a string representation of this object, consisting of the
+   * statistics interval end date and the concatenation of all other
+   * attributes. */
+  @Override
+  public String[] format() {
+    String first = DateTimeHelper.format(this.statsDateMillis,
+        DateTimeHelper.ISO_DATE_FORMAT);
+    String second = this.fingerprint +
+        (this.fractionRendRelayedCells == 0.0 ? ",,"
+        : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells,
+        this.fractionRendRelayedCells)) +
+        (this.fractionDirOnionsSeen == 0.0 ? ",,"
+        : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen,
+        this.fractionDirOnionsSeen));
+    return new String[] { first, second };
+  }
+
+  /* Instantiate an empty stats object that will be initialized more by
+   * the parse method. */
+  ExtrapolatedHidServStats() {
+  }
+
+  /* Initialize this stats object using the two provided strings that have
+   * been produced by the format method earlier.  Return whether this
+   * operation was successful. */
+  @Override
+  public boolean parse(String[] formattedStrings) {
+    if (formattedStrings.length != 2) {
+      System.err.printf("Invalid number of formatted strings.  "
+          + "Skipping.%n", formattedStrings.length);
+      return false;
+    }
+    long statsDateMillis = DateTimeHelper.parse(formattedStrings[0],
+        DateTimeHelper.ISO_DATE_FORMAT);
+    String[] secondParts = formattedStrings[1].split(",", 5);
+    if (secondParts.length != 5) {
+      System.err.printf("Invalid number of comma-separated values.  "
+          + "Skipping.%n");
+      return false;
+    }
+    String fingerprint = secondParts[0];
+    double extrapolatedRendRelayedCells = 0.0,
+        fractionRendRelayedCells = 0.0, extrapolatedDirOnionsSeen = 0.0,
+        fractionDirOnionsSeen = 0.0;
+    try {
+      extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0
+          : Double.parseDouble(secondParts[1]);
+      fractionRendRelayedCells = secondParts[2].equals("") ? 0.0
+          : Double.parseDouble(secondParts[2]);
+      extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0
+          : Double.parseDouble(secondParts[3]);
+      fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0
+          : Double.parseDouble(secondParts[4]);
+    } catch (NumberFormatException e) {
+      return false;
+    }
+    this.statsDateMillis = statsDateMillis;
+    this.fingerprint = fingerprint;
+    this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
+    this.fractionRendRelayedCells = fractionRendRelayedCells;
+    this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
+    this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+    return true;
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java
new file mode 100644
index 0000000..a1ff075
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java
@@ -0,0 +1,251 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Extrapolate hidden-service statistics reported by single relays by
+ * dividing them by the computed fraction of hidden-service activity
+ * observed by the relay. */
+public class Extrapolator {
+
+  /* Document file containing previously parsed reported hidden-service
+   * statistics. */
+  private File reportedHidServStatsFile;
+
+  /* Document store for storing and retrieving reported hidden-service
+   * statistics. */
+  private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
+
+  /* Directory containing document files with previously computed network
+   * fractions. */
+  private File computedNetworkFractionsDirectory;
+
+  /* Document store for storing and retrieving computed network
+   * fractions. */
+  private DocumentStore<ComputedNetworkFractions>
+      computedNetworkFractionsStore;
+
+  /* Document file containing extrapolated hidden-service statistics. */
+  private File extrapolatedHidServStatsFile;
+
+  /* Document store for storing and retrieving extrapolated hidden-service
+   * statistics. */
+  private DocumentStore<ExtrapolatedHidServStats>
+      extrapolatedHidServStatsStore;
+
+  /* Initialize a new extrapolator object using the given directory and
+   * document stores. */
+  public Extrapolator(File statusDirectory,
+      DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
+      DocumentStore<ComputedNetworkFractions>
+      computedNetworkFractionsStore,
+      DocumentStore<ExtrapolatedHidServStats>
+      extrapolatedHidServStatsStore) {
+
+    /* Create File instances for the files and directories in the provided
+     * status directory. */
+    this.reportedHidServStatsFile = new File(statusDirectory,
+        "reported-hidserv-stats");
+    this.computedNetworkFractionsDirectory =
+        new File(statusDirectory, "computed-network-fractions");
+    this.extrapolatedHidServStatsFile = new File(statusDirectory,
+        "extrapolated-hidserv-stats");
+
+    /* Store references to the provided document stores. */
+    this.reportedHidServStatsStore = reportedHidServStatsStore;
+    this.computedNetworkFractionsStore = computedNetworkFractionsStore;
+    this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
+  }
+
+  /* Iterate over all reported stats and extrapolate network totals for
+   * those that have not been extrapolated before. */
+  public boolean extrapolateHidServStats() {
+
+    /* Retrieve previously extrapolated stats to avoid extrapolating them
+     * again. */
+    Set<ExtrapolatedHidServStats> extrapolatedStats =
+        this.extrapolatedHidServStatsStore.retrieve(
+        this.extrapolatedHidServStatsFile);
+
+    /* Retrieve all reported stats, even including those that have already
+     * been extrapolated. */
+    Set<ReportedHidServStats> reportedStats =
+        this.reportedHidServStatsStore.retrieve(
+        this.reportedHidServStatsFile);
+
+    /* Make sure that all documents could be retrieved correctly. */
+    if (extrapolatedStats == null || reportedStats == null) {
+      System.err.printf("Could not read previously parsed or "
+          + "extrapolated hidserv-stats.  Skipping.");
+      return false;
+    }
+
+    /* Re-arrange reported stats by fingerprint. */
+    SortedMap<String, Set<ReportedHidServStats>>
+        parsedStatsByFingerprint =
+        new TreeMap<String, Set<ReportedHidServStats>>();
+    for (ReportedHidServStats stat : reportedStats) {
+      String fingerprint = stat.getFingerprint();
+      if (!parsedStatsByFingerprint.containsKey(fingerprint)) {
+        parsedStatsByFingerprint.put(fingerprint,
+            new HashSet<ReportedHidServStats>());
+      }
+      parsedStatsByFingerprint.get(fingerprint).add(stat);
+    }
+
+    /* Go through reported stats by fingerprint. */
+    for (Map.Entry<String, Set<ReportedHidServStats>> e :
+        parsedStatsByFingerprint.entrySet()) {
+      String fingerprint = e.getKey();
+
+      /* Iterate over all stats reported by this relay and make a list of
+       * those that still need to be extrapolated.  Also make a list of
+       * all dates for which we need to retrieve computed network
+       * fractions. */
+      Set<ReportedHidServStats> newReportedStats =
+          new HashSet<ReportedHidServStats>();
+      SortedSet<String> retrieveFractionDates = new TreeSet<String>();
+      for (ReportedHidServStats stats : e.getValue()) {
+
+        /* Check whether extrapolated stats already contain an object with
+         * the same statistics interval end date and fingerprint. */
+        long statsDateMillis = (stats.getStatsEndMillis()
+            / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
+        if (extrapolatedStats.contains(
+            new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) {
+          continue;
+        }
+
+        /* Add the reported stats to the list of stats we still need to
+         * extrapolate. */
+        newReportedStats.add(stats);
+
+        /* Add all dates between statistics interval start and end to a
+         * list. */
+        long statsEndMillis = stats.getStatsEndMillis();
+        long statsStartMillis = statsEndMillis
+            - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
+        for (long millis = statsStartMillis; millis <= statsEndMillis;
+            millis += DateTimeHelper.ONE_DAY) {
+          String date = DateTimeHelper.format(millis,
+              DateTimeHelper.ISO_DATE_FORMAT);
+          retrieveFractionDates.add(date);
+        }
+      }
+
+      /* Retrieve all computed network fractions that might be needed to
+       * extrapolate new statistics.  Keep a list of all known consensus
+       * valid-after times, and keep a map of fractions also by consensus
+       * valid-after time.  (It's not sufficient to only keep the latter,
+       * because we need to count known consensuses even if the relay was
+       * not contained in a consensus or had a network fraction of exactly
+       * zero.) */
+      SortedSet<Long> knownConsensuses = new TreeSet<Long>();
+      SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions =
+          new TreeMap<Long, ComputedNetworkFractions>();
+      for (String date : retrieveFractionDates) {
+        File documentFile = new File(
+            this.computedNetworkFractionsDirectory, date);
+        Set<ComputedNetworkFractions> fractions
+            = this.computedNetworkFractionsStore.retrieve(documentFile);
+        for (ComputedNetworkFractions fraction : fractions) {
+          knownConsensuses.add(fraction.getValidAfterMillis());
+          if (fraction.getFingerprint().equals(fingerprint)) {
+            computedNetworkFractions.put(fraction.getValidAfterMillis(),
+                fraction);
+          }
+        }
+      }
+
+      /* Go through newly reported stats, match them with computed network
+       * fractions, and extrapolate network totals. */
+      for (ReportedHidServStats stats : newReportedStats) {
+        long statsEndMillis = stats.getStatsEndMillis();
+        long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY)
+            * DateTimeHelper.ONE_DAY;
+        long statsStartMillis = statsEndMillis
+            - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
+
+        /* Sum up computed network fractions and count known consensus in
+         * the relevant interval, so that we can later compute means of
+         * network fractions. */
+        double sumFractionRendRelayedCells = 0.0,
+            sumFractionDirOnionsSeen = 0.0;
+        int consensuses = 0;
+        for (long validAfterMillis : knownConsensuses) {
+          if (statsStartMillis <= validAfterMillis &&
+              validAfterMillis < statsEndMillis) {
+            if (computedNetworkFractions.containsKey(validAfterMillis)) {
+              ComputedNetworkFractions frac =
+                  computedNetworkFractions.get(validAfterMillis);
+              sumFractionRendRelayedCells +=
+                  frac.getFractionRendRelayedCells();
+              sumFractionDirOnionsSeen +=
+                  frac.getFractionDirOnionsSeen();
+            }
+            consensuses++;
+          }
+        }
+
+        /* If we don't know a single consensus with valid-after time in
+         * the statistics interval, skip this stat. */
+        if (consensuses == 0) {
+          continue;
+        }
+
+        /* Compute means of network fractions. */
+        double fractionRendRelayedCells =
+            sumFractionRendRelayedCells / consensuses;
+        double fractionDirOnionsSeen =
+            sumFractionDirOnionsSeen / consensuses;
+
+        /* If at least one fraction is positive, extrapolate network
+         * totals. */
+        if (fractionRendRelayedCells > 0.0 ||
+            fractionDirOnionsSeen > 0.0) {
+          ExtrapolatedHidServStats extrapolated =
+              new ExtrapolatedHidServStats(
+              statsDateMillis, fingerprint);
+          if (fractionRendRelayedCells > 0.0) {
+            extrapolated.setFractionRendRelayedCells(
+                fractionRendRelayedCells);
+            /* Extrapolating cells on rendezvous circuits is as easy as
+             * dividing the reported number by the computed network
+             * fraction. */
+            double extrapolatedRendRelayedCells =
+                stats.getRendRelayedCells() / fractionRendRelayedCells;
+            extrapolated.setExtrapolatedRendRelayedCells(
+                extrapolatedRendRelayedCells);
+          }
+          if (fractionDirOnionsSeen > 0.0) {
+            extrapolated.setFractionDirOnionsSeen(
+                fractionDirOnionsSeen);
+            /* Extrapolating reported unique .onion addresses to the
+             * total number in the network is more difficult.  In short,
+             * each descriptor is stored to 12 (likely) different
+             * directories, so we'll have to divide the reported number by
+             * 12 and then by the computed network fraction of this
+             * directory. */
+            double extrapolatedDirOnionsSeen =
+                stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen);
+            extrapolated.setExtrapolatedDirOnionsSeen(
+                extrapolatedDirOnionsSeen);
+          }
+          extrapolatedStats.add(extrapolated);
+        }
+      }
+    }
+
+    /* Store all extrapolated network totals to disk with help of the
+     * document store. */
+    return this.extrapolatedHidServStatsStore.store(
+        this.extrapolatedHidServStatsFile, extrapolatedStats);
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java
new file mode 100644
index 0000000..1e53bd0
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java
@@ -0,0 +1,91 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Set;
+
+/* Main class for updating extrapolated network totals of hidden-service
+ * statistics.  The main method of this class can be executed as often as
+ * new statistics are needed, though callers must ensure that executions
+ * do not overlap. */
+public class Main {
+
+  /* Parse new descriptors, extrapolate contained statistics using
+   * computed network fractions, aggregate results, and write results to
+   * disk. */
+  public static void main(String[] args) {
+
+    /* Initialize directories and file paths. */
+    Set<File> inDirectories = new HashSet<File>();
+    inDirectories.add(
+        new File("../../shared/in/relay-descriptors/consensuses"));
+    inDirectories.add(
+        new File("../../shared/in/relay-descriptors/extra-infos"));
+    File statusDirectory = new File("status");
+    File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv");
+
+    /* Initialize document stores that will handle writing documents to
+     * files. */
+    DocumentStore<ReportedHidServStats> reportedHidServStatsStore =
+        new DocumentStore<ReportedHidServStats>(
+        ReportedHidServStats.class);
+    DocumentStore<ComputedNetworkFractions>
+        computedNetworkFractionsStore =
+        new DocumentStore<ComputedNetworkFractions>(
+        ComputedNetworkFractions.class);
+    DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore
+        = new DocumentStore<ExtrapolatedHidServStats>(
+        ExtrapolatedHidServStats.class);
+
+    /* Initialize parser and read parse history to avoid parsing
+     * descriptor files that haven't changed since the last execution. */
+    System.out.println("Initializing parser and reading parse "
+        + "history...");
+    Parser parser = new Parser(inDirectories, statusDirectory,
+        reportedHidServStatsStore, computedNetworkFractionsStore);
+    parser.readParseHistory();
+
+    /* Parse new descriptors and store their contents using the document
+     * stores. */
+    System.out.println("Parsing descriptors...");
+    if (!parser.parseDescriptors()) {
+      System.err.println("Could not store parsed descriptors.  "
+          + "Terminating.");
+      return;
+    }
+
+    /* Write the parse history to avoid parsing descriptor files again
+     * next time.  It's okay to do this now and not at the end of the
+     * execution, because even if something breaks apart below, it's safe
+     * not to parse descriptor files again. */
+    System.out.println("Writing parse history...");
+    parser.writeParseHistory();
+
+    /* Extrapolate reported statistics using computed network fractions
+     * and write the result to disk using a document store.  The result is
+     * a single file with extrapolated network totals based on reports by
+     * single relays. */
+    System.out.println("Extrapolating statistics...");
+    Extrapolator extrapolator = new Extrapolator(statusDirectory,
+        reportedHidServStatsStore, computedNetworkFractionsStore,
+        extrapolatedHidServStatsStore);
+    if (!extrapolator.extrapolateHidServStats()) {
+      System.err.println("Could not extrapolate statistics.  "
+          + "Terminating.");
+      return;
+    }
+
+    /* Go through all extrapolated network totals and aggregate them.
+     * This includes calculating daily weighted interquartile means, among
+     * other statistics.  Write the result to a .csv file that can be
+     * processed by other tools. */
+    System.out.println("Aggregating statistics...");
+    Aggregator aggregator = new Aggregator(statusDirectory,
+        extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile);
+    aggregator.aggregateHidServStats();
+
+    /* End this execution. */
+    System.out.println("Terminating.");
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java
new file mode 100644
index 0000000..85f7d91
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java
@@ -0,0 +1,484 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+/* Parse hidden-service statistics from extra-info descriptors, compute
+ * network fractions from consensuses, and write parsed contents to
+ * document files for later use. */
+public class Parser {
+
+  /* File containing tuples of last-modified times and file names of
+   * descriptor files parsed in the previous execution. */
+  private File parseHistoryFile;
+
+  /* Descriptor reader to provide parsed extra-info descriptors and
+   * consensuses. */
+  private DescriptorReader descriptorReader;
+
+  /* Document file containing previously parsed reported hidden-service
+   * statistics. */
+  private File reportedHidServStatsFile;
+
+  /* Document store for storing and retrieving reported hidden-service
+   * statistics. */
+  private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
+
+  /* Directory containing document files with previously computed network
+   * fractions. */
+  private File computedNetworkFractionsDirectory;
+
+  /* Document store for storing and retrieving computed network
+   * fractions. */
+  private DocumentStore<ComputedNetworkFractions>
+      computedNetworkFractionsStore;
+
+  /* Initialize a new parser object using the given directories and
+   * document stores. */
+  public Parser(Set<File> inDirectories, File statusDirectory,
+      DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
+      DocumentStore<ComputedNetworkFractions>
+      computedNetworkFractionsStore) {
+
+    /* Create a new descriptor reader for reading descriptors in the given
+     * in directory.  Configure the reader to avoid having more than five
+     * parsed descriptors in the queue, rather than the default one
+     * hundred.  Five is a compromise between very large consensuses and
+     * rather small extra-info descriptors. */
+    this.descriptorReader =
+        DescriptorSourceFactory.createDescriptorReader();
+    for (File inDirectory : inDirectories) {
+      this.descriptorReader.addDirectory(inDirectory);
+    }
+    this.descriptorReader.setMaxDescriptorFilesInQueue(5);
+
+    /* Create File instances for the files and directories in the provided
+     * status directory. */
+    this.parseHistoryFile = new File(statusDirectory, "parse-history");
+    this.reportedHidServStatsFile = new File(statusDirectory,
+        "reported-hidserv-stats");
+    this.computedNetworkFractionsDirectory =
+        new File(statusDirectory, "computed-network-fractions");
+
+    /* Store references to the provided document stores. */
+    this.reportedHidServStatsStore = reportedHidServStatsStore;
+    this.computedNetworkFractionsStore = computedNetworkFractionsStore;
+  }
+
+  /* Read the parse history file to avoid parsing descriptor files that
+   * have not changed since the previous execution. */
+  public void readParseHistory() {
+    if (this.parseHistoryFile.exists() &&
+        this.parseHistoryFile.isFile()) {
+      SortedMap<String, Long> excludedFiles =
+          new TreeMap<String, Long>();
+      try {
+        BufferedReader br = new BufferedReader(new FileReader(
+            this.parseHistoryFile));
+        String line;
+        while ((line = br.readLine()) != null) {
+          try {
+            /* Each line is supposed to contain the last-modified time and
+             * absolute path of a descriptor file. */
+            String[] parts = line.split(" ", 2);
+            excludedFiles.put(parts[1], Long.parseLong(parts[0]));
+          } catch (NumberFormatException e) {
+            System.err.printf("Illegal line '%s' in parse history.  "
+                + "Skipping line.%n", line);
+          }
+        }
+        br.close();
+      } catch (IOException e) {
+        System.err.printf("Could not read history file '%s'.  Not "
+            + "excluding descriptors in this execution.",
+            this.parseHistoryFile.getAbsolutePath());
+      }
+
+      /* Tell the descriptor reader to exclude the files contained in the
+       * parse history file. */
+      this.descriptorReader.setExcludedFiles(excludedFiles);
+    }
+  }
+
+  /* Write parsed or skipped descriptor files with last-modified times and
+   * absolute paths to the parse history file to avoid parsing these files
+   * again, unless they change until the next execution. */
+  public void writeParseHistory() {
+
+    /* Obtain the list of descriptor files that were either parsed now or
+     * that were skipped in this execution from the descriptor reader. */
+    SortedMap<String, Long> excludedAndParsedFiles =
+        new TreeMap<String, Long>();
+    excludedAndParsedFiles.putAll(
+        this.descriptorReader.getExcludedFiles());
+    excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles());
+    try {
+      this.parseHistoryFile.getParentFile().mkdirs();
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          this.parseHistoryFile));
+      for (Map.Entry<String, Long> e :
+          excludedAndParsedFiles.entrySet()) {
+        /* Each line starts with the last-modified time of the descriptor
+         * file, followed by its absolute path. */
+        String absolutePath = e.getKey();
+        long lastModifiedMillis = e.getValue();
+        bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath
+            + "\n");
+      }
+      bw.close();
+    } catch (IOException e) {
+      System.err.printf("Could not write history file '%s'.  Not "
+          + "excluding descriptors in next execution.",
+          this.parseHistoryFile.getAbsolutePath());
+    }
+  }
+
+  /* Set of all reported hidden-service statistics.  To date, these
+   * objects are small, and keeping them all in memory is easy.  But if
+   * this ever changes, e.g., when more and more statistics are added,
+   * this may not scale. */
+  private Set<ReportedHidServStats> reportedHidServStats =
+      new HashSet<ReportedHidServStats>();
+
+  /* Instruct the descriptor reader to parse descriptor files, and handle
+   * the resulting parsed descriptors if they are either extra-info
+   * descriptors or consensuses. */
+  public boolean parseDescriptors() {
+    Iterator<DescriptorFile> descriptorFiles =
+        this.descriptorReader.readDescriptors();
+    while (descriptorFiles.hasNext()) {
+      DescriptorFile descriptorFile = descriptorFiles.next();
+      for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+        if (descriptor instanceof ExtraInfoDescriptor) {
+          this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+        } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+          if (!this.parseRelayNetworkStatusConsensus(
+              (RelayNetworkStatusConsensus) descriptor)) {
+            return false;
+          }
+        }
+      }
+    }
+
+    /* Store reported hidden-service statistics to their document file.
+     * It's more efficient to only do this once after processing all
+     * descriptors.  In contrast, sets of computed network fractions are
+     * stored immediately after processing the consensus they are based
+     * on. */
+    return this.reportedHidServStatsStore.store(
+        this.reportedHidServStatsFile, this.reportedHidServStats);
+  }
+
+  /* Parse the given extra-info descriptor by extracting its fingerprint
+   * and contained hidserv-* lines.  If a valid set of hidserv-stats can
+   * be extracted, create a new stats object that will later be stored to
+   * a document file. */
+  private void parseExtraInfoDescriptor(
+      ExtraInfoDescriptor extraInfoDescriptor) {
+
+    /* Extract the fingerprint from the parsed descriptor. */
+    String fingerprint = extraInfoDescriptor.getFingerprint();
+
+    /* Parse the descriptor once more to extract any hidserv-* lines.
+     * This is necessary, because these lines are not yet supported by the
+     * descriptor-parsing library. */
+    Scanner scanner = new Scanner(new ByteArrayInputStream(
+        extraInfoDescriptor.getRawDescriptorBytes()));
+    Long statsEndMillis = null, statsIntervalSeconds = null,
+        rendRelayedCells = null, rendRelayedCellsBinSize = null,
+        dirOnionsSeen = null, dirOnionsSeenBinSize = null;
+    try {
+      while (scanner.hasNext()) {
+        String line = scanner.nextLine();
+        if (line.startsWith("hidserv-")) {
+          String[] parts = line.split(" ");
+          if (parts[0].equals("hidserv-stats-end")) {
+            /* Parse statistics end and statistics interval length. */
+            if (parts.length != 5 || !parts[3].startsWith("(") ||
+                !parts[4].equals("s)")) {
+              /* Will warn below, because statsEndMillis is still null. */
+              continue;
+            }
+            statsEndMillis = DateTimeHelper.parse(parts[1] + " "
+                + parts[2]);
+            statsIntervalSeconds = Long.parseLong(parts[3].substring(1));
+          } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
+            /* Parse the reported number of cells on rendezvous circuits
+             * and the bin size used by the relay to obfuscate that
+             * number. */
+            if (parts.length != 5 ||
+                !parts[4].startsWith("bin_size=")) {
+              /* Will warn below, because rendRelayedCells is still
+               * null. */
+              continue;
+            }
+            rendRelayedCells = Long.parseLong(parts[1]);
+            rendRelayedCellsBinSize =
+                Long.parseLong(parts[4].substring(9));
+          } else if (parts[0].equals("hidserv-dir-onions-seen")) {
+            /* Parse the reported number of distinct .onion addresses and
+             * the bin size used by the relay to obfuscate that number. */
+            if (parts.length != 5 ||
+                !parts[4].startsWith("bin_size=")) {
+              /* Will warn below, because dirOnionsSeen is still null. */
+              continue;
+            }
+            dirOnionsSeen = Long.parseLong(parts[1]);
+            dirOnionsSeenBinSize = Long.parseLong(parts[4].substring(9));
+          }
+        }
+      }
+    } catch (NumberFormatException e) {
+      e.printStackTrace();
+      return;
+    }
+
+    /* If the descriptor did not contain any of the expected hidserv-*
+     * lines, don't do anything.  This applies to the majority of
+     * descriptors, at least as long as only a minority of relays reports
+     * these statistics. */
+    if (statsEndMillis == null && rendRelayedCells == null &&
+        dirOnionsSeen == null) {
+      return;
+
+    /* If the descriptor contained all expected hidserv-* lines, create a
+     * new stats object and put it in the local map, so that it will later
+     * be written to a document file. */
+    } else if (statsEndMillis != null &&
+        statsEndMillis != DateTimeHelper.NO_TIME_AVAILABLE &&
+        statsIntervalSeconds != null && rendRelayedCells != null &&
+        dirOnionsSeen != null) {
+      ReportedHidServStats reportedStats = new ReportedHidServStats(
+          fingerprint, statsEndMillis);
+      reportedStats.setStatsIntervalSeconds(statsIntervalSeconds);
+      reportedStats.setRendRelayedCells(this.removeNoise(rendRelayedCells,
+          rendRelayedCellsBinSize));
+      reportedStats.setDirOnionsSeen(this.removeNoise(dirOnionsSeen,
+          dirOnionsSeenBinSize));
+      this.reportedHidServStats.add(reportedStats);
+
+    /* If the descriptor contained some but not all hidserv-* lines, print
+     * out a warning.  This case does not warrant any further action,
+     * because relays can in theory write anything in their extra-info
+     * descriptors.  But maybe we'll want to know. */
+    } else {
+      System.err.println("Relay " + fingerprint + " published "
+          + "incomplete hidserv-stats.  Ignoring.");
+    }
+  }
+
+  /* Remove noise from a reported stats value by rounding to the nearest
+   * right side of a bin and subtracting half of the bin size. */
+  private long removeNoise(long reportedNumber, long binSize) {
+    long roundedToNearestRightSideOfTheBin =
+        ((reportedNumber + binSize / 2) / binSize) * binSize;
+    long subtractedHalfOfBinSize =
+        roundedToNearestRightSideOfTheBin - binSize / 2;
+    return subtractedHalfOfBinSize;
+  }
+
+  public boolean parseRelayNetworkStatusConsensus(
+      RelayNetworkStatusConsensus consensus) {
+
+    /* Make sure that the consensus contains Wxx weights. */
+    SortedMap<String, Integer> bandwidthWeights =
+        consensus.getBandwidthWeights();
+    if (bandwidthWeights == null) {
+      System.err.printf("Consensus with valid-after time %s doesn't "
+          + "contain any Wxx weights.  Skipping.%n",
+          DateTimeHelper.format(consensus.getValidAfterMillis()));
+      return false;
+    }
+
+    /* More precisely, make sure that it contains Wmx weights, and then
+     * parse them. */
+    SortedSet<String> expectedWeightKeys =
+        new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(",")));
+    expectedWeightKeys.removeAll(bandwidthWeights.keySet());
+    if (!expectedWeightKeys.isEmpty()) {
+      System.err.printf("Consensus with valid-after time %s doesn't "
+          + "contain expected Wmx weights.  Skipping.%n",
+          DateTimeHelper.format(consensus.getValidAfterMillis()));
+      return false;
+    }
+    double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0;
+    double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0;
+    double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0;
+    double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0;
+
+    /* Keep a sorted set with the fingerprints of all hidden-service
+     * directories, in reverse order, so that we can later determine the
+     * fingerprint distance between a directory and the directory
+     * preceding it by three positions in the descriptor ring. */
+    SortedSet<String> hsDirs = new TreeSet<String>(
+        Collections.reverseOrder());
+
+    /* Prepare for computing the weights of all relays with the Fast flag
+     * for being selected in the middle position. */
+    double totalWeightsRendezvousPoint = 0.0;
+    SortedMap<String, Double> weightsRendezvousPoint =
+        new TreeMap<String, Double>();
+
+    /* Go through all status entries contained in the consensus. */
+    for (Map.Entry<String, NetworkStatusEntry> e :
+        consensus.getStatusEntries().entrySet()) {
+      String fingerprint = e.getKey();
+      NetworkStatusEntry statusEntry = e.getValue();
+      SortedSet<String> flags = statusEntry.getFlags();
+
+      /* Add the relay to the set of hidden-service directories if it has
+       * the HSDir flag. */
+      if (flags.contains("HSDir")) {
+        hsDirs.add(statusEntry.getFingerprint());
+      }
+
+      /* Compute the probability for being selected as rendezvous point.
+       * If the relay has the Fast flag, multiply its consensus weight
+       * with the correct Wmx weight, depending on whether the relay has
+       * the Guard and/or Exit flag. */
+      double weightRendezvousPoint = 0.0;
+      if (flags.contains("Fast")) {
+        weightRendezvousPoint = (double) statusEntry.getBandwidth();
+        if (flags.contains("Guard") && flags.contains("Exit")) {
+          weightRendezvousPoint *= wmd;
+        } else if (flags.contains("Guard")) {
+          weightRendezvousPoint *= wmg;
+        } else if (flags.contains("Exit")) {
+          weightRendezvousPoint *= wme;
+        } else {
+          weightRendezvousPoint *= wmm;
+        }
+      }
+      weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+      totalWeightsRendezvousPoint += weightRendezvousPoint;
+    }
+
+    /* Store all computed network fractions based on this consensus in a
+     * set, which will then be written to disk in a single store
+     * operation. */
+    Set<ComputedNetworkFractions> computedNetworkFractions =
+        new HashSet<ComputedNetworkFractions>();
+
+    /* Remove all previously added directory fingerprints and re-add them
+     * twice, once with a leading "0" and once with a leading "1".  The
+     * purpose is to simplify the logic for moving from one fingerprint to
+     * the previous one, even if that would mean traversing the ring
+     * start.  For example, the fingerprint preceding "1""00..0000" with
+     * the first "1" being added here could be "0""FF..FFFF". */
+    SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
+    hsDirs.clear();
+    for (String fingerprint : hsDirsCopy) {
+      hsDirs.add("0" + fingerprint);
+      hsDirs.add("1" + fingerprint);
+    }
+
+    /* Define the total ring size to compute fractions below.  This is
+     * 16^40 or 2^160. */
+    final double RING_SIZE = new BigInteger(
+        "10000000000000000000000000000000000000000",
+        16).doubleValue();
+
+    /* Go through all status entries again, this time computing network
+     * fractions. */
+    for (Map.Entry<String, NetworkStatusEntry> e :
+        consensus.getStatusEntries().entrySet()) {
+      String fingerprint = e.getKey();
+      NetworkStatusEntry statusEntry = e.getValue();
+      double fractionRendRelayedCells = 0.0,
+          fractionDirOnionsSeen = 0.0;
+      if (statusEntry != null) {
+
+        /* Check if the relay is a hidden-service directory by looking up
+         * its fingerprint, preceded by "1", in the sorted set that we
+         * populated above. */
+        String fingerprintPrecededByOne = "1" + fingerprint;
+        if (hsDirs.contains(fingerprintPrecededByOne)) {
+
+          /* Move three positions in the sorted set, which is in reverse
+           * order, to learn the fingerprint of the directory preceding
+           * this directory by three positions. */
+          String startResponsible = fingerprint;
+          int positionsToGo = 3;
+          for (String hsDirFingerprint :
+              hsDirs.tailSet(fingerprintPrecededByOne)) {
+            startResponsible = hsDirFingerprint;
+            if (positionsToGo-- <= 0) {
+              break;
+            }
+          }
+
+          /* Compute the fraction of descriptor space that this relay is
+           * responsible for as difference between the two fingerprints
+           * divided by the ring size. */
+          fractionDirOnionsSeen =
+              new BigInteger(fingerprintPrecededByOne, 16).subtract(
+              new BigInteger(startResponsible, 16)).doubleValue()
+              / RING_SIZE;
+
+          /* Divide this fraction by three to obtain the fraction of
+           * descriptors that this directory has seen.  This step is
+           * necessary, because each descriptor that is published to this
+           * directory is also published to two other directories. */
+          fractionDirOnionsSeen /= 3.0;
+        }
+
+        /* Compute the fraction of cells on rendezvous circuits that this
+         * relay has seen by dividing its previously calculated weight by
+         * the sum of all such weights. */
+        fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint)
+            / totalWeightsRendezvousPoint;
+      }
+
+      /* If at least one of the computed fractions is non-zero, create a
+       * new fractions object. */
+      if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) {
+        ComputedNetworkFractions fractions = new ComputedNetworkFractions(
+            fingerprint, consensus.getValidAfterMillis());
+        fractions.setFractionRendRelayedCells(fractionRendRelayedCells);
+        fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen);
+        computedNetworkFractions.add(fractions);
+      }
+    }
+
+    /* Store all newly computed network fractions to a documents file.
+     * The same file also contains computed network fractions from other
+     * consensuses that were valid on the same day.  This is in contrast
+     * to the other documents which are all stored in a single file, which
+     * would not scale for computed network fractions. */
+    String date = DateTimeHelper.format(consensus.getValidAfterMillis(),
+        DateTimeHelper.ISO_DATE_FORMAT);
+    File documentFile = new File(this.computedNetworkFractionsDirectory,
+        date);
+    if (!this.computedNetworkFractionsStore.store(documentFile,
+        computedNetworkFractions)) {
+      return false;
+    }
+    return true;
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java
new file mode 100644
index 0000000..996a70a
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java
@@ -0,0 +1,130 @@
+package org.torproject.metrics.hidserv;
+
+/* Hidden-service statistics reported by a single relay covering a single
+ * statistics interval of usually 24 hours.  These statistics are reported
+ * by the relay in the "hidserv-" lines of its extra-info descriptor. */
+public class ReportedHidServStats implements Document {
+
+  /* Relay fingerprint consisting of 40 upper-case hex characters. */
+  private String fingerprint;
+  public String getFingerprint() {
+    return this.fingerprint;
+  }
+
+  /* Hidden-service statistics end timestamp in milliseconds. */
+  private long statsEndMillis;
+  public long getStatsEndMillis() {
+    return this.statsEndMillis;
+  }
+
+  /* Statistics interval length in seconds. */
+  private long statsIntervalSeconds;
+  public void setStatsIntervalSeconds(long statsIntervalSeconds) {
+    this.statsIntervalSeconds = statsIntervalSeconds;
+  }
+  public long getStatsIntervalSeconds() {
+    return this.statsIntervalSeconds;
+  }
+
+  /* Number of relayed cells on rendezvous circuits as reported by the
+   * relay and adjusted by rounding to the nearest right side of a bin and
+   * subtracting half of the bin size. */
+  private long rendRelayedCells;
+  public void setRendRelayedCells(long rendRelayedCells) {
+    this.rendRelayedCells = rendRelayedCells;
+  }
+  public long getRendRelayedCells() {
+    return this.rendRelayedCells;
+  }
+
+  /* Number of distinct .onion addresses as reported by the relay and
+   * adjusted by rounding to the nearest right side of a bin and
+   * subtracting half of the bin size. */
+  private long dirOnionsSeen;
+  public void setDirOnionsSeen(long dirOnionsSeen) {
+    this.dirOnionsSeen = dirOnionsSeen;
+  }
+  public long getDirOnionsSeen() {
+    return this.dirOnionsSeen;
+  }
+
+  /* Instantiate a new stats object using fingerprint and stats interval
+   * end which together uniquely identify the object. */
+  public ReportedHidServStats(String fingerprint, long statsEndMillis) {
+    this.fingerprint = fingerprint;
+    this.statsEndMillis = statsEndMillis;
+  }
+
+  /* Return whether this object contains the same fingerprint and stats
+   * interval end as the passed object. */
+  @Override
+  public boolean equals(Object otherObject) {
+    if (!(otherObject instanceof ReportedHidServStats)) {
+      return false;
+    }
+    ReportedHidServStats other = (ReportedHidServStats) otherObject;
+    return this.fingerprint.equals(other.fingerprint) &&
+        this.statsEndMillis == other.statsEndMillis;
+  }
+
+  /* Return a (hopefully unique) hash code based on this object's
+   * fingerprint and stats interval end. */
+  @Override
+  public int hashCode() {
+    return this.fingerprint.hashCode() + (int) this.statsEndMillis;
+  }
+
+  /* Return a string representation of this object, consisting of
+   * fingerprint and the concatenation of all other attributes. */
+  @Override
+  public String[] format() {
+    String first = this.fingerprint;
+    String second = String.format("%s,%d,%d,%d",
+        DateTimeHelper.format(this.statsEndMillis),
+        this.statsIntervalSeconds, this.rendRelayedCells,
+        this.dirOnionsSeen);
+    return new String[] { first, second };
+  }
+
+  /* Instantiate an empty stats object that will be initialized more by
+   * the parse method. */
+  ReportedHidServStats() {
+  }
+
+  /* Initialize this stats object using the two provided strings that have
+   * been produced by the format method earlier.  Return whether this
+   * operation was successful. */
+  @Override
+  public boolean parse(String[] formattedStrings) {
+    if (formattedStrings.length != 2) {
+      System.err.printf("Invalid number of formatted strings.  "
+          + "Skipping.%n", formattedStrings.length);
+      return false;
+    }
+    String fingerprint = formattedStrings[0];
+    String[] secondParts = formattedStrings[1].split(",", 4);
+    if (secondParts.length != 4) {
+      return false;
+    }
+    long statsEndMillis = DateTimeHelper.parse(secondParts[0]);
+    if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) {
+      return false;
+    }
+    long statsIntervalSeconds = -1L, rendRelayedCells = -1L,
+        dirOnionsSeen = -1L;
+    try {
+      statsIntervalSeconds = Long.parseLong(secondParts[1]);
+      rendRelayedCells = Long.parseLong(secondParts[2]);
+      dirOnionsSeen = Long.parseLong(secondParts[3]);
+    } catch (NumberFormatException e) {
+      return false;
+    }
+    this.fingerprint = fingerprint;
+    this.statsEndMillis = statsEndMillis;
+    this.statsIntervalSeconds = statsIntervalSeconds;
+    this.rendRelayedCells = rendRelayedCells;
+    this.dirOnionsSeen = dirOnionsSeen;
+    return true;
+  }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java
new file mode 100644
index 0000000..db7d065
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java
@@ -0,0 +1,360 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* NOTE: This class is not required for running the Main class!  (It
+ * contains its own main method.) */
+public class Simulate {
+  private static File simCellsCsvFile =
+      new File("out/csv/sim-cells.csv");
+
+  private static File simOnionsCsvFile =
+      new File("out/csv/sim-onions.csv");
+
+  public static void main(String[] args) throws Exception {
+    System.out.print("Simulating extrapolation of rendezvous cells");
+    simulateManyCells();
+    System.out.print("\nSimulating extrapolation of .onions");
+    simulateManyOnions();
+    System.out.println("\nTerminating.");
+  }
+
+  private static Random rnd = new Random();
+
+  private static void simulateManyCells() throws Exception {
+    simCellsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        simCellsCsvFile));
+    bw.write("run,frac,wmean,wmedian,wiqm\n");
+    final int numberOfExtrapolations = 1000;
+    for (int i = 0; i < numberOfExtrapolations; i++) {
+      bw.write(simulateCells(i));
+      System.out.print(".");
+    }
+    bw.close();
+  }
+
+  private static void simulateManyOnions() throws Exception {
+    simOnionsCsvFile.getParentFile().mkdirs();
+    BufferedWriter bw = new BufferedWriter(new FileWriter(
+        simOnionsCsvFile));
+    bw.write("run,frac,wmean,wmedian,wiqm\n");
+    final int numberOfExtrapolations = 1000;
+    for (int i = 0; i < numberOfExtrapolations; i++) {
+      bw.write(simulateOnions(i));
+      System.out.print(".");
+    }
+    bw.close();
+  }
+
+  private static String simulateCells(int run) {
+
+    /* Generate consensus weights following an exponential distribution
+     * with lambda = 1 for 3000 potential rendezvous points. */
+    final int numberRendPoints = 3000;
+    double[] consensusWeights = new double[numberRendPoints];
+    double totalConsensusWeight = 0.0;
+    for (int i = 0; i < numberRendPoints; i++) {
+      double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+      consensusWeights[i] = consensusWeight;
+      totalConsensusWeight += consensusWeight;
+    }
+
+    /* Compute probabilities for being selected as rendezvous point. */
+    double[] probRendPoint = new double[numberRendPoints];
+    for (int i = 0; i < numberRendPoints; i++) {
+      probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+    }
+
+    /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an
+     * exponential distribution with lambda = 0.0001, so on average
+     * 10,000 cells per chunk, and randomly assign them to a rendezvous
+     * point to report them later. */
+    long cellsLeft = 10000000000L;
+    final double cellsLambda = 0.0001;
+    long[] observedCells = new long[numberRendPoints];
+    while (cellsLeft > 0) {
+      long cells = Math.min(cellsLeft,
+          (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda));
+      double selectRendPoint = rnd.nextDouble();
+      for (int i = 0; i < probRendPoint.length; i++) {
+        selectRendPoint -= probRendPoint[i];
+        if (selectRendPoint <= 0.0) {
+          observedCells[i] += cells;
+          break;
+        }
+      }
+      cellsLeft -= cells;
+    }
+
+    /* Obfuscate reports using binning and Laplace noise, and then attempt
+     * to remove noise again. */
+    final long binSize = 1024L;
+    final double b = 2048.0 / 0.3;
+    long[] reportedCells = new long[numberRendPoints];
+    long[] removedNoiseCells = new long[numberRendPoints];
+    for (int i = 0; i < numberRendPoints; i++) {
+      long observed = observedCells[i];
+      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+      double p = rnd.nextDouble();
+      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+      long reported = afterBinning + (long) laplaceNoise;
+      reportedCells[i] = reported;
+      long roundedToNearestRightSideOfTheBin =
+          ((reported + binSize / 2) / binSize) * binSize;
+      long subtractedHalfOfBinSize =
+          roundedToNearestRightSideOfTheBin - binSize / 2;
+      removedNoiseCells[i] = subtractedHalfOfBinSize;
+    }
+
+    /* Perform extrapolations from random fractions of reports by
+     * probability to be selected as rendezvous point. */
+    StringBuilder sb = new StringBuilder();
+    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+    for (double fraction : fractions) {
+      SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
+      for (int j = 0; j < numberRendPoints; j++) {
+        nonReportingRelays.add(j);
+      }
+      List<Integer> shuffledRelays = new ArrayList<Integer>(
+          nonReportingRelays);
+      Collections.shuffle(shuffledRelays);
+      SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
+      for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+          j++) {
+        reportingRelays.add(shuffledRelays.get(j));
+        nonReportingRelays.remove(shuffledRelays.get(j));
+      }
+      List<double[]> singleRelayExtrapolations;
+      double totalReportingProbability;
+      do {
+        singleRelayExtrapolations = new ArrayList<double[]>();
+        totalReportingProbability = 0.0;
+        for (int reportingRelay : reportingRelays) {
+          double probability = probRendPoint[reportingRelay];
+          if (probability > 0.0) {
+            singleRelayExtrapolations.add(
+                new double[] {
+                    removedNoiseCells[reportingRelay] / probability,
+                    removedNoiseCells[reportingRelay],
+                    probability });
+          }
+          totalReportingProbability += probability;
+        }
+        if (totalReportingProbability < fraction - 0.001) {
+          int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
+              rnd.nextInt(nonReportingRelays.size()));
+          nonReportingRelays.remove(addRelay);
+          reportingRelays.add(addRelay);
+        } else if (totalReportingProbability > fraction + 0.001) {
+          int removeRelay = new ArrayList<Integer>(reportingRelays).get(
+              rnd.nextInt(reportingRelays.size()));
+          reportingRelays.remove(removeRelay);
+          nonReportingRelays.add(removeRelay);
+        }
+      } while (totalReportingProbability < fraction - 0.001 ||
+          totalReportingProbability > fraction + 0.001);
+      Collections.sort(singleRelayExtrapolations,
+          new Comparator<double[]>() {
+        public int compare(double[] o1, double[] o2) {
+          return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+        }
+      });
+      double totalProbability = 0.0, totalValues = 0.0;
+      double totalInterquartileProbability = 0.0,
+          totalInterquartileValues = 0.0;
+      Double weightedMedian = null;
+      for (double[] extrapolation : singleRelayExtrapolations) {
+        totalValues += extrapolation[1];
+        totalProbability += extrapolation[2];
+        if (weightedMedian == null &&
+            totalProbability > totalReportingProbability * 0.5) {
+          weightedMedian = extrapolation[0];
+        }
+        if (totalProbability > totalReportingProbability * 0.25 &&
+            totalProbability < totalReportingProbability * 0.75) {
+          totalInterquartileValues += extrapolation[1];
+          totalInterquartileProbability += extrapolation[2];
+        }
+      }
+      sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+          totalValues / totalProbability, weightedMedian,
+          totalInterquartileValues / totalInterquartileProbability));
+    }
+    return sb.toString();
+  }
+
+  private static String simulateOnions(final int run) {
+
+    /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+    final int numberHsDirs = 3000;
+    SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
+    for (int i = 0; i < numberHsDirs; i++) {
+      hsDirFingerprints.add(rnd.nextDouble());
+    }
+
+    /* Compute fractions of observed descriptor space. */
+    SortedSet<Double> ring =
+        new TreeSet<Double>(Collections.reverseOrder());
+    for (double fingerprint : hsDirFingerprints) {
+      ring.add(fingerprint);
+      ring.add(fingerprint - 1.0);
+    }
+    SortedMap<Double, Double> hsDirFractions =
+        new TreeMap<Double, Double>();
+    for (double fingerprint : hsDirFingerprints) {
+      double start = fingerprint;
+      int positionsToGo = 3;
+      for (double prev : ring.tailSet(fingerprint)) {
+        start = prev;
+        if (positionsToGo-- <= 0) {
+          break;
+        }
+      }
+      hsDirFractions.put(fingerprint, fingerprint - start);
+    }
+
+    /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+    final int numberOnions = 40000;
+    final int replicas = 4;
+    final int storeOnDirs = 3;
+    SortedMap<Double, SortedSet<Integer>> storedDescs =
+        new TreeMap<Double, SortedSet<Integer>>();
+    for (double fingerprint : hsDirFingerprints) {
+      storedDescs.put(fingerprint, new TreeSet<Integer>());
+    }
+    for (int i = 0; i < numberOnions; i++) {
+      for (int j = 0; j < replicas; j++) {
+        int leftToStore = storeOnDirs;
+        for (double fingerprint :
+            hsDirFingerprints.tailSet(rnd.nextDouble())) {
+          storedDescs.get(fingerprint).add(i);
+          if (--leftToStore <= 0) {
+            break;
+          }
+        }
+        if (leftToStore > 0) {
+          for (double fingerprint : hsDirFingerprints) {
+            storedDescs.get(fingerprint).add(i);
+            if (--leftToStore <= 0) {
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    /* Obfuscate reports using binning and Laplace noise, and then attempt
+     * to remove noise again. */
+    final long binSize = 8L;
+    final double b = 8.0 / 0.3;
+    SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
+        removedNoiseOnions = new TreeMap<Double, Long>();
+    for (Map.Entry<Double, SortedSet<Integer>> e :
+      storedDescs.entrySet()) {
+      double fingerprint = e.getKey();
+      long observed = (long) e.getValue().size();
+      long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+      double p = rnd.nextDouble();
+      double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+          Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+      long reported = afterBinning + (long) laplaceNoise;
+      reportedOnions.put(fingerprint, reported);
+      long roundedToNearestRightSideOfTheBin =
+          ((reported + binSize / 2) / binSize) * binSize;
+      long subtractedHalfOfBinSize =
+          roundedToNearestRightSideOfTheBin - binSize / 2;
+      removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+    }
+
+    /* Perform extrapolations from random fractions of reports by
+     * probability to be selected as rendezvous point. */
+    StringBuilder sb = new StringBuilder();
+    double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+        0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+    for (double fraction : fractions) {
+      SortedSet<Double> nonReportingRelays =
+          new TreeSet<Double>(hsDirFractions.keySet());
+      List<Double> shuffledRelays = new ArrayList<Double>(
+          nonReportingRelays);
+      Collections.shuffle(shuffledRelays);
+      SortedSet<Double> reportingRelays = new TreeSet<Double>();
+      for (int j = 0; j < (int) ((double) hsDirFractions.size()
+          * fraction); j++) {
+        reportingRelays.add(shuffledRelays.get(j));
+        nonReportingRelays.remove(shuffledRelays.get(j));
+      }
+      List<double[]> singleRelayExtrapolations;
+      double totalReportingProbability;
+      do {
+        singleRelayExtrapolations = new ArrayList<double[]>();
+        totalReportingProbability = 0.0;
+        for (double reportingRelay : reportingRelays) {
+          double probability = hsDirFractions.get(reportingRelay) / 3.0;
+          if (probability > 0.0) {
+            singleRelayExtrapolations.add(
+                new double[] { removedNoiseOnions.get(reportingRelay)
+                    / probability, removedNoiseOnions.get(reportingRelay),
+                    probability });
+          }
+          totalReportingProbability += probability;
+        }
+        if (totalReportingProbability < fraction - 0.001) {
+          double addRelay =
+              new ArrayList<Double>(nonReportingRelays).get(
+              rnd.nextInt(nonReportingRelays.size()));
+          nonReportingRelays.remove(addRelay);
+          reportingRelays.add(addRelay);
+        } else if (totalReportingProbability > fraction + 0.001) {
+          double removeRelay =
+              new ArrayList<Double>(reportingRelays).get(
+              rnd.nextInt(reportingRelays.size()));
+          reportingRelays.remove(removeRelay);
+          nonReportingRelays.add(removeRelay);
+        }
+      } while (totalReportingProbability < fraction - 0.001 ||
+          totalReportingProbability > fraction + 0.001);
+      Collections.sort(singleRelayExtrapolations,
+          new Comparator<double[]>() {
+        public int compare(double[] o1, double[] o2) {
+          return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+        }
+      });
+      double totalProbability = 0.0, totalValues = 0.0;
+      double totalInterquartileProbability = 0.0,
+          totalInterquartileValues = 0.0;
+      Double weightedMedian = null;
+      for (double[] extrapolation : singleRelayExtrapolations) {
+        totalValues += extrapolation[1];
+        totalProbability += extrapolation[2];
+        if (weightedMedian == null &&
+            totalProbability > totalReportingProbability * 0.5) {
+          weightedMedian = extrapolation[0];
+        }
+        if (totalProbability > totalReportingProbability * 0.25 &&
+            totalProbability < totalReportingProbability * 0.75) {
+          totalInterquartileValues += extrapolation[1];
+          totalInterquartileProbability += extrapolation[2];
+        }
+      }
+      sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+          totalValues / totalProbability, weightedMedian,
+          totalInterquartileValues / totalInterquartileProbability));
+    }
+    return sb.toString();
+  }
+}
diff --git a/shared/bin/70-run-hidserv-stats.sh b/shared/bin/70-run-hidserv-stats.sh
new file mode 100755
index 0000000..a924f31
--- /dev/null
+++ b/shared/bin/70-run-hidserv-stats.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+cd modules/hidserv/
+ant | grep "\[java\]"
+cd ../../
+
diff --git a/shared/bin/99-copy-stats-files.sh b/shared/bin/99-copy-stats-files.sh
index 2292da2..5493cf8 100755
--- a/shared/bin/99-copy-stats-files.sh
+++ b/shared/bin/99-copy-stats-files.sh
@@ -2,4 +2,5 @@
 mkdir -p shared/stats
 cp -a modules/legacy/stats/*.csv shared/stats/
 cp -a modules/advbwdist/stats/advbwdist.csv shared/stats/
+cp -a modules/hidserv/stats/hidserv.csv shared/stats/
 





More information about the tor-commits mailing list