commit fefb0f946aa5018639415cb67da7f35d35ff721b
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Mar 11 15:25:38 2015 +0100
Add hiserv-stats extrapolation code.
---
modules/hidserv/.gitignore | 4 +
modules/hidserv/build.xml | 44 ++
.../org/torproject/metrics/hidserv/Aggregator.java | 191 ++++++++
.../metrics/hidserv/ComputedNetworkFractions.java | 141 ++++++
.../torproject/metrics/hidserv/DateTimeHelper.java | 95 ++++
.../org/torproject/metrics/hidserv/Document.java | 19 +
.../torproject/metrics/hidserv/DocumentStore.java | 157 +++++++
.../metrics/hidserv/ExtrapolatedHidServStats.java | 156 +++++++
.../torproject/metrics/hidserv/Extrapolator.java | 251 ++++++++++
.../src/org/torproject/metrics/hidserv/Main.java | 91 ++++
.../src/org/torproject/metrics/hidserv/Parser.java | 484 ++++++++++++++++++++
.../metrics/hidserv/ReportedHidServStats.java | 130 ++++++
.../org/torproject/metrics/hidserv/Simulate.java | 360 +++++++++++++++
shared/bin/70-run-hidserv-stats.sh | 5 +
shared/bin/99-copy-stats-files.sh | 1 +
15 files changed, 2129 insertions(+)
diff --git a/modules/hidserv/.gitignore b/modules/hidserv/.gitignore
new file mode 100644
index 0000000..4bb76a5
--- /dev/null
+++ b/modules/hidserv/.gitignore
@@ -0,0 +1,4 @@
+classes/
+stats/
+status/
+
diff --git a/modules/hidserv/build.xml b/modules/hidserv/build.xml
new file mode 100644
index 0000000..7480b8c
--- /dev/null
+++ b/modules/hidserv/build.xml
@@ -0,0 +1,44 @@
+<project default="run" name="hidserv" basedir=".">
+
+ <property name="sources" value="src"/>
+ <property name="classes" value="classes"/>
+ <path id="classpath">
+ <pathelement path="${classes}"/>
+ <fileset dir="/usr/share/java">
+ <include name="commons-codec-1.6.jar"/>
+ <include name="commons-compress-1.4.1.jar"/>
+ <include name="commons-lang-2.6.jar"/>
+ </fileset>
+ <fileset dir="../../deps/metrics-lib">
+ <include name="descriptor.jar"/>
+ </fileset>
+ </path>
+
+ <target name="metrics-lib">
+ <ant dir="../../deps/metrics-lib"/>
+ </target>
+
+ <target name="compile" depends="metrics-lib">
+ <mkdir dir="${classes}"/>
+ <javac destdir="${classes}"
+ srcdir="${sources}"
+ source="1.6"
+ target="1.6"
+ debug="true"
+ deprecation="true"
+ optimize="false"
+ failonerror="true"
+ includeantruntime="false">
+ <classpath refid="classpath"/>
+ </javac>
+ </target>
+
+ <target name="run" depends="compile">
+ <java fork="true"
+ maxmemory="1024m"
+ classname="org.torproject.metrics.hidserv.Main">
+ <classpath refid="classpath"/>
+ </java>
+ </target>
+</project>
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java
new file mode 100644
index 0000000..192a342
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java
@@ -0,0 +1,191 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/* Aggregate extrapolated network totals of hidden-service statistics by
+ * calculating statistics like the daily weighted interquartile mean.
+ * Also calculate simpler statistics like the number of reported
+ * statistics and the total network fraction of reporting relays. */
+public class Aggregator {
+
+ /* Document file containing extrapolated hidden-service statistics. */
+ private File extrapolatedHidServStatsFile;
+
+ /* Document store for storing and retrieving extrapolated hidden-service
+ * statistics. */
+ private DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore;
+
+ /* Output file for writing aggregated statistics. */
+ private File hidservStatsCsvFile;
+
+ /* Initialize a new aggregator object using the given directory,
+ * document store, and output file for results. */
+ public Aggregator(File statusDirectory,
+ DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore, File hidservStatsCsvFile) {
+
+ /* Create a File instance for the document file containing
+ * extrapolated network totals. */
+ this.extrapolatedHidServStatsFile = new File(statusDirectory,
+ "extrapolated-hidserv-stats");
+
+ /* Store references to the provided document store and output file. */
+ this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
+ this.hidservStatsCsvFile = hidservStatsCsvFile;
+ }
+
+ /* Calculate aggregates for all extrapolated hidden-service statistics
+ * and write them to the output file. */
+ public void aggregateHidServStats() {
+
+ /* Retrieve previously extrapolated network totals. */
+ Set<ExtrapolatedHidServStats> extrapolatedStats =
+ this.extrapolatedHidServStatsStore.retrieve(
+ this.extrapolatedHidServStatsFile);
+ if (extrapolatedStats == null) {
+ System.err.printf("Unable to retrieve extrapolated hidden-service "
+ + "statistics from file %s. Skipping aggregation step.%n",
+ this.extrapolatedHidServStatsFile.getAbsolutePath());
+ return;
+ }
+
+ /* Re-arrange extrapolated network totals by statistics interval end
+ * date, and include the computed network total as weight for the
+ * extrapolated value. More precisely, map keys are ISO-formatted
+ * dates, map values are double[] arrays with the extrapolated network
+ * total as first element and the corresponding computed network
+ * fraction as second element. */
+ SortedMap<String, List<double[]>>
+ extrapolatedCells = new TreeMap<String, List<double[]>>(),
+ extrapolatedOnions = new TreeMap<String, List<double[]>>();
+ for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) {
+ String date = DateTimeHelper.format(
+ extrapolated.getStatsDateMillis(),
+ DateTimeHelper.ISO_DATE_FORMAT);
+ if (extrapolated.getFractionRendRelayedCells() > 0.0) {
+ if (!extrapolatedCells.containsKey(date)) {
+ extrapolatedCells.put(date, new ArrayList<double[]>());
+ }
+ extrapolatedCells.get(date).add(new double[] {
+ extrapolated.getExtrapolatedRendRelayedCells(),
+ extrapolated.getFractionRendRelayedCells() });
+ }
+ if (extrapolated.getFractionDirOnionsSeen() > 0.0) {
+ if (!extrapolatedOnions.containsKey(date)) {
+ extrapolatedOnions.put(date, new ArrayList<double[]>());
+ }
+ extrapolatedOnions.get(date).add(new double[] {
+ extrapolated.getExtrapolatedDirOnionsSeen(),
+ extrapolated.getFractionDirOnionsSeen() });
+ }
+ }
+
+ /* Write all results to a string builder that will later be written to
+ * the output file. Each line contains an ISO-formatted "date", a
+ * string identifier for the "type" of statistic, the weighted mean
+ * ("wmean"), weighted median ("wmedian"), weighted interquartile mean
+ * ("wiqm"), the total network "frac"tion, and the number of reported
+ * "stats" with non-zero computed network fraction. */
+ StringBuilder sb = new StringBuilder();
+ sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n");
+
+ /* Repeat all aggregation steps for both types of statistics. */
+ for (int i = 0; i < 2; i++) {
+ String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen";
+ SortedMap<String, List<double[]>> extrapolated = i == 0
+ ? extrapolatedCells : extrapolatedOnions;
+
+ /* Go through all dates. */
+ for (Map.Entry<String, List<double[]>> e :
+ extrapolated.entrySet()) {
+ String date = e.getKey();
+ List<double[]> weightedValues = e.getValue();
+ int numStats = weightedValues.size();
+
+ /* Sort extrapolated network totals contained in the first array
+ * element. (The second array element contains the computed
+ * network fraction as weight.) */
+ Collections.sort(weightedValues,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ });
+
+ /* For the weighted mean, sum up all previously extrapolated
+ * values weighted with their network fractions (which happens to
+ * be the values that relays reported), and sum up all network
+ * fractions. Once we have those two sums, we can divide the sum
+ * of weighted extrapolated values by the sum of network fractions
+ * to obtain the weighted mean of extrapolated values. */
+ double sumReported = 0.0, sumFraction = 0.0;
+ for (double[] d : weightedValues) {
+ sumReported += d[0] * d[1];
+ sumFraction += d[1];
+ }
+ double weightedMean = sumReported / sumFraction;
+
+ /* For the weighted median and weighted interquartile mean, go
+ * through all values once again. The weighted median is the
+ * first extrapolated value with weight interval end greater than
+ * 50% of reported network fractions. For the weighted
+ * interquartile mean, sum up extrapolated values multiplied with
+ * network fractions and network fractions falling into the 25% to
+ * 75% range and later compute the weighted mean of those. */
+ double weightIntervalEnd = 0.0;
+ Double weightedMedian = null;
+ double sumFractionInterquartile = 0.0,
+ sumReportedInterquartile = 0.0;
+ for (double[] d : weightedValues) {
+ double extrapolatedValue = d[0], computedFraction = d[1];
+ double weightIntervalStart = weightIntervalEnd;
+ weightIntervalEnd += computedFraction;
+ if (weightedMedian == null &&
+ weightIntervalEnd > sumFraction * 0.5) {
+ weightedMedian = extrapolatedValue;
+ }
+ if (weightIntervalEnd >= sumFraction * 0.25 &&
+ weightIntervalStart <= sumFraction * 0.75) {
+ double fractionBetweenQuartiles =
+ Math.min(weightIntervalEnd, sumFraction * 0.75)
+ - Math.max(weightIntervalStart, sumFraction * 0.25);
+ sumReportedInterquartile += extrapolatedValue
+ * fractionBetweenQuartiles;
+ sumFractionInterquartile += fractionBetweenQuartiles;
+ }
+ }
+ double weightedInterquartileMean =
+ sumReportedInterquartile / sumFractionInterquartile;
+
+ /* Put together all aggregated values in a single line. */
+ sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date,
+ type, weightedMean, weightedMedian, weightedInterquartileMean,
+ sumFraction, numStats));
+ }
+ }
+
+ /* Write all aggregated results to the output file. */
+ try {
+ this.hidservStatsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.hidservStatsCsvFile));
+ bw.write(sb.toString());
+ bw.close();
+ } catch (IOException e) {
+ System.err.printf("Unable to write results to %s. Ignoring.");
+ }
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
new file mode 100644
index 0000000..1fe0020
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java
@@ -0,0 +1,141 @@
+package org.torproject.metrics.hidserv;
+
+/* Computed fraction of hidden-service activity that a single relay is
+ * assumed to observe in the network. These fractions are computed from
+ * status entries and bandwidth weights in a network status consensus. */
+public class ComputedNetworkFractions implements Document {
+
+ /* Relay fingerprint consisting of 40 upper-case hex characters. */
+ private String fingerprint;
+ public String getFingerprint() {
+ return this.fingerprint;
+ }
+
+ /* Valid-after timestamp of the consensus in milliseconds. */
+ private long validAfterMillis;
+ public long getValidAfterMillis() {
+ return this.validAfterMillis;
+ }
+
+ /* Fraction of cells on rendezvous circuits that this relay is assumed
+ * to observe in the network. */
+ private double fractionRendRelayedCells;
+ public void setFractionRendRelayedCells(
+ double fractionRendRelayedCells) {
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ }
+ public double getFractionRendRelayedCells() {
+ return this.fractionRendRelayedCells;
+ }
+
+ /* Fraction of descriptors that this relay is assumed to observe in the
+ * network. This is calculated as the fraction of descriptors
+ * identifiers that this relay was responsible for, divided by 3,
+ * because each descriptor that is published to this directory is also
+ * published to two other directories. */
+ private double fractionDirOnionsSeen;
+ public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ }
+ public double getFractionDirOnionsSeen() {
+ return this.fractionDirOnionsSeen;
+ }
+
+ /* Instantiate a new fractions object using fingerprint and consensus
+ * valid-after time which together uniquely identify the object. */
+ public ComputedNetworkFractions(String fingerprint,
+ long validAfterMillis) {
+ this.fingerprint = fingerprint;
+ this.validAfterMillis = validAfterMillis;
+ }
+
+ /* Return whether this object contains the same fingerprint and
+ * consensus valid-after time as the passed object. */
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ComputedNetworkFractions)) {
+ return false;
+ }
+ ComputedNetworkFractions other =
+ (ComputedNetworkFractions) otherObject;
+ return this.fingerprint.equals(other.fingerprint) &&
+ this.validAfterMillis == other.validAfterMillis;
+ }
+
+ /* Return a (hopefully unique) hash code based on this object's
+ * fingerprint and consensus valid-after time. */
+ @Override
+ public int hashCode() {
+ return this.fingerprint.hashCode() +
+ (int) this.validAfterMillis;
+ }
+
+ /* Return a string representation of this object, consisting of two
+ * strings: the first string contains fingerprint and valid-after date,
+ * the second string contains the concatenation of all other
+ * attributes. */
+ @Override
+ public String[] format() {
+ String first = String.format("%s,%s", this.fingerprint,
+ DateTimeHelper.format(this.validAfterMillis,
+ DateTimeHelper.ISO_DATE_FORMAT));
+ String second = DateTimeHelper.format(this.validAfterMillis,
+ DateTimeHelper.ISO_HOUR_FORMAT)
+ + (this.fractionRendRelayedCells == 0.0 ? ","
+ : String.format(",%f", this.fractionRendRelayedCells))
+ + (this.fractionDirOnionsSeen == 0.0 ? ","
+ : String.format(",%f", this.fractionDirOnionsSeen));
+ return new String[] { first, second };
+ }
+
+ /* Instantiate an empty fractions object that will be initialized more
+ * by the parse method. */
+ ComputedNetworkFractions() {
+ }
+
+ /* Initialize this fractions object using the two provided strings that
+ * have been produced by the format method earlier. Return whether this
+ * operation was successful. */
+ @Override
+ public boolean parse(String[] formattedStrings) {
+ if (formattedStrings.length != 2) {
+ System.err.printf("Invalid number of formatted strings. "
+ + "Skipping.%n", formattedStrings.length);
+ return false;
+ }
+ String[] firstParts = formattedStrings[0].split(",", 2);
+ if (firstParts.length != 2) {
+ System.err.printf("Invalid number of comma-separated values. "
+ + "Skipping.%n");
+ return false;
+ }
+ String fingerprint = firstParts[0];
+ String[] secondParts = formattedStrings[1].split(",", 3);
+ if (secondParts.length != 3) {
+ System.err.printf("Invalid number of comma-separated values. "
+ + "Skipping.%n");
+ return false;
+ }
+ long validAfterMillis = DateTimeHelper.parse(firstParts[1] + " "
+ + secondParts[0], DateTimeHelper.ISO_DATE_HOUR_FORMAT);
+ if (validAfterMillis == DateTimeHelper.NO_TIME_AVAILABLE) {
+ System.err.printf("Invalid date/hour format. Skipping.%n");
+ return false;
+ }
+ try {
+ double fractionRendRelayedCells = secondParts[1].equals("")
+ ? 0.0 : Double.parseDouble(secondParts[1]);
+ double fractionDirOnionsSeen = secondParts[2].equals("")
+ ? 0.0 : Double.parseDouble(secondParts[2]);
+ this.fingerprint = fingerprint;
+ this.validAfterMillis = validAfterMillis;
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ return true;
+ } catch (NumberFormatException e) {
+ System.err.printf("Invalid number format. Skipping.%n");
+ return false;
+ }
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java
new file mode 100644
index 0000000..c33a50d
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java
@@ -0,0 +1,95 @@
+package org.torproject.metrics.hidserv;
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+/* Utility class to format and parse dates and timestamps. */
+public class DateTimeHelper {
+
+ /* This class is not supposed to be instantiated, which is why its
+ * constructor has private visibility. */
+ private DateTimeHelper() {
+ }
+
+ /* Some useful time constant. */
+ public static final long
+ ONE_SECOND = 1000L,
+ ONE_MINUTE = 60L * ONE_SECOND,
+ ONE_HOUR = 60L * ONE_MINUTE,
+ ONE_DAY = 24L * ONE_HOUR;
+
+ /* Some useful date/time formats. */
+ public static final String
+ ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss",
+ ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH",
+ ISO_DATE_FORMAT = "yyyy-MM-dd",
+ ISO_HOUR_FORMAT = "HH";
+
+ /* Map of DateFormat instances for parsing and formatting dates and
+ * timestamps, protected using ThreadLocal to ensure that each thread
+ * uses its own instances. */
+ private static ThreadLocal<Map<String, DateFormat>> dateFormats =
+ new ThreadLocal<Map<String, DateFormat>> () {
+ public Map<String, DateFormat> get() {
+ return super.get();
+ }
+ protected Map<String, DateFormat> initialValue() {
+ return new HashMap<String, DateFormat>();
+ }
+ public void remove() {
+ super.remove();
+ }
+ public void set(Map<String, DateFormat> value) {
+ super.set(value);
+ }
+ };
+
+ /* Return an instance of DateFormat for the given format. If no such
+ * instance exists, create one and put it in the map. */
+ private static DateFormat getDateFormat(String format) {
+ Map<String, DateFormat> threadDateFormats = dateFormats.get();
+ if (!threadDateFormats.containsKey(format)) {
+ DateFormat dateFormat = new SimpleDateFormat(format);
+ dateFormat.setLenient(false);
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ threadDateFormats.put(format, dateFormat);
+ }
+ return threadDateFormats.get(format);
+ }
+
+ /* Format the given time in milliseconds using the given format. */
+ public static String format(long millis, String format) {
+ return getDateFormat(format).format(millis);
+ }
+
+ /* Format the given time in milliseconds using ISO date/time format. */
+ public static String format(long millis) {
+ return format(millis, ISO_DATETIME_FORMAT);
+ }
+
+ /* Default result of the parse methods if the provided time could not be
+ * parsed. */
+ public final static long NO_TIME_AVAILABLE = -1L;
+
+ /* Parse the given string using the given format. */
+ public static long parse(String string, String format) {
+ if (null == string) {
+ return NO_TIME_AVAILABLE;
+ }
+ try {
+ return getDateFormat(format).parse(string).getTime();
+ } catch (ParseException e) {
+ return NO_TIME_AVAILABLE;
+ }
+ }
+
+ /* Parse the given string using ISO date/time format. */
+ public static long parse(String string) {
+ return parse(string, ISO_DATETIME_FORMAT);
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java
new file mode 100644
index 0000000..47614f3
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java
@@ -0,0 +1,19 @@
+package org.torproject.metrics.hidserv;
+
+/* Common interface of documents that are supposed to be serialized and
+ * stored in document files and later retrieved and de-serialized. */
+public interface Document {
+
+ /* Return an array of two strings with a string representation of this
+ * document. The first string will be used to start a group of
+ * documents, the second string will be used to represent a single
+ * document in that group. Ideally, the first string is equivalent for
+ * many documents stored in the same file, and the second string is
+ * different for those documents. */
+ public String[] format();
+
+ /* Initialize an object using the given array of two strings. These are
+ * the same two strings that the format method provides. */
+ public boolean parse(String[] formattedStrings);
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java
new file mode 100644
index 0000000..3266df5
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java
@@ -0,0 +1,157 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Utility class to store serialized objects implementing the Document
+ * interface to a file and later to retrieve them. */
+public class DocumentStore<T extends Document> {
+
+ /* Document class, needed to create new instances when retrieving
+ * documents. */
+ private Class<T> clazz;
+
+ /* Initialize a new store object for the given type of documents. */
+ DocumentStore(Class<T> clazz) {
+ this.clazz = clazz;
+ }
+
+ /* Store the provided documents in the given file and return whether the
+ * storage operation was successful. If the file already existed and if
+ * it contains documents, merge the new documents with the existing
+ * ones. */
+ public boolean store(File documentFile, Set<T> documentsToStore) {
+
+ /* Retrieve existing documents. */
+ Set<T> retrievedDocuments = this.retrieve(documentFile);
+ if (retrievedDocuments == null) {
+ System.err.printf("Unable to read and update %s. Not storing "
+ + "documents.%n", documentFile.getAbsoluteFile());
+ return false;
+ }
+
+ /* Merge new documents with existing ones. */
+ retrievedDocuments.addAll(documentsToStore);
+
+ /* Serialize documents. */
+ SortedMap<String, SortedSet<String>> formattedDocuments =
+ new TreeMap<String, SortedSet<String>>();
+ for (T retrieveDocument : retrievedDocuments) {
+ String[] formattedDocument = retrieveDocument.format();
+ if (!formattedDocuments.containsKey(formattedDocument[0])) {
+ formattedDocuments.put(formattedDocument[0],
+ new TreeSet<String>());
+ }
+ formattedDocuments.get(formattedDocument[0]).add(
+ formattedDocument[1]);
+ }
+
+ /* Check if a temporary file exists from the previous execution. */
+ File documentTempFile = new File(documentFile.getAbsoluteFile()
+ + ".tmp");
+ if (documentTempFile.exists()) {
+ System.err.printf("Temporary document file %s still exists, "
+ + "indicating that a previous execution did not terminate "
+ + "cleanly. Not storing documents.%n",
+ documentTempFile.getAbsoluteFile());
+ return false;
+ }
+
+ /* Write to a new temporary file, then move it into place, possibly
+ * overwriting an existing file. */
+ try {
+ documentTempFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ documentTempFile));
+ for (Map.Entry<String, SortedSet<String>> e :
+ formattedDocuments.entrySet()) {
+ bw.write(e.getKey() + "\n");
+ for (String s : e.getValue()) {
+ bw.write(" " + s + "\n");
+ }
+ }
+ bw.close();
+ documentFile.delete();
+ documentTempFile.renameTo(documentFile);
+ } catch (IOException e) {
+ System.err.printf("Unable to write %s. Not storing documents.%n",
+ documentFile.getAbsolutePath());
+ return false;
+ }
+
+ /* Return success. */
+ return true;
+ }
+
+ /* Retrieve all previously stored documents from the given file. */
+ public Set<T> retrieve(File documentFile) {
+
+ /* Check if the document file exists, and if not, return an empty set.
+ * This is not an error case. */
+ Set<T> result = new HashSet<T>();
+ if (!documentFile.exists()) {
+ return result;
+ }
+
+ /* Parse the document file line by line and de-serialize contained
+ * documents. */
+ try {
+ LineNumberReader lnr = new LineNumberReader(new BufferedReader(
+ new FileReader(documentFile)));
+ String line, formattedString0 = null;
+ while ((line = lnr.readLine()) != null) {
+ if (!line.startsWith(" ")) {
+ formattedString0 = line;
+ } else if (formattedString0 == null) {
+ System.err.printf("First line in %s must not start with a "
+ + "space. Not retrieving any previously stored "
+ + "documents.%n", documentFile.getAbsolutePath());
+ lnr.close();
+ return null;
+ } else {
+ T document = this.clazz.newInstance();
+ if (!document.parse(new String[] { formattedString0,
+ line.substring(1) })) {
+ System.err.printf("Unable to read line %d from %s. Not "
+ + "retrieving any previously stored documents.%n",
+ lnr.getLineNumber(), documentFile.getAbsolutePath());
+ lnr.close();
+ return null;
+ }
+ result.add(document);
+ }
+ }
+ lnr.close();
+ } catch (IOException e) {
+ System.err.printf("Unable to read %s. Not retrieving any "
+ + "previously stored documents.%n",
+ documentFile.getAbsolutePath());
+ e.printStackTrace();
+ return null;
+ } catch (InstantiationException e) {
+ System.err.printf("Unable to read %s. Cannot instantiate document "
+ + "object.%n", documentFile.getAbsolutePath());
+ e.printStackTrace();
+ return null;
+ } catch (IllegalAccessException e) {
+ System.err.printf("Unable to read %s. Cannot instantiate document "
+ + "object.%n", documentFile.getAbsolutePath());
+ e.printStackTrace();
+ return null;
+ }
+ return result;
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
new file mode 100644
index 0000000..52357d4
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java
@@ -0,0 +1,156 @@
+package org.torproject.metrics.hidserv;
+
+/* Extrapolated network totals of hidden-service statistics reported by a
+ * single relay. Extrapolated values are based on reported statistics and
+ * computed network fractions in the statistics interval. */
+public class ExtrapolatedHidServStats implements Document {
+
+ /* Date of statistics interval end in milliseconds. */
+ private long statsDateMillis;
+ public long getStatsDateMillis() {
+ return this.statsDateMillis;
+ }
+
+ /* Relay fingerprint consisting of 40 upper-case hex characters. */
+ private String fingerprint;
+ public String getFingerprint() {
+ return this.fingerprint;
+ }
+
+ /* Extrapolated number of cells on rendezvous circuits in the
+ * network. */
+ private double extrapolatedRendRelayedCells;
+ public void setExtrapolatedRendRelayedCells(
+ double extrapolatedRendRelayedCells) {
+ this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
+ }
+ public double getExtrapolatedRendRelayedCells() {
+ return this.extrapolatedRendRelayedCells;
+ }
+
+ /* Computed fraction of observed cells on rendezvous circuits in the
+ * network, used to weight this relay's extrapolated network total in
+ * the aggregation step. */
+ private double fractionRendRelayedCells;
+ public void setFractionRendRelayedCells(
+ double fractionRendRelayedCells) {
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ }
+ public double getFractionRendRelayedCells() {
+ return this.fractionRendRelayedCells;
+ }
+
+ /* Extrapolated number of .onions in the network. */
+ private double extrapolatedDirOnionsSeen;
+ public void setExtrapolatedDirOnionsSeen(
+ double extrapolatedDirOnionsSeen) {
+ this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
+ }
+ public double getExtrapolatedDirOnionsSeen() {
+ return this.extrapolatedDirOnionsSeen;
+ }
+
+ /* Computed fraction of observed .onions in the network, used to weight
+ * this relay's extrapolated network total in the aggregation step. */
+ private double fractionDirOnionsSeen;
+ public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) {
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ }
+ public double getFractionDirOnionsSeen() {
+ return this.fractionDirOnionsSeen;
+ }
+
+ /* Instantiate a new stats object using fingerprint and statistics
+ * interval end date which together uniquely identify the object. */
+ public ExtrapolatedHidServStats(long statsDateMillis,
+ String fingerprint) {
+ this.statsDateMillis = statsDateMillis;
+ this.fingerprint = fingerprint;
+ }
+
+ /* Return whether this object contains the same fingerprint and
+ * statistics interval end date as the passed object. */
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ExtrapolatedHidServStats)) {
+ return false;
+ }
+ ExtrapolatedHidServStats other =
+ (ExtrapolatedHidServStats) otherObject;
+ return this.fingerprint.equals(other.fingerprint) &&
+ this.statsDateMillis == other.statsDateMillis;
+ }
+
+ /* Return a (hopefully unique) hash code based on this object's
+ * fingerprint and statistics interval end date. */
+ @Override
+ public int hashCode() {
+ return this.fingerprint.hashCode() + (int) this.statsDateMillis;
+ }
+
+ /* Return a string representation of this object, consisting of the
+ * statistics interval end date and the concatenation of all other
+ * attributes. */
+ @Override
+ public String[] format() {
+ String first = DateTimeHelper.format(this.statsDateMillis,
+ DateTimeHelper.ISO_DATE_FORMAT);
+ String second = this.fingerprint +
+ (this.fractionRendRelayedCells == 0.0 ? ",,"
+ : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells,
+ this.fractionRendRelayedCells)) +
+ (this.fractionDirOnionsSeen == 0.0 ? ",,"
+ : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen,
+ this.fractionDirOnionsSeen));
+ return new String[] { first, second };
+ }
+
+ /* Instantiate an empty stats object that will be initialized more by
+ * the parse method. */
+ ExtrapolatedHidServStats() {
+ }
+
+ /* Initialize this stats object using the two provided strings that have
+ * been produced by the format method earlier. Return whether this
+ * operation was successful. */
+ @Override
+ public boolean parse(String[] formattedStrings) {
+ if (formattedStrings.length != 2) {
+ System.err.printf("Invalid number of formatted strings. "
+ + "Skipping.%n", formattedStrings.length);
+ return false;
+ }
+ long statsDateMillis = DateTimeHelper.parse(formattedStrings[0],
+ DateTimeHelper.ISO_DATE_FORMAT);
+ String[] secondParts = formattedStrings[1].split(",", 5);
+ if (secondParts.length != 5) {
+ System.err.printf("Invalid number of comma-separated values. "
+ + "Skipping.%n");
+ return false;
+ }
+ String fingerprint = secondParts[0];
+ double extrapolatedRendRelayedCells = 0.0,
+ fractionRendRelayedCells = 0.0, extrapolatedDirOnionsSeen = 0.0,
+ fractionDirOnionsSeen = 0.0;
+ try {
+ extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0
+ : Double.parseDouble(secondParts[1]);
+ fractionRendRelayedCells = secondParts[2].equals("") ? 0.0
+ : Double.parseDouble(secondParts[2]);
+ extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0
+ : Double.parseDouble(secondParts[3]);
+ fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0
+ : Double.parseDouble(secondParts[4]);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ this.statsDateMillis = statsDateMillis;
+ this.fingerprint = fingerprint;
+ this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells;
+ this.fractionRendRelayedCells = fractionRendRelayedCells;
+ this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen;
+ this.fractionDirOnionsSeen = fractionDirOnionsSeen;
+ return true;
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java
new file mode 100644
index 0000000..a1ff075
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java
@@ -0,0 +1,251 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* Extrapolate hidden-service statistics reported by single relays by
+ * dividing them by the computed fraction of hidden-service activity
+ * observed by the relay. */
+public class Extrapolator {
+
+ /* Document file containing previously parsed reported hidden-service
+ * statistics. */
+ private File reportedHidServStatsFile;
+
+ /* Document store for storing and retrieving reported hidden-service
+ * statistics. */
+ private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
+
+ /* Directory containing document files with previously computed network
+ * fractions. */
+ private File computedNetworkFractionsDirectory;
+
+ /* Document store for storing and retrieving computed network
+ * fractions. */
+ private DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore;
+
+ /* Document file containing extrapolated hidden-service statistics. */
+ private File extrapolatedHidServStatsFile;
+
+ /* Document store for storing and retrieving extrapolated hidden-service
+ * statistics. */
+ private DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore;
+
+ /* Initialize a new extrapolator object using the given directory and
+ * document stores. */
+ public Extrapolator(File statusDirectory,
+ DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
+ DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore,
+ DocumentStore<ExtrapolatedHidServStats>
+ extrapolatedHidServStatsStore) {
+
+ /* Create File instances for the files and directories in the provided
+ * status directory. */
+ this.reportedHidServStatsFile = new File(statusDirectory,
+ "reported-hidserv-stats");
+ this.computedNetworkFractionsDirectory =
+ new File(statusDirectory, "computed-network-fractions");
+ this.extrapolatedHidServStatsFile = new File(statusDirectory,
+ "extrapolated-hidserv-stats");
+
+ /* Store references to the provided document stores. */
+ this.reportedHidServStatsStore = reportedHidServStatsStore;
+ this.computedNetworkFractionsStore = computedNetworkFractionsStore;
+ this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore;
+ }
+
+ /* Iterate over all reported stats and extrapolate network totals for
+ * those that have not been extrapolated before. */
+ public boolean extrapolateHidServStats() {
+
+ /* Retrieve previously extrapolated stats to avoid extrapolating them
+ * again. */
+ Set<ExtrapolatedHidServStats> extrapolatedStats =
+ this.extrapolatedHidServStatsStore.retrieve(
+ this.extrapolatedHidServStatsFile);
+
+ /* Retrieve all reported stats, even including those that have already
+ * been extrapolated. */
+ Set<ReportedHidServStats> reportedStats =
+ this.reportedHidServStatsStore.retrieve(
+ this.reportedHidServStatsFile);
+
+ /* Make sure that all documents could be retrieved correctly. */
+ if (extrapolatedStats == null || reportedStats == null) {
+ System.err.printf("Could not read previously parsed or "
+ + "extrapolated hidserv-stats. Skipping.");
+ return false;
+ }
+
+ /* Re-arrange reported stats by fingerprint. */
+ SortedMap<String, Set<ReportedHidServStats>>
+ parsedStatsByFingerprint =
+ new TreeMap<String, Set<ReportedHidServStats>>();
+ for (ReportedHidServStats stat : reportedStats) {
+ String fingerprint = stat.getFingerprint();
+ if (!parsedStatsByFingerprint.containsKey(fingerprint)) {
+ parsedStatsByFingerprint.put(fingerprint,
+ new HashSet<ReportedHidServStats>());
+ }
+ parsedStatsByFingerprint.get(fingerprint).add(stat);
+ }
+
+ /* Go through reported stats by fingerprint. */
+ for (Map.Entry<String, Set<ReportedHidServStats>> e :
+ parsedStatsByFingerprint.entrySet()) {
+ String fingerprint = e.getKey();
+
+ /* Iterate over all stats reported by this relay and make a list of
+ * those that still need to be extrapolated. Also make a list of
+ * all dates for which we need to retrieve computed network
+ * fractions. */
+ Set<ReportedHidServStats> newReportedStats =
+ new HashSet<ReportedHidServStats>();
+ SortedSet<String> retrieveFractionDates = new TreeSet<String>();
+ for (ReportedHidServStats stats : e.getValue()) {
+
+ /* Check whether extrapolated stats already contain an object with
+ * the same statistics interval end date and fingerprint. */
+ long statsDateMillis = (stats.getStatsEndMillis()
+ / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY;
+ if (extrapolatedStats.contains(
+ new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) {
+ continue;
+ }
+
+ /* Add the reported stats to the list of stats we still need to
+ * extrapolate. */
+ newReportedStats.add(stats);
+
+ /* Add all dates between statistics interval start and end to a
+ * list. */
+ long statsEndMillis = stats.getStatsEndMillis();
+ long statsStartMillis = statsEndMillis
+ - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
+ for (long millis = statsStartMillis; millis <= statsEndMillis;
+ millis += DateTimeHelper.ONE_DAY) {
+ String date = DateTimeHelper.format(millis,
+ DateTimeHelper.ISO_DATE_FORMAT);
+ retrieveFractionDates.add(date);
+ }
+ }
+
+ /* Retrieve all computed network fractions that might be needed to
+ * extrapolate new statistics. Keep a list of all known consensus
+ * valid-after times, and keep a map of fractions also by consensus
+ * valid-after time. (It's not sufficient to only keep the latter,
+ * because we need to count known consensuses even if the relay was
+ * not contained in a consensus or had a network fraction of exactly
+ * zero.) */
+ SortedSet<Long> knownConsensuses = new TreeSet<Long>();
+ SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions =
+ new TreeMap<Long, ComputedNetworkFractions>();
+ for (String date : retrieveFractionDates) {
+ File documentFile = new File(
+ this.computedNetworkFractionsDirectory, date);
+ Set<ComputedNetworkFractions> fractions
+ = this.computedNetworkFractionsStore.retrieve(documentFile);
+ for (ComputedNetworkFractions fraction : fractions) {
+ knownConsensuses.add(fraction.getValidAfterMillis());
+ if (fraction.getFingerprint().equals(fingerprint)) {
+ computedNetworkFractions.put(fraction.getValidAfterMillis(),
+ fraction);
+ }
+ }
+ }
+
+ /* Go through newly reported stats, match them with computed network
+ * fractions, and extrapolate network totals. */
+ for (ReportedHidServStats stats : newReportedStats) {
+ long statsEndMillis = stats.getStatsEndMillis();
+ long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY)
+ * DateTimeHelper.ONE_DAY;
+ long statsStartMillis = statsEndMillis
+ - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND;
+
+ /* Sum up computed network fractions and count known consensus in
+ * the relevant interval, so that we can later compute means of
+ * network fractions. */
+ double sumFractionRendRelayedCells = 0.0,
+ sumFractionDirOnionsSeen = 0.0;
+ int consensuses = 0;
+ for (long validAfterMillis : knownConsensuses) {
+ if (statsStartMillis <= validAfterMillis &&
+ validAfterMillis < statsEndMillis) {
+ if (computedNetworkFractions.containsKey(validAfterMillis)) {
+ ComputedNetworkFractions frac =
+ computedNetworkFractions.get(validAfterMillis);
+ sumFractionRendRelayedCells +=
+ frac.getFractionRendRelayedCells();
+ sumFractionDirOnionsSeen +=
+ frac.getFractionDirOnionsSeen();
+ }
+ consensuses++;
+ }
+ }
+
+ /* If we don't know a single consensus with valid-after time in
+ * the statistics interval, skip this stat. */
+ if (consensuses == 0) {
+ continue;
+ }
+
+ /* Compute means of network fractions. */
+ double fractionRendRelayedCells =
+ sumFractionRendRelayedCells / consensuses;
+ double fractionDirOnionsSeen =
+ sumFractionDirOnionsSeen / consensuses;
+
+ /* If at least one fraction is positive, extrapolate network
+ * totals. */
+ if (fractionRendRelayedCells > 0.0 ||
+ fractionDirOnionsSeen > 0.0) {
+ ExtrapolatedHidServStats extrapolated =
+ new ExtrapolatedHidServStats(
+ statsDateMillis, fingerprint);
+ if (fractionRendRelayedCells > 0.0) {
+ extrapolated.setFractionRendRelayedCells(
+ fractionRendRelayedCells);
+ /* Extrapolating cells on rendezvous circuits is as easy as
+ * dividing the reported number by the computed network
+ * fraction. */
+ double extrapolatedRendRelayedCells =
+ stats.getRendRelayedCells() / fractionRendRelayedCells;
+ extrapolated.setExtrapolatedRendRelayedCells(
+ extrapolatedRendRelayedCells);
+ }
+ if (fractionDirOnionsSeen > 0.0) {
+ extrapolated.setFractionDirOnionsSeen(
+ fractionDirOnionsSeen);
+ /* Extrapolating reported unique .onion addresses to the
+ * total number in the network is more difficult. In short,
+ * each descriptor is stored to 12 (likely) different
+ * directories, so we'll have to divide the reported number by
+ * 12 and then by the computed network fraction of this
+ * directory. */
+ double extrapolatedDirOnionsSeen =
+ stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen);
+ extrapolated.setExtrapolatedDirOnionsSeen(
+ extrapolatedDirOnionsSeen);
+ }
+ extrapolatedStats.add(extrapolated);
+ }
+ }
+ }
+
+ /* Store all extrapolated network totals to disk with help of the
+ * document store. */
+ return this.extrapolatedHidServStatsStore.store(
+ this.extrapolatedHidServStatsFile, extrapolatedStats);
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java
new file mode 100644
index 0000000..1e53bd0
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java
@@ -0,0 +1,91 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Set;
+
+/* Main class for updating extrapolated network totals of hidden-service
+ * statistics. The main method of this class can be executed as often as
+ * new statistics are needed, though callers must ensure that executions
+ * do not overlap. */
+public class Main {
+
+ /* Parse new descriptors, extrapolate contained statistics using
+ * computed network fractions, aggregate results, and write results to
+ * disk. */
+ public static void main(String[] args) {
+
+ /* Initialize directories and file paths. */
+ Set<File> inDirectories = new HashSet<File>();
+ inDirectories.add(
+ new File("../../shared/in/relay-descriptors/consensuses"));
+ inDirectories.add(
+ new File("../../shared/in/relay-descriptors/extra-infos"));
+ File statusDirectory = new File("status");
+ File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv");
+
+ /* Initialize document stores that will handle writing documents to
+ * files. */
+ DocumentStore<ReportedHidServStats> reportedHidServStatsStore =
+ new DocumentStore<ReportedHidServStats>(
+ ReportedHidServStats.class);
+ DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore =
+ new DocumentStore<ComputedNetworkFractions>(
+ ComputedNetworkFractions.class);
+ DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore
+ = new DocumentStore<ExtrapolatedHidServStats>(
+ ExtrapolatedHidServStats.class);
+
+ /* Initialize parser and read parse history to avoid parsing
+ * descriptor files that haven't changed since the last execution. */
+ System.out.println("Initializing parser and reading parse "
+ + "history...");
+ Parser parser = new Parser(inDirectories, statusDirectory,
+ reportedHidServStatsStore, computedNetworkFractionsStore);
+ parser.readParseHistory();
+
+ /* Parse new descriptors and store their contents using the document
+ * stores. */
+ System.out.println("Parsing descriptors...");
+ if (!parser.parseDescriptors()) {
+ System.err.println("Could not store parsed descriptors. "
+ + "Terminating.");
+ return;
+ }
+
+ /* Write the parse history to avoid parsing descriptor files again
+ * next time. It's okay to do this now and not at the end of the
+ * execution, because even if something breaks apart below, it's safe
+ * not to parse descriptor files again. */
+ System.out.println("Writing parse history...");
+ parser.writeParseHistory();
+
+ /* Extrapolate reported statistics using computed network fractions
+ * and write the result to disk using a document store. The result is
+ * a single file with extrapolated network totals based on reports by
+ * single relays. */
+ System.out.println("Extrapolating statistics...");
+ Extrapolator extrapolator = new Extrapolator(statusDirectory,
+ reportedHidServStatsStore, computedNetworkFractionsStore,
+ extrapolatedHidServStatsStore);
+ if (!extrapolator.extrapolateHidServStats()) {
+ System.err.println("Could not extrapolate statistics. "
+ + "Terminating.");
+ return;
+ }
+
+ /* Go through all extrapolated network totals and aggregate them.
+ * This includes calculating daily weighted interquartile means, among
+ * other statistics. Write the result to a .csv file that can be
+ * processed by other tools. */
+ System.out.println("Aggregating statistics...");
+ Aggregator aggregator = new Aggregator(statusDirectory,
+ extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile);
+ aggregator.aggregateHidServStats();
+
+ /* End this execution. */
+ System.out.println("Terminating.");
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java
new file mode 100644
index 0000000..85f7d91
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java
@@ -0,0 +1,484 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+/* Parse hidden-service statistics from extra-info descriptors, compute
+ * network fractions from consensuses, and write parsed contents to
+ * document files for later use. */
+public class Parser {
+
+ /* File containing tuples of last-modified times and file names of
+ * descriptor files parsed in the previous execution. */
+ private File parseHistoryFile;
+
+ /* Descriptor reader to provide parsed extra-info descriptors and
+ * consensuses. */
+ private DescriptorReader descriptorReader;
+
+ /* Document file containing previously parsed reported hidden-service
+ * statistics. */
+ private File reportedHidServStatsFile;
+
+ /* Document store for storing and retrieving reported hidden-service
+ * statistics. */
+ private DocumentStore<ReportedHidServStats> reportedHidServStatsStore;
+
+ /* Directory containing document files with previously computed network
+ * fractions. */
+ private File computedNetworkFractionsDirectory;
+
+ /* Document store for storing and retrieving computed network
+ * fractions. */
+ private DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore;
+
+ /* Initialize a new parser object using the given directories and
+ * document stores. */
+ public Parser(Set<File> inDirectories, File statusDirectory,
+ DocumentStore<ReportedHidServStats> reportedHidServStatsStore,
+ DocumentStore<ComputedNetworkFractions>
+ computedNetworkFractionsStore) {
+
+ /* Create a new descriptor reader for reading descriptors in the given
+ * in directory. Configure the reader to avoid having more than five
+ * parsed descriptors in the queue, rather than the default one
+ * hundred. Five is a compromise between very large consensuses and
+ * rather small extra-info descriptors. */
+ this.descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ for (File inDirectory : inDirectories) {
+ this.descriptorReader.addDirectory(inDirectory);
+ }
+ this.descriptorReader.setMaxDescriptorFilesInQueue(5);
+
+ /* Create File instances for the files and directories in the provided
+ * status directory. */
+ this.parseHistoryFile = new File(statusDirectory, "parse-history");
+ this.reportedHidServStatsFile = new File(statusDirectory,
+ "reported-hidserv-stats");
+ this.computedNetworkFractionsDirectory =
+ new File(statusDirectory, "computed-network-fractions");
+
+ /* Store references to the provided document stores. */
+ this.reportedHidServStatsStore = reportedHidServStatsStore;
+ this.computedNetworkFractionsStore = computedNetworkFractionsStore;
+ }
+
+ /* Read the parse history file to avoid parsing descriptor files that
+ * have not changed since the previous execution. */
+ public void readParseHistory() {
+ if (this.parseHistoryFile.exists() &&
+ this.parseHistoryFile.isFile()) {
+ SortedMap<String, Long> excludedFiles =
+ new TreeMap<String, Long>();
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.parseHistoryFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ try {
+ /* Each line is supposed to contain the last-modified time and
+ * absolute path of a descriptor file. */
+ String[] parts = line.split(" ", 2);
+ excludedFiles.put(parts[1], Long.parseLong(parts[0]));
+ } catch (NumberFormatException e) {
+ System.err.printf("Illegal line '%s' in parse history. "
+ + "Skipping line.%n", line);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ System.err.printf("Could not read history file '%s'. Not "
+ + "excluding descriptors in this execution.",
+ this.parseHistoryFile.getAbsolutePath());
+ }
+
+ /* Tell the descriptor reader to exclude the files contained in the
+ * parse history file. */
+ this.descriptorReader.setExcludedFiles(excludedFiles);
+ }
+ }
+
+ /* Write parsed or skipped descriptor files with last-modified times and
+ * absolute paths to the parse history file to avoid parsing these files
+ * again, unless they change until the next execution. */
+ public void writeParseHistory() {
+
+ /* Obtain the list of descriptor files that were either parsed now or
+ * that were skipped in this execution from the descriptor reader. */
+ SortedMap<String, Long> excludedAndParsedFiles =
+ new TreeMap<String, Long>();
+ excludedAndParsedFiles.putAll(
+ this.descriptorReader.getExcludedFiles());
+ excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles());
+ try {
+ this.parseHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.parseHistoryFile));
+ for (Map.Entry<String, Long> e :
+ excludedAndParsedFiles.entrySet()) {
+ /* Each line starts with the last-modified time of the descriptor
+ * file, followed by its absolute path. */
+ String absolutePath = e.getKey();
+ long lastModifiedMillis = e.getValue();
+ bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath
+ + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ System.err.printf("Could not write history file '%s'. Not "
+ + "excluding descriptors in next execution.",
+ this.parseHistoryFile.getAbsolutePath());
+ }
+ }
+
+ /* Set of all reported hidden-service statistics. To date, these
+ * objects are small, and keeping them all in memory is easy. But if
+ * this ever changes, e.g., when more and more statistics are added,
+ * this may not scale. */
+ private Set<ReportedHidServStats> reportedHidServStats =
+ new HashSet<ReportedHidServStats>();
+
+ /* Instruct the descriptor reader to parse descriptor files, and handle
+ * the resulting parsed descriptors if they are either extra-info
+ * descriptors or consensuses. */
+ public boolean parseDescriptors() {
+ Iterator<DescriptorFile> descriptorFiles =
+ this.descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof ExtraInfoDescriptor) {
+ this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor);
+ } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+ if (!this.parseRelayNetworkStatusConsensus(
+ (RelayNetworkStatusConsensus) descriptor)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ /* Store reported hidden-service statistics to their document file.
+ * It's more efficient to only do this once after processing all
+ * descriptors. In contrast, sets of computed network fractions are
+ * stored immediately after processing the consensus they are based
+ * on. */
+ return this.reportedHidServStatsStore.store(
+ this.reportedHidServStatsFile, this.reportedHidServStats);
+ }
+
+ /* Parse the given extra-info descriptor by extracting its fingerprint
+ * and contained hidserv-* lines. If a valid set of hidserv-stats can
+ * be extracted, create a new stats object that will later be stored to
+ * a document file. */
+ private void parseExtraInfoDescriptor(
+ ExtraInfoDescriptor extraInfoDescriptor) {
+
+ /* Extract the fingerprint from the parsed descriptor. */
+ String fingerprint = extraInfoDescriptor.getFingerprint();
+
+ /* Parse the descriptor once more to extract any hidserv-* lines.
+ * This is necessary, because these lines are not yet supported by the
+ * descriptor-parsing library. */
+ Scanner scanner = new Scanner(new ByteArrayInputStream(
+ extraInfoDescriptor.getRawDescriptorBytes()));
+ Long statsEndMillis = null, statsIntervalSeconds = null,
+ rendRelayedCells = null, rendRelayedCellsBinSize = null,
+ dirOnionsSeen = null, dirOnionsSeenBinSize = null;
+ try {
+ while (scanner.hasNext()) {
+ String line = scanner.nextLine();
+ if (line.startsWith("hidserv-")) {
+ String[] parts = line.split(" ");
+ if (parts[0].equals("hidserv-stats-end")) {
+ /* Parse statistics end and statistics interval length. */
+ if (parts.length != 5 || !parts[3].startsWith("(") ||
+ !parts[4].equals("s)")) {
+ /* Will warn below, because statsEndMillis is still null. */
+ continue;
+ }
+ statsEndMillis = DateTimeHelper.parse(parts[1] + " "
+ + parts[2]);
+ statsIntervalSeconds = Long.parseLong(parts[3].substring(1));
+ } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
+ /* Parse the reported number of cells on rendezvous circuits
+ * and the bin size used by the relay to obfuscate that
+ * number. */
+ if (parts.length != 5 ||
+ !parts[4].startsWith("bin_size=")) {
+ /* Will warn below, because rendRelayedCells is still
+ * null. */
+ continue;
+ }
+ rendRelayedCells = Long.parseLong(parts[1]);
+ rendRelayedCellsBinSize =
+ Long.parseLong(parts[4].substring(9));
+ } else if (parts[0].equals("hidserv-dir-onions-seen")) {
+ /* Parse the reported number of distinct .onion addresses and
+ * the bin size used by the relay to obfuscate that number. */
+ if (parts.length != 5 ||
+ !parts[4].startsWith("bin_size=")) {
+ /* Will warn below, because dirOnionsSeen is still null. */
+ continue;
+ }
+ dirOnionsSeen = Long.parseLong(parts[1]);
+ dirOnionsSeenBinSize = Long.parseLong(parts[4].substring(9));
+ }
+ }
+ }
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ return;
+ }
+
+ /* If the descriptor did not contain any of the expected hidserv-*
+ * lines, don't do anything. This applies to the majority of
+ * descriptors, at least as long as only a minority of relays reports
+ * these statistics. */
+ if (statsEndMillis == null && rendRelayedCells == null &&
+ dirOnionsSeen == null) {
+ return;
+
+ /* If the descriptor contained all expected hidserv-* lines, create a
+ * new stats object and put it in the local map, so that it will later
+ * be written to a document file. */
+ } else if (statsEndMillis != null &&
+ statsEndMillis != DateTimeHelper.NO_TIME_AVAILABLE &&
+ statsIntervalSeconds != null && rendRelayedCells != null &&
+ dirOnionsSeen != null) {
+ ReportedHidServStats reportedStats = new ReportedHidServStats(
+ fingerprint, statsEndMillis);
+ reportedStats.setStatsIntervalSeconds(statsIntervalSeconds);
+ reportedStats.setRendRelayedCells(this.removeNoise(rendRelayedCells,
+ rendRelayedCellsBinSize));
+ reportedStats.setDirOnionsSeen(this.removeNoise(dirOnionsSeen,
+ dirOnionsSeenBinSize));
+ this.reportedHidServStats.add(reportedStats);
+
+ /* If the descriptor contained some but not all hidserv-* lines, print
+ * out a warning. This case does not warrant any further action,
+ * because relays can in theory write anything in their extra-info
+ * descriptors. But maybe we'll want to know. */
+ } else {
+ System.err.println("Relay " + fingerprint + " published "
+ + "incomplete hidserv-stats. Ignoring.");
+ }
+ }
+
+ /* Remove noise from a reported stats value by rounding to the nearest
+ * right side of a bin and subtracting half of the bin size. */
+ private long removeNoise(long reportedNumber, long binSize) {
+ long roundedToNearestRightSideOfTheBin =
+ ((reportedNumber + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ return subtractedHalfOfBinSize;
+ }
+
+ public boolean parseRelayNetworkStatusConsensus(
+ RelayNetworkStatusConsensus consensus) {
+
+ /* Make sure that the consensus contains Wxx weights. */
+ SortedMap<String, Integer> bandwidthWeights =
+ consensus.getBandwidthWeights();
+ if (bandwidthWeights == null) {
+ System.err.printf("Consensus with valid-after time %s doesn't "
+ + "contain any Wxx weights. Skipping.%n",
+ DateTimeHelper.format(consensus.getValidAfterMillis()));
+ return false;
+ }
+
+ /* More precisely, make sure that it contains Wmx weights, and then
+ * parse them. */
+ SortedSet<String> expectedWeightKeys =
+ new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(",")));
+ expectedWeightKeys.removeAll(bandwidthWeights.keySet());
+ if (!expectedWeightKeys.isEmpty()) {
+ System.err.printf("Consensus with valid-after time %s doesn't "
+ + "contain expected Wmx weights. Skipping.%n",
+ DateTimeHelper.format(consensus.getValidAfterMillis()));
+ return false;
+ }
+ double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0;
+ double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0;
+ double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0;
+ double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0;
+
+ /* Keep a sorted set with the fingerprints of all hidden-service
+ * directories, in reverse order, so that we can later determine the
+ * fingerprint distance between a directory and the directory
+ * preceding it by three positions in the descriptor ring. */
+ SortedSet<String> hsDirs = new TreeSet<String>(
+ Collections.reverseOrder());
+
+ /* Prepare for computing the weights of all relays with the Fast flag
+ * for being selected in the middle position. */
+ double totalWeightsRendezvousPoint = 0.0;
+ SortedMap<String, Double> weightsRendezvousPoint =
+ new TreeMap<String, Double>();
+
+ /* Go through all status entries contained in the consensus. */
+ for (Map.Entry<String, NetworkStatusEntry> e :
+ consensus.getStatusEntries().entrySet()) {
+ String fingerprint = e.getKey();
+ NetworkStatusEntry statusEntry = e.getValue();
+ SortedSet<String> flags = statusEntry.getFlags();
+
+ /* Add the relay to the set of hidden-service directories if it has
+ * the HSDir flag. */
+ if (flags.contains("HSDir")) {
+ hsDirs.add(statusEntry.getFingerprint());
+ }
+
+ /* Compute the probability for being selected as rendezvous point.
+ * If the relay has the Fast flag, multiply its consensus weight
+ * with the correct Wmx weight, depending on whether the relay has
+ * the Guard and/or Exit flag. */
+ double weightRendezvousPoint = 0.0;
+ if (flags.contains("Fast")) {
+ weightRendezvousPoint = (double) statusEntry.getBandwidth();
+ if (flags.contains("Guard") && flags.contains("Exit")) {
+ weightRendezvousPoint *= wmd;
+ } else if (flags.contains("Guard")) {
+ weightRendezvousPoint *= wmg;
+ } else if (flags.contains("Exit")) {
+ weightRendezvousPoint *= wme;
+ } else {
+ weightRendezvousPoint *= wmm;
+ }
+ }
+ weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+ totalWeightsRendezvousPoint += weightRendezvousPoint;
+ }
+
+ /* Store all computed network fractions based on this consensus in a
+ * set, which will then be written to disk in a single store
+ * operation. */
+ Set<ComputedNetworkFractions> computedNetworkFractions =
+ new HashSet<ComputedNetworkFractions>();
+
+ /* Remove all previously added directory fingerprints and re-add them
+ * twice, once with a leading "0" and once with a leading "1". The
+ * purpose is to simplify the logic for moving from one fingerprint to
+ * the previous one, even if that would mean traversing the ring
+ * start. For example, the fingerprint preceding "1""00..0000" with
+ * the first "1" being added here could be "0""FF..FFFF". */
+ SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
+ hsDirs.clear();
+ for (String fingerprint : hsDirsCopy) {
+ hsDirs.add("0" + fingerprint);
+ hsDirs.add("1" + fingerprint);
+ }
+
+ /* Define the total ring size to compute fractions below. This is
+ * 16^40 or 2^160. */
+ final double RING_SIZE = new BigInteger(
+ "10000000000000000000000000000000000000000",
+ 16).doubleValue();
+
+ /* Go through all status entries again, this time computing network
+ * fractions. */
+ for (Map.Entry<String, NetworkStatusEntry> e :
+ consensus.getStatusEntries().entrySet()) {
+ String fingerprint = e.getKey();
+ NetworkStatusEntry statusEntry = e.getValue();
+ double fractionRendRelayedCells = 0.0,
+ fractionDirOnionsSeen = 0.0;
+ if (statusEntry != null) {
+
+ /* Check if the relay is a hidden-service directory by looking up
+ * its fingerprint, preceded by "1", in the sorted set that we
+ * populated above. */
+ String fingerprintPrecededByOne = "1" + fingerprint;
+ if (hsDirs.contains(fingerprintPrecededByOne)) {
+
+ /* Move three positions in the sorted set, which is in reverse
+ * order, to learn the fingerprint of the directory preceding
+ * this directory by three positions. */
+ String startResponsible = fingerprint;
+ int positionsToGo = 3;
+ for (String hsDirFingerprint :
+ hsDirs.tailSet(fingerprintPrecededByOne)) {
+ startResponsible = hsDirFingerprint;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+
+ /* Compute the fraction of descriptor space that this relay is
+ * responsible for as difference between the two fingerprints
+ * divided by the ring size. */
+ fractionDirOnionsSeen =
+ new BigInteger(fingerprintPrecededByOne, 16).subtract(
+ new BigInteger(startResponsible, 16)).doubleValue()
+ / RING_SIZE;
+
+ /* Divide this fraction by three to obtain the fraction of
+ * descriptors that this directory has seen. This step is
+ * necessary, because each descriptor that is published to this
+ * directory is also published to two other directories. */
+ fractionDirOnionsSeen /= 3.0;
+ }
+
+ /* Compute the fraction of cells on rendezvous circuits that this
+ * relay has seen by dividing its previously calculated weight by
+ * the sum of all such weights. */
+ fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint)
+ / totalWeightsRendezvousPoint;
+ }
+
+ /* If at least one of the computed fractions is non-zero, create a
+ * new fractions object. */
+ if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) {
+ ComputedNetworkFractions fractions = new ComputedNetworkFractions(
+ fingerprint, consensus.getValidAfterMillis());
+ fractions.setFractionRendRelayedCells(fractionRendRelayedCells);
+ fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen);
+ computedNetworkFractions.add(fractions);
+ }
+ }
+
+ /* Store all newly computed network fractions to a documents file.
+ * The same file also contains computed network fractions from other
+ * consensuses that were valid on the same day. This is in contrast
+ * to the other documents which are all stored in a single file, which
+ * would not scale for computed network fractions. */
+ String date = DateTimeHelper.format(consensus.getValidAfterMillis(),
+ DateTimeHelper.ISO_DATE_FORMAT);
+ File documentFile = new File(this.computedNetworkFractionsDirectory,
+ date);
+ if (!this.computedNetworkFractionsStore.store(documentFile,
+ computedNetworkFractions)) {
+ return false;
+ }
+ return true;
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java
new file mode 100644
index 0000000..996a70a
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java
@@ -0,0 +1,130 @@
+package org.torproject.metrics.hidserv;
+
+/* Hidden-service statistics reported by a single relay covering a single
+ * statistics interval of usually 24 hours. These statistics are reported
+ * by the relay in the "hidserv-" lines of its extra-info descriptor. */
+public class ReportedHidServStats implements Document {
+
+ /* Relay fingerprint consisting of 40 upper-case hex characters. */
+ private String fingerprint;
+ public String getFingerprint() {
+ return this.fingerprint;
+ }
+
+ /* Hidden-service statistics end timestamp in milliseconds. */
+ private long statsEndMillis;
+ public long getStatsEndMillis() {
+ return this.statsEndMillis;
+ }
+
+ /* Statistics interval length in seconds. */
+ private long statsIntervalSeconds;
+ public void setStatsIntervalSeconds(long statsIntervalSeconds) {
+ this.statsIntervalSeconds = statsIntervalSeconds;
+ }
+ public long getStatsIntervalSeconds() {
+ return this.statsIntervalSeconds;
+ }
+
+ /* Number of relayed cells on rendezvous circuits as reported by the
+ * relay and adjusted by rounding to the nearest right side of a bin and
+ * subtracting half of the bin size. */
+ private long rendRelayedCells;
+ public void setRendRelayedCells(long rendRelayedCells) {
+ this.rendRelayedCells = rendRelayedCells;
+ }
+ public long getRendRelayedCells() {
+ return this.rendRelayedCells;
+ }
+
+ /* Number of distinct .onion addresses as reported by the relay and
+ * adjusted by rounding to the nearest right side of a bin and
+ * subtracting half of the bin size. */
+ private long dirOnionsSeen;
+ public void setDirOnionsSeen(long dirOnionsSeen) {
+ this.dirOnionsSeen = dirOnionsSeen;
+ }
+ public long getDirOnionsSeen() {
+ return this.dirOnionsSeen;
+ }
+
+ /* Instantiate a new stats object using fingerprint and stats interval
+ * end which together uniquely identify the object. */
+ public ReportedHidServStats(String fingerprint, long statsEndMillis) {
+ this.fingerprint = fingerprint;
+ this.statsEndMillis = statsEndMillis;
+ }
+
+ /* Return whether this object contains the same fingerprint and stats
+ * interval end as the passed object. */
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ReportedHidServStats)) {
+ return false;
+ }
+ ReportedHidServStats other = (ReportedHidServStats) otherObject;
+ return this.fingerprint.equals(other.fingerprint) &&
+ this.statsEndMillis == other.statsEndMillis;
+ }
+
+ /* Return a (hopefully unique) hash code based on this object's
+ * fingerprint and stats interval end. */
+ @Override
+ public int hashCode() {
+ return this.fingerprint.hashCode() + (int) this.statsEndMillis;
+ }
+
+ /* Return a string representation of this object, consisting of
+ * fingerprint and the concatenation of all other attributes. */
+ @Override
+ public String[] format() {
+ String first = this.fingerprint;
+ String second = String.format("%s,%d,%d,%d",
+ DateTimeHelper.format(this.statsEndMillis),
+ this.statsIntervalSeconds, this.rendRelayedCells,
+ this.dirOnionsSeen);
+ return new String[] { first, second };
+ }
+
+ /* Instantiate an empty stats object that will be initialized more by
+ * the parse method. */
+ ReportedHidServStats() {
+ }
+
+ /* Initialize this stats object using the two provided strings that have
+ * been produced by the format method earlier. Return whether this
+ * operation was successful. */
+ @Override
+ public boolean parse(String[] formattedStrings) {
+ if (formattedStrings.length != 2) {
+ System.err.printf("Invalid number of formatted strings. "
+ + "Skipping.%n", formattedStrings.length);
+ return false;
+ }
+ String fingerprint = formattedStrings[0];
+ String[] secondParts = formattedStrings[1].split(",", 4);
+ if (secondParts.length != 4) {
+ return false;
+ }
+ long statsEndMillis = DateTimeHelper.parse(secondParts[0]);
+ if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) {
+ return false;
+ }
+ long statsIntervalSeconds = -1L, rendRelayedCells = -1L,
+ dirOnionsSeen = -1L;
+ try {
+ statsIntervalSeconds = Long.parseLong(secondParts[1]);
+ rendRelayedCells = Long.parseLong(secondParts[2]);
+ dirOnionsSeen = Long.parseLong(secondParts[3]);
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ this.fingerprint = fingerprint;
+ this.statsEndMillis = statsEndMillis;
+ this.statsIntervalSeconds = statsIntervalSeconds;
+ this.rendRelayedCells = rendRelayedCells;
+ this.dirOnionsSeen = dirOnionsSeen;
+ return true;
+ }
+}
+
diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java
new file mode 100644
index 0000000..db7d065
--- /dev/null
+++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java
@@ -0,0 +1,360 @@
+package org.torproject.metrics.hidserv;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/* NOTE: This class is not required for running the Main class! (It
+ * contains its own main method.) */
+public class Simulate {
+ private static File simCellsCsvFile =
+ new File("out/csv/sim-cells.csv");
+
+ private static File simOnionsCsvFile =
+ new File("out/csv/sim-onions.csv");
+
+ public static void main(String[] args) throws Exception {
+ System.out.print("Simulating extrapolation of rendezvous cells");
+ simulateManyCells();
+ System.out.print("\nSimulating extrapolation of .onions");
+ simulateManyOnions();
+ System.out.println("\nTerminating.");
+ }
+
+ private static Random rnd = new Random();
+
+ private static void simulateManyCells() throws Exception {
+ simCellsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simCellsCsvFile));
+ bw.write("run,frac,wmean,wmedian,wiqm\n");
+ final int numberOfExtrapolations = 1000;
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ bw.write(simulateCells(i));
+ System.out.print(".");
+ }
+ bw.close();
+ }
+
+ private static void simulateManyOnions() throws Exception {
+ simOnionsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simOnionsCsvFile));
+ bw.write("run,frac,wmean,wmedian,wiqm\n");
+ final int numberOfExtrapolations = 1000;
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ bw.write(simulateOnions(i));
+ System.out.print(".");
+ }
+ bw.close();
+ }
+
+ private static String simulateCells(int run) {
+
+ /* Generate consensus weights following an exponential distribution
+ * with lambda = 1 for 3000 potential rendezvous points. */
+ final int numberRendPoints = 3000;
+ double[] consensusWeights = new double[numberRendPoints];
+ double totalConsensusWeight = 0.0;
+ for (int i = 0; i < numberRendPoints; i++) {
+ double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+ consensusWeights[i] = consensusWeight;
+ totalConsensusWeight += consensusWeight;
+ }
+
+ /* Compute probabilities for being selected as rendezvous point. */
+ double[] probRendPoint = new double[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+ }
+
+ /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an
+ * exponential distribution with lambda = 0.0001, so on average
+ * 10,000 cells per chunk, and randomly assign them to a rendezvous
+ * point to report them later. */
+ long cellsLeft = 10000000000L;
+ final double cellsLambda = 0.0001;
+ long[] observedCells = new long[numberRendPoints];
+ while (cellsLeft > 0) {
+ long cells = Math.min(cellsLeft,
+ (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda));
+ double selectRendPoint = rnd.nextDouble();
+ for (int i = 0; i < probRendPoint.length; i++) {
+ selectRendPoint -= probRendPoint[i];
+ if (selectRendPoint <= 0.0) {
+ observedCells[i] += cells;
+ break;
+ }
+ }
+ cellsLeft -= cells;
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 1024L;
+ final double b = 2048.0 / 0.3;
+ long[] reportedCells = new long[numberRendPoints];
+ long[] removedNoiseCells = new long[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ long observed = observedCells[i];
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double p = rnd.nextDouble();
+ double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+ Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedCells[i] = reported;
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseCells[i] = subtractedHalfOfBinSize;
+ }
+
+ /* Perform extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ StringBuilder sb = new StringBuilder();
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ for (double fraction : fractions) {
+ SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
+ for (int j = 0; j < numberRendPoints; j++) {
+ nonReportingRelays.add(j);
+ }
+ List<Integer> shuffledRelays = new ArrayList<Integer>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
+ for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+ j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ List<double[]> singleRelayExtrapolations;
+ double totalReportingProbability;
+ do {
+ singleRelayExtrapolations = new ArrayList<double[]>();
+ totalReportingProbability = 0.0;
+ for (int reportingRelay : reportingRelays) {
+ double probability = probRendPoint[reportingRelay];
+ if (probability > 0.0) {
+ singleRelayExtrapolations.add(
+ new double[] {
+ removedNoiseCells[reportingRelay] / probability,
+ removedNoiseCells[reportingRelay],
+ probability });
+ }
+ totalReportingProbability += probability;
+ }
+ if (totalReportingProbability < fraction - 0.001) {
+ int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (totalReportingProbability > fraction + 0.001) {
+ int removeRelay = new ArrayList<Integer>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (totalReportingProbability < fraction - 0.001 ||
+ totalReportingProbability > fraction + 0.001);
+ Collections.sort(singleRelayExtrapolations,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ });
+ double totalProbability = 0.0, totalValues = 0.0;
+ double totalInterquartileProbability = 0.0,
+ totalInterquartileValues = 0.0;
+ Double weightedMedian = null;
+ for (double[] extrapolation : singleRelayExtrapolations) {
+ totalValues += extrapolation[1];
+ totalProbability += extrapolation[2];
+ if (weightedMedian == null &&
+ totalProbability > totalReportingProbability * 0.5) {
+ weightedMedian = extrapolation[0];
+ }
+ if (totalProbability > totalReportingProbability * 0.25 &&
+ totalProbability < totalReportingProbability * 0.75) {
+ totalInterquartileValues += extrapolation[1];
+ totalInterquartileProbability += extrapolation[2];
+ }
+ }
+ sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+ totalValues / totalProbability, weightedMedian,
+ totalInterquartileValues / totalInterquartileProbability));
+ }
+ return sb.toString();
+ }
+
+ private static String simulateOnions(final int run) {
+
+ /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+ final int numberHsDirs = 3000;
+ SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
+ for (int i = 0; i < numberHsDirs; i++) {
+ hsDirFingerprints.add(rnd.nextDouble());
+ }
+
+ /* Compute fractions of observed descriptor space. */
+ SortedSet<Double> ring =
+ new TreeSet<Double>(Collections.reverseOrder());
+ for (double fingerprint : hsDirFingerprints) {
+ ring.add(fingerprint);
+ ring.add(fingerprint - 1.0);
+ }
+ SortedMap<Double, Double> hsDirFractions =
+ new TreeMap<Double, Double>();
+ for (double fingerprint : hsDirFingerprints) {
+ double start = fingerprint;
+ int positionsToGo = 3;
+ for (double prev : ring.tailSet(fingerprint)) {
+ start = prev;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+ hsDirFractions.put(fingerprint, fingerprint - start);
+ }
+
+ /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+ final int numberOnions = 40000;
+ final int replicas = 4;
+ final int storeOnDirs = 3;
+ SortedMap<Double, SortedSet<Integer>> storedDescs =
+ new TreeMap<Double, SortedSet<Integer>>();
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.put(fingerprint, new TreeSet<Integer>());
+ }
+ for (int i = 0; i < numberOnions; i++) {
+ for (int j = 0; j < replicas; j++) {
+ int leftToStore = storeOnDirs;
+ for (double fingerprint :
+ hsDirFingerprints.tailSet(rnd.nextDouble())) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ if (leftToStore > 0) {
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 8L;
+ final double b = 8.0 / 0.3;
+ SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
+ removedNoiseOnions = new TreeMap<Double, Long>();
+ for (Map.Entry<Double, SortedSet<Integer>> e :
+ storedDescs.entrySet()) {
+ double fingerprint = e.getKey();
+ long observed = (long) e.getValue().size();
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double p = rnd.nextDouble();
+ double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+ Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedOnions.put(fingerprint, reported);
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+ }
+
+ /* Perform extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ StringBuilder sb = new StringBuilder();
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ for (double fraction : fractions) {
+ SortedSet<Double> nonReportingRelays =
+ new TreeSet<Double>(hsDirFractions.keySet());
+ List<Double> shuffledRelays = new ArrayList<Double>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Double> reportingRelays = new TreeSet<Double>();
+ for (int j = 0; j < (int) ((double) hsDirFractions.size()
+ * fraction); j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ List<double[]> singleRelayExtrapolations;
+ double totalReportingProbability;
+ do {
+ singleRelayExtrapolations = new ArrayList<double[]>();
+ totalReportingProbability = 0.0;
+ for (double reportingRelay : reportingRelays) {
+ double probability = hsDirFractions.get(reportingRelay) / 3.0;
+ if (probability > 0.0) {
+ singleRelayExtrapolations.add(
+ new double[] { removedNoiseOnions.get(reportingRelay)
+ / probability, removedNoiseOnions.get(reportingRelay),
+ probability });
+ }
+ totalReportingProbability += probability;
+ }
+ if (totalReportingProbability < fraction - 0.001) {
+ double addRelay =
+ new ArrayList<Double>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (totalReportingProbability > fraction + 0.001) {
+ double removeRelay =
+ new ArrayList<Double>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (totalReportingProbability < fraction - 0.001 ||
+ totalReportingProbability > fraction + 0.001);
+ Collections.sort(singleRelayExtrapolations,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ });
+ double totalProbability = 0.0, totalValues = 0.0;
+ double totalInterquartileProbability = 0.0,
+ totalInterquartileValues = 0.0;
+ Double weightedMedian = null;
+ for (double[] extrapolation : singleRelayExtrapolations) {
+ totalValues += extrapolation[1];
+ totalProbability += extrapolation[2];
+ if (weightedMedian == null &&
+ totalProbability > totalReportingProbability * 0.5) {
+ weightedMedian = extrapolation[0];
+ }
+ if (totalProbability > totalReportingProbability * 0.25 &&
+ totalProbability < totalReportingProbability * 0.75) {
+ totalInterquartileValues += extrapolation[1];
+ totalInterquartileProbability += extrapolation[2];
+ }
+ }
+ sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+ totalValues / totalProbability, weightedMedian,
+ totalInterquartileValues / totalInterquartileProbability));
+ }
+ return sb.toString();
+ }
+}
diff --git a/shared/bin/70-run-hidserv-stats.sh b/shared/bin/70-run-hidserv-stats.sh
new file mode 100755
index 0000000..a924f31
--- /dev/null
+++ b/shared/bin/70-run-hidserv-stats.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+cd modules/hidserv/
+ant | grep "\[java\]"
+cd ../../
+
diff --git a/shared/bin/99-copy-stats-files.sh b/shared/bin/99-copy-stats-files.sh
index 2292da2..5493cf8 100755
--- a/shared/bin/99-copy-stats-files.sh
+++ b/shared/bin/99-copy-stats-files.sh
@@ -2,4 +2,5 @@
mkdir -p shared/stats
cp -a modules/legacy/stats/*.csv shared/stats/
cp -a modules/advbwdist/stats/advbwdist.csv shared/stats/
+cp -a modules/hidserv/stats/hidserv.csv shared/stats/