commit fefb0f946aa5018639415cb67da7f35d35ff721b Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Mar 11 15:25:38 2015 +0100
Add hiserv-stats extrapolation code. --- modules/hidserv/.gitignore | 4 + modules/hidserv/build.xml | 44 ++ .../org/torproject/metrics/hidserv/Aggregator.java | 191 ++++++++ .../metrics/hidserv/ComputedNetworkFractions.java | 141 ++++++ .../torproject/metrics/hidserv/DateTimeHelper.java | 95 ++++ .../org/torproject/metrics/hidserv/Document.java | 19 + .../torproject/metrics/hidserv/DocumentStore.java | 157 +++++++ .../metrics/hidserv/ExtrapolatedHidServStats.java | 156 +++++++ .../torproject/metrics/hidserv/Extrapolator.java | 251 ++++++++++ .../src/org/torproject/metrics/hidserv/Main.java | 91 ++++ .../src/org/torproject/metrics/hidserv/Parser.java | 484 ++++++++++++++++++++ .../metrics/hidserv/ReportedHidServStats.java | 130 ++++++ .../org/torproject/metrics/hidserv/Simulate.java | 360 +++++++++++++++ shared/bin/70-run-hidserv-stats.sh | 5 + shared/bin/99-copy-stats-files.sh | 1 + 15 files changed, 2129 insertions(+)
diff --git a/modules/hidserv/.gitignore b/modules/hidserv/.gitignore new file mode 100644 index 0000000..4bb76a5 --- /dev/null +++ b/modules/hidserv/.gitignore @@ -0,0 +1,4 @@ +classes/ +stats/ +status/ + diff --git a/modules/hidserv/build.xml b/modules/hidserv/build.xml new file mode 100644 index 0000000..7480b8c --- /dev/null +++ b/modules/hidserv/build.xml @@ -0,0 +1,44 @@ +<project default="run" name="hidserv" basedir="."> + + <property name="sources" value="src"/> + <property name="classes" value="classes"/> + <path id="classpath"> + <pathelement path="${classes}"/> + <fileset dir="/usr/share/java"> + <include name="commons-codec-1.6.jar"/> + <include name="commons-compress-1.4.1.jar"/> + <include name="commons-lang-2.6.jar"/> + </fileset> + <fileset dir="../../deps/metrics-lib"> + <include name="descriptor.jar"/> + </fileset> + </path> + + <target name="metrics-lib"> + <ant dir="../../deps/metrics-lib"/> + </target> + + <target name="compile" depends="metrics-lib"> + <mkdir dir="${classes}"/> + <javac destdir="${classes}" + srcdir="${sources}" + source="1.6" + target="1.6" + debug="true" + deprecation="true" + optimize="false" + failonerror="true" + includeantruntime="false"> + <classpath refid="classpath"/> + </javac> + </target> + + <target name="run" depends="compile"> + <java fork="true" + maxmemory="1024m" + classname="org.torproject.metrics.hidserv.Main"> + <classpath refid="classpath"/> + </java> + </target> +</project> + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java new file mode 100644 index 0000000..192a342 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java @@ -0,0 +1,191 @@ +package org.torproject.metrics.hidserv; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +/* Aggregate extrapolated network totals of hidden-service statistics by + * calculating statistics like the daily weighted interquartile mean. + * Also calculate simpler statistics like the number of reported + * statistics and the total network fraction of reporting relays. */ +public class Aggregator { + + /* Document file containing extrapolated hidden-service statistics. */ + private File extrapolatedHidServStatsFile; + + /* Document store for storing and retrieving extrapolated hidden-service + * statistics. */ + private DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore; + + /* Output file for writing aggregated statistics. */ + private File hidservStatsCsvFile; + + /* Initialize a new aggregator object using the given directory, + * document store, and output file for results. */ + public Aggregator(File statusDirectory, + DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore, File hidservStatsCsvFile) { + + /* Create a File instance for the document file containing + * extrapolated network totals. */ + this.extrapolatedHidServStatsFile = new File(statusDirectory, + "extrapolated-hidserv-stats"); + + /* Store references to the provided document store and output file. */ + this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore; + this.hidservStatsCsvFile = hidservStatsCsvFile; + } + + /* Calculate aggregates for all extrapolated hidden-service statistics + * and write them to the output file. */ + public void aggregateHidServStats() { + + /* Retrieve previously extrapolated network totals. */ + Set<ExtrapolatedHidServStats> extrapolatedStats = + this.extrapolatedHidServStatsStore.retrieve( + this.extrapolatedHidServStatsFile); + if (extrapolatedStats == null) { + System.err.printf("Unable to retrieve extrapolated hidden-service " + + "statistics from file %s. Skipping aggregation step.%n", + this.extrapolatedHidServStatsFile.getAbsolutePath()); + return; + } + + /* Re-arrange extrapolated network totals by statistics interval end + * date, and include the computed network total as weight for the + * extrapolated value. More precisely, map keys are ISO-formatted + * dates, map values are double[] arrays with the extrapolated network + * total as first element and the corresponding computed network + * fraction as second element. */ + SortedMap<String, List<double[]>> + extrapolatedCells = new TreeMap<String, List<double[]>>(), + extrapolatedOnions = new TreeMap<String, List<double[]>>(); + for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) { + String date = DateTimeHelper.format( + extrapolated.getStatsDateMillis(), + DateTimeHelper.ISO_DATE_FORMAT); + if (extrapolated.getFractionRendRelayedCells() > 0.0) { + if (!extrapolatedCells.containsKey(date)) { + extrapolatedCells.put(date, new ArrayList<double[]>()); + } + extrapolatedCells.get(date).add(new double[] { + extrapolated.getExtrapolatedRendRelayedCells(), + extrapolated.getFractionRendRelayedCells() }); + } + if (extrapolated.getFractionDirOnionsSeen() > 0.0) { + if (!extrapolatedOnions.containsKey(date)) { + extrapolatedOnions.put(date, new ArrayList<double[]>()); + } + extrapolatedOnions.get(date).add(new double[] { + extrapolated.getExtrapolatedDirOnionsSeen(), + extrapolated.getFractionDirOnionsSeen() }); + } + } + + /* Write all results to a string builder that will later be written to + * the output file. Each line contains an ISO-formatted "date", a + * string identifier for the "type" of statistic, the weighted mean + * ("wmean"), weighted median ("wmedian"), weighted interquartile mean + * ("wiqm"), the total network "frac"tion, and the number of reported + * "stats" with non-zero computed network fraction. */ + StringBuilder sb = new StringBuilder(); + sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n"); + + /* Repeat all aggregation steps for both types of statistics. */ + for (int i = 0; i < 2; i++) { + String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen"; + SortedMap<String, List<double[]>> extrapolated = i == 0 + ? extrapolatedCells : extrapolatedOnions; + + /* Go through all dates. */ + for (Map.Entry<String, List<double[]>> e : + extrapolated.entrySet()) { + String date = e.getKey(); + List<double[]> weightedValues = e.getValue(); + int numStats = weightedValues.size(); + + /* Sort extrapolated network totals contained in the first array + * element. (The second array element contains the computed + * network fraction as weight.) */ + Collections.sort(weightedValues, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + }); + + /* For the weighted mean, sum up all previously extrapolated + * values weighted with their network fractions (which happens to + * be the values that relays reported), and sum up all network + * fractions. Once we have those two sums, we can divide the sum + * of weighted extrapolated values by the sum of network fractions + * to obtain the weighted mean of extrapolated values. */ + double sumReported = 0.0, sumFraction = 0.0; + for (double[] d : weightedValues) { + sumReported += d[0] * d[1]; + sumFraction += d[1]; + } + double weightedMean = sumReported / sumFraction; + + /* For the weighted median and weighted interquartile mean, go + * through all values once again. The weighted median is the + * first extrapolated value with weight interval end greater than + * 50% of reported network fractions. For the weighted + * interquartile mean, sum up extrapolated values multiplied with + * network fractions and network fractions falling into the 25% to + * 75% range and later compute the weighted mean of those. */ + double weightIntervalEnd = 0.0; + Double weightedMedian = null; + double sumFractionInterquartile = 0.0, + sumReportedInterquartile = 0.0; + for (double[] d : weightedValues) { + double extrapolatedValue = d[0], computedFraction = d[1]; + double weightIntervalStart = weightIntervalEnd; + weightIntervalEnd += computedFraction; + if (weightedMedian == null && + weightIntervalEnd > sumFraction * 0.5) { + weightedMedian = extrapolatedValue; + } + if (weightIntervalEnd >= sumFraction * 0.25 && + weightIntervalStart <= sumFraction * 0.75) { + double fractionBetweenQuartiles = + Math.min(weightIntervalEnd, sumFraction * 0.75) + - Math.max(weightIntervalStart, sumFraction * 0.25); + sumReportedInterquartile += extrapolatedValue + * fractionBetweenQuartiles; + sumFractionInterquartile += fractionBetweenQuartiles; + } + } + double weightedInterquartileMean = + sumReportedInterquartile / sumFractionInterquartile; + + /* Put together all aggregated values in a single line. */ + sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date, + type, weightedMean, weightedMedian, weightedInterquartileMean, + sumFraction, numStats)); + } + } + + /* Write all aggregated results to the output file. */ + try { + this.hidservStatsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.hidservStatsCsvFile)); + bw.write(sb.toString()); + bw.close(); + } catch (IOException e) { + System.err.printf("Unable to write results to %s. Ignoring."); + } + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java new file mode 100644 index 0000000..1fe0020 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java @@ -0,0 +1,141 @@ +package org.torproject.metrics.hidserv; + +/* Computed fraction of hidden-service activity that a single relay is + * assumed to observe in the network. These fractions are computed from + * status entries and bandwidth weights in a network status consensus. */ +public class ComputedNetworkFractions implements Document { + + /* Relay fingerprint consisting of 40 upper-case hex characters. */ + private String fingerprint; + public String getFingerprint() { + return this.fingerprint; + } + + /* Valid-after timestamp of the consensus in milliseconds. */ + private long validAfterMillis; + public long getValidAfterMillis() { + return this.validAfterMillis; + } + + /* Fraction of cells on rendezvous circuits that this relay is assumed + * to observe in the network. */ + private double fractionRendRelayedCells; + public void setFractionRendRelayedCells( + double fractionRendRelayedCells) { + this.fractionRendRelayedCells = fractionRendRelayedCells; + } + public double getFractionRendRelayedCells() { + return this.fractionRendRelayedCells; + } + + /* Fraction of descriptors that this relay is assumed to observe in the + * network. This is calculated as the fraction of descriptors + * identifiers that this relay was responsible for, divided by 3, + * because each descriptor that is published to this directory is also + * published to two other directories. */ + private double fractionDirOnionsSeen; + public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) { + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + } + public double getFractionDirOnionsSeen() { + return this.fractionDirOnionsSeen; + } + + /* Instantiate a new fractions object using fingerprint and consensus + * valid-after time which together uniquely identify the object. */ + public ComputedNetworkFractions(String fingerprint, + long validAfterMillis) { + this.fingerprint = fingerprint; + this.validAfterMillis = validAfterMillis; + } + + /* Return whether this object contains the same fingerprint and + * consensus valid-after time as the passed object. */ + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ComputedNetworkFractions)) { + return false; + } + ComputedNetworkFractions other = + (ComputedNetworkFractions) otherObject; + return this.fingerprint.equals(other.fingerprint) && + this.validAfterMillis == other.validAfterMillis; + } + + /* Return a (hopefully unique) hash code based on this object's + * fingerprint and consensus valid-after time. */ + @Override + public int hashCode() { + return this.fingerprint.hashCode() + + (int) this.validAfterMillis; + } + + /* Return a string representation of this object, consisting of two + * strings: the first string contains fingerprint and valid-after date, + * the second string contains the concatenation of all other + * attributes. */ + @Override + public String[] format() { + String first = String.format("%s,%s", this.fingerprint, + DateTimeHelper.format(this.validAfterMillis, + DateTimeHelper.ISO_DATE_FORMAT)); + String second = DateTimeHelper.format(this.validAfterMillis, + DateTimeHelper.ISO_HOUR_FORMAT) + + (this.fractionRendRelayedCells == 0.0 ? "," + : String.format(",%f", this.fractionRendRelayedCells)) + + (this.fractionDirOnionsSeen == 0.0 ? "," + : String.format(",%f", this.fractionDirOnionsSeen)); + return new String[] { first, second }; + } + + /* Instantiate an empty fractions object that will be initialized more + * by the parse method. */ + ComputedNetworkFractions() { + } + + /* Initialize this fractions object using the two provided strings that + * have been produced by the format method earlier. Return whether this + * operation was successful. */ + @Override + public boolean parse(String[] formattedStrings) { + if (formattedStrings.length != 2) { + System.err.printf("Invalid number of formatted strings. " + + "Skipping.%n", formattedStrings.length); + return false; + } + String[] firstParts = formattedStrings[0].split(",", 2); + if (firstParts.length != 2) { + System.err.printf("Invalid number of comma-separated values. " + + "Skipping.%n"); + return false; + } + String fingerprint = firstParts[0]; + String[] secondParts = formattedStrings[1].split(",", 3); + if (secondParts.length != 3) { + System.err.printf("Invalid number of comma-separated values. " + + "Skipping.%n"); + return false; + } + long validAfterMillis = DateTimeHelper.parse(firstParts[1] + " " + + secondParts[0], DateTimeHelper.ISO_DATE_HOUR_FORMAT); + if (validAfterMillis == DateTimeHelper.NO_TIME_AVAILABLE) { + System.err.printf("Invalid date/hour format. Skipping.%n"); + return false; + } + try { + double fractionRendRelayedCells = secondParts[1].equals("") + ? 0.0 : Double.parseDouble(secondParts[1]); + double fractionDirOnionsSeen = secondParts[2].equals("") + ? 0.0 : Double.parseDouble(secondParts[2]); + this.fingerprint = fingerprint; + this.validAfterMillis = validAfterMillis; + this.fractionRendRelayedCells = fractionRendRelayedCells; + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + return true; + } catch (NumberFormatException e) { + System.err.printf("Invalid number format. Skipping.%n"); + return false; + } + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java new file mode 100644 index 0000000..c33a50d --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java @@ -0,0 +1,95 @@ +package org.torproject.metrics.hidserv; + +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +/* Utility class to format and parse dates and timestamps. */ +public class DateTimeHelper { + + /* This class is not supposed to be instantiated, which is why its + * constructor has private visibility. */ + private DateTimeHelper() { + } + + /* Some useful time constant. */ + public static final long + ONE_SECOND = 1000L, + ONE_MINUTE = 60L * ONE_SECOND, + ONE_HOUR = 60L * ONE_MINUTE, + ONE_DAY = 24L * ONE_HOUR; + + /* Some useful date/time formats. */ + public static final String + ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss", + ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH", + ISO_DATE_FORMAT = "yyyy-MM-dd", + ISO_HOUR_FORMAT = "HH"; + + /* Map of DateFormat instances for parsing and formatting dates and + * timestamps, protected using ThreadLocal to ensure that each thread + * uses its own instances. */ + private static ThreadLocal<Map<String, DateFormat>> dateFormats = + new ThreadLocal<Map<String, DateFormat>> () { + public Map<String, DateFormat> get() { + return super.get(); + } + protected Map<String, DateFormat> initialValue() { + return new HashMap<String, DateFormat>(); + } + public void remove() { + super.remove(); + } + public void set(Map<String, DateFormat> value) { + super.set(value); + } + }; + + /* Return an instance of DateFormat for the given format. If no such + * instance exists, create one and put it in the map. */ + private static DateFormat getDateFormat(String format) { + Map<String, DateFormat> threadDateFormats = dateFormats.get(); + if (!threadDateFormats.containsKey(format)) { + DateFormat dateFormat = new SimpleDateFormat(format); + dateFormat.setLenient(false); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + threadDateFormats.put(format, dateFormat); + } + return threadDateFormats.get(format); + } + + /* Format the given time in milliseconds using the given format. */ + public static String format(long millis, String format) { + return getDateFormat(format).format(millis); + } + + /* Format the given time in milliseconds using ISO date/time format. */ + public static String format(long millis) { + return format(millis, ISO_DATETIME_FORMAT); + } + + /* Default result of the parse methods if the provided time could not be + * parsed. */ + public final static long NO_TIME_AVAILABLE = -1L; + + /* Parse the given string using the given format. */ + public static long parse(String string, String format) { + if (null == string) { + return NO_TIME_AVAILABLE; + } + try { + return getDateFormat(format).parse(string).getTime(); + } catch (ParseException e) { + return NO_TIME_AVAILABLE; + } + } + + /* Parse the given string using ISO date/time format. */ + public static long parse(String string) { + return parse(string, ISO_DATETIME_FORMAT); + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java new file mode 100644 index 0000000..47614f3 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java @@ -0,0 +1,19 @@ +package org.torproject.metrics.hidserv; + +/* Common interface of documents that are supposed to be serialized and + * stored in document files and later retrieved and de-serialized. */ +public interface Document { + + /* Return an array of two strings with a string representation of this + * document. The first string will be used to start a group of + * documents, the second string will be used to represent a single + * document in that group. Ideally, the first string is equivalent for + * many documents stored in the same file, and the second string is + * different for those documents. */ + public String[] format(); + + /* Initialize an object using the given array of two strings. These are + * the same two strings that the format method provides. */ + public boolean parse(String[] formattedStrings); +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java new file mode 100644 index 0000000..3266df5 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java @@ -0,0 +1,157 @@ +package org.torproject.metrics.hidserv; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.LineNumberReader; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/* Utility class to store serialized objects implementing the Document + * interface to a file and later to retrieve them. */ +public class DocumentStore<T extends Document> { + + /* Document class, needed to create new instances when retrieving + * documents. */ + private Class<T> clazz; + + /* Initialize a new store object for the given type of documents. */ + DocumentStore(Class<T> clazz) { + this.clazz = clazz; + } + + /* Store the provided documents in the given file and return whether the + * storage operation was successful. If the file already existed and if + * it contains documents, merge the new documents with the existing + * ones. */ + public boolean store(File documentFile, Set<T> documentsToStore) { + + /* Retrieve existing documents. */ + Set<T> retrievedDocuments = this.retrieve(documentFile); + if (retrievedDocuments == null) { + System.err.printf("Unable to read and update %s. Not storing " + + "documents.%n", documentFile.getAbsoluteFile()); + return false; + } + + /* Merge new documents with existing ones. */ + retrievedDocuments.addAll(documentsToStore); + + /* Serialize documents. */ + SortedMap<String, SortedSet<String>> formattedDocuments = + new TreeMap<String, SortedSet<String>>(); + for (T retrieveDocument : retrievedDocuments) { + String[] formattedDocument = retrieveDocument.format(); + if (!formattedDocuments.containsKey(formattedDocument[0])) { + formattedDocuments.put(formattedDocument[0], + new TreeSet<String>()); + } + formattedDocuments.get(formattedDocument[0]).add( + formattedDocument[1]); + } + + /* Check if a temporary file exists from the previous execution. */ + File documentTempFile = new File(documentFile.getAbsoluteFile() + + ".tmp"); + if (documentTempFile.exists()) { + System.err.printf("Temporary document file %s still exists, " + + "indicating that a previous execution did not terminate " + + "cleanly. Not storing documents.%n", + documentTempFile.getAbsoluteFile()); + return false; + } + + /* Write to a new temporary file, then move it into place, possibly + * overwriting an existing file. */ + try { + documentTempFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + documentTempFile)); + for (Map.Entry<String, SortedSet<String>> e : + formattedDocuments.entrySet()) { + bw.write(e.getKey() + "\n"); + for (String s : e.getValue()) { + bw.write(" " + s + "\n"); + } + } + bw.close(); + documentFile.delete(); + documentTempFile.renameTo(documentFile); + } catch (IOException e) { + System.err.printf("Unable to write %s. Not storing documents.%n", + documentFile.getAbsolutePath()); + return false; + } + + /* Return success. */ + return true; + } + + /* Retrieve all previously stored documents from the given file. */ + public Set<T> retrieve(File documentFile) { + + /* Check if the document file exists, and if not, return an empty set. + * This is not an error case. */ + Set<T> result = new HashSet<T>(); + if (!documentFile.exists()) { + return result; + } + + /* Parse the document file line by line and de-serialize contained + * documents. */ + try { + LineNumberReader lnr = new LineNumberReader(new BufferedReader( + new FileReader(documentFile))); + String line, formattedString0 = null; + while ((line = lnr.readLine()) != null) { + if (!line.startsWith(" ")) { + formattedString0 = line; + } else if (formattedString0 == null) { + System.err.printf("First line in %s must not start with a " + + "space. Not retrieving any previously stored " + + "documents.%n", documentFile.getAbsolutePath()); + lnr.close(); + return null; + } else { + T document = this.clazz.newInstance(); + if (!document.parse(new String[] { formattedString0, + line.substring(1) })) { + System.err.printf("Unable to read line %d from %s. Not " + + "retrieving any previously stored documents.%n", + lnr.getLineNumber(), documentFile.getAbsolutePath()); + lnr.close(); + return null; + } + result.add(document); + } + } + lnr.close(); + } catch (IOException e) { + System.err.printf("Unable to read %s. Not retrieving any " + + "previously stored documents.%n", + documentFile.getAbsolutePath()); + e.printStackTrace(); + return null; + } catch (InstantiationException e) { + System.err.printf("Unable to read %s. Cannot instantiate document " + + "object.%n", documentFile.getAbsolutePath()); + e.printStackTrace(); + return null; + } catch (IllegalAccessException e) { + System.err.printf("Unable to read %s. Cannot instantiate document " + + "object.%n", documentFile.getAbsolutePath()); + e.printStackTrace(); + return null; + } + return result; + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java new file mode 100644 index 0000000..52357d4 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java @@ -0,0 +1,156 @@ +package org.torproject.metrics.hidserv; + +/* Extrapolated network totals of hidden-service statistics reported by a + * single relay. Extrapolated values are based on reported statistics and + * computed network fractions in the statistics interval. */ +public class ExtrapolatedHidServStats implements Document { + + /* Date of statistics interval end in milliseconds. */ + private long statsDateMillis; + public long getStatsDateMillis() { + return this.statsDateMillis; + } + + /* Relay fingerprint consisting of 40 upper-case hex characters. */ + private String fingerprint; + public String getFingerprint() { + return this.fingerprint; + } + + /* Extrapolated number of cells on rendezvous circuits in the + * network. */ + private double extrapolatedRendRelayedCells; + public void setExtrapolatedRendRelayedCells( + double extrapolatedRendRelayedCells) { + this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells; + } + public double getExtrapolatedRendRelayedCells() { + return this.extrapolatedRendRelayedCells; + } + + /* Computed fraction of observed cells on rendezvous circuits in the + * network, used to weight this relay's extrapolated network total in + * the aggregation step. */ + private double fractionRendRelayedCells; + public void setFractionRendRelayedCells( + double fractionRendRelayedCells) { + this.fractionRendRelayedCells = fractionRendRelayedCells; + } + public double getFractionRendRelayedCells() { + return this.fractionRendRelayedCells; + } + + /* Extrapolated number of .onions in the network. */ + private double extrapolatedDirOnionsSeen; + public void setExtrapolatedDirOnionsSeen( + double extrapolatedDirOnionsSeen) { + this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen; + } + public double getExtrapolatedDirOnionsSeen() { + return this.extrapolatedDirOnionsSeen; + } + + /* Computed fraction of observed .onions in the network, used to weight + * this relay's extrapolated network total in the aggregation step. */ + private double fractionDirOnionsSeen; + public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) { + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + } + public double getFractionDirOnionsSeen() { + return this.fractionDirOnionsSeen; + } + + /* Instantiate a new stats object using fingerprint and statistics + * interval end date which together uniquely identify the object. */ + public ExtrapolatedHidServStats(long statsDateMillis, + String fingerprint) { + this.statsDateMillis = statsDateMillis; + this.fingerprint = fingerprint; + } + + /* Return whether this object contains the same fingerprint and + * statistics interval end date as the passed object. */ + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ExtrapolatedHidServStats)) { + return false; + } + ExtrapolatedHidServStats other = + (ExtrapolatedHidServStats) otherObject; + return this.fingerprint.equals(other.fingerprint) && + this.statsDateMillis == other.statsDateMillis; + } + + /* Return a (hopefully unique) hash code based on this object's + * fingerprint and statistics interval end date. */ + @Override + public int hashCode() { + return this.fingerprint.hashCode() + (int) this.statsDateMillis; + } + + /* Return a string representation of this object, consisting of the + * statistics interval end date and the concatenation of all other + * attributes. */ + @Override + public String[] format() { + String first = DateTimeHelper.format(this.statsDateMillis, + DateTimeHelper.ISO_DATE_FORMAT); + String second = this.fingerprint + + (this.fractionRendRelayedCells == 0.0 ? ",," + : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells, + this.fractionRendRelayedCells)) + + (this.fractionDirOnionsSeen == 0.0 ? ",," + : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen, + this.fractionDirOnionsSeen)); + return new String[] { first, second }; + } + + /* Instantiate an empty stats object that will be initialized more by + * the parse method. */ + ExtrapolatedHidServStats() { + } + + /* Initialize this stats object using the two provided strings that have + * been produced by the format method earlier. Return whether this + * operation was successful. */ + @Override + public boolean parse(String[] formattedStrings) { + if (formattedStrings.length != 2) { + System.err.printf("Invalid number of formatted strings. " + + "Skipping.%n", formattedStrings.length); + return false; + } + long statsDateMillis = DateTimeHelper.parse(formattedStrings[0], + DateTimeHelper.ISO_DATE_FORMAT); + String[] secondParts = formattedStrings[1].split(",", 5); + if (secondParts.length != 5) { + System.err.printf("Invalid number of comma-separated values. " + + "Skipping.%n"); + return false; + } + String fingerprint = secondParts[0]; + double extrapolatedRendRelayedCells = 0.0, + fractionRendRelayedCells = 0.0, extrapolatedDirOnionsSeen = 0.0, + fractionDirOnionsSeen = 0.0; + try { + extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0 + : Double.parseDouble(secondParts[1]); + fractionRendRelayedCells = secondParts[2].equals("") ? 0.0 + : Double.parseDouble(secondParts[2]); + extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0 + : Double.parseDouble(secondParts[3]); + fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0 + : Double.parseDouble(secondParts[4]); + } catch (NumberFormatException e) { + return false; + } + this.statsDateMillis = statsDateMillis; + this.fingerprint = fingerprint; + this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells; + this.fractionRendRelayedCells = fractionRendRelayedCells; + this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen; + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + return true; + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java new file mode 100644 index 0000000..a1ff075 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java @@ -0,0 +1,251 @@ +package org.torproject.metrics.hidserv; + +import java.io.File; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/* Extrapolate hidden-service statistics reported by single relays by + * dividing them by the computed fraction of hidden-service activity + * observed by the relay. */ +public class Extrapolator { + + /* Document file containing previously parsed reported hidden-service + * statistics. */ + private File reportedHidServStatsFile; + + /* Document store for storing and retrieving reported hidden-service + * statistics. */ + private DocumentStore<ReportedHidServStats> reportedHidServStatsStore; + + /* Directory containing document files with previously computed network + * fractions. */ + private File computedNetworkFractionsDirectory; + + /* Document store for storing and retrieving computed network + * fractions. */ + private DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore; + + /* Document file containing extrapolated hidden-service statistics. */ + private File extrapolatedHidServStatsFile; + + /* Document store for storing and retrieving extrapolated hidden-service + * statistics. */ + private DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore; + + /* Initialize a new extrapolator object using the given directory and + * document stores. */ + public Extrapolator(File statusDirectory, + DocumentStore<ReportedHidServStats> reportedHidServStatsStore, + DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore, + DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore) { + + /* Create File instances for the files and directories in the provided + * status directory. */ + this.reportedHidServStatsFile = new File(statusDirectory, + "reported-hidserv-stats"); + this.computedNetworkFractionsDirectory = + new File(statusDirectory, "computed-network-fractions"); + this.extrapolatedHidServStatsFile = new File(statusDirectory, + "extrapolated-hidserv-stats"); + + /* Store references to the provided document stores. */ + this.reportedHidServStatsStore = reportedHidServStatsStore; + this.computedNetworkFractionsStore = computedNetworkFractionsStore; + this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore; + } + + /* Iterate over all reported stats and extrapolate network totals for + * those that have not been extrapolated before. */ + public boolean extrapolateHidServStats() { + + /* Retrieve previously extrapolated stats to avoid extrapolating them + * again. */ + Set<ExtrapolatedHidServStats> extrapolatedStats = + this.extrapolatedHidServStatsStore.retrieve( + this.extrapolatedHidServStatsFile); + + /* Retrieve all reported stats, even including those that have already + * been extrapolated. */ + Set<ReportedHidServStats> reportedStats = + this.reportedHidServStatsStore.retrieve( + this.reportedHidServStatsFile); + + /* Make sure that all documents could be retrieved correctly. */ + if (extrapolatedStats == null || reportedStats == null) { + System.err.printf("Could not read previously parsed or " + + "extrapolated hidserv-stats. Skipping."); + return false; + } + + /* Re-arrange reported stats by fingerprint. */ + SortedMap<String, Set<ReportedHidServStats>> + parsedStatsByFingerprint = + new TreeMap<String, Set<ReportedHidServStats>>(); + for (ReportedHidServStats stat : reportedStats) { + String fingerprint = stat.getFingerprint(); + if (!parsedStatsByFingerprint.containsKey(fingerprint)) { + parsedStatsByFingerprint.put(fingerprint, + new HashSet<ReportedHidServStats>()); + } + parsedStatsByFingerprint.get(fingerprint).add(stat); + } + + /* Go through reported stats by fingerprint. */ + for (Map.Entry<String, Set<ReportedHidServStats>> e : + parsedStatsByFingerprint.entrySet()) { + String fingerprint = e.getKey(); + + /* Iterate over all stats reported by this relay and make a list of + * those that still need to be extrapolated. Also make a list of + * all dates for which we need to retrieve computed network + * fractions. */ + Set<ReportedHidServStats> newReportedStats = + new HashSet<ReportedHidServStats>(); + SortedSet<String> retrieveFractionDates = new TreeSet<String>(); + for (ReportedHidServStats stats : e.getValue()) { + + /* Check whether extrapolated stats already contain an object with + * the same statistics interval end date and fingerprint. */ + long statsDateMillis = (stats.getStatsEndMillis() + / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY; + if (extrapolatedStats.contains( + new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) { + continue; + } + + /* Add the reported stats to the list of stats we still need to + * extrapolate. */ + newReportedStats.add(stats); + + /* Add all dates between statistics interval start and end to a + * list. */ + long statsEndMillis = stats.getStatsEndMillis(); + long statsStartMillis = statsEndMillis + - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND; + for (long millis = statsStartMillis; millis <= statsEndMillis; + millis += DateTimeHelper.ONE_DAY) { + String date = DateTimeHelper.format(millis, + DateTimeHelper.ISO_DATE_FORMAT); + retrieveFractionDates.add(date); + } + } + + /* Retrieve all computed network fractions that might be needed to + * extrapolate new statistics. Keep a list of all known consensus + * valid-after times, and keep a map of fractions also by consensus + * valid-after time. (It's not sufficient to only keep the latter, + * because we need to count known consensuses even if the relay was + * not contained in a consensus or had a network fraction of exactly + * zero.) */ + SortedSet<Long> knownConsensuses = new TreeSet<Long>(); + SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions = + new TreeMap<Long, ComputedNetworkFractions>(); + for (String date : retrieveFractionDates) { + File documentFile = new File( + this.computedNetworkFractionsDirectory, date); + Set<ComputedNetworkFractions> fractions + = this.computedNetworkFractionsStore.retrieve(documentFile); + for (ComputedNetworkFractions fraction : fractions) { + knownConsensuses.add(fraction.getValidAfterMillis()); + if (fraction.getFingerprint().equals(fingerprint)) { + computedNetworkFractions.put(fraction.getValidAfterMillis(), + fraction); + } + } + } + + /* Go through newly reported stats, match them with computed network + * fractions, and extrapolate network totals. */ + for (ReportedHidServStats stats : newReportedStats) { + long statsEndMillis = stats.getStatsEndMillis(); + long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY) + * DateTimeHelper.ONE_DAY; + long statsStartMillis = statsEndMillis + - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND; + + /* Sum up computed network fractions and count known consensus in + * the relevant interval, so that we can later compute means of + * network fractions. */ + double sumFractionRendRelayedCells = 0.0, + sumFractionDirOnionsSeen = 0.0; + int consensuses = 0; + for (long validAfterMillis : knownConsensuses) { + if (statsStartMillis <= validAfterMillis && + validAfterMillis < statsEndMillis) { + if (computedNetworkFractions.containsKey(validAfterMillis)) { + ComputedNetworkFractions frac = + computedNetworkFractions.get(validAfterMillis); + sumFractionRendRelayedCells += + frac.getFractionRendRelayedCells(); + sumFractionDirOnionsSeen += + frac.getFractionDirOnionsSeen(); + } + consensuses++; + } + } + + /* If we don't know a single consensus with valid-after time in + * the statistics interval, skip this stat. */ + if (consensuses == 0) { + continue; + } + + /* Compute means of network fractions. */ + double fractionRendRelayedCells = + sumFractionRendRelayedCells / consensuses; + double fractionDirOnionsSeen = + sumFractionDirOnionsSeen / consensuses; + + /* If at least one fraction is positive, extrapolate network + * totals. */ + if (fractionRendRelayedCells > 0.0 || + fractionDirOnionsSeen > 0.0) { + ExtrapolatedHidServStats extrapolated = + new ExtrapolatedHidServStats( + statsDateMillis, fingerprint); + if (fractionRendRelayedCells > 0.0) { + extrapolated.setFractionRendRelayedCells( + fractionRendRelayedCells); + /* Extrapolating cells on rendezvous circuits is as easy as + * dividing the reported number by the computed network + * fraction. */ + double extrapolatedRendRelayedCells = + stats.getRendRelayedCells() / fractionRendRelayedCells; + extrapolated.setExtrapolatedRendRelayedCells( + extrapolatedRendRelayedCells); + } + if (fractionDirOnionsSeen > 0.0) { + extrapolated.setFractionDirOnionsSeen( + fractionDirOnionsSeen); + /* Extrapolating reported unique .onion addresses to the + * total number in the network is more difficult. In short, + * each descriptor is stored to 12 (likely) different + * directories, so we'll have to divide the reported number by + * 12 and then by the computed network fraction of this + * directory. */ + double extrapolatedDirOnionsSeen = + stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen); + extrapolated.setExtrapolatedDirOnionsSeen( + extrapolatedDirOnionsSeen); + } + extrapolatedStats.add(extrapolated); + } + } + } + + /* Store all extrapolated network totals to disk with help of the + * document store. */ + return this.extrapolatedHidServStatsStore.store( + this.extrapolatedHidServStatsFile, extrapolatedStats); + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java new file mode 100644 index 0000000..1e53bd0 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java @@ -0,0 +1,91 @@ +package org.torproject.metrics.hidserv; + +import java.io.File; +import java.util.HashSet; +import java.util.Set; + +/* Main class for updating extrapolated network totals of hidden-service + * statistics. The main method of this class can be executed as often as + * new statistics are needed, though callers must ensure that executions + * do not overlap. */ +public class Main { + + /* Parse new descriptors, extrapolate contained statistics using + * computed network fractions, aggregate results, and write results to + * disk. */ + public static void main(String[] args) { + + /* Initialize directories and file paths. */ + Set<File> inDirectories = new HashSet<File>(); + inDirectories.add( + new File("../../shared/in/relay-descriptors/consensuses")); + inDirectories.add( + new File("../../shared/in/relay-descriptors/extra-infos")); + File statusDirectory = new File("status"); + File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv"); + + /* Initialize document stores that will handle writing documents to + * files. */ + DocumentStore<ReportedHidServStats> reportedHidServStatsStore = + new DocumentStore<ReportedHidServStats>( + ReportedHidServStats.class); + DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore = + new DocumentStore<ComputedNetworkFractions>( + ComputedNetworkFractions.class); + DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore + = new DocumentStore<ExtrapolatedHidServStats>( + ExtrapolatedHidServStats.class); + + /* Initialize parser and read parse history to avoid parsing + * descriptor files that haven't changed since the last execution. */ + System.out.println("Initializing parser and reading parse " + + "history..."); + Parser parser = new Parser(inDirectories, statusDirectory, + reportedHidServStatsStore, computedNetworkFractionsStore); + parser.readParseHistory(); + + /* Parse new descriptors and store their contents using the document + * stores. */ + System.out.println("Parsing descriptors..."); + if (!parser.parseDescriptors()) { + System.err.println("Could not store parsed descriptors. " + + "Terminating."); + return; + } + + /* Write the parse history to avoid parsing descriptor files again + * next time. It's okay to do this now and not at the end of the + * execution, because even if something breaks apart below, it's safe + * not to parse descriptor files again. */ + System.out.println("Writing parse history..."); + parser.writeParseHistory(); + + /* Extrapolate reported statistics using computed network fractions + * and write the result to disk using a document store. The result is + * a single file with extrapolated network totals based on reports by + * single relays. */ + System.out.println("Extrapolating statistics..."); + Extrapolator extrapolator = new Extrapolator(statusDirectory, + reportedHidServStatsStore, computedNetworkFractionsStore, + extrapolatedHidServStatsStore); + if (!extrapolator.extrapolateHidServStats()) { + System.err.println("Could not extrapolate statistics. " + + "Terminating."); + return; + } + + /* Go through all extrapolated network totals and aggregate them. + * This includes calculating daily weighted interquartile means, among + * other statistics. Write the result to a .csv file that can be + * processed by other tools. */ + System.out.println("Aggregating statistics..."); + Aggregator aggregator = new Aggregator(statusDirectory, + extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile); + aggregator.aggregateHidServStats(); + + /* End this execution. */ + System.out.println("Terminating."); + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java new file mode 100644 index 0000000..85f7d91 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java @@ -0,0 +1,484 @@ +package org.torproject.metrics.hidserv; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; + +/* Parse hidden-service statistics from extra-info descriptors, compute + * network fractions from consensuses, and write parsed contents to + * document files for later use. */ +public class Parser { + + /* File containing tuples of last-modified times and file names of + * descriptor files parsed in the previous execution. */ + private File parseHistoryFile; + + /* Descriptor reader to provide parsed extra-info descriptors and + * consensuses. */ + private DescriptorReader descriptorReader; + + /* Document file containing previously parsed reported hidden-service + * statistics. */ + private File reportedHidServStatsFile; + + /* Document store for storing and retrieving reported hidden-service + * statistics. */ + private DocumentStore<ReportedHidServStats> reportedHidServStatsStore; + + /* Directory containing document files with previously computed network + * fractions. */ + private File computedNetworkFractionsDirectory; + + /* Document store for storing and retrieving computed network + * fractions. */ + private DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore; + + /* Initialize a new parser object using the given directories and + * document stores. */ + public Parser(Set<File> inDirectories, File statusDirectory, + DocumentStore<ReportedHidServStats> reportedHidServStatsStore, + DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore) { + + /* Create a new descriptor reader for reading descriptors in the given + * in directory. Configure the reader to avoid having more than five + * parsed descriptors in the queue, rather than the default one + * hundred. Five is a compromise between very large consensuses and + * rather small extra-info descriptors. */ + this.descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + for (File inDirectory : inDirectories) { + this.descriptorReader.addDirectory(inDirectory); + } + this.descriptorReader.setMaxDescriptorFilesInQueue(5); + + /* Create File instances for the files and directories in the provided + * status directory. */ + this.parseHistoryFile = new File(statusDirectory, "parse-history"); + this.reportedHidServStatsFile = new File(statusDirectory, + "reported-hidserv-stats"); + this.computedNetworkFractionsDirectory = + new File(statusDirectory, "computed-network-fractions"); + + /* Store references to the provided document stores. */ + this.reportedHidServStatsStore = reportedHidServStatsStore; + this.computedNetworkFractionsStore = computedNetworkFractionsStore; + } + + /* Read the parse history file to avoid parsing descriptor files that + * have not changed since the previous execution. */ + public void readParseHistory() { + if (this.parseHistoryFile.exists() && + this.parseHistoryFile.isFile()) { + SortedMap<String, Long> excludedFiles = + new TreeMap<String, Long>(); + try { + BufferedReader br = new BufferedReader(new FileReader( + this.parseHistoryFile)); + String line; + while ((line = br.readLine()) != null) { + try { + /* Each line is supposed to contain the last-modified time and + * absolute path of a descriptor file. */ + String[] parts = line.split(" ", 2); + excludedFiles.put(parts[1], Long.parseLong(parts[0])); + } catch (NumberFormatException e) { + System.err.printf("Illegal line '%s' in parse history. " + + "Skipping line.%n", line); + } + } + br.close(); + } catch (IOException e) { + System.err.printf("Could not read history file '%s'. Not " + + "excluding descriptors in this execution.", + this.parseHistoryFile.getAbsolutePath()); + } + + /* Tell the descriptor reader to exclude the files contained in the + * parse history file. */ + this.descriptorReader.setExcludedFiles(excludedFiles); + } + } + + /* Write parsed or skipped descriptor files with last-modified times and + * absolute paths to the parse history file to avoid parsing these files + * again, unless they change until the next execution. */ + public void writeParseHistory() { + + /* Obtain the list of descriptor files that were either parsed now or + * that were skipped in this execution from the descriptor reader. */ + SortedMap<String, Long> excludedAndParsedFiles = + new TreeMap<String, Long>(); + excludedAndParsedFiles.putAll( + this.descriptorReader.getExcludedFiles()); + excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles()); + try { + this.parseHistoryFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.parseHistoryFile)); + for (Map.Entry<String, Long> e : + excludedAndParsedFiles.entrySet()) { + /* Each line starts with the last-modified time of the descriptor + * file, followed by its absolute path. */ + String absolutePath = e.getKey(); + long lastModifiedMillis = e.getValue(); + bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath + + "\n"); + } + bw.close(); + } catch (IOException e) { + System.err.printf("Could not write history file '%s'. Not " + + "excluding descriptors in next execution.", + this.parseHistoryFile.getAbsolutePath()); + } + } + + /* Set of all reported hidden-service statistics. To date, these + * objects are small, and keeping them all in memory is easy. But if + * this ever changes, e.g., when more and more statistics are added, + * this may not scale. */ + private Set<ReportedHidServStats> reportedHidServStats = + new HashSet<ReportedHidServStats>(); + + /* Instruct the descriptor reader to parse descriptor files, and handle + * the resulting parsed descriptors if they are either extra-info + * descriptors or consensuses. */ + public boolean parseDescriptors() { + Iterator<DescriptorFile> descriptorFiles = + this.descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof ExtraInfoDescriptor) { + this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); + } else if (descriptor instanceof RelayNetworkStatusConsensus) { + if (!this.parseRelayNetworkStatusConsensus( + (RelayNetworkStatusConsensus) descriptor)) { + return false; + } + } + } + } + + /* Store reported hidden-service statistics to their document file. + * It's more efficient to only do this once after processing all + * descriptors. In contrast, sets of computed network fractions are + * stored immediately after processing the consensus they are based + * on. */ + return this.reportedHidServStatsStore.store( + this.reportedHidServStatsFile, this.reportedHidServStats); + } + + /* Parse the given extra-info descriptor by extracting its fingerprint + * and contained hidserv-* lines. If a valid set of hidserv-stats can + * be extracted, create a new stats object that will later be stored to + * a document file. */ + private void parseExtraInfoDescriptor( + ExtraInfoDescriptor extraInfoDescriptor) { + + /* Extract the fingerprint from the parsed descriptor. */ + String fingerprint = extraInfoDescriptor.getFingerprint(); + + /* Parse the descriptor once more to extract any hidserv-* lines. + * This is necessary, because these lines are not yet supported by the + * descriptor-parsing library. */ + Scanner scanner = new Scanner(new ByteArrayInputStream( + extraInfoDescriptor.getRawDescriptorBytes())); + Long statsEndMillis = null, statsIntervalSeconds = null, + rendRelayedCells = null, rendRelayedCellsBinSize = null, + dirOnionsSeen = null, dirOnionsSeenBinSize = null; + try { + while (scanner.hasNext()) { + String line = scanner.nextLine(); + if (line.startsWith("hidserv-")) { + String[] parts = line.split(" "); + if (parts[0].equals("hidserv-stats-end")) { + /* Parse statistics end and statistics interval length. */ + if (parts.length != 5 || !parts[3].startsWith("(") || + !parts[4].equals("s)")) { + /* Will warn below, because statsEndMillis is still null. */ + continue; + } + statsEndMillis = DateTimeHelper.parse(parts[1] + " " + + parts[2]); + statsIntervalSeconds = Long.parseLong(parts[3].substring(1)); + } else if (parts[0].equals("hidserv-rend-relayed-cells")) { + /* Parse the reported number of cells on rendezvous circuits + * and the bin size used by the relay to obfuscate that + * number. */ + if (parts.length != 5 || + !parts[4].startsWith("bin_size=")) { + /* Will warn below, because rendRelayedCells is still + * null. */ + continue; + } + rendRelayedCells = Long.parseLong(parts[1]); + rendRelayedCellsBinSize = + Long.parseLong(parts[4].substring(9)); + } else if (parts[0].equals("hidserv-dir-onions-seen")) { + /* Parse the reported number of distinct .onion addresses and + * the bin size used by the relay to obfuscate that number. */ + if (parts.length != 5 || + !parts[4].startsWith("bin_size=")) { + /* Will warn below, because dirOnionsSeen is still null. */ + continue; + } + dirOnionsSeen = Long.parseLong(parts[1]); + dirOnionsSeenBinSize = Long.parseLong(parts[4].substring(9)); + } + } + } + } catch (NumberFormatException e) { + e.printStackTrace(); + return; + } + + /* If the descriptor did not contain any of the expected hidserv-* + * lines, don't do anything. This applies to the majority of + * descriptors, at least as long as only a minority of relays reports + * these statistics. */ + if (statsEndMillis == null && rendRelayedCells == null && + dirOnionsSeen == null) { + return; + + /* If the descriptor contained all expected hidserv-* lines, create a + * new stats object and put it in the local map, so that it will later + * be written to a document file. */ + } else if (statsEndMillis != null && + statsEndMillis != DateTimeHelper.NO_TIME_AVAILABLE && + statsIntervalSeconds != null && rendRelayedCells != null && + dirOnionsSeen != null) { + ReportedHidServStats reportedStats = new ReportedHidServStats( + fingerprint, statsEndMillis); + reportedStats.setStatsIntervalSeconds(statsIntervalSeconds); + reportedStats.setRendRelayedCells(this.removeNoise(rendRelayedCells, + rendRelayedCellsBinSize)); + reportedStats.setDirOnionsSeen(this.removeNoise(dirOnionsSeen, + dirOnionsSeenBinSize)); + this.reportedHidServStats.add(reportedStats); + + /* If the descriptor contained some but not all hidserv-* lines, print + * out a warning. This case does not warrant any further action, + * because relays can in theory write anything in their extra-info + * descriptors. But maybe we'll want to know. */ + } else { + System.err.println("Relay " + fingerprint + " published " + + "incomplete hidserv-stats. Ignoring."); + } + } + + /* Remove noise from a reported stats value by rounding to the nearest + * right side of a bin and subtracting half of the bin size. */ + private long removeNoise(long reportedNumber, long binSize) { + long roundedToNearestRightSideOfTheBin = + ((reportedNumber + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + return subtractedHalfOfBinSize; + } + + public boolean parseRelayNetworkStatusConsensus( + RelayNetworkStatusConsensus consensus) { + + /* Make sure that the consensus contains Wxx weights. */ + SortedMap<String, Integer> bandwidthWeights = + consensus.getBandwidthWeights(); + if (bandwidthWeights == null) { + System.err.printf("Consensus with valid-after time %s doesn't " + + "contain any Wxx weights. Skipping.%n", + DateTimeHelper.format(consensus.getValidAfterMillis())); + return false; + } + + /* More precisely, make sure that it contains Wmx weights, and then + * parse them. */ + SortedSet<String> expectedWeightKeys = + new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(","))); + expectedWeightKeys.removeAll(bandwidthWeights.keySet()); + if (!expectedWeightKeys.isEmpty()) { + System.err.printf("Consensus with valid-after time %s doesn't " + + "contain expected Wmx weights. Skipping.%n", + DateTimeHelper.format(consensus.getValidAfterMillis())); + return false; + } + double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0; + double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0; + double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0; + double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0; + + /* Keep a sorted set with the fingerprints of all hidden-service + * directories, in reverse order, so that we can later determine the + * fingerprint distance between a directory and the directory + * preceding it by three positions in the descriptor ring. */ + SortedSet<String> hsDirs = new TreeSet<String>( + Collections.reverseOrder()); + + /* Prepare for computing the weights of all relays with the Fast flag + * for being selected in the middle position. */ + double totalWeightsRendezvousPoint = 0.0; + SortedMap<String, Double> weightsRendezvousPoint = + new TreeMap<String, Double>(); + + /* Go through all status entries contained in the consensus. */ + for (Map.Entry<String, NetworkStatusEntry> e : + consensus.getStatusEntries().entrySet()) { + String fingerprint = e.getKey(); + NetworkStatusEntry statusEntry = e.getValue(); + SortedSet<String> flags = statusEntry.getFlags(); + + /* Add the relay to the set of hidden-service directories if it has + * the HSDir flag. */ + if (flags.contains("HSDir")) { + hsDirs.add(statusEntry.getFingerprint()); + } + + /* Compute the probability for being selected as rendezvous point. + * If the relay has the Fast flag, multiply its consensus weight + * with the correct Wmx weight, depending on whether the relay has + * the Guard and/or Exit flag. */ + double weightRendezvousPoint = 0.0; + if (flags.contains("Fast")) { + weightRendezvousPoint = (double) statusEntry.getBandwidth(); + if (flags.contains("Guard") && flags.contains("Exit")) { + weightRendezvousPoint *= wmd; + } else if (flags.contains("Guard")) { + weightRendezvousPoint *= wmg; + } else if (flags.contains("Exit")) { + weightRendezvousPoint *= wme; + } else { + weightRendezvousPoint *= wmm; + } + } + weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint); + totalWeightsRendezvousPoint += weightRendezvousPoint; + } + + /* Store all computed network fractions based on this consensus in a + * set, which will then be written to disk in a single store + * operation. */ + Set<ComputedNetworkFractions> computedNetworkFractions = + new HashSet<ComputedNetworkFractions>(); + + /* Remove all previously added directory fingerprints and re-add them + * twice, once with a leading "0" and once with a leading "1". The + * purpose is to simplify the logic for moving from one fingerprint to + * the previous one, even if that would mean traversing the ring + * start. For example, the fingerprint preceding "1""00..0000" with + * the first "1" being added here could be "0""FF..FFFF". */ + SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs); + hsDirs.clear(); + for (String fingerprint : hsDirsCopy) { + hsDirs.add("0" + fingerprint); + hsDirs.add("1" + fingerprint); + } + + /* Define the total ring size to compute fractions below. This is + * 16^40 or 2^160. */ + final double RING_SIZE = new BigInteger( + "10000000000000000000000000000000000000000", + 16).doubleValue(); + + /* Go through all status entries again, this time computing network + * fractions. */ + for (Map.Entry<String, NetworkStatusEntry> e : + consensus.getStatusEntries().entrySet()) { + String fingerprint = e.getKey(); + NetworkStatusEntry statusEntry = e.getValue(); + double fractionRendRelayedCells = 0.0, + fractionDirOnionsSeen = 0.0; + if (statusEntry != null) { + + /* Check if the relay is a hidden-service directory by looking up + * its fingerprint, preceded by "1", in the sorted set that we + * populated above. */ + String fingerprintPrecededByOne = "1" + fingerprint; + if (hsDirs.contains(fingerprintPrecededByOne)) { + + /* Move three positions in the sorted set, which is in reverse + * order, to learn the fingerprint of the directory preceding + * this directory by three positions. */ + String startResponsible = fingerprint; + int positionsToGo = 3; + for (String hsDirFingerprint : + hsDirs.tailSet(fingerprintPrecededByOne)) { + startResponsible = hsDirFingerprint; + if (positionsToGo-- <= 0) { + break; + } + } + + /* Compute the fraction of descriptor space that this relay is + * responsible for as difference between the two fingerprints + * divided by the ring size. */ + fractionDirOnionsSeen = + new BigInteger(fingerprintPrecededByOne, 16).subtract( + new BigInteger(startResponsible, 16)).doubleValue() + / RING_SIZE; + + /* Divide this fraction by three to obtain the fraction of + * descriptors that this directory has seen. This step is + * necessary, because each descriptor that is published to this + * directory is also published to two other directories. */ + fractionDirOnionsSeen /= 3.0; + } + + /* Compute the fraction of cells on rendezvous circuits that this + * relay has seen by dividing its previously calculated weight by + * the sum of all such weights. */ + fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint) + / totalWeightsRendezvousPoint; + } + + /* If at least one of the computed fractions is non-zero, create a + * new fractions object. */ + if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) { + ComputedNetworkFractions fractions = new ComputedNetworkFractions( + fingerprint, consensus.getValidAfterMillis()); + fractions.setFractionRendRelayedCells(fractionRendRelayedCells); + fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen); + computedNetworkFractions.add(fractions); + } + } + + /* Store all newly computed network fractions to a documents file. + * The same file also contains computed network fractions from other + * consensuses that were valid on the same day. This is in contrast + * to the other documents which are all stored in a single file, which + * would not scale for computed network fractions. */ + String date = DateTimeHelper.format(consensus.getValidAfterMillis(), + DateTimeHelper.ISO_DATE_FORMAT); + File documentFile = new File(this.computedNetworkFractionsDirectory, + date); + if (!this.computedNetworkFractionsStore.store(documentFile, + computedNetworkFractions)) { + return false; + } + return true; + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java new file mode 100644 index 0000000..996a70a --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java @@ -0,0 +1,130 @@ +package org.torproject.metrics.hidserv; + +/* Hidden-service statistics reported by a single relay covering a single + * statistics interval of usually 24 hours. These statistics are reported + * by the relay in the "hidserv-" lines of its extra-info descriptor. */ +public class ReportedHidServStats implements Document { + + /* Relay fingerprint consisting of 40 upper-case hex characters. */ + private String fingerprint; + public String getFingerprint() { + return this.fingerprint; + } + + /* Hidden-service statistics end timestamp in milliseconds. */ + private long statsEndMillis; + public long getStatsEndMillis() { + return this.statsEndMillis; + } + + /* Statistics interval length in seconds. */ + private long statsIntervalSeconds; + public void setStatsIntervalSeconds(long statsIntervalSeconds) { + this.statsIntervalSeconds = statsIntervalSeconds; + } + public long getStatsIntervalSeconds() { + return this.statsIntervalSeconds; + } + + /* Number of relayed cells on rendezvous circuits as reported by the + * relay and adjusted by rounding to the nearest right side of a bin and + * subtracting half of the bin size. */ + private long rendRelayedCells; + public void setRendRelayedCells(long rendRelayedCells) { + this.rendRelayedCells = rendRelayedCells; + } + public long getRendRelayedCells() { + return this.rendRelayedCells; + } + + /* Number of distinct .onion addresses as reported by the relay and + * adjusted by rounding to the nearest right side of a bin and + * subtracting half of the bin size. */ + private long dirOnionsSeen; + public void setDirOnionsSeen(long dirOnionsSeen) { + this.dirOnionsSeen = dirOnionsSeen; + } + public long getDirOnionsSeen() { + return this.dirOnionsSeen; + } + + /* Instantiate a new stats object using fingerprint and stats interval + * end which together uniquely identify the object. */ + public ReportedHidServStats(String fingerprint, long statsEndMillis) { + this.fingerprint = fingerprint; + this.statsEndMillis = statsEndMillis; + } + + /* Return whether this object contains the same fingerprint and stats + * interval end as the passed object. */ + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ReportedHidServStats)) { + return false; + } + ReportedHidServStats other = (ReportedHidServStats) otherObject; + return this.fingerprint.equals(other.fingerprint) && + this.statsEndMillis == other.statsEndMillis; + } + + /* Return a (hopefully unique) hash code based on this object's + * fingerprint and stats interval end. */ + @Override + public int hashCode() { + return this.fingerprint.hashCode() + (int) this.statsEndMillis; + } + + /* Return a string representation of this object, consisting of + * fingerprint and the concatenation of all other attributes. */ + @Override + public String[] format() { + String first = this.fingerprint; + String second = String.format("%s,%d,%d,%d", + DateTimeHelper.format(this.statsEndMillis), + this.statsIntervalSeconds, this.rendRelayedCells, + this.dirOnionsSeen); + return new String[] { first, second }; + } + + /* Instantiate an empty stats object that will be initialized more by + * the parse method. */ + ReportedHidServStats() { + } + + /* Initialize this stats object using the two provided strings that have + * been produced by the format method earlier. Return whether this + * operation was successful. */ + @Override + public boolean parse(String[] formattedStrings) { + if (formattedStrings.length != 2) { + System.err.printf("Invalid number of formatted strings. " + + "Skipping.%n", formattedStrings.length); + return false; + } + String fingerprint = formattedStrings[0]; + String[] secondParts = formattedStrings[1].split(",", 4); + if (secondParts.length != 4) { + return false; + } + long statsEndMillis = DateTimeHelper.parse(secondParts[0]); + if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) { + return false; + } + long statsIntervalSeconds = -1L, rendRelayedCells = -1L, + dirOnionsSeen = -1L; + try { + statsIntervalSeconds = Long.parseLong(secondParts[1]); + rendRelayedCells = Long.parseLong(secondParts[2]); + dirOnionsSeen = Long.parseLong(secondParts[3]); + } catch (NumberFormatException e) { + return false; + } + this.fingerprint = fingerprint; + this.statsEndMillis = statsEndMillis; + this.statsIntervalSeconds = statsIntervalSeconds; + this.rendRelayedCells = rendRelayedCells; + this.dirOnionsSeen = dirOnionsSeen; + return true; + } +} + diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java new file mode 100644 index 0000000..db7d065 --- /dev/null +++ b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java @@ -0,0 +1,360 @@ +package org.torproject.metrics.hidserv; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/* NOTE: This class is not required for running the Main class! (It + * contains its own main method.) */ +public class Simulate { + private static File simCellsCsvFile = + new File("out/csv/sim-cells.csv"); + + private static File simOnionsCsvFile = + new File("out/csv/sim-onions.csv"); + + public static void main(String[] args) throws Exception { + System.out.print("Simulating extrapolation of rendezvous cells"); + simulateManyCells(); + System.out.print("\nSimulating extrapolation of .onions"); + simulateManyOnions(); + System.out.println("\nTerminating."); + } + + private static Random rnd = new Random(); + + private static void simulateManyCells() throws Exception { + simCellsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simCellsCsvFile)); + bw.write("run,frac,wmean,wmedian,wiqm\n"); + final int numberOfExtrapolations = 1000; + for (int i = 0; i < numberOfExtrapolations; i++) { + bw.write(simulateCells(i)); + System.out.print("."); + } + bw.close(); + } + + private static void simulateManyOnions() throws Exception { + simOnionsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simOnionsCsvFile)); + bw.write("run,frac,wmean,wmedian,wiqm\n"); + final int numberOfExtrapolations = 1000; + for (int i = 0; i < numberOfExtrapolations; i++) { + bw.write(simulateOnions(i)); + System.out.print("."); + } + bw.close(); + } + + private static String simulateCells(int run) { + + /* Generate consensus weights following an exponential distribution + * with lambda = 1 for 3000 potential rendezvous points. */ + final int numberRendPoints = 3000; + double[] consensusWeights = new double[numberRendPoints]; + double totalConsensusWeight = 0.0; + for (int i = 0; i < numberRendPoints; i++) { + double consensusWeight = -Math.log(1.0 - rnd.nextDouble()); + consensusWeights[i] = consensusWeight; + totalConsensusWeight += consensusWeight; + } + + /* Compute probabilities for being selected as rendezvous point. */ + double[] probRendPoint = new double[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + probRendPoint[i] = consensusWeights[i] / totalConsensusWeight; + } + + /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an + * exponential distribution with lambda = 0.0001, so on average + * 10,000 cells per chunk, and randomly assign them to a rendezvous + * point to report them later. */ + long cellsLeft = 10000000000L; + final double cellsLambda = 0.0001; + long[] observedCells = new long[numberRendPoints]; + while (cellsLeft > 0) { + long cells = Math.min(cellsLeft, + (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda)); + double selectRendPoint = rnd.nextDouble(); + for (int i = 0; i < probRendPoint.length; i++) { + selectRendPoint -= probRendPoint[i]; + if (selectRendPoint <= 0.0) { + observedCells[i] += cells; + break; + } + } + cellsLeft -= cells; + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 1024L; + final double b = 2048.0 / 0.3; + long[] reportedCells = new long[numberRendPoints]; + long[] removedNoiseCells = new long[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + long observed = observedCells[i]; + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double p = rnd.nextDouble(); + double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * + Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedCells[i] = reported; + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseCells[i] = subtractedHalfOfBinSize; + } + + /* Perform extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + StringBuilder sb = new StringBuilder(); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + for (double fraction : fractions) { + SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>(); + for (int j = 0; j < numberRendPoints; j++) { + nonReportingRelays.add(j); + } + List<Integer> shuffledRelays = new ArrayList<Integer>( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet<Integer> reportingRelays = new TreeSet<Integer>(); + for (int j = 0; j < (int) ((double) numberRendPoints * fraction); + j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + List<double[]> singleRelayExtrapolations; + double totalReportingProbability; + do { + singleRelayExtrapolations = new ArrayList<double[]>(); + totalReportingProbability = 0.0; + for (int reportingRelay : reportingRelays) { + double probability = probRendPoint[reportingRelay]; + if (probability > 0.0) { + singleRelayExtrapolations.add( + new double[] { + removedNoiseCells[reportingRelay] / probability, + removedNoiseCells[reportingRelay], + probability }); + } + totalReportingProbability += probability; + } + if (totalReportingProbability < fraction - 0.001) { + int addRelay = new ArrayList<Integer>(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (totalReportingProbability > fraction + 0.001) { + int removeRelay = new ArrayList<Integer>(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (totalReportingProbability < fraction - 0.001 || + totalReportingProbability > fraction + 0.001); + Collections.sort(singleRelayExtrapolations, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + }); + double totalProbability = 0.0, totalValues = 0.0; + double totalInterquartileProbability = 0.0, + totalInterquartileValues = 0.0; + Double weightedMedian = null; + for (double[] extrapolation : singleRelayExtrapolations) { + totalValues += extrapolation[1]; + totalProbability += extrapolation[2]; + if (weightedMedian == null && + totalProbability > totalReportingProbability * 0.5) { + weightedMedian = extrapolation[0]; + } + if (totalProbability > totalReportingProbability * 0.25 && + totalProbability < totalReportingProbability * 0.75) { + totalInterquartileValues += extrapolation[1]; + totalInterquartileProbability += extrapolation[2]; + } + } + sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, + totalValues / totalProbability, weightedMedian, + totalInterquartileValues / totalInterquartileProbability)); + } + return sb.toString(); + } + + private static String simulateOnions(final int run) { + + /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */ + final int numberHsDirs = 3000; + SortedSet<Double> hsDirFingerprints = new TreeSet<Double>(); + for (int i = 0; i < numberHsDirs; i++) { + hsDirFingerprints.add(rnd.nextDouble()); + } + + /* Compute fractions of observed descriptor space. */ + SortedSet<Double> ring = + new TreeSet<Double>(Collections.reverseOrder()); + for (double fingerprint : hsDirFingerprints) { + ring.add(fingerprint); + ring.add(fingerprint - 1.0); + } + SortedMap<Double, Double> hsDirFractions = + new TreeMap<Double, Double>(); + for (double fingerprint : hsDirFingerprints) { + double start = fingerprint; + int positionsToGo = 3; + for (double prev : ring.tailSet(fingerprint)) { + start = prev; + if (positionsToGo-- <= 0) { + break; + } + } + hsDirFractions.put(fingerprint, fingerprint - start); + } + + /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */ + final int numberOnions = 40000; + final int replicas = 4; + final int storeOnDirs = 3; + SortedMap<Double, SortedSet<Integer>> storedDescs = + new TreeMap<Double, SortedSet<Integer>>(); + for (double fingerprint : hsDirFingerprints) { + storedDescs.put(fingerprint, new TreeSet<Integer>()); + } + for (int i = 0; i < numberOnions; i++) { + for (int j = 0; j < replicas; j++) { + int leftToStore = storeOnDirs; + for (double fingerprint : + hsDirFingerprints.tailSet(rnd.nextDouble())) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + if (leftToStore > 0) { + for (double fingerprint : hsDirFingerprints) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + } + } + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 8L; + final double b = 8.0 / 0.3; + SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(), + removedNoiseOnions = new TreeMap<Double, Long>(); + for (Map.Entry<Double, SortedSet<Integer>> e : + storedDescs.entrySet()) { + double fingerprint = e.getKey(); + long observed = (long) e.getValue().size(); + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double p = rnd.nextDouble(); + double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * + Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedOnions.put(fingerprint, reported); + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize); + } + + /* Perform extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + StringBuilder sb = new StringBuilder(); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + for (double fraction : fractions) { + SortedSet<Double> nonReportingRelays = + new TreeSet<Double>(hsDirFractions.keySet()); + List<Double> shuffledRelays = new ArrayList<Double>( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet<Double> reportingRelays = new TreeSet<Double>(); + for (int j = 0; j < (int) ((double) hsDirFractions.size() + * fraction); j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + List<double[]> singleRelayExtrapolations; + double totalReportingProbability; + do { + singleRelayExtrapolations = new ArrayList<double[]>(); + totalReportingProbability = 0.0; + for (double reportingRelay : reportingRelays) { + double probability = hsDirFractions.get(reportingRelay) / 3.0; + if (probability > 0.0) { + singleRelayExtrapolations.add( + new double[] { removedNoiseOnions.get(reportingRelay) + / probability, removedNoiseOnions.get(reportingRelay), + probability }); + } + totalReportingProbability += probability; + } + if (totalReportingProbability < fraction - 0.001) { + double addRelay = + new ArrayList<Double>(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (totalReportingProbability > fraction + 0.001) { + double removeRelay = + new ArrayList<Double>(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (totalReportingProbability < fraction - 0.001 || + totalReportingProbability > fraction + 0.001); + Collections.sort(singleRelayExtrapolations, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + }); + double totalProbability = 0.0, totalValues = 0.0; + double totalInterquartileProbability = 0.0, + totalInterquartileValues = 0.0; + Double weightedMedian = null; + for (double[] extrapolation : singleRelayExtrapolations) { + totalValues += extrapolation[1]; + totalProbability += extrapolation[2]; + if (weightedMedian == null && + totalProbability > totalReportingProbability * 0.5) { + weightedMedian = extrapolation[0]; + } + if (totalProbability > totalReportingProbability * 0.25 && + totalProbability < totalReportingProbability * 0.75) { + totalInterquartileValues += extrapolation[1]; + totalInterquartileProbability += extrapolation[2]; + } + } + sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, + totalValues / totalProbability, weightedMedian, + totalInterquartileValues / totalInterquartileProbability)); + } + return sb.toString(); + } +} diff --git a/shared/bin/70-run-hidserv-stats.sh b/shared/bin/70-run-hidserv-stats.sh new file mode 100755 index 0000000..a924f31 --- /dev/null +++ b/shared/bin/70-run-hidserv-stats.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cd modules/hidserv/ +ant | grep "[java]" +cd ../../ + diff --git a/shared/bin/99-copy-stats-files.sh b/shared/bin/99-copy-stats-files.sh index 2292da2..5493cf8 100755 --- a/shared/bin/99-copy-stats-files.sh +++ b/shared/bin/99-copy-stats-files.sh @@ -2,4 +2,5 @@ mkdir -p shared/stats cp -a modules/legacy/stats/*.csv shared/stats/ cp -a modules/advbwdist/stats/advbwdist.csv shared/stats/ +cp -a modules/hidserv/stats/hidserv.csv shared/stats/
tor-commits@lists.torproject.org