commit 2f9099c12b6d3b91c9e2609e1ce9bc137085aba9 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Aug 19 19:28:52 2015 +0200
Add new module to aggregate conn-bi-direct stats. --- modules/connbidirect/.gitignore | 3 + modules/connbidirect/build.xml | 68 +++ .../org/torproject/metrics/connbidirect/Main.java | 478 ++++++++++++++++++++ .../torproject/metrics/connbidirect/MainTest.java | 261 +++++++++++ shared/bin/10-run-connbidirect-stats.sh | 5 + shared/bin/99-copy-stats-files.sh | 1 + website/etc/web.xml | 1 + website/rserve/graphs.R | 31 +- .../org/torproject/metrics/web/IndexServlet.java | 3 + .../metrics/web/graphs/GraphsSubpagesServlet.java | 2 + .../metrics/web/research/ResearchStatsServlet.java | 1 + website/web/WEB-INF/connbidirect-data.jsp | 10 +- website/web/WEB-INF/connbidirect.jsp | 2 +- website/web/WEB-INF/connbidirect2-data.jsp | 67 +++ website/web/WEB-INF/performance.jsp | 2 +- website/web/WEB-INF/stats.jsp | 2 +- 16 files changed, 912 insertions(+), 25 deletions(-)
diff --git a/modules/connbidirect/.gitignore b/modules/connbidirect/.gitignore new file mode 100644 index 0000000..b359b59 --- /dev/null +++ b/modules/connbidirect/.gitignore @@ -0,0 +1,3 @@ +classes/ +stats/ + diff --git a/modules/connbidirect/build.xml b/modules/connbidirect/build.xml new file mode 100644 index 0000000..29629e8 --- /dev/null +++ b/modules/connbidirect/build.xml @@ -0,0 +1,68 @@ +<project default="run" name="connbidirect" basedir="."> + + <property name="connbidirect-sources" value="src/main/java"/> + <property name="connbidirect-tests" value="src/test/java"/> + <property name="connbidirect-classes" value="classes"/> + <path id="classpath"> + <pathelement path="${connbidirect-classes}"/> + <fileset dir="/usr/share/java"> + <include name="commons-codec-1.6.jar"/> + <include name="commons-compress-1.4.1.jar"/> + <include name="commons-lang-2.6.jar"/> + <include name="junit4.jar"/> + </fileset> + <fileset dir="../../deps/metrics-lib"> + <include name="descriptor.jar"/> + </fileset> + </path> + + <target name="metrics-lib"> + <ant dir="../../deps/metrics-lib"/> + </target> + + <target name="compile" depends="metrics-lib"> + <mkdir dir="${connbidirect-classes}"/> + <javac destdir="${connbidirect-classes}" + srcdir="${connbidirect-sources}" + source="1.6" + target="1.6" + debug="true" + deprecation="true" + optimize="false" + failonerror="true" + includeantruntime="false"> + <classpath refid="classpath"/> + </javac> + </target> + + <target name="test" depends="compile"> + <javac destdir="${connbidirect-classes}" + srcdir="${connbidirect-tests}" + source="1.6" + target="1.6" + debug="true" + deprecation="true" + optimize="false" + failonerror="true" + includeantruntime="false"> + <classpath refid="classpath"/> + </javac> + <junit fork="true" haltonfailure="true" printsummary="off"> + <classpath refid="classpath"/> + <formatter type="plain" usefile="false"/> + <batchtest> + <fileset dir="${connbidirect-classes}" + includes="**/*Test.class"/> + </batchtest> + </junit> + </target> + + <target name="run" depends="compile"> + <java fork="true" + maxmemory="2g" + classname="org.torproject.metrics.connbidirect.Main"> + <classpath refid="classpath"/> + </java> + </target> +</project> + diff --git a/modules/connbidirect/src/main/java/org/torproject/metrics/connbidirect/Main.java b/modules/connbidirect/src/main/java/org/torproject/metrics/connbidirect/Main.java new file mode 100644 index 0000000..190b3df --- /dev/null +++ b/modules/connbidirect/src/main/java/org/torproject/metrics/connbidirect/Main.java @@ -0,0 +1,478 @@ +/* Copyright 2015 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.metrics.connbidirect; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.StringReader; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; + +public class Main { + + static class RawStat implements Comparable<RawStat> { + + /* Date when the statistics interval ended in days since the epoch. */ + long dateDays; + + /* Relay fingerprint, or <code>null</code> if this entry only + * indicates that outdated raw statistics have been discarded and + * hence new raw statistics for this date should not be aggregated. */ + String fingerprint; + + /* Fraction of mostly reading connections as a value between 0 and + * 100. */ + short fractionRead; + + /* Fraction of mostly writing connections as a value between 0 and + * 100. */ + short fractionWrite; + + /* Fraction of both reading and writing connections as a value between + * 0 and 100. */ + short fractionBoth; + + RawStat(long dateDays, String fingerprint, short fractionRead, + short fractionWrite, short fractionBoth) { + this.dateDays = dateDays; + this.fingerprint = fingerprint; + this.fractionRead = fractionRead; + this.fractionWrite = fractionWrite; + this.fractionBoth = fractionBoth; + } + + static RawStat fromString(String string) { + try { + String[] parts = string.split(","); + if (parts.length == 5) { + long dateDays = Long.parseLong(parts[0]); + String fingerprint = parts[1]; + short fractionRead = Short.parseShort(parts[2]); + short fractionWrite = Short.parseShort(parts[3]); + short fractionBoth = Short.parseShort(parts[4]); + return new RawStat(dateDays, fingerprint, fractionRead, + fractionWrite, fractionBoth); + } else { + System.err.println("Could not deserialize raw statistic from " + + "string '" + string + "'."); + return null; + } + } catch (NumberFormatException e) { + System.err.println("Could not deserialize raw statistic from " + + "string '" + string + "'."); + return null; + } + } + + @Override + public String toString() { + if (this.fingerprint == null) { + return String.valueOf(this.dateDays); + } else { + return String.format("%d,%s,%d,%d,%d", this.dateDays, + this.fingerprint, this.fractionRead, this.fractionWrite, + this.fractionBoth); + } + } + + @Override + public int compareTo(RawStat other) { + if (this.dateDays != other.dateDays) { + return this.dateDays < other.dateDays ? -1 : 1; + } else if (this.fingerprint != null && other.fingerprint != null) { + return this.fingerprint.compareTo(other.fingerprint); + } else if (this.fingerprint != null) { + return -1; + } else if (other.fingerprint != null) { + return 1; + } else { + return 0; + } + } + + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof RawStat)) { + return false; + } + RawStat other = (RawStat) otherObject; + return this.dateDays == other.dateDays && + this.fingerprint.equals(other.fingerprint); + } + } + + static final long ONE_DAY_IN_MILLIS = 86400000L; + + public static void main(String[] args) throws IOException { + File parseHistoryFile = new File("stats/parse-history"); + File aggregateStatsFile = new File("stats/connbidirect2.csv"); + File rawStatsFile = new File("stats/raw-stats"); + File[] descriptorsDirectories = new File[] { + new File("../../shared/in/archive/relay-descriptors/extra-infos"), + new File("../../shared/in/recent/relay-descriptors/extra-infos")}; + SortedMap<String, Long> parseHistory = parseParseHistory( + readStringFromFile(parseHistoryFile)); + if (parseHistory == null) { + System.err.println("Could not parse " + + parseHistoryFile.getAbsolutePath() + ". Proceeding without " + + "parse history."); + } + SortedMap<String, Short> aggregateStats = parseAggregateStats( + readStringFromFile(aggregateStatsFile)); + if (aggregateStats == null) { + System.err.println("Could not parse previously aggregated " + + "statistics. Not proceeding, because we would otherwise " + + "lose previously aggregated values for which we don't have " + + "raw statistics anymore."); + return; + } + SortedSet<RawStat> newRawStats = new TreeSet<RawStat>(); + parseHistory = addRawStatsFromDescriptors(newRawStats, + descriptorsDirectories, parseHistory); + if (parseHistory == null) { + System.err.println("Could not parse raw statistics from " + + "descriptors. Not proceeding, because we would otherwise " + + "leave out those descriptors in future runs."); + return; + } + SortedSet<RawStat> rawStats = parseRawStats( + readStringFromFile(rawStatsFile)); + if (rawStats == null) { + System.err.println("Could not parse previously parsed raw " + + "statistics. Not proceeding, because we might otherwise " + + "leave out previously parsed statistics in the aggregates."); + return; + } + SortedSet<Long> conflictingDates = mergeRawStats(rawStats, + newRawStats); + if (!conflictingDates.isEmpty()) { + System.err.print("Could not update aggregate statistics, because " + + "we already aggregated statistics for at least one contained " + + "date and discarded the underlying raw statistics. Not " + + "proceeding. To fix this, you'll have to re-import " + + "statistics for the following dates:"); + DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + for (long conflictingDate : conflictingDates) { + System.err.print(" " + dateFormat.format(conflictingDate + * ONE_DAY_IN_MILLIS)); + } + System.err.println(); + return; + } + updateAggregateStats(aggregateStats, rawStats); + writeStringToFile(aggregateStatsFile, formatAggregateStats( + aggregateStats)); + writeStringToFile(rawStatsFile, formatRawStats(rawStats)); + writeStringToFile(parseHistoryFile, formatParseHistory(parseHistory)); + } + + /* Read the string contained in <code>file</code> and return it. */ + private static String readStringFromFile(File file) throws IOException { + StringBuilder sb = new StringBuilder(); + if (file.exists()) { + BufferedReader br = new BufferedReader(new FileReader(file)); + String line; + while ((line = br.readLine()) != null) { + sb.append(line + "\n"); + } + br.close(); + } + return sb.toString(); + } + + /* Write <code>string</code> to <code>file</code> by first creating its + * parent directory if that doesn't exist yet, then writing to a + * temporary file, and finally renaming that temporary file to the file + * to be written. */ + private static void writeStringToFile(File file, String string) + throws IOException { + file.getParentFile().mkdirs(); + File tempFile = new File(file.getParentFile(), file.getName() + + ".tmp"); + BufferedWriter bw = new BufferedWriter(new FileWriter(tempFile)); + bw.write(string); + bw.close(); + tempFile.renameTo(file); + } + + /* Format a parse history containing paths and last-modified times. */ + static String formatParseHistory(SortedMap<String, Long> parseHistory) { + StringBuilder sb = new StringBuilder(); + for (Map.Entry<String, Long> e : parseHistory.entrySet()) { + sb.append(e.getKey() + "," + e.getValue() + "\n"); + } + return sb.toString(); + } + + /* Parse a parse history. */ + static SortedMap<String, Long> parseParseHistory( + String formattedParseHistory) { + SortedMap<String, Long> parsedParseHistory = + new TreeMap<String, Long>(); + LineNumberReader lnr = new LineNumberReader(new StringReader( + formattedParseHistory)); + String line = ""; + try { + while ((line = lnr.readLine()) != null) { + String[] parts = line.split(","); + if (parts.length < 2) { + System.err.println("Invalid line " + lnr.getLineNumber() + + " in parse history: '" + line + "'."); + return null; + } + parsedParseHistory.put(parts[0], Long.parseLong(parts[1])); + } + } catch (IOException e) { + System.err.println("Unexpected I/O exception while reading line " + + lnr.getLineNumber() + " from parse history."); + e.printStackTrace(); + return null; + } catch (NumberFormatException e) { + System.err.println("Invalid line " + lnr.getLineNumber() + + " in parse history: '" + line + "'."); + return null; + } + return parsedParseHistory; + } + + private static final String AGGREGATE_STATS_HEADER = + "date,direction,quantile,fraction"; + + /* Format aggregate connbidirect stats containing a combined identifier + * consisting of date (e.g., 2015-08-18), direction (both, read, write), + * and quantile (0.25, 0.5, 0.75) and a fraction value (between 0.0 and + * 1.0). */ + static String formatAggregateStats( + SortedMap<String, Short> aggregateStats) { + StringBuilder sb = new StringBuilder(); + sb.append(AGGREGATE_STATS_HEADER + "\n"); + for (Map.Entry<String, Short> e : aggregateStats.entrySet()) { + sb.append(e.getKey() + "," + e.getValue() + "\n"); + } + return sb.toString(); + } + + /* Parse aggregate connbidirect stats. */ + static SortedMap<String, Short> parseAggregateStats( + String formattedAggregatedStats) { + SortedMap<String, Short> parsedAggregateStats = + new TreeMap<String, Short>(); + if (formattedAggregatedStats.length() < 1) { + return parsedAggregateStats; /* Empty file. */ + } + LineNumberReader lnr = new LineNumberReader(new StringReader( + formattedAggregatedStats)); + String line = ""; + try { + if (!AGGREGATE_STATS_HEADER.equals(lnr.readLine())) { + System.err.println("First line of aggregate statistics does not " + + "contain the header line. Is this the correct file?"); + return null; + } + while ((line = lnr.readLine()) != null) { + String[] parts = line.split(","); + if (parts.length != 4) { + System.err.println("Invalid line " + lnr.getLineNumber() + + " in aggregate statistics: '" + line + "'."); + return null; + } + parsedAggregateStats.put(parts[0] + "," + parts[1] + "," + + parts[2], Short.parseShort(parts[3])); + } + } catch (IOException e) { + System.err.println("Unexpected I/O exception while reading line " + + lnr.getLineNumber() + " from aggregate statistics."); + e.printStackTrace(); + return null; + } catch (NumberFormatException e) { + System.err.println("Invalid line " + lnr.getLineNumber() + + " in aggregate statistics: '" + line + "'."); + return null; + } + return parsedAggregateStats; + } + + /* Format raw statistics separated by newlines using the formatter in + * RawStats. */ + static String formatRawStats(SortedSet<RawStat> rawStats) { + StringBuilder sb = new StringBuilder(); + for (RawStat rawStat : rawStats) { + sb.append(rawStat.toString() + "\n"); + } + return sb.toString(); + } + + /* Parse raw statistics. */ + static SortedSet<RawStat> parseRawStats(String formattedRawStats) { + SortedSet<RawStat> parsedRawStats = new TreeSet<RawStat>(); + LineNumberReader lnr = new LineNumberReader(new StringReader( + formattedRawStats)); + String line = ""; + try { + while ((line = lnr.readLine()) != null) { + RawStat rawStat = RawStat.fromString(line); + if (rawStat == null) { + System.err.println("Invalid line " + lnr.getLineNumber() + + " in raw statistics: '" + line + "'."); + return null; + } + parsedRawStats.add(rawStat); + } + } catch (IOException e) { + System.err.println("Unexpected I/O exception while reading line " + + lnr.getLineNumber() + " from raw statistics."); + e.printStackTrace(); + return null; + } catch (NumberFormatException e) { + System.err.println("Invalid line " + lnr.getLineNumber() + + " in raw statistics: '" + line + "'."); + return null; + } + return parsedRawStats; + } + + private static SortedMap<String, Long> addRawStatsFromDescriptors( + SortedSet<RawStat> rawStats, File[] descriptorsDirectories, + SortedMap<String, Long> parseHistory) { + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + for (File descriptorsDirectory : descriptorsDirectories) { + descriptorReader.addDirectory(descriptorsDirectory); + } + descriptorReader.setExcludedFiles(parseHistory); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof ExtraInfoDescriptor)) { + continue; + } + RawStat rawStat = parseRawStatFromDescriptor( + (ExtraInfoDescriptor) descriptor); + if (rawStat != null) { + rawStats.add(rawStat); + } + } + } + parseHistory.clear(); + parseHistory.putAll(descriptorReader.getExcludedFiles()); + parseHistory.putAll(descriptorReader.getParsedFiles()); + return parseHistory; + } + + private static RawStat parseRawStatFromDescriptor( + ExtraInfoDescriptor extraInfo) { + if (extraInfo.getConnBiDirectStatsEndMillis() <= 0L) { + return null; + } + int below = extraInfo.getConnBiDirectBelow(), + read = extraInfo.getConnBiDirectRead(), + write = extraInfo.getConnBiDirectWrite(), + both = extraInfo.getConnBiDirectBoth(); + if (below < 0 || read < 0 || write < 0 || both < 0) { + System.err.println("Could not parse incomplete conn-bi-direct " + + "statistics. Skipping descriptor."); + return null; + } + long statsEndMillis = extraInfo.getConnBiDirectStatsEndMillis(); + String fingerprint = extraInfo.getFingerprint(); + return parseRawStatFromDescriptorContents(statsEndMillis, fingerprint, + below, read, write, both); + } + + static RawStat parseRawStatFromDescriptorContents(long statsEndMillis, + String fingerprint, int below, int read, int write, int both) { + int total = read + write + both; + if (below < 0 || read < 0 || write < 0 || both < 0 || total <= 0) { + return null; + } + long dateDays = statsEndMillis / ONE_DAY_IN_MILLIS; + short fractionRead = (short) ((read * 100) / total); + short fractionWrite = (short) ((write * 100) / total); + short fractionBoth = (short) ((both * 100) / total); + return new RawStat(dateDays, fingerprint, fractionRead, fractionWrite, + fractionBoth); + } + + static SortedSet<Long> mergeRawStats( + SortedSet<RawStat> rawStats, SortedSet<RawStat> newRawStats) { + rawStats.addAll(newRawStats); + SortedSet<Long> discardedRawStats = new TreeSet<Long>(), + availableRawStats = new TreeSet<Long>(); + for (RawStat rawStat : rawStats) { + if (rawStat.fingerprint != null) { + availableRawStats.add(rawStat.dateDays); + } else { + discardedRawStats.add(rawStat.dateDays); + } + } + discardedRawStats.retainAll(availableRawStats); + return discardedRawStats; + } + + static void updateAggregateStats( + SortedMap<String, Short> aggregateStats, + SortedSet<RawStat> rawStats) { + DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SortedMap<String, List<Short>> fractionsByDateAndDirection = + new TreeMap<String, List<Short>>(); + final String[] directions = new String[] { "read", "write", "both" }; + for (RawStat rawStat : rawStats) { + if (rawStat.fingerprint != null) { + String date = dateFormat.format(rawStat.dateDays + * ONE_DAY_IN_MILLIS); + short[] fractions = new short[] { rawStat.fractionRead, + rawStat.fractionWrite, rawStat.fractionBoth }; + for (int i = 0; i < directions.length; i++) { + String dateAndDirection = date + "," + directions[i]; + if (!fractionsByDateAndDirection.containsKey( + dateAndDirection)) { + fractionsByDateAndDirection.put(dateAndDirection, + new ArrayList<Short>()); + } + fractionsByDateAndDirection.get(dateAndDirection).add( + fractions[i]); + } + } + } + final String[] quantiles = new String[] { "0.25", "0.5", "0.75" }; + final int[] centiles = new int[] { 25, 50, 75 }; + for (Map.Entry<String, List<Short>> e : + fractionsByDateAndDirection.entrySet()) { + String dateAndDirection = e.getKey(); + List<Short> fractions = e.getValue(); + Collections.sort(fractions); + for (int i = 0; i < quantiles.length; i++) { + String dateDirectionAndQuantile = dateAndDirection + "," + + quantiles[i]; + short fraction = fractions.get((centiles[i] * fractions.size()) + / 100); + aggregateStats.put(dateDirectionAndQuantile, fraction); + } + } + } +} diff --git a/modules/connbidirect/src/test/java/org/torproject/metrics/connbidirect/MainTest.java b/modules/connbidirect/src/test/java/org/torproject/metrics/connbidirect/MainTest.java new file mode 100644 index 0000000..a490dd2 --- /dev/null +++ b/modules/connbidirect/src/test/java/org/torproject/metrics/connbidirect/MainTest.java @@ -0,0 +1,261 @@ +package org.torproject.metrics.connbidirect; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.junit.Test; + +public class MainTest { + + private void assertParseHistoryCanBeSerializedAndDeserialized( + SortedMap<String, Long> parseHistory) { + String formattedParseHistory = Main.formatParseHistory(parseHistory); + SortedMap<String, Long> parsedParseHistory = Main.parseParseHistory( + formattedParseHistory); + assertEquals("Parse histories are not equal", parseHistory, + parsedParseHistory); + } + + @Test + public void testParseHistoryEmpty() { + assertParseHistoryCanBeSerializedAndDeserialized( + new TreeMap<String, Long>()); + } + + private final String PATH_A = "a", PATH_B = "/b"; + + private final long LASTMOD_A = 1L, LASTMOD_B = 2L; + + @Test + public void testParseHistoryOneEntry() { + SortedMap<String, Long> parseHistory = new TreeMap<String, Long>(); + parseHistory.put(PATH_A, LASTMOD_A); + assertParseHistoryCanBeSerializedAndDeserialized(parseHistory); + } + + @Test + public void testParseHistoryTwoEntries() { + SortedMap<String, Long> parseHistory = new TreeMap<String, Long>(); + parseHistory.put(PATH_A, LASTMOD_A); + parseHistory.put(PATH_B, LASTMOD_B); + assertParseHistoryCanBeSerializedAndDeserialized(parseHistory); + } + + private void assertParseHistoryCannotBeDeserialized( + String brokenParseHistory) { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + System.setErr(new PrintStream(baos)); + SortedMap<String, Long> parsedParseHistory = + Main.parseParseHistory(brokenParseHistory); + assertNull("Parsed parse history is supposed to be null", + parsedParseHistory); + } + + @Test + public void testParseHistoryNoLastModifiedTime() { + assertParseHistoryCannotBeDeserialized(String.format("%s%n", PATH_A)); + } + + @Test + public void testParseHistoryLastModifiedTimeNoNumber() { + assertParseHistoryCannotBeDeserialized(String.format("%s%s%n", + PATH_A, PATH_B)); + } + + private void assertAggregateStatsCanBeSerializedAndDeserialized( + SortedMap<String, Short> aggregateStats) { + String formattedAggregateStats = Main.formatAggregateStats( + aggregateStats); + SortedMap<String, Short> parsedParseHistory = + Main.parseAggregateStats(formattedAggregateStats); + assertEquals("Aggregate statistics are not equal", aggregateStats, + parsedParseHistory); + } + + @Test + public void testAggregateStatsEmpty() { + assertAggregateStatsCanBeSerializedAndDeserialized( + new TreeMap<String, Short>()); + } + + @Test + public void testAggregateStatsOneEntry() { + SortedMap<String, Short> aggregateStats = + new TreeMap<String, Short>(); + aggregateStats.put("2015-08-18,read,0.25", (short) 42); + assertAggregateStatsCanBeSerializedAndDeserialized(aggregateStats); + } + + @Test + public void testAggregateStatsThreeEntries() { + SortedMap<String, Short> aggregateStats = + new TreeMap<String, Short>(); + aggregateStats.put("2015-08-18,read,0.25", (short) 12); + aggregateStats.put("2015-08-18,read,0.5", (short) 24); + aggregateStats.put("2015-08-18,read,0.75", (short) 42); + assertAggregateStatsCanBeSerializedAndDeserialized(aggregateStats); + } + + private void assertRawStatsCanBeSerializedAndDeserialized( + SortedSet<Main.RawStat> rawStats) { + String formattedRawStats = Main.formatRawStats(rawStats); + SortedSet<Main.RawStat> parsedRawStats = Main.parseRawStats( + formattedRawStats); + assertEquals("Raw statistics are not equal", rawStats, + parsedRawStats); + } + + @Test + public void testRawStatsEmpty() { + assertRawStatsCanBeSerializedAndDeserialized( + new TreeSet<Main.RawStat>()); + } + + private static final long DATE_A = 16665, /* 2015-08-18 */ + DATE_B = 16680; /* 2015-09-02 */ + + private static final String + FPR_A = "1234567890123456789012345678901234567890", + FPR_B = "2345678901234567890123456789012345678901"; + + @Test + public void testRawStatsOneEntry() { + SortedSet<Main.RawStat> rawStats = new TreeSet<Main.RawStat>(); + rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 40, (short) 30, + (short) 50)); + assertRawStatsCanBeSerializedAndDeserialized(rawStats); + } + + private void assertRawStatsCanBeMerged(SortedSet<Main.RawStat> rawStats, + SortedSet<Main.RawStat> newRawStats, boolean expectConflicts) { + SortedSet<Long> conflictingDays = Main.mergeRawStats(rawStats, + newRawStats); + assertSame("Expected merge conflicts differ from observed conflicts", + expectConflicts, !conflictingDays.isEmpty()); + } + + @Test + public void testMergeRawStatsAddNothing() { + SortedSet<Main.RawStat> rawStats = new TreeSet<Main.RawStat>(); + rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 40, (short) 30, + (short) 50)); + assertRawStatsCanBeMerged(rawStats, new TreeSet<Main.RawStat>(), + false); + } + + @Test + public void testMergeRawStatsAddSame() { + SortedSet<Main.RawStat> rawStats = new TreeSet<Main.RawStat>(); + rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 40, (short) 30, + (short) 50)); + SortedSet<Main.RawStat> newRawStats = new TreeSet<Main.RawStat>(); + newRawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 40, + (short) 30, (short) 50)); + assertRawStatsCanBeMerged(rawStats, newRawStats, false); + } + + @Test + public void testMergeRawStatsAddOther() { + SortedSet<Main.RawStat> rawStats = new TreeSet<Main.RawStat>(); + rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 40, (short) 30, + (short) 50)); + SortedSet<Main.RawStat> newRawStats = new TreeSet<Main.RawStat>(); + newRawStats.add(new Main.RawStat(DATE_B, FPR_B, (short) 40, + (short) 30, (short) 50)); + assertRawStatsCanBeMerged(rawStats, newRawStats, false); + } + + @Test + public void testParseRawStatAllNegative() { + Main.RawStat rawStat = Main.parseRawStatFromDescriptorContents(DATE_A, + FPR_A, -1, -1, -1, -1); + assertNull(rawStat); + } + + @Test + public void testParseRawStatOneNegative() { + Main.RawStat rawStat = Main.parseRawStatFromDescriptorContents(DATE_A, + FPR_A, -1, 1, 1, 1); + assertNull(rawStat); + } + + @Test + public void testParseRawStatTotalZero() { + Main.RawStat rawStat = Main.parseRawStatFromDescriptorContents(DATE_A, + FPR_A, 0, 0, 0, 0); + assertNull(rawStat); + } + + @Test + public void testParseRawStatOneOfEach() { + Main.RawStat rawStat = Main.parseRawStatFromDescriptorContents(DATE_A, + FPR_A, 1, 1, 1, 2); + assertSame("Read fraction", (short) 25, rawStat.fractionRead); + assertSame("Write fraction", (short) 25, rawStat.fractionWrite); + assertSame("Both fraction", (short) 50, rawStat.fractionBoth); + } + + private void assertStatsCanBeAggregated( + SortedMap<String, Short> expectedAggregateStats, + SortedSet<Main.RawStat> rawStats) { + SortedMap<String, Short> updatedAggregateStats = + new TreeMap<String, Short>(); + Main.updateAggregateStats(updatedAggregateStats, rawStats); + assertEquals("Updated aggregate statistics don't match", + expectedAggregateStats, updatedAggregateStats); + } + + @Test + public void testUpdateAggregateStatsEmpty() { + assertStatsCanBeAggregated(new TreeMap<String, Short>(), + new TreeSet<Main.RawStat>()); + } + + @Test + public void testUpdateAggregateStatsSingleRawStat() { + SortedMap<String, Short> expectedAggregateStats = + new TreeMap<String, Short>(); + expectedAggregateStats.put("2015-08-18,read,0.25", (short) 42); + expectedAggregateStats.put("2015-08-18,read,0.5", (short) 42); + expectedAggregateStats.put("2015-08-18,read,0.75", (short) 42); + expectedAggregateStats.put("2015-08-18,write,0.25", (short) 32); + expectedAggregateStats.put("2015-08-18,write,0.5", (short) 32); + expectedAggregateStats.put("2015-08-18,write,0.75", (short) 32); + expectedAggregateStats.put("2015-08-18,both,0.25", (short) 22); + expectedAggregateStats.put("2015-08-18,both,0.5", (short) 22); + expectedAggregateStats.put("2015-08-18,both,0.75", (short) 22); + SortedSet<Main.RawStat> rawStats = new TreeSet<Main.RawStat>(); + rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 42, (short) 32, + (short) 22)); + assertStatsCanBeAggregated(expectedAggregateStats, rawStats); + } + + @Test + public void testUpdateAggregateStatsTwoRawStat() { + SortedMap<String, Short> expectedAggregateStats = + new TreeMap<String, Short>(); + expectedAggregateStats.put("2015-08-18,read,0.25", (short) 32); + expectedAggregateStats.put("2015-08-18,read,0.5", (short) 42); + expectedAggregateStats.put("2015-08-18,read,0.75", (short) 42); + expectedAggregateStats.put("2015-08-18,write,0.25", (short) 22); + expectedAggregateStats.put("2015-08-18,write,0.5", (short) 32); + expectedAggregateStats.put("2015-08-18,write,0.75", (short) 32); + expectedAggregateStats.put("2015-08-18,both,0.25", (short) 12); + expectedAggregateStats.put("2015-08-18,both,0.5", (short) 22); + expectedAggregateStats.put("2015-08-18,both,0.75", (short) 22); + SortedSet<Main.RawStat> rawStats = new TreeSet<Main.RawStat>(); + rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 32, (short) 22, + (short) 12)); + rawStats.add(new Main.RawStat(DATE_A, FPR_B, (short) 42, (short) 32, + (short) 22)); + assertStatsCanBeAggregated(expectedAggregateStats, rawStats); + } +} diff --git a/shared/bin/10-run-connbidirect-stats.sh b/shared/bin/10-run-connbidirect-stats.sh new file mode 100755 index 0000000..d59c315 --- /dev/null +++ b/shared/bin/10-run-connbidirect-stats.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cd modules/connbidirect/ +ant | grep "[java]" +cd ../../ + diff --git a/shared/bin/99-copy-stats-files.sh b/shared/bin/99-copy-stats-files.sh index 4a30f24..22018ba 100755 --- a/shared/bin/99-copy-stats-files.sh +++ b/shared/bin/99-copy-stats-files.sh @@ -1,6 +1,7 @@ #!/bin/sh mkdir -p shared/stats cp -a modules/legacy/stats/*.csv shared/stats/ +cp -a modules/connbidirect/stats/connbidirect2.csv shared/stats/ cp -a modules/advbwdist/stats/advbwdist.csv shared/stats/ cp -a modules/hidserv/stats/hidserv.csv shared/stats/ cp -a modules/clients/stats/clients.csv shared/stats/ diff --git a/website/etc/web.xml b/website/etc/web.xml index e32f26b..87dec8f 100644 --- a/website/etc/web.xml +++ b/website/etc/web.xml @@ -64,6 +64,7 @@ <url-pattern>/clients-data.html</url-pattern> <url-pattern>/torperf-data.html</url-pattern> <url-pattern>/connbidirect-data.html</url-pattern> + <url-pattern>/connbidirect2-data.html</url-pattern> <url-pattern>/hidserv-data.html</url-pattern> <url-pattern>/hidserv-dir-onions-seen.html</url-pattern> <url-pattern>/hidserv-rend-relayed-cells.html</url-pattern> diff --git a/website/rserve/graphs.R b/website/rserve/graphs.R index 0e8e0a3..c35f1ec 100644 --- a/website/rserve/graphs.R +++ b/website/rserve/graphs.R @@ -674,30 +674,21 @@ plot_torperf_failures <- function(start, end, source, filesize, path) { plot_connbidirect <- function(start, end, path) { end <- min(end, as.character(Sys.Date() - 2)) c <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/", - "connbidirect.csv", sep = ""), stringsAsFactors = FALSE) - c <- c[c$date >= start & c$date <= end & - c$read + c$write + c$both > 0, ] - c <- data.frame(date = as.Date(c$date, "%Y-%m-%d"), - both = c$both / (c$read + c$write + c$both), - read = c$read / (c$read + c$write + c$both), - write = c$write / (c$read + c$write + c$both)) - c <- aggregate(list(both = c$both, read = c$read, write = c$write), - by = list(date = c$date), quantile, - probs = c(0.25, 0.5, 0.75)) - c <- rbind( - data.frame(date = as.Date(c$date), data.frame(c$both), - variable = "both"), - data.frame(date = as.Date(c$date), data.frame(c$write), - variable = "write"), - data.frame(date = as.Date(c$date), data.frame(c$read), - variable = "read")) + "connbidirect2.csv", sep = ""), stringsAsFactors = FALSE) + c <- c[c$date >= start & c$date <= end, ] + c <- data.frame(date = as.Date(c$date), + direction = factor(c$direction, + levels = c("both", "write", "read")), + quantile = paste("X", c$quantile, sep = ""), + fraction = c$fraction / 100) + c <- cast(c, date + direction ~ quantile, value = "fraction") date_breaks <- date_breaks( as.numeric(max(as.Date(c$date, "%Y-%m-%d")) - min(as.Date(c$date, "%Y-%m-%d")))) - ggplot(c, aes(x = date, y = X50., colour = variable)) + + ggplot(c, aes(x = date, y = X0.5, colour = direction)) + geom_line(size = 0.75) + - geom_ribbon(aes(x = date, ymin = X25., ymax = X75., fill = variable), - alpha = 0.5, legend = FALSE) + + geom_ribbon(aes(x = date, ymin = X0.25, ymax = X0.75, + fill = direction), alpha = 0.5, legend = FALSE) + scale_x_date(name = paste("\nThe Tor Project - ", "https://metrics.torproject.org/", sep = ""), format = date_breaks$format, major = date_breaks$major, diff --git a/website/src/org/torproject/metrics/web/IndexServlet.java b/website/src/org/torproject/metrics/web/IndexServlet.java index 63b0da6..47cb4cd 100644 --- a/website/src/org/torproject/metrics/web/IndexServlet.java +++ b/website/src/org/torproject/metrics/web/IndexServlet.java @@ -149,6 +149,9 @@ public class IndexServlet extends HttpServlet { "Performance of downloading static files over Tor", new String[] { "Performance" }, "Data", "Advanced")); availableMetrics.add(new Metric("connbidirect-data.html", + "Fraction of connections used uni-/bidirectionally (deprecated)", + new String[] { "Performance" }, "Data", "Advanced")); + availableMetrics.add(new Metric("connbidirect2-data.html", "Fraction of connections used uni-/bidirectionally", new String[] { "Performance" }, "Data", "Advanced")); availableMetrics.add(new Metric("hidserv-dir-onions-seen.html", diff --git a/website/src/org/torproject/metrics/web/graphs/GraphsSubpagesServlet.java b/website/src/org/torproject/metrics/web/graphs/GraphsSubpagesServlet.java index 0192836..f484d8f 100644 --- a/website/src/org/torproject/metrics/web/graphs/GraphsSubpagesServlet.java +++ b/website/src/org/torproject/metrics/web/graphs/GraphsSubpagesServlet.java @@ -90,6 +90,8 @@ public class GraphsSubpagesServlet extends HttpServlet { "WEB-INF/torperf-data.jsp"); this.availableGraphsSubpages.put("connbidirect-data.html", "WEB-INF/connbidirect-data.jsp"); + this.availableGraphsSubpages.put("connbidirect2-data.html", + "WEB-INF/connbidirect2-data.jsp"); this.availableGraphsSubpages.put("hidserv-data.html", "WEB-INF/hidserv-data.jsp"); this.availableGraphsSubpages.put("hidserv-dir-onions-seen.html", diff --git a/website/src/org/torproject/metrics/web/research/ResearchStatsServlet.java b/website/src/org/torproject/metrics/web/research/ResearchStatsServlet.java index ea30c60..618d614 100644 --- a/website/src/org/torproject/metrics/web/research/ResearchStatsServlet.java +++ b/website/src/org/torproject/metrics/web/research/ResearchStatsServlet.java @@ -33,6 +33,7 @@ public class ResearchStatsServlet extends HttpServlet { this.availableStatisticsFiles.add("clients"); this.availableStatisticsFiles.add("torperf"); this.availableStatisticsFiles.add("connbidirect"); + this.availableStatisticsFiles.add("connbidirect2"); this.availableStatisticsFiles.add("advbwdist"); this.availableStatisticsFiles.add("hidserv"); } diff --git a/website/web/WEB-INF/connbidirect-data.jsp b/website/web/WEB-INF/connbidirect-data.jsp index 51c78a7..8b7d9ed 100644 --- a/website/web/WEB-INF/connbidirect-data.jsp +++ b/website/web/WEB-INF/connbidirect-data.jsp @@ -3,7 +3,7 @@ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html> <head> - <title>Tor Metrics — Fraction of connections used uni-/bidirectionally</title> + <title>Tor Metrics — Fraction of connections used uni-/bidirectionally (deprecated)</title> <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> <link href="/css/stylesheet-ltr.css" type="text/css" rel="stylesheet"> <link href="/images/favicon.ico" type="image/x-icon" rel="shortcut icon"> @@ -13,8 +13,14 @@ <%@ include file="banner.jsp"%> <div class="main-column">
-<h2><a href="/">Tor Metrics</a> — Fraction of connections used uni-/bidirectionally</h2> +<h2><a href="/">Tor Metrics</a> — Fraction of connections used uni-/bidirectionally (deprecated)</h2> <br> +<p><font color="red">As of August 25, 2015, this page and the linked data +file have been replaced by <a href="connbidirect2-data.html">this page and +the data file linked from there</a>. +This page and the linked data file will be removed in the +future.</font></p> + <p>The following data file contains statistics on the fraction of direct connections between a <a href="about.html#relay">relay</a> and other nodes in the network that are used uni- or bidirectionally. diff --git a/website/web/WEB-INF/connbidirect.jsp b/website/web/WEB-INF/connbidirect.jsp index affa329..6ed512b 100644 --- a/website/web/WEB-INF/connbidirect.jsp +++ b/website/web/WEB-INF/connbidirect.jsp @@ -55,7 +55,7 @@ fractions.</p>
<h4>Related metrics</h4> <ul> -<li><a href="connbidirect-data.html">Data: Fraction of connections used uni-/bidirectionally</a></li> +<li><a href="connbidirect2-data.html">Data: Fraction of connections used uni-/bidirectionally</a></li> </ul>
</div> diff --git a/website/web/WEB-INF/connbidirect2-data.jsp b/website/web/WEB-INF/connbidirect2-data.jsp new file mode 100644 index 0000000..f29373a --- /dev/null +++ b/website/web/WEB-INF/connbidirect2-data.jsp @@ -0,0 +1,67 @@ +<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %> +<%@ taglib prefix="fn" uri="http://java.sun.com/jsp/jstl/functions" %> +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +<html> +<head> + <title>Tor Metrics — Fraction of connections used uni-/bidirectionally</title> + <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> + <link href="/css/stylesheet-ltr.css" type="text/css" rel="stylesheet"> + <link href="/images/favicon.ico" type="image/x-icon" rel="shortcut icon"> +</head> +<body> + <div class="center"> + <%@ include file="banner.jsp"%> + <div class="main-column"> + +<h2><a href="/">Tor Metrics</a> — Fraction of connections used uni-/bidirectionally</h2> +<br> +<p>The following data file contains statistics on the fraction of direct +connections between a <a href="about.html#relay">relay</a> and other nodes +in the network that are used uni- or bidirectionally. +Every 10 seconds, relays determine for every direct connection whether +they read and wrote less than a threshold of 20 KiB. +Connections below this threshold are excluded from the statistics file. +For the remaining connections, relays determine whether they read/wrote at +least 10 times as many bytes as they wrote/read. +If so, they classify a connection as "mostly reading" or "mostly writing", +respectively. +All other connections are classified as "both reading and writing". +After classifying connections, read and write counters are reset for the +next 10-second interval. +The data file contains daily medians and quartiles of reported +fractions.</p> + +<p><b>Download as <a href="stats/connbidirect2.csv">CSV file</a>.</b></p> + +<p>The statistics file contains the following columns:</p> +<ul> +<li><b>date:</b> UTC date (YYYY-MM-DD) for which statistics on +uni-/bidirectional connection usage were reported.</li> +<li><b>direction:</b> Direction of reported fraction, which can be +<b>"read"</b>, <b>"write"</b>, or <b>"both"</b> for connections classified +as "mostly reading", "mostly writing", or "both reading as writing". +Connections below the threshold have been removed from this statistics +file entirely.</li> +<li><b>quantile:</b> Quantile of the reported fraction when considering +all statistics reported for this date. +Examples are <b>"0.5"</b> for the median and <b>"0.25"</b> and +<b>"0.75"</b> for the lower and upper quartile.</li> +<li><b>fraction:</b> Fraction of connections in percent for the given +date, direction, and quantile. +For each daily statistic reported by a relay, fractions for the three +directions "read", "write", and "both" sum up to exactly 100.</li> +</ul> + +<h4>Related metrics</h4> +<ul> +<li><a href="connbidirect.html">Graph: Fraction of connections used uni-/bidirectionally</a></li> +</ul> + + </div> + </div> + <div class="bottom" id="bottom"> + <%@ include file="footer.jsp"%> + </div> +</body> +</html> + diff --git a/website/web/WEB-INF/performance.jsp b/website/web/WEB-INF/performance.jsp index 04231d3..37e05a1 100644 --- a/website/web/WEB-INF/performance.jsp +++ b/website/web/WEB-INF/performance.jsp @@ -25,7 +25,7 @@ Sorry for any inconvenience caused by this.</p> <li><a href="torperf-failures.html">Graph: Timeouts and failures of downloading files over Tor</a></li> <li><a href="connbidirect.html">Graph: Fraction of connections used uni-/bidirectionally</a></li> <li><a href="torperf-data.html">Data: Performance of downloading static files over Tor</a></li> -<li><a href="connbidirect-data.html">Data: Fraction of connections used uni-/bidirectionally</a></li> +<li><a href="connbidirect2-data.html">Data: Fraction of connections used uni-/bidirectionally</a></li> </ul>
</div> diff --git a/website/web/WEB-INF/stats.jsp b/website/web/WEB-INF/stats.jsp index 6a807ac..ef8319b 100644 --- a/website/web/WEB-INF/stats.jsp +++ b/website/web/WEB-INF/stats.jsp @@ -24,7 +24,7 @@ Sorry for any inconvenience caused by this.</p> <li><a href="advbwdist-data.html">Data: Advertised bandwidth distribution and n-th fastest relays</a></li> <li><a href="clients-data.html">Data: Estimated number of clients in the Tor network</a></li> <li><a href="torperf-data.html">Data: Performance of downloading static files over Tor</a></li> -<li><a href="connbidirect-data.html">Data: Fraction of connections used uni-/bidirectionally</a></li> +<li><a href="connbidirect2-data.html">Data: Fraction of connections used uni-/bidirectionally</a></li> </ul>
</div>
tor-commits@lists.torproject.org