commit 1da05983fe24ebda82c2bd9018eba846468d933e Author: iwakeh iwakeh@torproject.org Date: Thu Feb 23 13:30:26 2017 +0000
Make file layout comply to Metrics' standards a little more. Tweak build files. Set new descriptor/metrics-lib version. --- modules/advbwdist/build.xml | 3 - .../org/torproject/metrics/advbwdist/Main.java | 158 ++++ .../src/org/torproject/metrics/advbwdist/Main.java | 158 ---- .../java/org/torproject/metrics/clients/Main.java | 478 ++++++++++ .../src/org/torproject/metrics/clients/Main.java | 478 ---------- .../org/torproject/metrics/collectdescs/Main.java | 31 + .../org/torproject/metrics/collectdescs/Main.java | 31 - modules/connbidirect/build.xml | 61 +- modules/hidserv/build.xml | 3 - .../org/torproject/metrics/hidserv/Aggregator.java | 198 ++++ .../metrics/hidserv/ComputedNetworkFractions.java | 183 ++++ .../torproject/metrics/hidserv/DateTimeHelper.java | 107 +++ .../org/torproject/metrics/hidserv/Document.java | 26 + .../torproject/metrics/hidserv/DocumentStore.java | 176 ++++ .../metrics/hidserv/ExtrapolatedHidServStats.java | 170 ++++ .../torproject/metrics/hidserv/Extrapolator.java | 253 ++++++ .../java/org/torproject/metrics/hidserv/Main.java | 88 ++ .../org/torproject/metrics/hidserv/Parser.java | 440 +++++++++ .../metrics/hidserv/ReportedHidServStats.java | 141 +++ .../org/torproject/metrics/hidserv/Simulate.java | 365 ++++++++ .../org/torproject/metrics/hidserv/Aggregator.java | 198 ---- .../metrics/hidserv/ComputedNetworkFractions.java | 183 ---- .../torproject/metrics/hidserv/DateTimeHelper.java | 107 --- .../org/torproject/metrics/hidserv/Document.java | 26 - .../torproject/metrics/hidserv/DocumentStore.java | 176 ---- .../metrics/hidserv/ExtrapolatedHidServStats.java | 170 ---- .../torproject/metrics/hidserv/Extrapolator.java | 253 ------ .../src/org/torproject/metrics/hidserv/Main.java | 88 -- .../src/org/torproject/metrics/hidserv/Parser.java | 440 --------- .../metrics/hidserv/ReportedHidServStats.java | 141 --- .../org/torproject/metrics/hidserv/Simulate.java | 365 -------- modules/legacy/build.xml | 1 - .../org/torproject/ernie/cron/Configuration.java | 206 +++++ .../java/org/torproject/ernie/cron/LockFile.java | 58 ++ .../ernie/cron/LoggingConfiguration.java | 100 +++ .../main/java/org/torproject/ernie/cron/Main.java | 90 ++ .../cron/RelayDescriptorDatabaseImporter.java | 995 +++++++++++++++++++++ .../cron/network/ConsensusStatsFileHandler.java | 412 +++++++++ .../ernie/cron/performance/TorperfProcessor.java | 292 ++++++ .../org/torproject/ernie/cron/Configuration.java | 206 ----- .../src/org/torproject/ernie/cron/LockFile.java | 58 -- .../ernie/cron/LoggingConfiguration.java | 100 --- .../legacy/src/org/torproject/ernie/cron/Main.java | 90 -- .../cron/RelayDescriptorDatabaseImporter.java | 995 --------------------- .../cron/network/ConsensusStatsFileHandler.java | 412 --------- .../ernie/cron/performance/TorperfProcessor.java | 292 ------ modules/webstats/build.xml | 3 - shared/build-base.xml | 5 +- 48 files changed, 4978 insertions(+), 5032 deletions(-)
diff --git a/modules/advbwdist/build.xml b/modules/advbwdist/build.xml index 9aa187f..0493d8a 100644 --- a/modules/advbwdist/build.xml +++ b/modules/advbwdist/build.xml @@ -7,9 +7,6 @@ <path id="classpath"> <pathelement path="${classes}"/> <path refid="base.classpath" /> - <fileset dir="${libs}"> - <include name="commons-codec-1.9.jar"/> - </fileset> </path>
<target name="run" depends="compile"> diff --git a/modules/advbwdist/src/main/java/org/torproject/metrics/advbwdist/Main.java b/modules/advbwdist/src/main/java/org/torproject/metrics/advbwdist/Main.java new file mode 100644 index 0000000..7d7678d --- /dev/null +++ b/modules/advbwdist/src/main/java/org/torproject/metrics/advbwdist/Main.java @@ -0,0 +1,158 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.advbwdist; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; +import org.torproject.descriptor.ServerDescriptor; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; + +public class Main { + + /** Executes this data-processing module. */ + public static void main(String[] args) throws IOException { + + /* Parse server descriptors, not keeping a parse history, and memorize + * the advertised bandwidth for every server descriptor. */ + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory( + new File("../../shared/in/recent/relay-descriptors/" + + "server-descriptors")); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + Map<String, Long> serverDescriptors = new HashMap<>(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof ServerDescriptor)) { + continue; + } + ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor; + String digest = serverDescriptor.getServerDescriptorDigest(); + long advertisedBandwidth = Math.min(Math.min( + serverDescriptor.getBandwidthRate(), + serverDescriptor.getBandwidthBurst()), + serverDescriptor.getBandwidthObserved()); + serverDescriptors.put(digest.toUpperCase(), advertisedBandwidth); + } + } + + /* Parse consensuses, keeping a parse history. */ + descriptorReader = DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory( + new File("../../shared/in/recent/relay-descriptors/consensuses")); + descriptorReader.setExcludeFiles( + new File("status/parsed-consensuses")); + descriptorFiles = descriptorReader.readDescriptors(); + File resultsFile = new File("stats/advbwdist-validafter.csv"); + resultsFile.getParentFile().mkdirs(); + boolean writeHeader = !resultsFile.exists(); + BufferedWriter bw = new BufferedWriter(new FileWriter(resultsFile, + true)); + if (writeHeader) { + bw.write("valid_after,isexit,relay,percentile,advbw\n"); + } + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof RelayNetworkStatusConsensus)) { + continue; + } + + /* Parse server descriptor digests from consensus and look up + * advertised bandwidths. */ + RelayNetworkStatusConsensus consensus = + (RelayNetworkStatusConsensus) descriptor; + String validAfter = dateTimeFormat.format( + consensus.getValidAfterMillis()); + List<Long> advertisedBandwidthsAllRelays = new ArrayList<>(); + List<Long> advertisedBandwidthsExitsOnly = new ArrayList<>(); + for (NetworkStatusEntry relay + : consensus.getStatusEntries().values()) { + if (!relay.getFlags().contains("Running")) { + continue; + } + String serverDescriptorDigest = relay.getDescriptor() + .toUpperCase(); + if (!serverDescriptors.containsKey(serverDescriptorDigest)) { + continue; + } + long advertisedBandwidth = serverDescriptors.get( + serverDescriptorDigest); + advertisedBandwidthsAllRelays.add(advertisedBandwidth); + if (relay.getFlags().contains("Exit") + && !relay.getFlags().contains("BadExit")) { + advertisedBandwidthsExitsOnly.add(advertisedBandwidth); + } + } + + /* Write advertised bandwidths of n-th fastest relays/exits. */ + Collections.sort(advertisedBandwidthsAllRelays, + Collections.reverseOrder()); + Collections.sort(advertisedBandwidthsExitsOnly, + Collections.reverseOrder()); + int[] fastestRelays = new int[] { 1, 2, 3, 5, 10, 20, 30, 50, 100, + 200, 300, 500, 1000, 2000, 3000, 5000 }; + for (int fastestRelay : fastestRelays) { + if (advertisedBandwidthsAllRelays.size() >= fastestRelay) { + bw.write(String.format("%s,,%d,,%d%n", validAfter, + fastestRelay, + advertisedBandwidthsAllRelays.get(fastestRelay - 1))); + } + } + for (int fastestRelay : fastestRelays) { + if (advertisedBandwidthsExitsOnly.size() >= fastestRelay) { + bw.write(String.format("%s,TRUE,%d,,%d%n", validAfter, + fastestRelay, + advertisedBandwidthsExitsOnly.get(fastestRelay - 1))); + } + } + + /* Write advertised bandwidth percentiles of relays/exits. */ + Collections.sort(advertisedBandwidthsAllRelays); + Collections.sort(advertisedBandwidthsExitsOnly); + int[] percentiles = new int[] { 0, 1, 2, 3, 5, 9, 10, 20, 25, 30, + 40, 50, 60, 70, 75, 80, 90, 91, 95, 97, 98, 99, 100 }; + if (!advertisedBandwidthsAllRelays.isEmpty()) { + for (int percentile : percentiles) { + bw.write(String.format("%s,,,%d,%d%n", validAfter, + percentile, advertisedBandwidthsAllRelays.get( + ((advertisedBandwidthsAllRelays.size() - 1) + * percentile) / 100))); + } + } + if (!advertisedBandwidthsExitsOnly.isEmpty()) { + for (int percentile : percentiles) { + bw.write(String.format("%s,TRUE,,%d,%d%n", validAfter, + percentile, advertisedBandwidthsExitsOnly.get( + ((advertisedBandwidthsExitsOnly.size() - 1) + * percentile) / 100))); + } + } + } + } + bw.close(); + } +} + diff --git a/modules/advbwdist/src/org/torproject/metrics/advbwdist/Main.java b/modules/advbwdist/src/org/torproject/metrics/advbwdist/Main.java deleted file mode 100644 index 7d7678d..0000000 --- a/modules/advbwdist/src/org/torproject/metrics/advbwdist/Main.java +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.advbwdist; - -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; -import org.torproject.descriptor.ServerDescriptor; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TimeZone; - -public class Main { - - /** Executes this data-processing module. */ - public static void main(String[] args) throws IOException { - - /* Parse server descriptors, not keeping a parse history, and memorize - * the advertised bandwidth for every server descriptor. */ - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.addDirectory( - new File("../../shared/in/recent/relay-descriptors/" - + "server-descriptors")); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - Map<String, Long> serverDescriptors = new HashMap<>(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (!(descriptor instanceof ServerDescriptor)) { - continue; - } - ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor; - String digest = serverDescriptor.getServerDescriptorDigest(); - long advertisedBandwidth = Math.min(Math.min( - serverDescriptor.getBandwidthRate(), - serverDescriptor.getBandwidthBurst()), - serverDescriptor.getBandwidthObserved()); - serverDescriptors.put(digest.toUpperCase(), advertisedBandwidth); - } - } - - /* Parse consensuses, keeping a parse history. */ - descriptorReader = DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.addDirectory( - new File("../../shared/in/recent/relay-descriptors/consensuses")); - descriptorReader.setExcludeFiles( - new File("status/parsed-consensuses")); - descriptorFiles = descriptorReader.readDescriptors(); - File resultsFile = new File("stats/advbwdist-validafter.csv"); - resultsFile.getParentFile().mkdirs(); - boolean writeHeader = !resultsFile.exists(); - BufferedWriter bw = new BufferedWriter(new FileWriter(resultsFile, - true)); - if (writeHeader) { - bw.write("valid_after,isexit,relay,percentile,advbw\n"); - } - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (!(descriptor instanceof RelayNetworkStatusConsensus)) { - continue; - } - - /* Parse server descriptor digests from consensus and look up - * advertised bandwidths. */ - RelayNetworkStatusConsensus consensus = - (RelayNetworkStatusConsensus) descriptor; - String validAfter = dateTimeFormat.format( - consensus.getValidAfterMillis()); - List<Long> advertisedBandwidthsAllRelays = new ArrayList<>(); - List<Long> advertisedBandwidthsExitsOnly = new ArrayList<>(); - for (NetworkStatusEntry relay - : consensus.getStatusEntries().values()) { - if (!relay.getFlags().contains("Running")) { - continue; - } - String serverDescriptorDigest = relay.getDescriptor() - .toUpperCase(); - if (!serverDescriptors.containsKey(serverDescriptorDigest)) { - continue; - } - long advertisedBandwidth = serverDescriptors.get( - serverDescriptorDigest); - advertisedBandwidthsAllRelays.add(advertisedBandwidth); - if (relay.getFlags().contains("Exit") - && !relay.getFlags().contains("BadExit")) { - advertisedBandwidthsExitsOnly.add(advertisedBandwidth); - } - } - - /* Write advertised bandwidths of n-th fastest relays/exits. */ - Collections.sort(advertisedBandwidthsAllRelays, - Collections.reverseOrder()); - Collections.sort(advertisedBandwidthsExitsOnly, - Collections.reverseOrder()); - int[] fastestRelays = new int[] { 1, 2, 3, 5, 10, 20, 30, 50, 100, - 200, 300, 500, 1000, 2000, 3000, 5000 }; - for (int fastestRelay : fastestRelays) { - if (advertisedBandwidthsAllRelays.size() >= fastestRelay) { - bw.write(String.format("%s,,%d,,%d%n", validAfter, - fastestRelay, - advertisedBandwidthsAllRelays.get(fastestRelay - 1))); - } - } - for (int fastestRelay : fastestRelays) { - if (advertisedBandwidthsExitsOnly.size() >= fastestRelay) { - bw.write(String.format("%s,TRUE,%d,,%d%n", validAfter, - fastestRelay, - advertisedBandwidthsExitsOnly.get(fastestRelay - 1))); - } - } - - /* Write advertised bandwidth percentiles of relays/exits. */ - Collections.sort(advertisedBandwidthsAllRelays); - Collections.sort(advertisedBandwidthsExitsOnly); - int[] percentiles = new int[] { 0, 1, 2, 3, 5, 9, 10, 20, 25, 30, - 40, 50, 60, 70, 75, 80, 90, 91, 95, 97, 98, 99, 100 }; - if (!advertisedBandwidthsAllRelays.isEmpty()) { - for (int percentile : percentiles) { - bw.write(String.format("%s,,,%d,%d%n", validAfter, - percentile, advertisedBandwidthsAllRelays.get( - ((advertisedBandwidthsAllRelays.size() - 1) - * percentile) / 100))); - } - } - if (!advertisedBandwidthsExitsOnly.isEmpty()) { - for (int percentile : percentiles) { - bw.write(String.format("%s,TRUE,,%d,%d%n", validAfter, - percentile, advertisedBandwidthsExitsOnly.get( - ((advertisedBandwidthsExitsOnly.size() - 1) - * percentile) / 100))); - } - } - } - } - bw.close(); - } -} - diff --git a/modules/clients/src/main/java/org/torproject/metrics/clients/Main.java b/modules/clients/src/main/java/org/torproject/metrics/clients/Main.java new file mode 100644 index 0000000..dff73f7 --- /dev/null +++ b/modules/clients/src/main/java/org/torproject/metrics/clients/Main.java @@ -0,0 +1,478 @@ +/* Copyright 2013--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.clients; + +import org.torproject.descriptor.BandwidthHistory; +import org.torproject.descriptor.BridgeNetworkStatus; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; + +public class Main { + + /** Executes this data-processing module. */ + public static void main(String[] args) throws Exception { + parseArgs(args); + parseRelayDescriptors(); + parseBridgeDescriptors(); + closeOutputFiles(); + } + + private static boolean writeToSingleFile = true; + private static boolean byStatsDateNotByDescHour = false; + + private static void parseArgs(String[] args) { + if (args.length == 0) { + writeToSingleFile = true; + } else if (args.length == 1 && args[0].equals("--stats-date")) { + writeToSingleFile = false; + byStatsDateNotByDescHour = true; + } else if (args.length == 1 && args[0].equals("--desc-hour")) { + writeToSingleFile = false; + byStatsDateNotByDescHour = false; + } else { + System.err.println("Usage: java " + Main.class.getName() + + " [ --stats-date | --desc-hour ]"); + System.exit(1); + } + } + + private static final long ONE_HOUR_MILLIS = 60L * 60L * 1000L; + + private static final long ONE_DAY_MILLIS = 24L * ONE_HOUR_MILLIS; + + private static final long ONE_WEEK_MILLIS = 7L * ONE_DAY_MILLIS; + + private static void parseRelayDescriptors() throws Exception { + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.setExcludeFiles(new File( + "status/relay-descriptors")); + descriptorReader.addDirectory(new File( + "../../shared/in/recent/relay-descriptors/consensuses")); + descriptorReader.addDirectory(new File( + "../../shared/in/recent/relay-descriptors/extra-infos")); + descriptorReader.addDirectory(new File( + "../../shared/in/archive/relay-descriptors/consensuses")); + descriptorReader.addDirectory(new File( + "../../shared/in/archive/relay-descriptors/extra-infos")); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof ExtraInfoDescriptor) { + parseRelayExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); + } else if (descriptor instanceof RelayNetworkStatusConsensus) { + parseRelayNetworkStatusConsensus( + (RelayNetworkStatusConsensus) descriptor); + } + } + } + } + + private static void parseRelayExtraInfoDescriptor( + ExtraInfoDescriptor descriptor) throws IOException { + long publishedMillis = descriptor.getPublishedMillis(); + String fingerprint = descriptor.getFingerprint() + .toUpperCase(); + long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis(); + long dirreqStatsIntervalLengthMillis = + descriptor.getDirreqStatsIntervalLength() * 1000L; + SortedMap<String, Integer> requests = descriptor.getDirreqV3Reqs(); + BandwidthHistory dirreqWriteHistory = + descriptor.getDirreqWriteHistory(); + parseRelayDirreqV3Reqs(fingerprint, publishedMillis, + dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis, requests); + parseRelayDirreqWriteHistory(fingerprint, publishedMillis, + dirreqWriteHistory); + } + + private static void parseRelayDirreqV3Reqs(String fingerprint, + long publishedMillis, long dirreqStatsEndMillis, + long dirreqStatsIntervalLengthMillis, + SortedMap<String, Integer> requests) throws IOException { + if (requests == null + || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS + || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) { + /* Cut off all observations that are one week older than + * the descriptor publication time, or we'll have to update + * weeks of aggregate values every hour. */ + return; + } + long statsStartMillis = dirreqStatsEndMillis + - dirreqStatsIntervalLengthMillis; + long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS) + * ONE_DAY_MILLIS; + for (int i = 0; i < 2; i++) { + long fromMillis = i == 0 ? statsStartMillis + : utcBreakMillis; + long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis; + if (fromMillis >= toMillis) { + continue; + } + double intervalFraction = ((double) (toMillis - fromMillis)) + / ((double) dirreqStatsIntervalLengthMillis); + double sum = 0L; + for (Map.Entry<String, Integer> e : requests.entrySet()) { + String country = e.getKey(); + double reqs = ((double) e.getValue()) - 4.0; + sum += reqs; + writeOutputLine(fingerprint, "relay", "responses", country, + "", "", fromMillis, toMillis, reqs * intervalFraction, + publishedMillis); + } + writeOutputLine(fingerprint, "relay", "responses", "", "", + "", fromMillis, toMillis, sum * intervalFraction, + publishedMillis); + } + } + + private static void parseRelayDirreqWriteHistory(String fingerprint, + long publishedMillis, BandwidthHistory dirreqWriteHistory) + throws IOException { + if (dirreqWriteHistory == null + || publishedMillis - dirreqWriteHistory.getHistoryEndMillis() + > ONE_WEEK_MILLIS) { + return; + /* Cut off all observations that are one week older than + * the descriptor publication time, or we'll have to update + * weeks of aggregate values every hour. */ + } + long intervalLengthMillis = + dirreqWriteHistory.getIntervalLength() * 1000L; + for (Map.Entry<Long, Long> e + : dirreqWriteHistory.getBandwidthValues().entrySet()) { + long intervalEndMillis = e.getKey(); + long intervalStartMillis = + intervalEndMillis - intervalLengthMillis; + for (int i = 0; i < 2; i++) { + long fromMillis = intervalStartMillis; + long toMillis = intervalEndMillis; + double writtenBytes = (double) e.getValue(); + if (intervalStartMillis / ONE_DAY_MILLIS + < intervalEndMillis / ONE_DAY_MILLIS) { + long utcBreakMillis = (intervalEndMillis + / ONE_DAY_MILLIS) * ONE_DAY_MILLIS; + if (i == 0) { + toMillis = utcBreakMillis; + } else if (i == 1) { + fromMillis = utcBreakMillis; + } + double intervalFraction = ((double) (toMillis - fromMillis)) + / ((double) intervalLengthMillis); + writtenBytes *= intervalFraction; + } else if (i == 1) { + break; + } + writeOutputLine(fingerprint, "relay", "bytes", "", "", "", + fromMillis, toMillis, writtenBytes, publishedMillis); + } + } + } + + private static void parseRelayNetworkStatusConsensus( + RelayNetworkStatusConsensus consensus) throws IOException { + long fromMillis = consensus.getValidAfterMillis(); + long toMillis = consensus.getFreshUntilMillis(); + for (NetworkStatusEntry statusEntry + : consensus.getStatusEntries().values()) { + String fingerprint = statusEntry.getFingerprint() + .toUpperCase(); + if (statusEntry.getFlags().contains("Running")) { + writeOutputLine(fingerprint, "relay", "status", "", "", "", + fromMillis, toMillis, 0.0, fromMillis); + } + } + } + + private static void parseBridgeDescriptors() throws Exception { + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.setExcludeFiles(new File( + "status/bridge-descriptors")); + descriptorReader.addDirectory(new File( + "../../shared/in/recent/bridge-descriptors")); + descriptorReader.addDirectory(new File( + "../../shared/in/archive/bridge-descriptors")); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof ExtraInfoDescriptor) { + parseBridgeExtraInfoDescriptor( + (ExtraInfoDescriptor) descriptor); + } else if (descriptor instanceof BridgeNetworkStatus) { + parseBridgeNetworkStatus((BridgeNetworkStatus) descriptor); + } + } + } + } + + private static void parseBridgeExtraInfoDescriptor( + ExtraInfoDescriptor descriptor) throws IOException { + String fingerprint = descriptor.getFingerprint().toUpperCase(); + long publishedMillis = descriptor.getPublishedMillis(); + long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis(); + long dirreqStatsIntervalLengthMillis = + descriptor.getDirreqStatsIntervalLength() * 1000L; + parseBridgeDirreqV3Resp(fingerprint, publishedMillis, + dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis, + descriptor.getDirreqV3Resp(), + descriptor.getBridgeIps(), + descriptor.getBridgeIpTransports(), + descriptor.getBridgeIpVersions()); + + parseBridgeDirreqWriteHistory(fingerprint, publishedMillis, + descriptor.getDirreqWriteHistory()); + } + + private static void parseBridgeDirreqV3Resp(String fingerprint, + long publishedMillis, long dirreqStatsEndMillis, + long dirreqStatsIntervalLengthMillis, + SortedMap<String, Integer> responses, + SortedMap<String, Integer> bridgeIps, + SortedMap<String, Integer> bridgeIpTransports, + SortedMap<String, Integer> bridgeIpVersions) throws IOException { + if (responses == null + || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS + || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) { + /* Cut off all observations that are one week older than + * the descriptor publication time, or we'll have to update + * weeks of aggregate values every hour. */ + return; + } + long statsStartMillis = dirreqStatsEndMillis + - dirreqStatsIntervalLengthMillis; + long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS) + * ONE_DAY_MILLIS; + double resp = ((double) responses.get("ok")) - 4.0; + if (resp > 0.0) { + for (int i = 0; i < 2; i++) { + long fromMillis = i == 0 ? statsStartMillis + : utcBreakMillis; + long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis; + if (fromMillis >= toMillis) { + continue; + } + double intervalFraction = ((double) (toMillis - fromMillis)) + / ((double) dirreqStatsIntervalLengthMillis); + writeOutputLine(fingerprint, "bridge", "responses", "", "", + "", fromMillis, toMillis, resp * intervalFraction, + publishedMillis); + parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp, + dirreqStatsIntervalLengthMillis, "country", bridgeIps, + publishedMillis); + parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp, + dirreqStatsIntervalLengthMillis, "transport", + bridgeIpTransports, publishedMillis); + parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp, + dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions, + publishedMillis); + } + } + } + + private static void parseBridgeRespByCategory(String fingerprint, + long fromMillis, long toMillis, double resp, + long dirreqStatsIntervalLengthMillis, String category, + SortedMap<String, Integer> frequencies, long publishedMillis) + throws IOException { + double total = 0.0; + SortedMap<String, Double> frequenciesCopy = new TreeMap<>(); + if (frequencies != null) { + for (Map.Entry<String, Integer> e : frequencies.entrySet()) { + if (e.getValue() < 4.0) { + continue; + } + double frequency = ((double) e.getValue()) - 4.0; + frequenciesCopy.put(e.getKey(), frequency); + total += frequency; + } + } + /* If we're not told any frequencies, or at least none of them are + * greater than 4, put in a default that we'll attribute all responses + * to. */ + if (total == 0) { + if (category.equals("country")) { + frequenciesCopy.put("??", 4.0); + } else if (category.equals("transport")) { + frequenciesCopy.put("<OR>", 4.0); + } else if (category.equals("version")) { + frequenciesCopy.put("v4", 4.0); + } + total = 4.0; + } + for (Map.Entry<String, Double> e : frequenciesCopy.entrySet()) { + double intervalFraction = ((double) (toMillis - fromMillis)) + / ((double) dirreqStatsIntervalLengthMillis); + double val = resp * intervalFraction * e.getValue() / total; + if (category.equals("country")) { + writeOutputLine(fingerprint, "bridge", "responses", e.getKey(), + "", "", fromMillis, toMillis, val, publishedMillis); + } else if (category.equals("transport")) { + writeOutputLine(fingerprint, "bridge", "responses", "", + e.getKey(), "", fromMillis, toMillis, val, publishedMillis); + } else if (category.equals("version")) { + writeOutputLine(fingerprint, "bridge", "responses", "", "", + e.getKey(), fromMillis, toMillis, val, publishedMillis); + } + } + } + + private static void parseBridgeDirreqWriteHistory(String fingerprint, + long publishedMillis, BandwidthHistory dirreqWriteHistory) + throws IOException { + if (dirreqWriteHistory == null + || publishedMillis - dirreqWriteHistory.getHistoryEndMillis() + > ONE_WEEK_MILLIS) { + /* Cut off all observations that are one week older than + * the descriptor publication time, or we'll have to update + * weeks of aggregate values every hour. */ + return; + } + long intervalLengthMillis = + dirreqWriteHistory.getIntervalLength() * 1000L; + for (Map.Entry<Long, Long> e + : dirreqWriteHistory.getBandwidthValues().entrySet()) { + long intervalEndMillis = e.getKey(); + long intervalStartMillis = + intervalEndMillis - intervalLengthMillis; + for (int i = 0; i < 2; i++) { + long fromMillis = intervalStartMillis; + long toMillis = intervalEndMillis; + double writtenBytes = (double) e.getValue(); + if (intervalStartMillis / ONE_DAY_MILLIS + < intervalEndMillis / ONE_DAY_MILLIS) { + long utcBreakMillis = (intervalEndMillis + / ONE_DAY_MILLIS) * ONE_DAY_MILLIS; + if (i == 0) { + toMillis = utcBreakMillis; + } else if (i == 1) { + fromMillis = utcBreakMillis; + } + double intervalFraction = ((double) (toMillis - fromMillis)) + / ((double) intervalLengthMillis); + writtenBytes *= intervalFraction; + } else if (i == 1) { + break; + } + writeOutputLine(fingerprint, "bridge", "bytes", "", + "", "", fromMillis, toMillis, writtenBytes, publishedMillis); + } + } + } + + private static void parseBridgeNetworkStatus(BridgeNetworkStatus status) + throws IOException { + long publishedMillis = status.getPublishedMillis(); + long fromMillis = (publishedMillis / ONE_HOUR_MILLIS) + * ONE_HOUR_MILLIS; + long toMillis = fromMillis + ONE_HOUR_MILLIS; + for (NetworkStatusEntry statusEntry + : status.getStatusEntries().values()) { + String fingerprint = statusEntry.getFingerprint() + .toUpperCase(); + if (statusEntry.getFlags().contains("Running")) { + writeOutputLine(fingerprint, "bridge", "status", "", "", "", + fromMillis, toMillis, 0.0, publishedMillis); + } + } + } + + private static Map<String, BufferedWriter> openOutputFiles = new HashMap<>(); + + private static void writeOutputLine(String fingerprint, String node, + String metric, String country, String transport, String version, + long fromMillis, long toMillis, double val, long publishedMillis) + throws IOException { + if (fromMillis > toMillis) { + return; + } + String fromDateTime = formatDateTimeMillis(fromMillis); + String toDateTime = formatDateTimeMillis(toMillis); + BufferedWriter bw = getOutputFile(fromDateTime, publishedMillis); + bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\n", + fingerprint, node, metric, country, transport, version, + fromDateTime, toDateTime, val)); + } + + private static SimpleDateFormat dateTimeFormat = null; + + private static String formatDateTimeMillis(long millis) { + if (dateTimeFormat == null) { + dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setLenient(false); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + return dateTimeFormat.format(millis); + } + + private static BufferedWriter getOutputFile(String fromDateTime, + long publishedMillis) throws IOException { + String outputFileName; + if (writeToSingleFile) { + outputFileName = "out/userstats.sql"; + } else if (byStatsDateNotByDescHour) { + outputFileName = "out/userstats-" + fromDateTime.substring(0, 10) + + ".sql"; + } else { + String publishedHourDateTime = formatDateTimeMillis( + (publishedMillis / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS); + outputFileName = "out/userstats-" + + publishedHourDateTime.substring(0, 10) + "-" + + publishedHourDateTime.substring(11, 13) + ".sql"; + } + BufferedWriter bw = openOutputFiles.get(outputFileName); + if (bw == null) { + bw = openOutputFile(outputFileName); + openOutputFiles.put(outputFileName, bw); + } + return bw; + } + + private static BufferedWriter openOutputFile(String outputFileName) + throws IOException { + File outputFile = new File(outputFileName); + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFileName)); + bw.write("BEGIN;\n"); + bw.write("LOCK TABLE imported NOWAIT;\n"); + bw.write("COPY imported (fingerprint, node, metric, country, " + + "transport, version, stats_start, stats_end, val) FROM " + + "stdin;\n"); + return bw; + } + + private static void closeOutputFiles() throws IOException { + for (BufferedWriter bw : openOutputFiles.values()) { + bw.write("\.\n"); + bw.write("SELECT merge();\n"); + bw.write("SELECT aggregate();\n"); + bw.write("SELECT combine();\n"); + bw.write("TRUNCATE imported;\n"); + bw.write("COMMIT;\n"); + bw.close(); + } + } +} + diff --git a/modules/clients/src/org/torproject/metrics/clients/Main.java b/modules/clients/src/org/torproject/metrics/clients/Main.java deleted file mode 100644 index dff73f7..0000000 --- a/modules/clients/src/org/torproject/metrics/clients/Main.java +++ /dev/null @@ -1,478 +0,0 @@ -/* Copyright 2013--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.clients; - -import org.torproject.descriptor.BandwidthHistory; -import org.torproject.descriptor.BridgeNetworkStatus; -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.SortedMap; -import java.util.TimeZone; -import java.util.TreeMap; - -public class Main { - - /** Executes this data-processing module. */ - public static void main(String[] args) throws Exception { - parseArgs(args); - parseRelayDescriptors(); - parseBridgeDescriptors(); - closeOutputFiles(); - } - - private static boolean writeToSingleFile = true; - private static boolean byStatsDateNotByDescHour = false; - - private static void parseArgs(String[] args) { - if (args.length == 0) { - writeToSingleFile = true; - } else if (args.length == 1 && args[0].equals("--stats-date")) { - writeToSingleFile = false; - byStatsDateNotByDescHour = true; - } else if (args.length == 1 && args[0].equals("--desc-hour")) { - writeToSingleFile = false; - byStatsDateNotByDescHour = false; - } else { - System.err.println("Usage: java " + Main.class.getName() - + " [ --stats-date | --desc-hour ]"); - System.exit(1); - } - } - - private static final long ONE_HOUR_MILLIS = 60L * 60L * 1000L; - - private static final long ONE_DAY_MILLIS = 24L * ONE_HOUR_MILLIS; - - private static final long ONE_WEEK_MILLIS = 7L * ONE_DAY_MILLIS; - - private static void parseRelayDescriptors() throws Exception { - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.setExcludeFiles(new File( - "status/relay-descriptors")); - descriptorReader.addDirectory(new File( - "../../shared/in/recent/relay-descriptors/consensuses")); - descriptorReader.addDirectory(new File( - "../../shared/in/recent/relay-descriptors/extra-infos")); - descriptorReader.addDirectory(new File( - "../../shared/in/archive/relay-descriptors/consensuses")); - descriptorReader.addDirectory(new File( - "../../shared/in/archive/relay-descriptors/extra-infos")); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof ExtraInfoDescriptor) { - parseRelayExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); - } else if (descriptor instanceof RelayNetworkStatusConsensus) { - parseRelayNetworkStatusConsensus( - (RelayNetworkStatusConsensus) descriptor); - } - } - } - } - - private static void parseRelayExtraInfoDescriptor( - ExtraInfoDescriptor descriptor) throws IOException { - long publishedMillis = descriptor.getPublishedMillis(); - String fingerprint = descriptor.getFingerprint() - .toUpperCase(); - long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis(); - long dirreqStatsIntervalLengthMillis = - descriptor.getDirreqStatsIntervalLength() * 1000L; - SortedMap<String, Integer> requests = descriptor.getDirreqV3Reqs(); - BandwidthHistory dirreqWriteHistory = - descriptor.getDirreqWriteHistory(); - parseRelayDirreqV3Reqs(fingerprint, publishedMillis, - dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis, requests); - parseRelayDirreqWriteHistory(fingerprint, publishedMillis, - dirreqWriteHistory); - } - - private static void parseRelayDirreqV3Reqs(String fingerprint, - long publishedMillis, long dirreqStatsEndMillis, - long dirreqStatsIntervalLengthMillis, - SortedMap<String, Integer> requests) throws IOException { - if (requests == null - || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS - || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) { - /* Cut off all observations that are one week older than - * the descriptor publication time, or we'll have to update - * weeks of aggregate values every hour. */ - return; - } - long statsStartMillis = dirreqStatsEndMillis - - dirreqStatsIntervalLengthMillis; - long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS) - * ONE_DAY_MILLIS; - for (int i = 0; i < 2; i++) { - long fromMillis = i == 0 ? statsStartMillis - : utcBreakMillis; - long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis; - if (fromMillis >= toMillis) { - continue; - } - double intervalFraction = ((double) (toMillis - fromMillis)) - / ((double) dirreqStatsIntervalLengthMillis); - double sum = 0L; - for (Map.Entry<String, Integer> e : requests.entrySet()) { - String country = e.getKey(); - double reqs = ((double) e.getValue()) - 4.0; - sum += reqs; - writeOutputLine(fingerprint, "relay", "responses", country, - "", "", fromMillis, toMillis, reqs * intervalFraction, - publishedMillis); - } - writeOutputLine(fingerprint, "relay", "responses", "", "", - "", fromMillis, toMillis, sum * intervalFraction, - publishedMillis); - } - } - - private static void parseRelayDirreqWriteHistory(String fingerprint, - long publishedMillis, BandwidthHistory dirreqWriteHistory) - throws IOException { - if (dirreqWriteHistory == null - || publishedMillis - dirreqWriteHistory.getHistoryEndMillis() - > ONE_WEEK_MILLIS) { - return; - /* Cut off all observations that are one week older than - * the descriptor publication time, or we'll have to update - * weeks of aggregate values every hour. */ - } - long intervalLengthMillis = - dirreqWriteHistory.getIntervalLength() * 1000L; - for (Map.Entry<Long, Long> e - : dirreqWriteHistory.getBandwidthValues().entrySet()) { - long intervalEndMillis = e.getKey(); - long intervalStartMillis = - intervalEndMillis - intervalLengthMillis; - for (int i = 0; i < 2; i++) { - long fromMillis = intervalStartMillis; - long toMillis = intervalEndMillis; - double writtenBytes = (double) e.getValue(); - if (intervalStartMillis / ONE_DAY_MILLIS - < intervalEndMillis / ONE_DAY_MILLIS) { - long utcBreakMillis = (intervalEndMillis - / ONE_DAY_MILLIS) * ONE_DAY_MILLIS; - if (i == 0) { - toMillis = utcBreakMillis; - } else if (i == 1) { - fromMillis = utcBreakMillis; - } - double intervalFraction = ((double) (toMillis - fromMillis)) - / ((double) intervalLengthMillis); - writtenBytes *= intervalFraction; - } else if (i == 1) { - break; - } - writeOutputLine(fingerprint, "relay", "bytes", "", "", "", - fromMillis, toMillis, writtenBytes, publishedMillis); - } - } - } - - private static void parseRelayNetworkStatusConsensus( - RelayNetworkStatusConsensus consensus) throws IOException { - long fromMillis = consensus.getValidAfterMillis(); - long toMillis = consensus.getFreshUntilMillis(); - for (NetworkStatusEntry statusEntry - : consensus.getStatusEntries().values()) { - String fingerprint = statusEntry.getFingerprint() - .toUpperCase(); - if (statusEntry.getFlags().contains("Running")) { - writeOutputLine(fingerprint, "relay", "status", "", "", "", - fromMillis, toMillis, 0.0, fromMillis); - } - } - } - - private static void parseBridgeDescriptors() throws Exception { - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.setExcludeFiles(new File( - "status/bridge-descriptors")); - descriptorReader.addDirectory(new File( - "../../shared/in/recent/bridge-descriptors")); - descriptorReader.addDirectory(new File( - "../../shared/in/archive/bridge-descriptors")); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof ExtraInfoDescriptor) { - parseBridgeExtraInfoDescriptor( - (ExtraInfoDescriptor) descriptor); - } else if (descriptor instanceof BridgeNetworkStatus) { - parseBridgeNetworkStatus((BridgeNetworkStatus) descriptor); - } - } - } - } - - private static void parseBridgeExtraInfoDescriptor( - ExtraInfoDescriptor descriptor) throws IOException { - String fingerprint = descriptor.getFingerprint().toUpperCase(); - long publishedMillis = descriptor.getPublishedMillis(); - long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis(); - long dirreqStatsIntervalLengthMillis = - descriptor.getDirreqStatsIntervalLength() * 1000L; - parseBridgeDirreqV3Resp(fingerprint, publishedMillis, - dirreqStatsEndMillis, dirreqStatsIntervalLengthMillis, - descriptor.getDirreqV3Resp(), - descriptor.getBridgeIps(), - descriptor.getBridgeIpTransports(), - descriptor.getBridgeIpVersions()); - - parseBridgeDirreqWriteHistory(fingerprint, publishedMillis, - descriptor.getDirreqWriteHistory()); - } - - private static void parseBridgeDirreqV3Resp(String fingerprint, - long publishedMillis, long dirreqStatsEndMillis, - long dirreqStatsIntervalLengthMillis, - SortedMap<String, Integer> responses, - SortedMap<String, Integer> bridgeIps, - SortedMap<String, Integer> bridgeIpTransports, - SortedMap<String, Integer> bridgeIpVersions) throws IOException { - if (responses == null - || publishedMillis - dirreqStatsEndMillis > ONE_WEEK_MILLIS - || dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS) { - /* Cut off all observations that are one week older than - * the descriptor publication time, or we'll have to update - * weeks of aggregate values every hour. */ - return; - } - long statsStartMillis = dirreqStatsEndMillis - - dirreqStatsIntervalLengthMillis; - long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS) - * ONE_DAY_MILLIS; - double resp = ((double) responses.get("ok")) - 4.0; - if (resp > 0.0) { - for (int i = 0; i < 2; i++) { - long fromMillis = i == 0 ? statsStartMillis - : utcBreakMillis; - long toMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis; - if (fromMillis >= toMillis) { - continue; - } - double intervalFraction = ((double) (toMillis - fromMillis)) - / ((double) dirreqStatsIntervalLengthMillis); - writeOutputLine(fingerprint, "bridge", "responses", "", "", - "", fromMillis, toMillis, resp * intervalFraction, - publishedMillis); - parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp, - dirreqStatsIntervalLengthMillis, "country", bridgeIps, - publishedMillis); - parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp, - dirreqStatsIntervalLengthMillis, "transport", - bridgeIpTransports, publishedMillis); - parseBridgeRespByCategory(fingerprint, fromMillis, toMillis, resp, - dirreqStatsIntervalLengthMillis, "version", bridgeIpVersions, - publishedMillis); - } - } - } - - private static void parseBridgeRespByCategory(String fingerprint, - long fromMillis, long toMillis, double resp, - long dirreqStatsIntervalLengthMillis, String category, - SortedMap<String, Integer> frequencies, long publishedMillis) - throws IOException { - double total = 0.0; - SortedMap<String, Double> frequenciesCopy = new TreeMap<>(); - if (frequencies != null) { - for (Map.Entry<String, Integer> e : frequencies.entrySet()) { - if (e.getValue() < 4.0) { - continue; - } - double frequency = ((double) e.getValue()) - 4.0; - frequenciesCopy.put(e.getKey(), frequency); - total += frequency; - } - } - /* If we're not told any frequencies, or at least none of them are - * greater than 4, put in a default that we'll attribute all responses - * to. */ - if (total == 0) { - if (category.equals("country")) { - frequenciesCopy.put("??", 4.0); - } else if (category.equals("transport")) { - frequenciesCopy.put("<OR>", 4.0); - } else if (category.equals("version")) { - frequenciesCopy.put("v4", 4.0); - } - total = 4.0; - } - for (Map.Entry<String, Double> e : frequenciesCopy.entrySet()) { - double intervalFraction = ((double) (toMillis - fromMillis)) - / ((double) dirreqStatsIntervalLengthMillis); - double val = resp * intervalFraction * e.getValue() / total; - if (category.equals("country")) { - writeOutputLine(fingerprint, "bridge", "responses", e.getKey(), - "", "", fromMillis, toMillis, val, publishedMillis); - } else if (category.equals("transport")) { - writeOutputLine(fingerprint, "bridge", "responses", "", - e.getKey(), "", fromMillis, toMillis, val, publishedMillis); - } else if (category.equals("version")) { - writeOutputLine(fingerprint, "bridge", "responses", "", "", - e.getKey(), fromMillis, toMillis, val, publishedMillis); - } - } - } - - private static void parseBridgeDirreqWriteHistory(String fingerprint, - long publishedMillis, BandwidthHistory dirreqWriteHistory) - throws IOException { - if (dirreqWriteHistory == null - || publishedMillis - dirreqWriteHistory.getHistoryEndMillis() - > ONE_WEEK_MILLIS) { - /* Cut off all observations that are one week older than - * the descriptor publication time, or we'll have to update - * weeks of aggregate values every hour. */ - return; - } - long intervalLengthMillis = - dirreqWriteHistory.getIntervalLength() * 1000L; - for (Map.Entry<Long, Long> e - : dirreqWriteHistory.getBandwidthValues().entrySet()) { - long intervalEndMillis = e.getKey(); - long intervalStartMillis = - intervalEndMillis - intervalLengthMillis; - for (int i = 0; i < 2; i++) { - long fromMillis = intervalStartMillis; - long toMillis = intervalEndMillis; - double writtenBytes = (double) e.getValue(); - if (intervalStartMillis / ONE_DAY_MILLIS - < intervalEndMillis / ONE_DAY_MILLIS) { - long utcBreakMillis = (intervalEndMillis - / ONE_DAY_MILLIS) * ONE_DAY_MILLIS; - if (i == 0) { - toMillis = utcBreakMillis; - } else if (i == 1) { - fromMillis = utcBreakMillis; - } - double intervalFraction = ((double) (toMillis - fromMillis)) - / ((double) intervalLengthMillis); - writtenBytes *= intervalFraction; - } else if (i == 1) { - break; - } - writeOutputLine(fingerprint, "bridge", "bytes", "", - "", "", fromMillis, toMillis, writtenBytes, publishedMillis); - } - } - } - - private static void parseBridgeNetworkStatus(BridgeNetworkStatus status) - throws IOException { - long publishedMillis = status.getPublishedMillis(); - long fromMillis = (publishedMillis / ONE_HOUR_MILLIS) - * ONE_HOUR_MILLIS; - long toMillis = fromMillis + ONE_HOUR_MILLIS; - for (NetworkStatusEntry statusEntry - : status.getStatusEntries().values()) { - String fingerprint = statusEntry.getFingerprint() - .toUpperCase(); - if (statusEntry.getFlags().contains("Running")) { - writeOutputLine(fingerprint, "bridge", "status", "", "", "", - fromMillis, toMillis, 0.0, publishedMillis); - } - } - } - - private static Map<String, BufferedWriter> openOutputFiles = new HashMap<>(); - - private static void writeOutputLine(String fingerprint, String node, - String metric, String country, String transport, String version, - long fromMillis, long toMillis, double val, long publishedMillis) - throws IOException { - if (fromMillis > toMillis) { - return; - } - String fromDateTime = formatDateTimeMillis(fromMillis); - String toDateTime = formatDateTimeMillis(toMillis); - BufferedWriter bw = getOutputFile(fromDateTime, publishedMillis); - bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.1f\n", - fingerprint, node, metric, country, transport, version, - fromDateTime, toDateTime, val)); - } - - private static SimpleDateFormat dateTimeFormat = null; - - private static String formatDateTimeMillis(long millis) { - if (dateTimeFormat == null) { - dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setLenient(false); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - } - return dateTimeFormat.format(millis); - } - - private static BufferedWriter getOutputFile(String fromDateTime, - long publishedMillis) throws IOException { - String outputFileName; - if (writeToSingleFile) { - outputFileName = "out/userstats.sql"; - } else if (byStatsDateNotByDescHour) { - outputFileName = "out/userstats-" + fromDateTime.substring(0, 10) - + ".sql"; - } else { - String publishedHourDateTime = formatDateTimeMillis( - (publishedMillis / ONE_HOUR_MILLIS) * ONE_HOUR_MILLIS); - outputFileName = "out/userstats-" - + publishedHourDateTime.substring(0, 10) + "-" - + publishedHourDateTime.substring(11, 13) + ".sql"; - } - BufferedWriter bw = openOutputFiles.get(outputFileName); - if (bw == null) { - bw = openOutputFile(outputFileName); - openOutputFiles.put(outputFileName, bw); - } - return bw; - } - - private static BufferedWriter openOutputFile(String outputFileName) - throws IOException { - File outputFile = new File(outputFileName); - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFileName)); - bw.write("BEGIN;\n"); - bw.write("LOCK TABLE imported NOWAIT;\n"); - bw.write("COPY imported (fingerprint, node, metric, country, " - + "transport, version, stats_start, stats_end, val) FROM " - + "stdin;\n"); - return bw; - } - - private static void closeOutputFiles() throws IOException { - for (BufferedWriter bw : openOutputFiles.values()) { - bw.write("\.\n"); - bw.write("SELECT merge();\n"); - bw.write("SELECT aggregate();\n"); - bw.write("SELECT combine();\n"); - bw.write("TRUNCATE imported;\n"); - bw.write("COMMIT;\n"); - bw.close(); - } - } -} - diff --git a/modules/collectdescs/src/main/java/org/torproject/metrics/collectdescs/Main.java b/modules/collectdescs/src/main/java/org/torproject/metrics/collectdescs/Main.java new file mode 100644 index 0000000..499dff9 --- /dev/null +++ b/modules/collectdescs/src/main/java/org/torproject/metrics/collectdescs/Main.java @@ -0,0 +1,31 @@ +/* Copyright 2015--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collectdescs; + +import org.torproject.descriptor.DescriptorCollector; +import org.torproject.descriptor.DescriptorSourceFactory; + +import java.io.File; + +public class Main { + + /** Executes this data-processing module. */ + public static void main(String[] args) { + /* Fetch recent descriptors from CollecTor. */ + DescriptorCollector collector = + DescriptorSourceFactory.createDescriptorCollector(); + collector.collectDescriptors( + "https://collector.torproject.org", new String[] { + "/recent/bridge-descriptors/extra-infos/", + "/recent/bridge-descriptors/server-descriptors/", + "/recent/bridge-descriptors/statuses/", + "/recent/exit-lists/", + "/recent/relay-descriptors/consensuses/", + "/recent/relay-descriptors/extra-infos/", + "/recent/relay-descriptors/server-descriptors/", + "/recent/torperf/" + }, 0L, new File("../../shared/in"), true); + } +} + diff --git a/modules/collectdescs/src/org/torproject/metrics/collectdescs/Main.java b/modules/collectdescs/src/org/torproject/metrics/collectdescs/Main.java deleted file mode 100644 index 499dff9..0000000 --- a/modules/collectdescs/src/org/torproject/metrics/collectdescs/Main.java +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright 2015--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.collectdescs; - -import org.torproject.descriptor.DescriptorCollector; -import org.torproject.descriptor.DescriptorSourceFactory; - -import java.io.File; - -public class Main { - - /** Executes this data-processing module. */ - public static void main(String[] args) { - /* Fetch recent descriptors from CollecTor. */ - DescriptorCollector collector = - DescriptorSourceFactory.createDescriptorCollector(); - collector.collectDescriptors( - "https://collector.torproject.org", new String[] { - "/recent/bridge-descriptors/extra-infos/", - "/recent/bridge-descriptors/server-descriptors/", - "/recent/bridge-descriptors/statuses/", - "/recent/exit-lists/", - "/recent/relay-descriptors/consensuses/", - "/recent/relay-descriptors/extra-infos/", - "/recent/relay-descriptors/server-descriptors/", - "/recent/torperf/" - }, 0L, new File("../../shared/in"), true); - } -} - diff --git a/modules/connbidirect/build.xml b/modules/connbidirect/build.xml index 72c028f..7bc1f32 100644 --- a/modules/connbidirect/build.xml +++ b/modules/connbidirect/build.xml @@ -1,61 +1,16 @@ <project default="run" name="connbidirect" basedir=".">
- <property name="connbidirect-sources" value="src/main/java"/> - <property name="connbidirect-tests" value="src/test/java"/> - <property name="connbidirect-libs" value="../../shared/lib"/> - <property name="connbidirect-classes" value="classes"/> + <include file="../../shared/build-base.xml" as="basetask"/> + <target name="clean" depends="basetask.clean"/> + <target name="compile" depends="basetask.compile"/> + <target name="testcompile" depends="basetask.testcompile"/> + <target name="test" depends="basetask.test"/> + <path id="classpath"> - <pathelement path="${connbidirect-classes}"/> - <fileset dir="${connbidirect-libs}"> - <include name="commons-codec-1.6.jar"/> - <include name="commons-compress-1.9.jar"/> - <include name="commons-lang-2.6.jar"/> - <include name="junit4-4.11.jar"/> - <include name="hamcrest-all-1.3.jar"/> - <include name="descriptor-1.4.0.jar"/> - <include name="slf4j-api-1.7.7.jar"/> - <include name="logback-core-1.1.2.jar"/> - <include name="logback-classic-1.1.2.jar"/> - </fileset> + <pathelement path="${classes}"/> + <path refid="base.classpath" /> </path>
- <target name="compile"> - <mkdir dir="${connbidirect-classes}"/> - <javac destdir="${connbidirect-classes}" - srcdir="${connbidirect-sources}" - source="1.7" - target="1.7" - debug="true" - deprecation="true" - optimize="false" - failonerror="true" - includeantruntime="false"> - <classpath refid="classpath"/> - </javac> - </target> - - <target name="test" depends="compile"> - <javac destdir="${connbidirect-classes}" - srcdir="${connbidirect-tests}" - source="1.7" - target="1.7" - debug="true" - deprecation="true" - optimize="false" - failonerror="true" - includeantruntime="false"> - <classpath refid="classpath"/> - </javac> - <junit fork="true" haltonfailure="true" printsummary="off"> - <classpath refid="classpath"/> - <formatter type="plain" usefile="false"/> - <batchtest> - <fileset dir="${connbidirect-classes}" - includes="**/*Test.class"/> - </batchtest> - </junit> - </target> - <target name="run" depends="compile"> <java fork="true" maxmemory="2g" diff --git a/modules/hidserv/build.xml b/modules/hidserv/build.xml index fe073a1..c997161 100644 --- a/modules/hidserv/build.xml +++ b/modules/hidserv/build.xml @@ -7,9 +7,6 @@ <path id="classpath"> <pathelement path="${classes}"/> <path refid="base.classpath" /> - <fileset dir="${libs}"> - <include name="commons-codec-1.9.jar"/> - </fileset> </path>
<target name="run" depends="basetask.compile"> diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Aggregator.java new file mode 100644 index 0000000..ea09a78 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Aggregator.java @@ -0,0 +1,198 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +/** Aggregate extrapolated network totals of hidden-service statistics by + * calculating statistics like the daily weighted interquartile mean. + * Also calculate simpler statistics like the number of reported + * statistics and the total network fraction of reporting relays. */ +public class Aggregator { + + /** Document file containing extrapolated hidden-service statistics. */ + private File extrapolatedHidServStatsFile; + + /** Document store for storing and retrieving extrapolated hidden-service + * statistics. */ + private DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore; + + /** Output file for writing aggregated statistics. */ + private File hidservStatsCsvFile; + + /** Initializes a new aggregator object using the given directory, + * document store, and output file for results. */ + public Aggregator(File statusDirectory, + DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore, File hidservStatsCsvFile) { + + /* Create a File instance for the document file containing + * extrapolated network totals. */ + this.extrapolatedHidServStatsFile = new File(statusDirectory, + "extrapolated-hidserv-stats"); + + /* Store references to the provided document store and output file. */ + this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore; + this.hidservStatsCsvFile = hidservStatsCsvFile; + } + + /** Calculates aggregates for all extrapolated hidden-service statistics + * and writes them to the output file. */ + public void aggregateHidServStats() { + + /* Retrieve previously extrapolated network totals. */ + Set<ExtrapolatedHidServStats> extrapolatedStats = + this.extrapolatedHidServStatsStore.retrieve( + this.extrapolatedHidServStatsFile); + if (extrapolatedStats == null) { + System.err.printf("Unable to retrieve extrapolated hidden-service " + + "statistics from file %s. Skipping aggregation step.%n", + this.extrapolatedHidServStatsFile.getAbsolutePath()); + return; + } + + /* Re-arrange extrapolated network totals by statistics interval end + * date, and include the computed network total as weight for the + * extrapolated value. More precisely, map keys are ISO-formatted + * dates, map values are double[] arrays with the extrapolated network + * total as first element and the corresponding computed network + * fraction as second element. */ + SortedMap<String, List<double[]>> extrapolatedCells = new TreeMap<>(); + SortedMap<String, List<double[]>> extrapolatedOnions = new TreeMap<>(); + for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) { + String date = DateTimeHelper.format( + extrapolated.getStatsDateMillis(), + DateTimeHelper.ISO_DATE_FORMAT); + if (extrapolated.getFractionRendRelayedCells() > 0.0) { + if (!extrapolatedCells.containsKey(date)) { + extrapolatedCells.put(date, new ArrayList<double[]>()); + } + extrapolatedCells.get(date).add(new double[] { + extrapolated.getExtrapolatedRendRelayedCells(), + extrapolated.getFractionRendRelayedCells() }); + } + if (extrapolated.getFractionDirOnionsSeen() > 0.0) { + if (!extrapolatedOnions.containsKey(date)) { + extrapolatedOnions.put(date, new ArrayList<double[]>()); + } + extrapolatedOnions.get(date).add(new double[] { + extrapolated.getExtrapolatedDirOnionsSeen(), + extrapolated.getFractionDirOnionsSeen() }); + } + } + + /* Write all results to a string builder that will later be written to + * the output file. Each line contains an ISO-formatted "date", a + * string identifier for the "type" of statistic, the weighted mean + * ("wmean"), weighted median ("wmedian"), weighted interquartile mean + * ("wiqm"), the total network "frac"tion, and the number of reported + * "stats" with non-zero computed network fraction. */ + StringBuilder sb = new StringBuilder(); + sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n"); + + /* Repeat all aggregation steps for both types of statistics. */ + for (int i = 0; i < 2; i++) { + String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen"; + SortedMap<String, List<double[]>> extrapolated = i == 0 + ? extrapolatedCells : extrapolatedOnions; + + /* Go through all dates. */ + for (Map.Entry<String, List<double[]>> e + : extrapolated.entrySet()) { + List<double[]> weightedValues = e.getValue(); + + /* Sort extrapolated network totals contained in the first array + * element. (The second array element contains the computed + * network fraction as weight.) */ + Collections.sort(weightedValues, + new Comparator<double[]>() { + public int compare(double[] first, double[] second) { + return first[0] < second[0] ? -1 + : first[0] > second[0] ? 1 + : 0; + } + } + ); + + /* For the weighted mean, sum up all previously extrapolated + * values weighted with their network fractions (which happens to + * be the values that relays reported), and sum up all network + * fractions. Once we have those two sums, we can divide the sum + * of weighted extrapolated values by the sum of network fractions + * to obtain the weighted mean of extrapolated values. */ + double sumReported = 0.0; + double sumFraction = 0.0; + for (double[] d : weightedValues) { + sumReported += d[0] * d[1]; + sumFraction += d[1]; + } + double weightedMean = sumReported / sumFraction; + + /* For the weighted median and weighted interquartile mean, go + * through all values once again. The weighted median is the + * first extrapolated value with weight interval end greater than + * 50% of reported network fractions. For the weighted + * interquartile mean, sum up extrapolated values multiplied with + * network fractions and network fractions falling into the 25% to + * 75% range and later compute the weighted mean of those. */ + double weightIntervalEnd = 0.0; + Double weightedMedian = null; + double sumFractionInterquartile = 0.0; + double sumReportedInterquartile = 0.0; + for (double[] d : weightedValues) { + double extrapolatedValue = d[0]; + double computedFraction = d[1]; + double weightIntervalStart = weightIntervalEnd; + weightIntervalEnd += computedFraction; + if (weightedMedian == null + && weightIntervalEnd > sumFraction * 0.5) { + weightedMedian = extrapolatedValue; + } + if (weightIntervalEnd >= sumFraction * 0.25 + && weightIntervalStart <= sumFraction * 0.75) { + double fractionBetweenQuartiles = + Math.min(weightIntervalEnd, sumFraction * 0.75) + - Math.max(weightIntervalStart, sumFraction * 0.25); + sumReportedInterquartile += extrapolatedValue + * fractionBetweenQuartiles; + sumFractionInterquartile += fractionBetweenQuartiles; + } + } + double weightedInterquartileMean = + sumReportedInterquartile / sumFractionInterquartile; + + /* Put together all aggregated values in a single line. */ + String date = e.getKey(); + int numStats = weightedValues.size(); + sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date, + type, weightedMean, weightedMedian, weightedInterquartileMean, + sumFraction, numStats)); + } + } + + /* Write all aggregated results to the output file. */ + try { + this.hidservStatsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.hidservStatsCsvFile)); + bw.write(sb.toString()); + bw.close(); + } catch (IOException e) { + System.err.printf("Unable to write results to %s. Ignoring."); + } + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ComputedNetworkFractions.java new file mode 100644 index 0000000..a403e48 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ComputedNetworkFractions.java @@ -0,0 +1,183 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** Computed fraction of hidden-service activity that a single relay is + * assumed to observe in the network. These fractions are computed from + * status entries and bandwidth weights in a network status consensus. */ +public class ComputedNetworkFractions implements Document { + + /** Relay fingerprint consisting of 40 upper-case hex characters. */ + private String fingerprint; + + public String getFingerprint() { + return this.fingerprint; + } + + /** Valid-after timestamp of the consensus in milliseconds. */ + private long validAfterMillis; + + public long getValidAfterMillis() { + return this.validAfterMillis; + } + + /** Fraction of cells on rendezvous circuits that this relay is assumed + * to observe in the network. */ + private double fractionRendRelayedCells; + + public void setFractionRendRelayedCells( + double fractionRendRelayedCells) { + this.fractionRendRelayedCells = fractionRendRelayedCells; + } + + public double getFractionRendRelayedCells() { + return this.fractionRendRelayedCells; + } + + /** Fraction of descriptors that this relay is assumed to observe in the + * network. This is calculated as the fraction of descriptors + * identifiers that this relay was responsible for, divided by 3, + * because each descriptor that is published to this directory is also + * published to two other directories. */ + private double fractionDirOnionsSeen; + + public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) { + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + } + + public double getFractionDirOnionsSeen() { + return this.fractionDirOnionsSeen; + } + + /** Instantiates a new fractions object using fingerprint and consensus + * valid-after time which together uniquely identify the object. */ + public ComputedNetworkFractions(String fingerprint, + long validAfterMillis) { + this.fingerprint = fingerprint; + this.validAfterMillis = validAfterMillis; + } + + /** Returns whether this object contains the same fingerprint and + * consensus valid-after time as the passed object. */ + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ComputedNetworkFractions)) { + return false; + } + ComputedNetworkFractions other = + (ComputedNetworkFractions) otherObject; + return this.fingerprint.equals(other.fingerprint) + && this.validAfterMillis == other.validAfterMillis; + } + + /** Returns a (hopefully unique) hash code based on this object's + * fingerprint and consensus valid-after time. */ + @Override + public int hashCode() { + return this.fingerprint.hashCode() + + (int) this.validAfterMillis; + } + + private static Map<Long, String> previouslyFormattedDates = + Collections.synchronizedMap(new HashMap<Long, String>()); + + /** Returns a string representation of this object, consisting of two + * strings: the first string contains fingerprint and valid-after date, + * the second string contains the concatenation of all other + * attributes. */ + @Override + public String[] format() { + long validAfterDateMillis = (this.validAfterMillis + / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY; + String validAfterDate; + if (previouslyFormattedDates.containsKey(validAfterDateMillis)) { + validAfterDate = previouslyFormattedDates.get(validAfterDateMillis); + } else { + validAfterDate = DateTimeHelper.format(validAfterDateMillis, + DateTimeHelper.ISO_DATE_FORMAT); + previouslyFormattedDates.put(validAfterDateMillis, validAfterDate); + } + long validAfterHourMillis = this.validAfterMillis + % DateTimeHelper.ONE_DAY; + String validAfterHour = String.format("%02d", + validAfterHourMillis / DateTimeHelper.ONE_HOUR); + String first = String.format("%s,%s", this.fingerprint, + validAfterDate); + String second = validAfterHour + + (this.fractionRendRelayedCells == 0.0 ? "," + : String.format(",%f", this.fractionRendRelayedCells)) + + (this.fractionDirOnionsSeen == 0.0 ? "," + : String.format(",%f", this.fractionDirOnionsSeen)); + return new String[] { first, second }; + } + + /** Instantiates an empty fractions object that will be initialized more + * by the parse method. */ + ComputedNetworkFractions() { + } + + private static Map<String, Long> previouslyParsedDates = + Collections.synchronizedMap(new HashMap<String, Long>()); + + /** Initializes this fractions object using the two provided strings + * that have been produced by the format method earlier and returns + * whether this operation was successful. */ + @Override + public boolean parse(String[] formattedStrings) { + if (formattedStrings.length != 2) { + System.err.printf("Invalid number of formatted strings. " + + "Skipping.%n", formattedStrings.length); + return false; + } + String[] firstParts = formattedStrings[0].split(",", 2); + if (firstParts.length != 2) { + System.err.printf("Invalid number of comma-separated values. " + + "Skipping.%n"); + return false; + } + String fingerprint = firstParts[0]; + String[] secondParts = formattedStrings[1].split(",", 3); + if (secondParts.length != 3) { + System.err.printf("Invalid number of comma-separated values. " + + "Skipping.%n"); + return false; + } + String validAfterDate = firstParts[1]; + String validAfterHour = secondParts[0]; + long validAfterDateMillis; + if (previouslyParsedDates.containsKey(validAfterDate)) { + validAfterDateMillis = previouslyParsedDates.get(validAfterDate); + } else { + validAfterDateMillis = DateTimeHelper.parse(validAfterDate, + DateTimeHelper.ISO_DATE_FORMAT); + previouslyParsedDates.put(validAfterDate, validAfterDateMillis); + } + long validAfterTimeMillis = Long.parseLong(validAfterHour) + * DateTimeHelper.ONE_HOUR; + if (validAfterDateMillis == DateTimeHelper.NO_TIME_AVAILABLE + || validAfterTimeMillis < 0L + || validAfterTimeMillis >= DateTimeHelper.ONE_DAY) { + System.err.printf("Invalid date/hour format. Skipping.%n"); + return false; + } + long validAfterMillis = validAfterDateMillis + validAfterTimeMillis; + try { + this.fingerprint = fingerprint; + this.validAfterMillis = validAfterMillis; + this.fractionRendRelayedCells = secondParts[1].equals("") + ? 0.0 : Double.parseDouble(secondParts[1]); + this.fractionDirOnionsSeen = secondParts[2].equals("") + ? 0.0 : Double.parseDouble(secondParts[2]); + return true; + } catch (NumberFormatException e) { + System.err.printf("Invalid number format. Skipping.%n"); + return false; + } + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DateTimeHelper.java new file mode 100644 index 0000000..5be6800 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DateTimeHelper.java @@ -0,0 +1,107 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +/** Utility class to format and parse dates and timestamps. */ +public class DateTimeHelper { + + /** This class is not supposed to be instantiated, which is why its + * constructor has private visibility. */ + private DateTimeHelper() { + } + + /* Some useful time constant. */ + public static final long ONE_SECOND = 1000L; + + public static final long ONE_MINUTE = 60L * ONE_SECOND; + + public static final long ONE_HOUR = 60L * ONE_MINUTE; + + public static final long ONE_DAY = 24L * ONE_HOUR; + + /* Some useful date/time formats. */ + public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss"; + + public static final String ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH"; + + public static final String ISO_DATE_FORMAT = "yyyy-MM-dd"; + + public static final String ISO_HOUR_FORMAT = "HH"; + + /** Map of DateFormat instances for parsing and formatting dates and + * timestamps, protected using ThreadLocal to ensure that each thread + * uses its own instances. */ + private static ThreadLocal<Map<String, DateFormat>> dateFormats = + new ThreadLocal<Map<String, DateFormat>>() { + + public Map<String, DateFormat> get() { + return super.get(); + } + + protected Map<String, DateFormat> initialValue() { + return new HashMap<>(); + } + + public void remove() { + super.remove(); + } + + public void set(Map<String, DateFormat> value) { + super.set(value); + } + }; + + /** Returns an instance of DateFormat for the given format, and if no + * such instance exists, creates one and puts it in the map. */ + private static DateFormat getDateFormat(String format) { + Map<String, DateFormat> threadDateFormats = dateFormats.get(); + if (!threadDateFormats.containsKey(format)) { + DateFormat dateFormat = new SimpleDateFormat(format); + dateFormat.setLenient(false); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + threadDateFormats.put(format, dateFormat); + } + return threadDateFormats.get(format); + } + + /** Formats the given time in milliseconds using the given format. */ + public static String format(long millis, String format) { + return getDateFormat(format).format(millis); + } + + /** Formats the given time in milliseconds using ISO date/time + * format. */ + public static String format(long millis) { + return format(millis, ISO_DATETIME_FORMAT); + } + + /** Default result of the parse methods if the provided time could not + * be parsed. */ + public static final long NO_TIME_AVAILABLE = -1L; + + /** Parses the given string using the given format. */ + public static long parse(String string, String format) { + if (null == string) { + return NO_TIME_AVAILABLE; + } + try { + return getDateFormat(format).parse(string).getTime(); + } catch (ParseException e) { + return NO_TIME_AVAILABLE; + } + } + + /** Parses the given string using ISO date/time format. */ + public static long parse(String string) { + return parse(string, ISO_DATETIME_FORMAT); + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Document.java new file mode 100644 index 0000000..46ce40d --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Document.java @@ -0,0 +1,26 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +/** Common interface of documents that are supposed to be serialized and + * stored in document files and later retrieved and de-serialized. */ +public interface Document { + + /** Returns an array of two strings with a string representation of this + * document. + * + * <p>The first string will be used to start a group of documents, the + * second string will be used to represent a single document in that + * group. Ideally, the first string is equivalent for many documents + * stored in the same file, and the second string is different for those + * documents.</p> */ + public String[] format(); + + /** Initializes an object using the given array of two strings. + * + * <p>These are the same two strings that the format method + * provides.</p> */ + public boolean parse(String[] formattedStrings); +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DocumentStore.java new file mode 100644 index 0000000..2670cf4 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/DocumentStore.java @@ -0,0 +1,176 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.LineNumberReader; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/** Utility class to store serialized objects implementing the Document + * interface to a file and later to retrieve them. */ +public class DocumentStore<T extends Document> { + + /** Document class, needed to create new instances when retrieving + * documents. */ + private Class<T> clazz; + + /** Initializes a new store object for the given type of documents. */ + DocumentStore(Class<T> clazz) { + this.clazz = clazz; + } + + /** Stores the provided documents in the given file and returns whether + * the storage operation was successful. + * + * <p>If the file already existed and if it contains documents, merge + * the new documents with the existing ones.</p> */ + public boolean store(File documentFile, Set<T> documentsToStore) { + + /* Retrieve existing documents. */ + Set<T> retrievedDocuments = this.retrieve(documentFile); + if (retrievedDocuments == null) { + System.err.printf("Unable to read and update %s. Not storing " + + "documents.%n", documentFile.getAbsoluteFile()); + return false; + } + + /* Merge new documents with existing ones. */ + retrievedDocuments.addAll(documentsToStore); + + /* Serialize documents. */ + SortedMap<String, SortedSet<String>> formattedDocuments = new TreeMap<>(); + for (T retrieveDocument : retrievedDocuments) { + String[] formattedDocument = retrieveDocument.format(); + if (!formattedDocuments.containsKey(formattedDocument[0])) { + formattedDocuments.put(formattedDocument[0], + new TreeSet<String>()); + } + formattedDocuments.get(formattedDocument[0]).add( + formattedDocument[1]); + } + + /* Check if a temporary file exists from the previous execution. */ + File documentTempFile = new File(documentFile.getAbsoluteFile() + + ".tmp"); + if (documentTempFile.exists()) { + System.err.printf("Temporary document file %s still exists, " + + "indicating that a previous execution did not terminate " + + "cleanly. Not storing documents.%n", + documentTempFile.getAbsoluteFile()); + return false; + } + + /* Write to a new temporary file, then move it into place, possibly + * overwriting an existing file. */ + try { + documentTempFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + documentTempFile)); + for (Map.Entry<String, SortedSet<String>> e + : formattedDocuments.entrySet()) { + bw.write(e.getKey() + "\n"); + for (String s : e.getValue()) { + bw.write(" " + s + "\n"); + } + } + bw.close(); + documentFile.delete(); + documentTempFile.renameTo(documentFile); + } catch (IOException e) { + System.err.printf("Unable to write %s. Not storing documents.%n", + documentFile.getAbsolutePath()); + return false; + } + + /* Return success. */ + return true; + } + + /** Retrieves all previously stored documents from the given file. */ + public Set<T> retrieve(File documentFile) { + return this.retrieve(documentFile, ""); + } + + /** Retrieves previously stored documents from the given file that start + * with the given prefix. */ + public Set<T> retrieve(File documentFile, String prefix) { + + /* Check if the document file exists, and if not, return an empty set. + * This is not an error case. */ + Set<T> result = new HashSet<>(); + if (!documentFile.exists()) { + return result; + } + + /* Parse the document file line by line and de-serialize contained + * documents. */ + try { + LineNumberReader lnr = new LineNumberReader(new BufferedReader( + new FileReader(documentFile))); + String line; + String formattedString0 = null; + while ((line = lnr.readLine()) != null) { + if (!line.startsWith(" ")) { + formattedString0 = line; + } else if (formattedString0 == null) { + System.err.printf("First line in %s must not start with a " + + "space. Not retrieving any previously stored " + + "documents.%n", documentFile.getAbsolutePath()); + lnr.close(); + return null; + } else if (prefix.length() > formattedString0.length() + && !(formattedString0 + line.substring(1)) + .startsWith(prefix)) { + /* Skip combined line not starting with prefix. */ + continue; + } else if (prefix.length() > 0 + && !formattedString0.startsWith(prefix)) { + /* Skip line not starting with prefix. */ + continue; + } else { + T document = this.clazz.newInstance(); + if (!document.parse(new String[] { formattedString0, + line.substring(1) })) { + System.err.printf("Unable to read line %d from %s. Not " + + "retrieving any previously stored documents.%n", + lnr.getLineNumber(), documentFile.getAbsolutePath()); + lnr.close(); + return null; + } + result.add(document); + } + } + lnr.close(); + } catch (IOException e) { + System.err.printf("Unable to read %s. Not retrieving any " + + "previously stored documents.%n", + documentFile.getAbsolutePath()); + e.printStackTrace(); + return null; + } catch (InstantiationException e) { + System.err.printf("Unable to read %s. Cannot instantiate document " + + "object.%n", documentFile.getAbsolutePath()); + e.printStackTrace(); + return null; + } catch (IllegalAccessException e) { + System.err.printf("Unable to read %s. Cannot instantiate document " + + "object.%n", documentFile.getAbsolutePath()); + e.printStackTrace(); + return null; + } + return result; + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java new file mode 100644 index 0000000..53bef71 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java @@ -0,0 +1,170 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +/** Extrapolated network totals of hidden-service statistics reported by a + * single relay. Extrapolated values are based on reported statistics and + * computed network fractions in the statistics interval. */ +public class ExtrapolatedHidServStats implements Document { + + /** Date of statistics interval end in milliseconds. */ + private long statsDateMillis; + + public long getStatsDateMillis() { + return this.statsDateMillis; + } + + /** Relay fingerprint consisting of 40 upper-case hex characters. */ + private String fingerprint; + + public String getFingerprint() { + return this.fingerprint; + } + + /** Extrapolated number of cells on rendezvous circuits in the + * network. */ + private double extrapolatedRendRelayedCells; + + public void setExtrapolatedRendRelayedCells( + double extrapolatedRendRelayedCells) { + this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells; + } + + public double getExtrapolatedRendRelayedCells() { + return this.extrapolatedRendRelayedCells; + } + + /** Computed fraction of observed cells on rendezvous circuits in the + * network, used to weight this relay's extrapolated network total in + * the aggregation step. */ + private double fractionRendRelayedCells; + + public void setFractionRendRelayedCells( + double fractionRendRelayedCells) { + this.fractionRendRelayedCells = fractionRendRelayedCells; + } + + public double getFractionRendRelayedCells() { + return this.fractionRendRelayedCells; + } + + /** Extrapolated number of .onions in the network. */ + private double extrapolatedDirOnionsSeen; + + public void setExtrapolatedDirOnionsSeen( + double extrapolatedDirOnionsSeen) { + this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen; + } + + public double getExtrapolatedDirOnionsSeen() { + return this.extrapolatedDirOnionsSeen; + } + + /** Computed fraction of observed .onions in the network, used to weight + * this relay's extrapolated network total in the aggregation step. */ + private double fractionDirOnionsSeen; + + public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) { + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + } + + public double getFractionDirOnionsSeen() { + return this.fractionDirOnionsSeen; + } + + /** Instantiates a new stats object using fingerprint and statistics + * interval end date which together uniquely identify the object. */ + public ExtrapolatedHidServStats(long statsDateMillis, + String fingerprint) { + this.statsDateMillis = statsDateMillis; + this.fingerprint = fingerprint; + } + + /** Returns whether this object contains the same fingerprint and + * statistics interval end date as the passed object. */ + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ExtrapolatedHidServStats)) { + return false; + } + ExtrapolatedHidServStats other = + (ExtrapolatedHidServStats) otherObject; + return this.fingerprint.equals(other.fingerprint) + && this.statsDateMillis == other.statsDateMillis; + } + + /** Returns a (hopefully unique) hash code based on this object's + * fingerprint and statistics interval end date. */ + @Override + public int hashCode() { + return this.fingerprint.hashCode() + (int) this.statsDateMillis; + } + + /** Returns a string representation of this object, consisting of the + * statistics interval end date and the concatenation of all other + * attributes. */ + @Override + public String[] format() { + String first = DateTimeHelper.format(this.statsDateMillis, + DateTimeHelper.ISO_DATE_FORMAT); + String second = this.fingerprint + + (this.fractionRendRelayedCells == 0.0 ? ",," + : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells, + this.fractionRendRelayedCells)) + + (this.fractionDirOnionsSeen == 0.0 ? ",," + : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen, + this.fractionDirOnionsSeen)); + return new String[] { first, second }; + } + + /** Instantiates an empty stats object that will be initialized more by + * the parse method. */ + ExtrapolatedHidServStats() { + } + + /** Initializes this stats object using the two provided strings that + * have been produced by the format method earlier and returns whether + * this operation was successful. */ + @Override + public boolean parse(String[] formattedStrings) { + if (formattedStrings.length != 2) { + System.err.printf("Invalid number of formatted strings. " + + "Skipping.%n", formattedStrings.length); + return false; + } + long statsDateMillis = DateTimeHelper.parse(formattedStrings[0], + DateTimeHelper.ISO_DATE_FORMAT); + String[] secondParts = formattedStrings[1].split(",", 5); + if (secondParts.length != 5) { + System.err.printf("Invalid number of comma-separated values. " + + "Skipping.%n"); + return false; + } + String fingerprint = secondParts[0]; + double extrapolatedRendRelayedCells = 0.0; + double fractionRendRelayedCells = 0.0; + double extrapolatedDirOnionsSeen = 0.0; + double fractionDirOnionsSeen = 0.0; + try { + extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0 + : Double.parseDouble(secondParts[1]); + fractionRendRelayedCells = secondParts[2].equals("") ? 0.0 + : Double.parseDouble(secondParts[2]); + extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0 + : Double.parseDouble(secondParts[3]); + fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0 + : Double.parseDouble(secondParts[4]); + } catch (NumberFormatException e) { + return false; + } + this.statsDateMillis = statsDateMillis; + this.fingerprint = fingerprint; + this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells; + this.fractionRendRelayedCells = fractionRendRelayedCells; + this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen; + this.fractionDirOnionsSeen = fractionDirOnionsSeen; + return true; + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Extrapolator.java new file mode 100644 index 0000000..262720a --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Extrapolator.java @@ -0,0 +1,253 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.io.File; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/** Extrapolate hidden-service statistics reported by single relays by + * dividing them by the computed fraction of hidden-service activity + * observed by the relay. */ +public class Extrapolator { + + /** Document file containing previously parsed reported hidden-service + * statistics. */ + private File reportedHidServStatsFile; + + /** Document store for storing and retrieving reported hidden-service + * statistics. */ + private DocumentStore<ReportedHidServStats> reportedHidServStatsStore; + + /** Directory containing document files with previously computed network + * fractions. */ + private File computedNetworkFractionsDirectory; + + /** Document store for storing and retrieving computed network + * fractions. */ + private DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore; + + /** Document file containing extrapolated hidden-service statistics. */ + private File extrapolatedHidServStatsFile; + + /** Document store for storing and retrieving extrapolated hidden-service + * statistics. */ + private DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore; + + /** Initializes a new extrapolator object using the given directory and + * document stores. */ + public Extrapolator(File statusDirectory, + DocumentStore<ReportedHidServStats> reportedHidServStatsStore, + DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore, + DocumentStore<ExtrapolatedHidServStats> + extrapolatedHidServStatsStore) { + + /* Create File instances for the files and directories in the provided + * status directory. */ + this.reportedHidServStatsFile = new File(statusDirectory, + "reported-hidserv-stats"); + this.computedNetworkFractionsDirectory = + new File(statusDirectory, "computed-network-fractions"); + this.extrapolatedHidServStatsFile = new File(statusDirectory, + "extrapolated-hidserv-stats"); + + /* Store references to the provided document stores. */ + this.reportedHidServStatsStore = reportedHidServStatsStore; + this.computedNetworkFractionsStore = computedNetworkFractionsStore; + this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore; + } + + /** Iterates over all reported stats and extrapolate network totals for + * those that have not been extrapolated before. */ + public boolean extrapolateHidServStats() { + + /* Retrieve previously extrapolated stats to avoid extrapolating them + * again. */ + Set<ExtrapolatedHidServStats> extrapolatedStats = + this.extrapolatedHidServStatsStore.retrieve( + this.extrapolatedHidServStatsFile); + + /* Retrieve all reported stats, even including those that have already + * been extrapolated. */ + Set<ReportedHidServStats> reportedStats = + this.reportedHidServStatsStore.retrieve( + this.reportedHidServStatsFile); + + /* Make sure that all documents could be retrieved correctly. */ + if (extrapolatedStats == null || reportedStats == null) { + System.err.printf("Could not read previously parsed or " + + "extrapolated hidserv-stats. Skipping."); + return false; + } + + /* Re-arrange reported stats by fingerprint. */ + SortedMap<String, Set<ReportedHidServStats>> parsedStatsByFingerprint = + new TreeMap<>(); + for (ReportedHidServStats stat : reportedStats) { + String fingerprint = stat.getFingerprint(); + if (!parsedStatsByFingerprint.containsKey(fingerprint)) { + parsedStatsByFingerprint.put(fingerprint, + new HashSet<ReportedHidServStats>()); + } + parsedStatsByFingerprint.get(fingerprint).add(stat); + } + + /* Go through reported stats by fingerprint. */ + for (Map.Entry<String, Set<ReportedHidServStats>> e + : parsedStatsByFingerprint.entrySet()) { + String fingerprint = e.getKey(); + + /* Iterate over all stats reported by this relay and make a list of + * those that still need to be extrapolated. Also make a list of + * all dates for which we need to retrieve computed network + * fractions. */ + Set<ReportedHidServStats> newReportedStats = new HashSet<>(); + SortedSet<String> retrieveFractionDates = new TreeSet<>(); + for (ReportedHidServStats stats : e.getValue()) { + + /* Check whether extrapolated stats already contain an object with + * the same statistics interval end date and fingerprint. */ + long statsDateMillis = (stats.getStatsEndMillis() + / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY; + if (extrapolatedStats.contains( + new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) { + continue; + } + + /* Add the reported stats to the list of stats we still need to + * extrapolate. */ + newReportedStats.add(stats); + + /* Add all dates between statistics interval start and end to a + * list. */ + long statsEndMillis = stats.getStatsEndMillis(); + long statsStartMillis = statsEndMillis + - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND; + for (long millis = statsStartMillis; millis <= statsEndMillis; + millis += DateTimeHelper.ONE_DAY) { + String date = DateTimeHelper.format(millis, + DateTimeHelper.ISO_DATE_FORMAT); + retrieveFractionDates.add(date); + } + } + + /* Retrieve all computed network fractions that might be needed to + * extrapolate new statistics. Keep a list of all known consensus + * valid-after times, and keep a map of fractions also by consensus + * valid-after time. (It's not sufficient to only keep the latter, + * because we need to count known consensuses even if the relay was + * not contained in a consensus or had a network fraction of exactly + * zero.) */ + SortedSet<Long> knownConsensuses = new TreeSet<>(); + SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions = + new TreeMap<>(); + for (String date : retrieveFractionDates) { + File documentFile = new File( + this.computedNetworkFractionsDirectory, date); + Set<ComputedNetworkFractions> fractions + = this.computedNetworkFractionsStore.retrieve(documentFile, + fingerprint); + for (ComputedNetworkFractions fraction : fractions) { + knownConsensuses.add(fraction.getValidAfterMillis()); + if (fraction.getFingerprint().equals(fingerprint)) { + computedNetworkFractions.put(fraction.getValidAfterMillis(), + fraction); + } + } + } + + /* Go through newly reported stats, match them with computed network + * fractions, and extrapolate network totals. */ + for (ReportedHidServStats stats : newReportedStats) { + long statsEndMillis = stats.getStatsEndMillis(); + long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY) + * DateTimeHelper.ONE_DAY; + long statsStartMillis = statsEndMillis + - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND; + + /* Sum up computed network fractions and count known consensus in + * the relevant interval, so that we can later compute means of + * network fractions. */ + double sumFractionRendRelayedCells = 0.0; + double sumFractionDirOnionsSeen = 0.0; + int consensuses = 0; + for (long validAfterMillis : knownConsensuses) { + if (statsStartMillis <= validAfterMillis + && validAfterMillis < statsEndMillis) { + if (computedNetworkFractions.containsKey(validAfterMillis)) { + ComputedNetworkFractions frac = + computedNetworkFractions.get(validAfterMillis); + sumFractionRendRelayedCells += + frac.getFractionRendRelayedCells(); + sumFractionDirOnionsSeen += + frac.getFractionDirOnionsSeen(); + } + consensuses++; + } + } + + /* If we don't know a single consensus with valid-after time in + * the statistics interval, skip this stat. */ + if (consensuses == 0) { + continue; + } + + /* Compute means of network fractions. */ + double fractionRendRelayedCells = + sumFractionRendRelayedCells / consensuses; + double fractionDirOnionsSeen = + sumFractionDirOnionsSeen / consensuses; + + /* If at least one fraction is positive, extrapolate network + * totals. */ + if (fractionRendRelayedCells > 0.0 + || fractionDirOnionsSeen > 0.0) { + ExtrapolatedHidServStats extrapolated = + new ExtrapolatedHidServStats( + statsDateMillis, fingerprint); + if (fractionRendRelayedCells > 0.0) { + extrapolated.setFractionRendRelayedCells( + fractionRendRelayedCells); + /* Extrapolating cells on rendezvous circuits is as easy as + * dividing the reported number by the computed network + * fraction. */ + double extrapolatedRendRelayedCells = + stats.getRendRelayedCells() / fractionRendRelayedCells; + extrapolated.setExtrapolatedRendRelayedCells( + extrapolatedRendRelayedCells); + } + if (fractionDirOnionsSeen > 0.0) { + extrapolated.setFractionDirOnionsSeen( + fractionDirOnionsSeen); + /* Extrapolating reported unique .onion addresses to the + * total number in the network is more difficult. In short, + * each descriptor is stored to 12 (likely) different + * directories, so we'll have to divide the reported number by + * 12 and then by the computed network fraction of this + * directory. */ + double extrapolatedDirOnionsSeen = + stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen); + extrapolated.setExtrapolatedDirOnionsSeen( + extrapolatedDirOnionsSeen); + } + extrapolatedStats.add(extrapolated); + } + } + } + + /* Store all extrapolated network totals to disk with help of the + * document store. */ + return this.extrapolatedHidServStatsStore.store( + this.extrapolatedHidServStatsFile, extrapolatedStats); + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Main.java new file mode 100644 index 0000000..ad0b415 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Main.java @@ -0,0 +1,88 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.io.File; +import java.util.HashSet; +import java.util.Set; + +/** Main class for updating extrapolated network totals of hidden-service + * statistics. The main method of this class can be executed as often as + * new statistics are needed, though callers must ensure that executions + * do not overlap. */ +public class Main { + + /** Parses new descriptors, extrapolate contained statistics using + * computed network fractions, aggregate results, and writes results to + * disk. */ + public static void main(String[] args) { + + /* Initialize directories and file paths. */ + Set<File> inDirectories = new HashSet<>(); + inDirectories.add( + new File("../../shared/in/recent/relay-descriptors/consensuses")); + inDirectories.add( + new File("../../shared/in/recent/relay-descriptors/extra-infos")); + File statusDirectory = new File("status"); + + /* Initialize parser and read parse history to avoid parsing + * descriptor files that haven't changed since the last execution. */ + System.out.println("Initializing parser and reading parse " + + "history..."); + DocumentStore<ReportedHidServStats> reportedHidServStatsStore = + new DocumentStore<>(ReportedHidServStats.class); + DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore = new DocumentStore<>( + ComputedNetworkFractions.class); + Parser parser = new Parser(inDirectories, statusDirectory, + reportedHidServStatsStore, computedNetworkFractionsStore); + parser.readParseHistory(); + + /* Parse new descriptors and store their contents using the document + * stores. */ + System.out.println("Parsing descriptors..."); + if (!parser.parseDescriptors()) { + System.err.println("Could not store parsed descriptors. " + + "Terminating."); + return; + } + + /* Write the parse history to avoid parsing descriptor files again + * next time. It's okay to do this now and not at the end of the + * execution, because even if something breaks apart below, it's safe + * not to parse descriptor files again. */ + System.out.println("Writing parse history..."); + parser.writeParseHistory(); + + /* Extrapolate reported statistics using computed network fractions + * and write the result to disk using a document store. The result is + * a single file with extrapolated network totals based on reports by + * single relays. */ + System.out.println("Extrapolating statistics..."); + DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore + = new DocumentStore<>(ExtrapolatedHidServStats.class); + Extrapolator extrapolator = new Extrapolator(statusDirectory, + reportedHidServStatsStore, computedNetworkFractionsStore, + extrapolatedHidServStatsStore); + if (!extrapolator.extrapolateHidServStats()) { + System.err.println("Could not extrapolate statistics. " + + "Terminating."); + return; + } + + /* Go through all extrapolated network totals and aggregate them. + * This includes calculating daily weighted interquartile means, among + * other statistics. Write the result to a .csv file that can be + * processed by other tools. */ + System.out.println("Aggregating statistics..."); + File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv"); + Aggregator aggregator = new Aggregator(statusDirectory, + extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile); + aggregator.aggregateHidServStats(); + + /* End this execution. */ + System.out.println("Terminating."); + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Parser.java new file mode 100644 index 0000000..eccb0c0 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Parser.java @@ -0,0 +1,440 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/** Parse hidden-service statistics from extra-info descriptors, compute + * network fractions from consensuses, and write parsed contents to + * document files for later use. */ +public class Parser { + + /** File containing tuples of last-modified times and file names of + * descriptor files parsed in the previous execution. */ + private File parseHistoryFile; + + /** Descriptor reader to provide parsed extra-info descriptors and + * consensuses. */ + private DescriptorReader descriptorReader; + + /** Document file containing previously parsed reported hidden-service + * statistics. */ + private File reportedHidServStatsFile; + + /** Document store for storing and retrieving reported hidden-service + * statistics. */ + private DocumentStore<ReportedHidServStats> reportedHidServStatsStore; + + /** Directory containing document files with previously computed network + * fractions. */ + private File computedNetworkFractionsDirectory; + + /** Document store for storing and retrieving computed network + * fractions. */ + private DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore; + + /** Initializes a new parser object using the given directories and + * document stores. */ + public Parser(Set<File> inDirectories, File statusDirectory, + DocumentStore<ReportedHidServStats> reportedHidServStatsStore, + DocumentStore<ComputedNetworkFractions> + computedNetworkFractionsStore) { + + /* Create a new descriptor reader for reading descriptors in the given + * in directory. Configure the reader to avoid having more than five + * parsed descriptors in the queue, rather than the default one + * hundred. Five is a compromise between very large consensuses and + * rather small extra-info descriptors. */ + this.descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + for (File inDirectory : inDirectories) { + this.descriptorReader.addDirectory(inDirectory); + } + this.descriptorReader.setMaxDescriptorFilesInQueue(5); + + /* Create File instances for the files and directories in the provided + * status directory. */ + this.parseHistoryFile = new File(statusDirectory, "parse-history"); + this.reportedHidServStatsFile = new File(statusDirectory, + "reported-hidserv-stats"); + this.computedNetworkFractionsDirectory = + new File(statusDirectory, "computed-network-fractions"); + + /* Store references to the provided document stores. */ + this.reportedHidServStatsStore = reportedHidServStatsStore; + this.computedNetworkFractionsStore = computedNetworkFractionsStore; + } + + /** Reads the parse history file to avoid parsing descriptor files that + * have not changed since the previous execution. */ + public void readParseHistory() { + if (this.parseHistoryFile.exists() + && this.parseHistoryFile.isFile()) { + SortedMap<String, Long> excludedFiles = new TreeMap<>(); + try { + BufferedReader br = new BufferedReader(new FileReader( + this.parseHistoryFile)); + String line; + while ((line = br.readLine()) != null) { + try { + /* Each line is supposed to contain the last-modified time and + * absolute path of a descriptor file. */ + String[] parts = line.split(" ", 2); + excludedFiles.put(parts[1], Long.parseLong(parts[0])); + } catch (NumberFormatException e) { + System.err.printf("Illegal line '%s' in parse history. " + + "Skipping line.%n", line); + } + } + br.close(); + } catch (IOException e) { + System.err.printf("Could not read history file '%s'. Not " + + "excluding descriptors in this execution.", + this.parseHistoryFile.getAbsolutePath()); + } + + /* Tell the descriptor reader to exclude the files contained in the + * parse history file. */ + this.descriptorReader.setExcludedFiles(excludedFiles); + } + } + + /** Writes parsed or skipped descriptor files with last-modified times + * and absolute paths to the parse history file to avoid parsing these + * files again, unless they change until the next execution. */ + public void writeParseHistory() { + + /* Obtain the list of descriptor files that were either parsed now or + * that were skipped in this execution from the descriptor reader. */ + SortedMap<String, Long> excludedAndParsedFiles = new TreeMap<>(); + excludedAndParsedFiles.putAll( + this.descriptorReader.getExcludedFiles()); + excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles()); + try { + this.parseHistoryFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.parseHistoryFile)); + for (Map.Entry<String, Long> e + : excludedAndParsedFiles.entrySet()) { + /* Each line starts with the last-modified time of the descriptor + * file, followed by its absolute path. */ + String absolutePath = e.getKey(); + long lastModifiedMillis = e.getValue(); + bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath + + "\n"); + } + bw.close(); + } catch (IOException e) { + System.err.printf("Could not write history file '%s'. Not " + + "excluding descriptors in next execution.", + this.parseHistoryFile.getAbsolutePath()); + } + } + + /** Set of all reported hidden-service statistics. + * + * <p>To date, these objects are small, and keeping them all in memory + * is easy. But if this ever changes, e.g., when more and more + * statistics are added, this may not scale.</p> */ + private Set<ReportedHidServStats> reportedHidServStats = new HashSet<>(); + + /** Instructs the descriptor reader to parse descriptor files, and + * handles the resulting parsed descriptors if they are either + * extra-info descriptors or consensuses. */ + public boolean parseDescriptors() { + Iterator<DescriptorFile> descriptorFiles = + this.descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof ExtraInfoDescriptor) { + this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); + } else if (descriptor instanceof RelayNetworkStatusConsensus) { + if (!this.parseRelayNetworkStatusConsensus( + (RelayNetworkStatusConsensus) descriptor)) { + return false; + } + } + } + } + + /* Store reported hidden-service statistics to their document file. + * It's more efficient to only do this once after processing all + * descriptors. In contrast, sets of computed network fractions are + * stored immediately after processing the consensus they are based + * on. */ + return this.reportedHidServStatsStore.store( + this.reportedHidServStatsFile, this.reportedHidServStats); + } + + private static final String BIN_SIZE = "bin_size"; + + /** Parses the given extra-info descriptor by extracting its fingerprint + * and contained hidserv-* lines. + * + * <p>If a valid set of hidserv-stats can be extracted, create a new + * stats object that will later be stored to a document file.</p> */ + private void parseExtraInfoDescriptor( + ExtraInfoDescriptor extraInfoDescriptor) { + + /* Extract the fingerprint from the parsed descriptor. */ + String fingerprint = extraInfoDescriptor.getFingerprint(); + + /* If the descriptor did not contain any of the expected hidserv-* + * lines, don't do anything. This applies to the majority of + * descriptors, at least as long as only a minority of relays reports + * these statistics. */ + if (extraInfoDescriptor.getHidservStatsEndMillis() < 0L + && extraInfoDescriptor.getHidservRendRelayedCells() == null + && extraInfoDescriptor.getHidservDirOnionsSeen() == null) { + return; + + /* If the descriptor contained all expected hidserv-* lines, create a + * new stats object and put it in the local map, so that it will later + * be written to a document file. */ + } else if (extraInfoDescriptor.getHidservStatsEndMillis() >= 0L + && extraInfoDescriptor.getHidservStatsIntervalLength() >= 0L + && extraInfoDescriptor.getHidservRendRelayedCells() != null + && extraInfoDescriptor.getHidservRendRelayedCellsParameters() != null + && extraInfoDescriptor.getHidservRendRelayedCellsParameters() + .containsKey(BIN_SIZE) + && extraInfoDescriptor.getHidservDirOnionsSeen() != null + && extraInfoDescriptor.getHidservDirOnionsSeenParameters() != null + && extraInfoDescriptor.getHidservDirOnionsSeenParameters() + .containsKey(BIN_SIZE)) { + ReportedHidServStats reportedStats = new ReportedHidServStats( + fingerprint, extraInfoDescriptor.getHidservStatsEndMillis()); + reportedStats.setStatsIntervalSeconds(extraInfoDescriptor + .getHidservStatsIntervalLength()); + reportedStats.setRendRelayedCells(this.removeNoise(extraInfoDescriptor + .getHidservRendRelayedCells().longValue(), extraInfoDescriptor + .getHidservRendRelayedCellsParameters().get(BIN_SIZE).longValue())); + reportedStats.setDirOnionsSeen(this.removeNoise(extraInfoDescriptor + .getHidservDirOnionsSeen().longValue(), extraInfoDescriptor + .getHidservDirOnionsSeenParameters().get(BIN_SIZE).longValue())); + this.reportedHidServStats.add(reportedStats); + + /* If the descriptor contained some but not all hidserv-* lines, print + * out a warning. This case does not warrant any further action, + * because relays can in theory write anything in their extra-info + * descriptors. But maybe we'll want to know. */ + } else { + System.err.println("Relay " + fingerprint + " published " + + "incomplete hidserv-stats. Ignoring."); + } + } + + /** Removes noise from a reported stats value by rounding to the nearest + * right side of a bin and subtracting half of the bin size. */ + private long removeNoise(long reportedNumber, long binSize) { + long roundedToNearestRightSideOfTheBin = + ((reportedNumber + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + return subtractedHalfOfBinSize; + } + + /** Parses the given consensus. */ + public boolean parseRelayNetworkStatusConsensus( + RelayNetworkStatusConsensus consensus) { + + /* Make sure that the consensus contains Wxx weights. */ + SortedMap<String, Integer> bandwidthWeights = + consensus.getBandwidthWeights(); + if (bandwidthWeights == null) { + System.err.printf("Consensus with valid-after time %s doesn't " + + "contain any Wxx weights. Skipping.%n", + DateTimeHelper.format(consensus.getValidAfterMillis())); + return false; + } + + /* More precisely, make sure that it contains Wmx weights, and then + * parse them. */ + SortedSet<String> expectedWeightKeys = + new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(","))); + expectedWeightKeys.removeAll(bandwidthWeights.keySet()); + if (!expectedWeightKeys.isEmpty()) { + System.err.printf("Consensus with valid-after time %s doesn't " + + "contain expected Wmx weights. Skipping.%n", + DateTimeHelper.format(consensus.getValidAfterMillis())); + return false; + } + double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0; + double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0; + double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0; + double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0; + + /* Keep a sorted set with the fingerprints of all hidden-service + * directories, in reverse order, so that we can later determine the + * fingerprint distance between a directory and the directory + * preceding it by three positions in the descriptor ring. */ + SortedSet<String> hsDirs = new TreeSet<>(Collections.reverseOrder()); + + /* Prepare for computing the weights of all relays with the Fast flag + * for being selected in the middle position. */ + double totalWeightsRendezvousPoint = 0.0; + SortedMap<String, Double> weightsRendezvousPoint = new TreeMap<>(); + + /* Go through all status entries contained in the consensus. */ + for (Map.Entry<String, NetworkStatusEntry> e + : consensus.getStatusEntries().entrySet()) { + String fingerprint = e.getKey(); + NetworkStatusEntry statusEntry = e.getValue(); + SortedSet<String> flags = statusEntry.getFlags(); + + /* Add the relay to the set of hidden-service directories if it has + * the HSDir flag. */ + if (flags.contains("HSDir")) { + hsDirs.add(statusEntry.getFingerprint()); + } + + /* Compute the probability for being selected as rendezvous point. + * If the relay has the Fast flag, multiply its consensus weight + * with the correct Wmx weight, depending on whether the relay has + * the Guard and/or Exit flag. */ + double weightRendezvousPoint = 0.0; + if (flags.contains("Fast")) { + weightRendezvousPoint = (double) statusEntry.getBandwidth(); + if (flags.contains("Guard") && flags.contains("Exit")) { + weightRendezvousPoint *= wmd; + } else if (flags.contains("Guard")) { + weightRendezvousPoint *= wmg; + } else if (flags.contains("Exit")) { + weightRendezvousPoint *= wme; + } else { + weightRendezvousPoint *= wmm; + } + } + weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint); + totalWeightsRendezvousPoint += weightRendezvousPoint; + } + + /* Store all computed network fractions based on this consensus in a + * set, which will then be written to disk in a single store + * operation. */ + Set<ComputedNetworkFractions> computedNetworkFractions = new HashSet<>(); + + /* Remove all previously added directory fingerprints and re-add them + * twice, once with a leading "0" and once with a leading "1". The + * purpose is to simplify the logic for moving from one fingerprint to + * the previous one, even if that would mean traversing the ring + * start. For example, the fingerprint preceding "1""00..0000" with + * the first "1" being added here could be "0""FF..FFFF". */ + SortedSet<String> hsDirsCopy = new TreeSet<>(hsDirs); + hsDirs.clear(); + for (String fingerprint : hsDirsCopy) { + hsDirs.add("0" + fingerprint); + hsDirs.add("1" + fingerprint); + } + + /* Define the total ring size to compute fractions below. This is + * 16^40 or 2^160. */ + final double ringSize = new BigInteger( + "10000000000000000000000000000000000000000", + 16).doubleValue(); + + /* Go through all status entries again, this time computing network + * fractions. */ + for (Map.Entry<String, NetworkStatusEntry> e + : consensus.getStatusEntries().entrySet()) { + String fingerprint = e.getKey(); + NetworkStatusEntry statusEntry = e.getValue(); + double fractionRendRelayedCells = 0.0; + double fractionDirOnionsSeen = 0.0; + if (statusEntry != null) { + + /* Check if the relay is a hidden-service directory by looking up + * its fingerprint, preceded by "1", in the sorted set that we + * populated above. */ + String fingerprintPrecededByOne = "1" + fingerprint; + if (hsDirs.contains(fingerprintPrecededByOne)) { + + /* Move three positions in the sorted set, which is in reverse + * order, to learn the fingerprint of the directory preceding + * this directory by three positions. */ + String startResponsible = fingerprint; + int positionsToGo = 3; + for (String hsDirFingerprint + : hsDirs.tailSet(fingerprintPrecededByOne)) { + startResponsible = hsDirFingerprint; + if (positionsToGo-- <= 0) { + break; + } + } + + /* Compute the fraction of descriptor space that this relay is + * responsible for as difference between the two fingerprints + * divided by the ring size. */ + fractionDirOnionsSeen = + new BigInteger(fingerprintPrecededByOne, 16).subtract( + new BigInteger(startResponsible, 16)).doubleValue() + / ringSize; + + /* Divide this fraction by three to obtain the fraction of + * descriptors that this directory has seen. This step is + * necessary, because each descriptor that is published to this + * directory is also published to two other directories. */ + fractionDirOnionsSeen /= 3.0; + } + + /* Compute the fraction of cells on rendezvous circuits that this + * relay has seen by dividing its previously calculated weight by + * the sum of all such weights. */ + fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint) + / totalWeightsRendezvousPoint; + } + + /* If at least one of the computed fractions is non-zero, create a + * new fractions object. */ + if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) { + ComputedNetworkFractions fractions = new ComputedNetworkFractions( + fingerprint, consensus.getValidAfterMillis()); + fractions.setFractionRendRelayedCells(fractionRendRelayedCells); + fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen); + computedNetworkFractions.add(fractions); + } + } + + /* Store all newly computed network fractions to a documents file. + * The same file also contains computed network fractions from other + * consensuses that were valid on the same day. This is in contrast + * to the other documents which are all stored in a single file, which + * would not scale for computed network fractions. */ + String date = DateTimeHelper.format(consensus.getValidAfterMillis(), + DateTimeHelper.ISO_DATE_FORMAT); + File documentFile = new File(this.computedNetworkFractionsDirectory, + date); + if (!this.computedNetworkFractionsStore.store(documentFile, + computedNetworkFractions)) { + return false; + } + return true; + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ReportedHidServStats.java new file mode 100644 index 0000000..6d305d0 --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/ReportedHidServStats.java @@ -0,0 +1,141 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +/* Hidden-service statistics reported by a single relay covering a single + * statistics interval of usually 24 hours. These statistics are reported + * by the relay in the "hidserv-" lines of its extra-info descriptor. */ +public class ReportedHidServStats implements Document { + + /* Relay fingerprint consisting of 40 upper-case hex characters. */ + private String fingerprint; + + public String getFingerprint() { + return this.fingerprint; + } + + /* Hidden-service statistics end timestamp in milliseconds. */ + private long statsEndMillis; + + public long getStatsEndMillis() { + return this.statsEndMillis; + } + + /* Statistics interval length in seconds. */ + private long statsIntervalSeconds; + + public void setStatsIntervalSeconds(long statsIntervalSeconds) { + this.statsIntervalSeconds = statsIntervalSeconds; + } + + public long getStatsIntervalSeconds() { + return this.statsIntervalSeconds; + } + + /* Number of relayed cells on rendezvous circuits as reported by the + * relay and adjusted by rounding to the nearest right side of a bin and + * subtracting half of the bin size. */ + private long rendRelayedCells; + + public void setRendRelayedCells(long rendRelayedCells) { + this.rendRelayedCells = rendRelayedCells; + } + + public long getRendRelayedCells() { + return this.rendRelayedCells; + } + + /* Number of distinct .onion addresses as reported by the relay and + * adjusted by rounding to the nearest right side of a bin and + * subtracting half of the bin size. */ + private long dirOnionsSeen; + + public void setDirOnionsSeen(long dirOnionsSeen) { + this.dirOnionsSeen = dirOnionsSeen; + } + + public long getDirOnionsSeen() { + return this.dirOnionsSeen; + } + + /* Instantiate a new stats object using fingerprint and stats interval + * end which together uniquely identify the object. */ + public ReportedHidServStats(String fingerprint, long statsEndMillis) { + this.fingerprint = fingerprint; + this.statsEndMillis = statsEndMillis; + } + + /* Return whether this object contains the same fingerprint and stats + * interval end as the passed object. */ + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ReportedHidServStats)) { + return false; + } + ReportedHidServStats other = (ReportedHidServStats) otherObject; + return this.fingerprint.equals(other.fingerprint) + && this.statsEndMillis == other.statsEndMillis; + } + + /* Return a (hopefully unique) hash code based on this object's + * fingerprint and stats interval end. */ + @Override + public int hashCode() { + return this.fingerprint.hashCode() + (int) this.statsEndMillis; + } + + /* Return a string representation of this object, consisting of + * fingerprint and the concatenation of all other attributes. */ + @Override + public String[] format() { + String first = this.fingerprint; + String second = String.format("%s,%d,%d,%d", + DateTimeHelper.format(this.statsEndMillis), + this.statsIntervalSeconds, this.rendRelayedCells, + this.dirOnionsSeen); + return new String[] { first, second }; + } + + /* Instantiate an empty stats object that will be initialized more by + * the parse method. */ + ReportedHidServStats() { + } + + /* Initialize this stats object using the two provided strings that have + * been produced by the format method earlier. Return whether this + * operation was successful. */ + @Override + public boolean parse(String[] formattedStrings) { + if (formattedStrings.length != 2) { + System.err.printf("Invalid number of formatted strings. " + + "Skipping.%n", formattedStrings.length); + return false; + } + String[] secondParts = formattedStrings[1].split(",", 4); + if (secondParts.length != 4) { + return false; + } + long statsEndMillis = DateTimeHelper.parse(secondParts[0]); + if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) { + return false; + } + long statsIntervalSeconds = -1L; + long rendRelayedCells = -1L; + long dirOnionsSeen = -1L; + try { + statsIntervalSeconds = Long.parseLong(secondParts[1]); + rendRelayedCells = Long.parseLong(secondParts[2]); + dirOnionsSeen = Long.parseLong(secondParts[3]); + } catch (NumberFormatException e) { + return false; + } + this.fingerprint = formattedStrings[0]; + this.statsEndMillis = statsEndMillis; + this.statsIntervalSeconds = statsIntervalSeconds; + this.rendRelayedCells = rendRelayedCells; + this.dirOnionsSeen = dirOnionsSeen; + return true; + } +} + diff --git a/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Simulate.java new file mode 100644 index 0000000..207b4aa --- /dev/null +++ b/modules/hidserv/src/main/java/org/torproject/metrics/hidserv/Simulate.java @@ -0,0 +1,365 @@ +/* Copyright 2016--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.hidserv; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/* NOTE: This class is not required for running the Main class! (It + * contains its own main method.) */ +public class Simulate { + private static File simCellsCsvFile = + new File("out/csv/sim-cells.csv"); + + private static File simOnionsCsvFile = + new File("out/csv/sim-onions.csv"); + + /** Runs two simulations to evaluate this data-processing module. */ + public static void main(String[] args) throws Exception { + System.out.print("Simulating extrapolation of rendezvous cells"); + simulateManyCells(); + System.out.print("\nSimulating extrapolation of .onions"); + simulateManyOnions(); + System.out.println("\nTerminating."); + } + + private static Random rnd = new Random(); + + private static void simulateManyCells() throws Exception { + simCellsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simCellsCsvFile)); + bw.write("run,frac,wmean,wmedian,wiqm\n"); + final int numberOfExtrapolations = 1000; + for (int i = 0; i < numberOfExtrapolations; i++) { + bw.write(simulateCells(i)); + System.out.print("."); + } + bw.close(); + } + + private static void simulateManyOnions() throws Exception { + simOnionsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simOnionsCsvFile)); + bw.write("run,frac,wmean,wmedian,wiqm\n"); + final int numberOfExtrapolations = 1000; + for (int i = 0; i < numberOfExtrapolations; i++) { + bw.write(simulateOnions(i)); + System.out.print("."); + } + bw.close(); + } + + private static String simulateCells(int run) { + + /* Generate consensus weights following an exponential distribution + * with lambda = 1 for 3000 potential rendezvous points. */ + final int numberRendPoints = 3000; + double[] consensusWeights = new double[numberRendPoints]; + double totalConsensusWeight = 0.0; + for (int i = 0; i < numberRendPoints; i++) { + double consensusWeight = -Math.log(1.0 - rnd.nextDouble()); + consensusWeights[i] = consensusWeight; + totalConsensusWeight += consensusWeight; + } + + /* Compute probabilities for being selected as rendezvous point. */ + double[] probRendPoint = new double[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + probRendPoint[i] = consensusWeights[i] / totalConsensusWeight; + } + + /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an + * exponential distribution with lambda = 0.0001, so on average + * 10,000 cells per chunk, and randomly assign them to a rendezvous + * point to report them later. */ + long cellsLeft = 10000000000L; + final double cellsLambda = 0.0001; + long[] observedCells = new long[numberRendPoints]; + while (cellsLeft > 0) { + long cells = Math.min(cellsLeft, + (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda)); + double selectRendPoint = rnd.nextDouble(); + for (int i = 0; i < probRendPoint.length; i++) { + selectRendPoint -= probRendPoint[i]; + if (selectRendPoint <= 0.0) { + observedCells[i] += cells; + break; + } + } + cellsLeft -= cells; + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 1024L; + final double b = 2048.0 / 0.3; + long[] reportedCells = new long[numberRendPoints]; + long[] removedNoiseCells = new long[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + long observed = observedCells[i]; + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double randomDouble = rnd.nextDouble(); + double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0) + * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedCells[i] = reported; + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseCells[i] = subtractedHalfOfBinSize; + } + + /* Perform extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + StringBuilder sb = new StringBuilder(); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + for (double fraction : fractions) { + SortedSet<Integer> nonReportingRelays = new TreeSet<>(); + for (int j = 0; j < numberRendPoints; j++) { + nonReportingRelays.add(j); + } + List<Integer> shuffledRelays = new ArrayList<>(nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet<Integer> reportingRelays = new TreeSet<>(); + for (int j = 0; j < (int) ((double) numberRendPoints * fraction); + j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + List<double[]> singleRelayExtrapolations; + double totalReportingProbability; + do { + singleRelayExtrapolations = new ArrayList<>(); + totalReportingProbability = 0.0; + for (int reportingRelay : reportingRelays) { + double probability = probRendPoint[reportingRelay]; + if (probability > 0.0) { + singleRelayExtrapolations.add( + new double[] { + removedNoiseCells[reportingRelay] / probability, + removedNoiseCells[reportingRelay], + probability }); + } + totalReportingProbability += probability; + } + if (totalReportingProbability < fraction - 0.001) { + int addRelay = new ArrayList<>(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (totalReportingProbability > fraction + 0.001) { + int removeRelay = new ArrayList<>(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (totalReportingProbability < fraction - 0.001 + || totalReportingProbability > fraction + 0.001); + Collections.sort(singleRelayExtrapolations, + new Comparator<double[]>() { + public int compare(double[] o1, double[] o2) { + return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; + } + } + ); + double totalProbability = 0.0; + double totalValues = 0.0; + double totalInterquartileProbability = 0.0; + double totalInterquartileValues = 0.0; + Double weightedMedian = null; + for (double[] extrapolation : singleRelayExtrapolations) { + totalValues += extrapolation[1]; + totalProbability += extrapolation[2]; + if (weightedMedian == null + && totalProbability > totalReportingProbability * 0.5) { + weightedMedian = extrapolation[0]; + } + if (totalProbability > totalReportingProbability * 0.25 + && totalProbability < totalReportingProbability * 0.75) { + totalInterquartileValues += extrapolation[1]; + totalInterquartileProbability += extrapolation[2]; + } + } + sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, + totalValues / totalProbability, weightedMedian, + totalInterquartileValues / totalInterquartileProbability)); + } + return sb.toString(); + } + + private static String simulateOnions(final int run) { + + /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */ + final int numberHsDirs = 3000; + SortedSet<Double> hsDirFingerprints = new TreeSet<>(); + for (int i = 0; i < numberHsDirs; i++) { + hsDirFingerprints.add(rnd.nextDouble()); + } + + /* Compute fractions of observed descriptor space. */ + SortedSet<Double> ring = + new TreeSet<>(Collections.reverseOrder()); + for (double fingerprint : hsDirFingerprints) { + ring.add(fingerprint); + ring.add(fingerprint - 1.0); + } + SortedMap<Double, Double> hsDirFractions = new TreeMap<>(); + for (double fingerprint : hsDirFingerprints) { + double start = fingerprint; + int positionsToGo = 3; + for (double prev : ring.tailSet(fingerprint)) { + start = prev; + if (positionsToGo-- <= 0) { + break; + } + } + hsDirFractions.put(fingerprint, fingerprint - start); + } + + /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */ + final int numberOnions = 40000; + final int replicas = 4; + final int storeOnDirs = 3; + SortedMap<Double, SortedSet<Integer>> storedDescs = new TreeMap<>(); + for (double fingerprint : hsDirFingerprints) { + storedDescs.put(fingerprint, new TreeSet<Integer>()); + } + for (int i = 0; i < numberOnions; i++) { + for (int j = 0; j < replicas; j++) { + int leftToStore = storeOnDirs; + for (double fingerprint + : hsDirFingerprints.tailSet(rnd.nextDouble())) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + if (leftToStore > 0) { + for (double fingerprint : hsDirFingerprints) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + } + } + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 8L; + final double b = 8.0 / 0.3; + SortedMap<Double, Long> reportedOnions = new TreeMap<>(); + SortedMap<Double, Long> removedNoiseOnions = new TreeMap<>(); + for (Map.Entry<Double, SortedSet<Integer>> e + : storedDescs.entrySet()) { + double fingerprint = e.getKey(); + long observed = (long) e.getValue().size(); + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double randomDouble = rnd.nextDouble(); + double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0) + * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedOnions.put(fingerprint, reported); + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize); + } + + /* Perform extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + StringBuilder sb = new StringBuilder(); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + for (double fraction : fractions) { + SortedSet<Double> nonReportingRelays = + new TreeSet<>(hsDirFractions.keySet()); + List<Double> shuffledRelays = new ArrayList<>( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet<Double> reportingRelays = new TreeSet<>(); + for (int j = 0; j < (int) ((double) hsDirFractions.size() + * fraction); j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + List<double[]> singleRelayExtrapolations; + double totalReportingProbability; + do { + singleRelayExtrapolations = new ArrayList<>(); + totalReportingProbability = 0.0; + for (double reportingRelay : reportingRelays) { + double probability = hsDirFractions.get(reportingRelay) / 3.0; + if (probability > 0.0) { + singleRelayExtrapolations.add( + new double[] { removedNoiseOnions.get(reportingRelay) + / probability, removedNoiseOnions.get(reportingRelay), + probability }); + } + totalReportingProbability += probability; + } + if (totalReportingProbability < fraction - 0.001) { + double addRelay = + new ArrayList<>(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (totalReportingProbability > fraction + 0.001) { + double removeRelay = + new ArrayList<>(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (totalReportingProbability < fraction - 0.001 + || totalReportingProbability > fraction + 0.001); + Collections.sort(singleRelayExtrapolations, + new Comparator<double[]>() { + public int compare(double[] first, double[] second) { + return first[0] < second[0] ? -1 : first[0] > second[0] ? 1 : 0; + } + } + ); + double totalProbability = 0.0; + double totalValues = 0.0; + double totalInterquartileProbability = 0.0; + double totalInterquartileValues = 0.0; + Double weightedMedian = null; + for (double[] extrapolation : singleRelayExtrapolations) { + totalValues += extrapolation[1]; + totalProbability += extrapolation[2]; + if (weightedMedian == null + && totalProbability > totalReportingProbability * 0.5) { + weightedMedian = extrapolation[0]; + } + if (totalProbability > totalReportingProbability * 0.25 + && totalProbability < totalReportingProbability * 0.75) { + totalInterquartileValues += extrapolation[1]; + totalInterquartileProbability += extrapolation[2]; + } + } + sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, + totalValues / totalProbability, weightedMedian, + totalInterquartileValues / totalInterquartileProbability)); + } + return sb.toString(); + } +} diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java deleted file mode 100644 index ea09a78..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/Aggregator.java +++ /dev/null @@ -1,198 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; - -/** Aggregate extrapolated network totals of hidden-service statistics by - * calculating statistics like the daily weighted interquartile mean. - * Also calculate simpler statistics like the number of reported - * statistics and the total network fraction of reporting relays. */ -public class Aggregator { - - /** Document file containing extrapolated hidden-service statistics. */ - private File extrapolatedHidServStatsFile; - - /** Document store for storing and retrieving extrapolated hidden-service - * statistics. */ - private DocumentStore<ExtrapolatedHidServStats> - extrapolatedHidServStatsStore; - - /** Output file for writing aggregated statistics. */ - private File hidservStatsCsvFile; - - /** Initializes a new aggregator object using the given directory, - * document store, and output file for results. */ - public Aggregator(File statusDirectory, - DocumentStore<ExtrapolatedHidServStats> - extrapolatedHidServStatsStore, File hidservStatsCsvFile) { - - /* Create a File instance for the document file containing - * extrapolated network totals. */ - this.extrapolatedHidServStatsFile = new File(statusDirectory, - "extrapolated-hidserv-stats"); - - /* Store references to the provided document store and output file. */ - this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore; - this.hidservStatsCsvFile = hidservStatsCsvFile; - } - - /** Calculates aggregates for all extrapolated hidden-service statistics - * and writes them to the output file. */ - public void aggregateHidServStats() { - - /* Retrieve previously extrapolated network totals. */ - Set<ExtrapolatedHidServStats> extrapolatedStats = - this.extrapolatedHidServStatsStore.retrieve( - this.extrapolatedHidServStatsFile); - if (extrapolatedStats == null) { - System.err.printf("Unable to retrieve extrapolated hidden-service " - + "statistics from file %s. Skipping aggregation step.%n", - this.extrapolatedHidServStatsFile.getAbsolutePath()); - return; - } - - /* Re-arrange extrapolated network totals by statistics interval end - * date, and include the computed network total as weight for the - * extrapolated value. More precisely, map keys are ISO-formatted - * dates, map values are double[] arrays with the extrapolated network - * total as first element and the corresponding computed network - * fraction as second element. */ - SortedMap<String, List<double[]>> extrapolatedCells = new TreeMap<>(); - SortedMap<String, List<double[]>> extrapolatedOnions = new TreeMap<>(); - for (ExtrapolatedHidServStats extrapolated : extrapolatedStats) { - String date = DateTimeHelper.format( - extrapolated.getStatsDateMillis(), - DateTimeHelper.ISO_DATE_FORMAT); - if (extrapolated.getFractionRendRelayedCells() > 0.0) { - if (!extrapolatedCells.containsKey(date)) { - extrapolatedCells.put(date, new ArrayList<double[]>()); - } - extrapolatedCells.get(date).add(new double[] { - extrapolated.getExtrapolatedRendRelayedCells(), - extrapolated.getFractionRendRelayedCells() }); - } - if (extrapolated.getFractionDirOnionsSeen() > 0.0) { - if (!extrapolatedOnions.containsKey(date)) { - extrapolatedOnions.put(date, new ArrayList<double[]>()); - } - extrapolatedOnions.get(date).add(new double[] { - extrapolated.getExtrapolatedDirOnionsSeen(), - extrapolated.getFractionDirOnionsSeen() }); - } - } - - /* Write all results to a string builder that will later be written to - * the output file. Each line contains an ISO-formatted "date", a - * string identifier for the "type" of statistic, the weighted mean - * ("wmean"), weighted median ("wmedian"), weighted interquartile mean - * ("wiqm"), the total network "frac"tion, and the number of reported - * "stats" with non-zero computed network fraction. */ - StringBuilder sb = new StringBuilder(); - sb.append("date,type,wmean,wmedian,wiqm,frac,stats\n"); - - /* Repeat all aggregation steps for both types of statistics. */ - for (int i = 0; i < 2; i++) { - String type = i == 0 ? "rend-relayed-cells" : "dir-onions-seen"; - SortedMap<String, List<double[]>> extrapolated = i == 0 - ? extrapolatedCells : extrapolatedOnions; - - /* Go through all dates. */ - for (Map.Entry<String, List<double[]>> e - : extrapolated.entrySet()) { - List<double[]> weightedValues = e.getValue(); - - /* Sort extrapolated network totals contained in the first array - * element. (The second array element contains the computed - * network fraction as weight.) */ - Collections.sort(weightedValues, - new Comparator<double[]>() { - public int compare(double[] first, double[] second) { - return first[0] < second[0] ? -1 - : first[0] > second[0] ? 1 - : 0; - } - } - ); - - /* For the weighted mean, sum up all previously extrapolated - * values weighted with their network fractions (which happens to - * be the values that relays reported), and sum up all network - * fractions. Once we have those two sums, we can divide the sum - * of weighted extrapolated values by the sum of network fractions - * to obtain the weighted mean of extrapolated values. */ - double sumReported = 0.0; - double sumFraction = 0.0; - for (double[] d : weightedValues) { - sumReported += d[0] * d[1]; - sumFraction += d[1]; - } - double weightedMean = sumReported / sumFraction; - - /* For the weighted median and weighted interquartile mean, go - * through all values once again. The weighted median is the - * first extrapolated value with weight interval end greater than - * 50% of reported network fractions. For the weighted - * interquartile mean, sum up extrapolated values multiplied with - * network fractions and network fractions falling into the 25% to - * 75% range and later compute the weighted mean of those. */ - double weightIntervalEnd = 0.0; - Double weightedMedian = null; - double sumFractionInterquartile = 0.0; - double sumReportedInterquartile = 0.0; - for (double[] d : weightedValues) { - double extrapolatedValue = d[0]; - double computedFraction = d[1]; - double weightIntervalStart = weightIntervalEnd; - weightIntervalEnd += computedFraction; - if (weightedMedian == null - && weightIntervalEnd > sumFraction * 0.5) { - weightedMedian = extrapolatedValue; - } - if (weightIntervalEnd >= sumFraction * 0.25 - && weightIntervalStart <= sumFraction * 0.75) { - double fractionBetweenQuartiles = - Math.min(weightIntervalEnd, sumFraction * 0.75) - - Math.max(weightIntervalStart, sumFraction * 0.25); - sumReportedInterquartile += extrapolatedValue - * fractionBetweenQuartiles; - sumFractionInterquartile += fractionBetweenQuartiles; - } - } - double weightedInterquartileMean = - sumReportedInterquartile / sumFractionInterquartile; - - /* Put together all aggregated values in a single line. */ - String date = e.getKey(); - int numStats = weightedValues.size(); - sb.append(String.format("%s,%s,%.0f,%.0f,%.0f,%.8f,%d%n", date, - type, weightedMean, weightedMedian, weightedInterquartileMean, - sumFraction, numStats)); - } - } - - /* Write all aggregated results to the output file. */ - try { - this.hidservStatsCsvFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.hidservStatsCsvFile)); - bw.write(sb.toString()); - bw.close(); - } catch (IOException e) { - System.err.printf("Unable to write results to %s. Ignoring."); - } - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java deleted file mode 100644 index a403e48..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/ComputedNetworkFractions.java +++ /dev/null @@ -1,183 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -/** Computed fraction of hidden-service activity that a single relay is - * assumed to observe in the network. These fractions are computed from - * status entries and bandwidth weights in a network status consensus. */ -public class ComputedNetworkFractions implements Document { - - /** Relay fingerprint consisting of 40 upper-case hex characters. */ - private String fingerprint; - - public String getFingerprint() { - return this.fingerprint; - } - - /** Valid-after timestamp of the consensus in milliseconds. */ - private long validAfterMillis; - - public long getValidAfterMillis() { - return this.validAfterMillis; - } - - /** Fraction of cells on rendezvous circuits that this relay is assumed - * to observe in the network. */ - private double fractionRendRelayedCells; - - public void setFractionRendRelayedCells( - double fractionRendRelayedCells) { - this.fractionRendRelayedCells = fractionRendRelayedCells; - } - - public double getFractionRendRelayedCells() { - return this.fractionRendRelayedCells; - } - - /** Fraction of descriptors that this relay is assumed to observe in the - * network. This is calculated as the fraction of descriptors - * identifiers that this relay was responsible for, divided by 3, - * because each descriptor that is published to this directory is also - * published to two other directories. */ - private double fractionDirOnionsSeen; - - public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) { - this.fractionDirOnionsSeen = fractionDirOnionsSeen; - } - - public double getFractionDirOnionsSeen() { - return this.fractionDirOnionsSeen; - } - - /** Instantiates a new fractions object using fingerprint and consensus - * valid-after time which together uniquely identify the object. */ - public ComputedNetworkFractions(String fingerprint, - long validAfterMillis) { - this.fingerprint = fingerprint; - this.validAfterMillis = validAfterMillis; - } - - /** Returns whether this object contains the same fingerprint and - * consensus valid-after time as the passed object. */ - @Override - public boolean equals(Object otherObject) { - if (!(otherObject instanceof ComputedNetworkFractions)) { - return false; - } - ComputedNetworkFractions other = - (ComputedNetworkFractions) otherObject; - return this.fingerprint.equals(other.fingerprint) - && this.validAfterMillis == other.validAfterMillis; - } - - /** Returns a (hopefully unique) hash code based on this object's - * fingerprint and consensus valid-after time. */ - @Override - public int hashCode() { - return this.fingerprint.hashCode() - + (int) this.validAfterMillis; - } - - private static Map<Long, String> previouslyFormattedDates = - Collections.synchronizedMap(new HashMap<Long, String>()); - - /** Returns a string representation of this object, consisting of two - * strings: the first string contains fingerprint and valid-after date, - * the second string contains the concatenation of all other - * attributes. */ - @Override - public String[] format() { - long validAfterDateMillis = (this.validAfterMillis - / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY; - String validAfterDate; - if (previouslyFormattedDates.containsKey(validAfterDateMillis)) { - validAfterDate = previouslyFormattedDates.get(validAfterDateMillis); - } else { - validAfterDate = DateTimeHelper.format(validAfterDateMillis, - DateTimeHelper.ISO_DATE_FORMAT); - previouslyFormattedDates.put(validAfterDateMillis, validAfterDate); - } - long validAfterHourMillis = this.validAfterMillis - % DateTimeHelper.ONE_DAY; - String validAfterHour = String.format("%02d", - validAfterHourMillis / DateTimeHelper.ONE_HOUR); - String first = String.format("%s,%s", this.fingerprint, - validAfterDate); - String second = validAfterHour - + (this.fractionRendRelayedCells == 0.0 ? "," - : String.format(",%f", this.fractionRendRelayedCells)) - + (this.fractionDirOnionsSeen == 0.0 ? "," - : String.format(",%f", this.fractionDirOnionsSeen)); - return new String[] { first, second }; - } - - /** Instantiates an empty fractions object that will be initialized more - * by the parse method. */ - ComputedNetworkFractions() { - } - - private static Map<String, Long> previouslyParsedDates = - Collections.synchronizedMap(new HashMap<String, Long>()); - - /** Initializes this fractions object using the two provided strings - * that have been produced by the format method earlier and returns - * whether this operation was successful. */ - @Override - public boolean parse(String[] formattedStrings) { - if (formattedStrings.length != 2) { - System.err.printf("Invalid number of formatted strings. " - + "Skipping.%n", formattedStrings.length); - return false; - } - String[] firstParts = formattedStrings[0].split(",", 2); - if (firstParts.length != 2) { - System.err.printf("Invalid number of comma-separated values. " - + "Skipping.%n"); - return false; - } - String fingerprint = firstParts[0]; - String[] secondParts = formattedStrings[1].split(",", 3); - if (secondParts.length != 3) { - System.err.printf("Invalid number of comma-separated values. " - + "Skipping.%n"); - return false; - } - String validAfterDate = firstParts[1]; - String validAfterHour = secondParts[0]; - long validAfterDateMillis; - if (previouslyParsedDates.containsKey(validAfterDate)) { - validAfterDateMillis = previouslyParsedDates.get(validAfterDate); - } else { - validAfterDateMillis = DateTimeHelper.parse(validAfterDate, - DateTimeHelper.ISO_DATE_FORMAT); - previouslyParsedDates.put(validAfterDate, validAfterDateMillis); - } - long validAfterTimeMillis = Long.parseLong(validAfterHour) - * DateTimeHelper.ONE_HOUR; - if (validAfterDateMillis == DateTimeHelper.NO_TIME_AVAILABLE - || validAfterTimeMillis < 0L - || validAfterTimeMillis >= DateTimeHelper.ONE_DAY) { - System.err.printf("Invalid date/hour format. Skipping.%n"); - return false; - } - long validAfterMillis = validAfterDateMillis + validAfterTimeMillis; - try { - this.fingerprint = fingerprint; - this.validAfterMillis = validAfterMillis; - this.fractionRendRelayedCells = secondParts[1].equals("") - ? 0.0 : Double.parseDouble(secondParts[1]); - this.fractionDirOnionsSeen = secondParts[2].equals("") - ? 0.0 : Double.parseDouble(secondParts[2]); - return true; - } catch (NumberFormatException e) { - System.err.printf("Invalid number format. Skipping.%n"); - return false; - } - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java deleted file mode 100644 index 5be6800..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/DateTimeHelper.java +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; - -/** Utility class to format and parse dates and timestamps. */ -public class DateTimeHelper { - - /** This class is not supposed to be instantiated, which is why its - * constructor has private visibility. */ - private DateTimeHelper() { - } - - /* Some useful time constant. */ - public static final long ONE_SECOND = 1000L; - - public static final long ONE_MINUTE = 60L * ONE_SECOND; - - public static final long ONE_HOUR = 60L * ONE_MINUTE; - - public static final long ONE_DAY = 24L * ONE_HOUR; - - /* Some useful date/time formats. */ - public static final String ISO_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss"; - - public static final String ISO_DATE_HOUR_FORMAT = "yyyy-MM-dd HH"; - - public static final String ISO_DATE_FORMAT = "yyyy-MM-dd"; - - public static final String ISO_HOUR_FORMAT = "HH"; - - /** Map of DateFormat instances for parsing and formatting dates and - * timestamps, protected using ThreadLocal to ensure that each thread - * uses its own instances. */ - private static ThreadLocal<Map<String, DateFormat>> dateFormats = - new ThreadLocal<Map<String, DateFormat>>() { - - public Map<String, DateFormat> get() { - return super.get(); - } - - protected Map<String, DateFormat> initialValue() { - return new HashMap<>(); - } - - public void remove() { - super.remove(); - } - - public void set(Map<String, DateFormat> value) { - super.set(value); - } - }; - - /** Returns an instance of DateFormat for the given format, and if no - * such instance exists, creates one and puts it in the map. */ - private static DateFormat getDateFormat(String format) { - Map<String, DateFormat> threadDateFormats = dateFormats.get(); - if (!threadDateFormats.containsKey(format)) { - DateFormat dateFormat = new SimpleDateFormat(format); - dateFormat.setLenient(false); - dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - threadDateFormats.put(format, dateFormat); - } - return threadDateFormats.get(format); - } - - /** Formats the given time in milliseconds using the given format. */ - public static String format(long millis, String format) { - return getDateFormat(format).format(millis); - } - - /** Formats the given time in milliseconds using ISO date/time - * format. */ - public static String format(long millis) { - return format(millis, ISO_DATETIME_FORMAT); - } - - /** Default result of the parse methods if the provided time could not - * be parsed. */ - public static final long NO_TIME_AVAILABLE = -1L; - - /** Parses the given string using the given format. */ - public static long parse(String string, String format) { - if (null == string) { - return NO_TIME_AVAILABLE; - } - try { - return getDateFormat(format).parse(string).getTime(); - } catch (ParseException e) { - return NO_TIME_AVAILABLE; - } - } - - /** Parses the given string using ISO date/time format. */ - public static long parse(String string) { - return parse(string, ISO_DATETIME_FORMAT); - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java deleted file mode 100644 index 46ce40d..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/Document.java +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -/** Common interface of documents that are supposed to be serialized and - * stored in document files and later retrieved and de-serialized. */ -public interface Document { - - /** Returns an array of two strings with a string representation of this - * document. - * - * <p>The first string will be used to start a group of documents, the - * second string will be used to represent a single document in that - * group. Ideally, the first string is equivalent for many documents - * stored in the same file, and the second string is different for those - * documents.</p> */ - public String[] format(); - - /** Initializes an object using the given array of two strings. - * - * <p>These are the same two strings that the format method - * provides.</p> */ - public boolean parse(String[] formattedStrings); -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java b/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java deleted file mode 100644 index 2670cf4..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/DocumentStore.java +++ /dev/null @@ -1,176 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.LineNumberReader; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - -/** Utility class to store serialized objects implementing the Document - * interface to a file and later to retrieve them. */ -public class DocumentStore<T extends Document> { - - /** Document class, needed to create new instances when retrieving - * documents. */ - private Class<T> clazz; - - /** Initializes a new store object for the given type of documents. */ - DocumentStore(Class<T> clazz) { - this.clazz = clazz; - } - - /** Stores the provided documents in the given file and returns whether - * the storage operation was successful. - * - * <p>If the file already existed and if it contains documents, merge - * the new documents with the existing ones.</p> */ - public boolean store(File documentFile, Set<T> documentsToStore) { - - /* Retrieve existing documents. */ - Set<T> retrievedDocuments = this.retrieve(documentFile); - if (retrievedDocuments == null) { - System.err.printf("Unable to read and update %s. Not storing " - + "documents.%n", documentFile.getAbsoluteFile()); - return false; - } - - /* Merge new documents with existing ones. */ - retrievedDocuments.addAll(documentsToStore); - - /* Serialize documents. */ - SortedMap<String, SortedSet<String>> formattedDocuments = new TreeMap<>(); - for (T retrieveDocument : retrievedDocuments) { - String[] formattedDocument = retrieveDocument.format(); - if (!formattedDocuments.containsKey(formattedDocument[0])) { - formattedDocuments.put(formattedDocument[0], - new TreeSet<String>()); - } - formattedDocuments.get(formattedDocument[0]).add( - formattedDocument[1]); - } - - /* Check if a temporary file exists from the previous execution. */ - File documentTempFile = new File(documentFile.getAbsoluteFile() - + ".tmp"); - if (documentTempFile.exists()) { - System.err.printf("Temporary document file %s still exists, " - + "indicating that a previous execution did not terminate " - + "cleanly. Not storing documents.%n", - documentTempFile.getAbsoluteFile()); - return false; - } - - /* Write to a new temporary file, then move it into place, possibly - * overwriting an existing file. */ - try { - documentTempFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - documentTempFile)); - for (Map.Entry<String, SortedSet<String>> e - : formattedDocuments.entrySet()) { - bw.write(e.getKey() + "\n"); - for (String s : e.getValue()) { - bw.write(" " + s + "\n"); - } - } - bw.close(); - documentFile.delete(); - documentTempFile.renameTo(documentFile); - } catch (IOException e) { - System.err.printf("Unable to write %s. Not storing documents.%n", - documentFile.getAbsolutePath()); - return false; - } - - /* Return success. */ - return true; - } - - /** Retrieves all previously stored documents from the given file. */ - public Set<T> retrieve(File documentFile) { - return this.retrieve(documentFile, ""); - } - - /** Retrieves previously stored documents from the given file that start - * with the given prefix. */ - public Set<T> retrieve(File documentFile, String prefix) { - - /* Check if the document file exists, and if not, return an empty set. - * This is not an error case. */ - Set<T> result = new HashSet<>(); - if (!documentFile.exists()) { - return result; - } - - /* Parse the document file line by line and de-serialize contained - * documents. */ - try { - LineNumberReader lnr = new LineNumberReader(new BufferedReader( - new FileReader(documentFile))); - String line; - String formattedString0 = null; - while ((line = lnr.readLine()) != null) { - if (!line.startsWith(" ")) { - formattedString0 = line; - } else if (formattedString0 == null) { - System.err.printf("First line in %s must not start with a " - + "space. Not retrieving any previously stored " - + "documents.%n", documentFile.getAbsolutePath()); - lnr.close(); - return null; - } else if (prefix.length() > formattedString0.length() - && !(formattedString0 + line.substring(1)) - .startsWith(prefix)) { - /* Skip combined line not starting with prefix. */ - continue; - } else if (prefix.length() > 0 - && !formattedString0.startsWith(prefix)) { - /* Skip line not starting with prefix. */ - continue; - } else { - T document = this.clazz.newInstance(); - if (!document.parse(new String[] { formattedString0, - line.substring(1) })) { - System.err.printf("Unable to read line %d from %s. Not " - + "retrieving any previously stored documents.%n", - lnr.getLineNumber(), documentFile.getAbsolutePath()); - lnr.close(); - return null; - } - result.add(document); - } - } - lnr.close(); - } catch (IOException e) { - System.err.printf("Unable to read %s. Not retrieving any " - + "previously stored documents.%n", - documentFile.getAbsolutePath()); - e.printStackTrace(); - return null; - } catch (InstantiationException e) { - System.err.printf("Unable to read %s. Cannot instantiate document " - + "object.%n", documentFile.getAbsolutePath()); - e.printStackTrace(); - return null; - } catch (IllegalAccessException e) { - System.err.printf("Unable to read %s. Cannot instantiate document " - + "object.%n", documentFile.getAbsolutePath()); - e.printStackTrace(); - return null; - } - return result; - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java deleted file mode 100644 index 53bef71..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/ExtrapolatedHidServStats.java +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -/** Extrapolated network totals of hidden-service statistics reported by a - * single relay. Extrapolated values are based on reported statistics and - * computed network fractions in the statistics interval. */ -public class ExtrapolatedHidServStats implements Document { - - /** Date of statistics interval end in milliseconds. */ - private long statsDateMillis; - - public long getStatsDateMillis() { - return this.statsDateMillis; - } - - /** Relay fingerprint consisting of 40 upper-case hex characters. */ - private String fingerprint; - - public String getFingerprint() { - return this.fingerprint; - } - - /** Extrapolated number of cells on rendezvous circuits in the - * network. */ - private double extrapolatedRendRelayedCells; - - public void setExtrapolatedRendRelayedCells( - double extrapolatedRendRelayedCells) { - this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells; - } - - public double getExtrapolatedRendRelayedCells() { - return this.extrapolatedRendRelayedCells; - } - - /** Computed fraction of observed cells on rendezvous circuits in the - * network, used to weight this relay's extrapolated network total in - * the aggregation step. */ - private double fractionRendRelayedCells; - - public void setFractionRendRelayedCells( - double fractionRendRelayedCells) { - this.fractionRendRelayedCells = fractionRendRelayedCells; - } - - public double getFractionRendRelayedCells() { - return this.fractionRendRelayedCells; - } - - /** Extrapolated number of .onions in the network. */ - private double extrapolatedDirOnionsSeen; - - public void setExtrapolatedDirOnionsSeen( - double extrapolatedDirOnionsSeen) { - this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen; - } - - public double getExtrapolatedDirOnionsSeen() { - return this.extrapolatedDirOnionsSeen; - } - - /** Computed fraction of observed .onions in the network, used to weight - * this relay's extrapolated network total in the aggregation step. */ - private double fractionDirOnionsSeen; - - public void setFractionDirOnionsSeen(double fractionDirOnionsSeen) { - this.fractionDirOnionsSeen = fractionDirOnionsSeen; - } - - public double getFractionDirOnionsSeen() { - return this.fractionDirOnionsSeen; - } - - /** Instantiates a new stats object using fingerprint and statistics - * interval end date which together uniquely identify the object. */ - public ExtrapolatedHidServStats(long statsDateMillis, - String fingerprint) { - this.statsDateMillis = statsDateMillis; - this.fingerprint = fingerprint; - } - - /** Returns whether this object contains the same fingerprint and - * statistics interval end date as the passed object. */ - @Override - public boolean equals(Object otherObject) { - if (!(otherObject instanceof ExtrapolatedHidServStats)) { - return false; - } - ExtrapolatedHidServStats other = - (ExtrapolatedHidServStats) otherObject; - return this.fingerprint.equals(other.fingerprint) - && this.statsDateMillis == other.statsDateMillis; - } - - /** Returns a (hopefully unique) hash code based on this object's - * fingerprint and statistics interval end date. */ - @Override - public int hashCode() { - return this.fingerprint.hashCode() + (int) this.statsDateMillis; - } - - /** Returns a string representation of this object, consisting of the - * statistics interval end date and the concatenation of all other - * attributes. */ - @Override - public String[] format() { - String first = DateTimeHelper.format(this.statsDateMillis, - DateTimeHelper.ISO_DATE_FORMAT); - String second = this.fingerprint - + (this.fractionRendRelayedCells == 0.0 ? ",," - : String.format(",%.0f,%f", this.extrapolatedRendRelayedCells, - this.fractionRendRelayedCells)) - + (this.fractionDirOnionsSeen == 0.0 ? ",," - : String.format(",%.0f,%f", this.extrapolatedDirOnionsSeen, - this.fractionDirOnionsSeen)); - return new String[] { first, second }; - } - - /** Instantiates an empty stats object that will be initialized more by - * the parse method. */ - ExtrapolatedHidServStats() { - } - - /** Initializes this stats object using the two provided strings that - * have been produced by the format method earlier and returns whether - * this operation was successful. */ - @Override - public boolean parse(String[] formattedStrings) { - if (formattedStrings.length != 2) { - System.err.printf("Invalid number of formatted strings. " - + "Skipping.%n", formattedStrings.length); - return false; - } - long statsDateMillis = DateTimeHelper.parse(formattedStrings[0], - DateTimeHelper.ISO_DATE_FORMAT); - String[] secondParts = formattedStrings[1].split(",", 5); - if (secondParts.length != 5) { - System.err.printf("Invalid number of comma-separated values. " - + "Skipping.%n"); - return false; - } - String fingerprint = secondParts[0]; - double extrapolatedRendRelayedCells = 0.0; - double fractionRendRelayedCells = 0.0; - double extrapolatedDirOnionsSeen = 0.0; - double fractionDirOnionsSeen = 0.0; - try { - extrapolatedRendRelayedCells = secondParts[1].equals("") ? 0.0 - : Double.parseDouble(secondParts[1]); - fractionRendRelayedCells = secondParts[2].equals("") ? 0.0 - : Double.parseDouble(secondParts[2]); - extrapolatedDirOnionsSeen = secondParts[3].equals("") ? 0.0 - : Double.parseDouble(secondParts[3]); - fractionDirOnionsSeen = secondParts[4].equals("") ? 0.0 - : Double.parseDouble(secondParts[4]); - } catch (NumberFormatException e) { - return false; - } - this.statsDateMillis = statsDateMillis; - this.fingerprint = fingerprint; - this.extrapolatedRendRelayedCells = extrapolatedRendRelayedCells; - this.fractionRendRelayedCells = fractionRendRelayedCells; - this.extrapolatedDirOnionsSeen = extrapolatedDirOnionsSeen; - this.fractionDirOnionsSeen = fractionDirOnionsSeen; - return true; - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java deleted file mode 100644 index 262720a..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/Extrapolator.java +++ /dev/null @@ -1,253 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.io.File; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - -/** Extrapolate hidden-service statistics reported by single relays by - * dividing them by the computed fraction of hidden-service activity - * observed by the relay. */ -public class Extrapolator { - - /** Document file containing previously parsed reported hidden-service - * statistics. */ - private File reportedHidServStatsFile; - - /** Document store for storing and retrieving reported hidden-service - * statistics. */ - private DocumentStore<ReportedHidServStats> reportedHidServStatsStore; - - /** Directory containing document files with previously computed network - * fractions. */ - private File computedNetworkFractionsDirectory; - - /** Document store for storing and retrieving computed network - * fractions. */ - private DocumentStore<ComputedNetworkFractions> - computedNetworkFractionsStore; - - /** Document file containing extrapolated hidden-service statistics. */ - private File extrapolatedHidServStatsFile; - - /** Document store for storing and retrieving extrapolated hidden-service - * statistics. */ - private DocumentStore<ExtrapolatedHidServStats> - extrapolatedHidServStatsStore; - - /** Initializes a new extrapolator object using the given directory and - * document stores. */ - public Extrapolator(File statusDirectory, - DocumentStore<ReportedHidServStats> reportedHidServStatsStore, - DocumentStore<ComputedNetworkFractions> - computedNetworkFractionsStore, - DocumentStore<ExtrapolatedHidServStats> - extrapolatedHidServStatsStore) { - - /* Create File instances for the files and directories in the provided - * status directory. */ - this.reportedHidServStatsFile = new File(statusDirectory, - "reported-hidserv-stats"); - this.computedNetworkFractionsDirectory = - new File(statusDirectory, "computed-network-fractions"); - this.extrapolatedHidServStatsFile = new File(statusDirectory, - "extrapolated-hidserv-stats"); - - /* Store references to the provided document stores. */ - this.reportedHidServStatsStore = reportedHidServStatsStore; - this.computedNetworkFractionsStore = computedNetworkFractionsStore; - this.extrapolatedHidServStatsStore = extrapolatedHidServStatsStore; - } - - /** Iterates over all reported stats and extrapolate network totals for - * those that have not been extrapolated before. */ - public boolean extrapolateHidServStats() { - - /* Retrieve previously extrapolated stats to avoid extrapolating them - * again. */ - Set<ExtrapolatedHidServStats> extrapolatedStats = - this.extrapolatedHidServStatsStore.retrieve( - this.extrapolatedHidServStatsFile); - - /* Retrieve all reported stats, even including those that have already - * been extrapolated. */ - Set<ReportedHidServStats> reportedStats = - this.reportedHidServStatsStore.retrieve( - this.reportedHidServStatsFile); - - /* Make sure that all documents could be retrieved correctly. */ - if (extrapolatedStats == null || reportedStats == null) { - System.err.printf("Could not read previously parsed or " - + "extrapolated hidserv-stats. Skipping."); - return false; - } - - /* Re-arrange reported stats by fingerprint. */ - SortedMap<String, Set<ReportedHidServStats>> parsedStatsByFingerprint = - new TreeMap<>(); - for (ReportedHidServStats stat : reportedStats) { - String fingerprint = stat.getFingerprint(); - if (!parsedStatsByFingerprint.containsKey(fingerprint)) { - parsedStatsByFingerprint.put(fingerprint, - new HashSet<ReportedHidServStats>()); - } - parsedStatsByFingerprint.get(fingerprint).add(stat); - } - - /* Go through reported stats by fingerprint. */ - for (Map.Entry<String, Set<ReportedHidServStats>> e - : parsedStatsByFingerprint.entrySet()) { - String fingerprint = e.getKey(); - - /* Iterate over all stats reported by this relay and make a list of - * those that still need to be extrapolated. Also make a list of - * all dates for which we need to retrieve computed network - * fractions. */ - Set<ReportedHidServStats> newReportedStats = new HashSet<>(); - SortedSet<String> retrieveFractionDates = new TreeSet<>(); - for (ReportedHidServStats stats : e.getValue()) { - - /* Check whether extrapolated stats already contain an object with - * the same statistics interval end date and fingerprint. */ - long statsDateMillis = (stats.getStatsEndMillis() - / DateTimeHelper.ONE_DAY) * DateTimeHelper.ONE_DAY; - if (extrapolatedStats.contains( - new ExtrapolatedHidServStats(statsDateMillis, fingerprint))) { - continue; - } - - /* Add the reported stats to the list of stats we still need to - * extrapolate. */ - newReportedStats.add(stats); - - /* Add all dates between statistics interval start and end to a - * list. */ - long statsEndMillis = stats.getStatsEndMillis(); - long statsStartMillis = statsEndMillis - - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND; - for (long millis = statsStartMillis; millis <= statsEndMillis; - millis += DateTimeHelper.ONE_DAY) { - String date = DateTimeHelper.format(millis, - DateTimeHelper.ISO_DATE_FORMAT); - retrieveFractionDates.add(date); - } - } - - /* Retrieve all computed network fractions that might be needed to - * extrapolate new statistics. Keep a list of all known consensus - * valid-after times, and keep a map of fractions also by consensus - * valid-after time. (It's not sufficient to only keep the latter, - * because we need to count known consensuses even if the relay was - * not contained in a consensus or had a network fraction of exactly - * zero.) */ - SortedSet<Long> knownConsensuses = new TreeSet<>(); - SortedMap<Long, ComputedNetworkFractions> computedNetworkFractions = - new TreeMap<>(); - for (String date : retrieveFractionDates) { - File documentFile = new File( - this.computedNetworkFractionsDirectory, date); - Set<ComputedNetworkFractions> fractions - = this.computedNetworkFractionsStore.retrieve(documentFile, - fingerprint); - for (ComputedNetworkFractions fraction : fractions) { - knownConsensuses.add(fraction.getValidAfterMillis()); - if (fraction.getFingerprint().equals(fingerprint)) { - computedNetworkFractions.put(fraction.getValidAfterMillis(), - fraction); - } - } - } - - /* Go through newly reported stats, match them with computed network - * fractions, and extrapolate network totals. */ - for (ReportedHidServStats stats : newReportedStats) { - long statsEndMillis = stats.getStatsEndMillis(); - long statsDateMillis = (statsEndMillis / DateTimeHelper.ONE_DAY) - * DateTimeHelper.ONE_DAY; - long statsStartMillis = statsEndMillis - - stats.getStatsIntervalSeconds() * DateTimeHelper.ONE_SECOND; - - /* Sum up computed network fractions and count known consensus in - * the relevant interval, so that we can later compute means of - * network fractions. */ - double sumFractionRendRelayedCells = 0.0; - double sumFractionDirOnionsSeen = 0.0; - int consensuses = 0; - for (long validAfterMillis : knownConsensuses) { - if (statsStartMillis <= validAfterMillis - && validAfterMillis < statsEndMillis) { - if (computedNetworkFractions.containsKey(validAfterMillis)) { - ComputedNetworkFractions frac = - computedNetworkFractions.get(validAfterMillis); - sumFractionRendRelayedCells += - frac.getFractionRendRelayedCells(); - sumFractionDirOnionsSeen += - frac.getFractionDirOnionsSeen(); - } - consensuses++; - } - } - - /* If we don't know a single consensus with valid-after time in - * the statistics interval, skip this stat. */ - if (consensuses == 0) { - continue; - } - - /* Compute means of network fractions. */ - double fractionRendRelayedCells = - sumFractionRendRelayedCells / consensuses; - double fractionDirOnionsSeen = - sumFractionDirOnionsSeen / consensuses; - - /* If at least one fraction is positive, extrapolate network - * totals. */ - if (fractionRendRelayedCells > 0.0 - || fractionDirOnionsSeen > 0.0) { - ExtrapolatedHidServStats extrapolated = - new ExtrapolatedHidServStats( - statsDateMillis, fingerprint); - if (fractionRendRelayedCells > 0.0) { - extrapolated.setFractionRendRelayedCells( - fractionRendRelayedCells); - /* Extrapolating cells on rendezvous circuits is as easy as - * dividing the reported number by the computed network - * fraction. */ - double extrapolatedRendRelayedCells = - stats.getRendRelayedCells() / fractionRendRelayedCells; - extrapolated.setExtrapolatedRendRelayedCells( - extrapolatedRendRelayedCells); - } - if (fractionDirOnionsSeen > 0.0) { - extrapolated.setFractionDirOnionsSeen( - fractionDirOnionsSeen); - /* Extrapolating reported unique .onion addresses to the - * total number in the network is more difficult. In short, - * each descriptor is stored to 12 (likely) different - * directories, so we'll have to divide the reported number by - * 12 and then by the computed network fraction of this - * directory. */ - double extrapolatedDirOnionsSeen = - stats.getDirOnionsSeen() / (12.0 * fractionDirOnionsSeen); - extrapolated.setExtrapolatedDirOnionsSeen( - extrapolatedDirOnionsSeen); - } - extrapolatedStats.add(extrapolated); - } - } - } - - /* Store all extrapolated network totals to disk with help of the - * document store. */ - return this.extrapolatedHidServStatsStore.store( - this.extrapolatedHidServStatsFile, extrapolatedStats); - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java deleted file mode 100644 index ad0b415..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/Main.java +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.io.File; -import java.util.HashSet; -import java.util.Set; - -/** Main class for updating extrapolated network totals of hidden-service - * statistics. The main method of this class can be executed as often as - * new statistics are needed, though callers must ensure that executions - * do not overlap. */ -public class Main { - - /** Parses new descriptors, extrapolate contained statistics using - * computed network fractions, aggregate results, and writes results to - * disk. */ - public static void main(String[] args) { - - /* Initialize directories and file paths. */ - Set<File> inDirectories = new HashSet<>(); - inDirectories.add( - new File("../../shared/in/recent/relay-descriptors/consensuses")); - inDirectories.add( - new File("../../shared/in/recent/relay-descriptors/extra-infos")); - File statusDirectory = new File("status"); - - /* Initialize parser and read parse history to avoid parsing - * descriptor files that haven't changed since the last execution. */ - System.out.println("Initializing parser and reading parse " - + "history..."); - DocumentStore<ReportedHidServStats> reportedHidServStatsStore = - new DocumentStore<>(ReportedHidServStats.class); - DocumentStore<ComputedNetworkFractions> - computedNetworkFractionsStore = new DocumentStore<>( - ComputedNetworkFractions.class); - Parser parser = new Parser(inDirectories, statusDirectory, - reportedHidServStatsStore, computedNetworkFractionsStore); - parser.readParseHistory(); - - /* Parse new descriptors and store their contents using the document - * stores. */ - System.out.println("Parsing descriptors..."); - if (!parser.parseDescriptors()) { - System.err.println("Could not store parsed descriptors. " - + "Terminating."); - return; - } - - /* Write the parse history to avoid parsing descriptor files again - * next time. It's okay to do this now and not at the end of the - * execution, because even if something breaks apart below, it's safe - * not to parse descriptor files again. */ - System.out.println("Writing parse history..."); - parser.writeParseHistory(); - - /* Extrapolate reported statistics using computed network fractions - * and write the result to disk using a document store. The result is - * a single file with extrapolated network totals based on reports by - * single relays. */ - System.out.println("Extrapolating statistics..."); - DocumentStore<ExtrapolatedHidServStats> extrapolatedHidServStatsStore - = new DocumentStore<>(ExtrapolatedHidServStats.class); - Extrapolator extrapolator = new Extrapolator(statusDirectory, - reportedHidServStatsStore, computedNetworkFractionsStore, - extrapolatedHidServStatsStore); - if (!extrapolator.extrapolateHidServStats()) { - System.err.println("Could not extrapolate statistics. " - + "Terminating."); - return; - } - - /* Go through all extrapolated network totals and aggregate them. - * This includes calculating daily weighted interquartile means, among - * other statistics. Write the result to a .csv file that can be - * processed by other tools. */ - System.out.println("Aggregating statistics..."); - File hidservStatsExtrapolatedCsvFile = new File("stats/hidserv.csv"); - Aggregator aggregator = new Aggregator(statusDirectory, - extrapolatedHidServStatsStore, hidservStatsExtrapolatedCsvFile); - aggregator.aggregateHidServStats(); - - /* End this execution. */ - System.out.println("Terminating."); - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java deleted file mode 100644 index eccb0c0..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/Parser.java +++ /dev/null @@ -1,440 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.math.BigInteger; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - -/** Parse hidden-service statistics from extra-info descriptors, compute - * network fractions from consensuses, and write parsed contents to - * document files for later use. */ -public class Parser { - - /** File containing tuples of last-modified times and file names of - * descriptor files parsed in the previous execution. */ - private File parseHistoryFile; - - /** Descriptor reader to provide parsed extra-info descriptors and - * consensuses. */ - private DescriptorReader descriptorReader; - - /** Document file containing previously parsed reported hidden-service - * statistics. */ - private File reportedHidServStatsFile; - - /** Document store for storing and retrieving reported hidden-service - * statistics. */ - private DocumentStore<ReportedHidServStats> reportedHidServStatsStore; - - /** Directory containing document files with previously computed network - * fractions. */ - private File computedNetworkFractionsDirectory; - - /** Document store for storing and retrieving computed network - * fractions. */ - private DocumentStore<ComputedNetworkFractions> - computedNetworkFractionsStore; - - /** Initializes a new parser object using the given directories and - * document stores. */ - public Parser(Set<File> inDirectories, File statusDirectory, - DocumentStore<ReportedHidServStats> reportedHidServStatsStore, - DocumentStore<ComputedNetworkFractions> - computedNetworkFractionsStore) { - - /* Create a new descriptor reader for reading descriptors in the given - * in directory. Configure the reader to avoid having more than five - * parsed descriptors in the queue, rather than the default one - * hundred. Five is a compromise between very large consensuses and - * rather small extra-info descriptors. */ - this.descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - for (File inDirectory : inDirectories) { - this.descriptorReader.addDirectory(inDirectory); - } - this.descriptorReader.setMaxDescriptorFilesInQueue(5); - - /* Create File instances for the files and directories in the provided - * status directory. */ - this.parseHistoryFile = new File(statusDirectory, "parse-history"); - this.reportedHidServStatsFile = new File(statusDirectory, - "reported-hidserv-stats"); - this.computedNetworkFractionsDirectory = - new File(statusDirectory, "computed-network-fractions"); - - /* Store references to the provided document stores. */ - this.reportedHidServStatsStore = reportedHidServStatsStore; - this.computedNetworkFractionsStore = computedNetworkFractionsStore; - } - - /** Reads the parse history file to avoid parsing descriptor files that - * have not changed since the previous execution. */ - public void readParseHistory() { - if (this.parseHistoryFile.exists() - && this.parseHistoryFile.isFile()) { - SortedMap<String, Long> excludedFiles = new TreeMap<>(); - try { - BufferedReader br = new BufferedReader(new FileReader( - this.parseHistoryFile)); - String line; - while ((line = br.readLine()) != null) { - try { - /* Each line is supposed to contain the last-modified time and - * absolute path of a descriptor file. */ - String[] parts = line.split(" ", 2); - excludedFiles.put(parts[1], Long.parseLong(parts[0])); - } catch (NumberFormatException e) { - System.err.printf("Illegal line '%s' in parse history. " - + "Skipping line.%n", line); - } - } - br.close(); - } catch (IOException e) { - System.err.printf("Could not read history file '%s'. Not " - + "excluding descriptors in this execution.", - this.parseHistoryFile.getAbsolutePath()); - } - - /* Tell the descriptor reader to exclude the files contained in the - * parse history file. */ - this.descriptorReader.setExcludedFiles(excludedFiles); - } - } - - /** Writes parsed or skipped descriptor files with last-modified times - * and absolute paths to the parse history file to avoid parsing these - * files again, unless they change until the next execution. */ - public void writeParseHistory() { - - /* Obtain the list of descriptor files that were either parsed now or - * that were skipped in this execution from the descriptor reader. */ - SortedMap<String, Long> excludedAndParsedFiles = new TreeMap<>(); - excludedAndParsedFiles.putAll( - this.descriptorReader.getExcludedFiles()); - excludedAndParsedFiles.putAll(this.descriptorReader.getParsedFiles()); - try { - this.parseHistoryFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.parseHistoryFile)); - for (Map.Entry<String, Long> e - : excludedAndParsedFiles.entrySet()) { - /* Each line starts with the last-modified time of the descriptor - * file, followed by its absolute path. */ - String absolutePath = e.getKey(); - long lastModifiedMillis = e.getValue(); - bw.write(String.valueOf(lastModifiedMillis) + " " + absolutePath - + "\n"); - } - bw.close(); - } catch (IOException e) { - System.err.printf("Could not write history file '%s'. Not " - + "excluding descriptors in next execution.", - this.parseHistoryFile.getAbsolutePath()); - } - } - - /** Set of all reported hidden-service statistics. - * - * <p>To date, these objects are small, and keeping them all in memory - * is easy. But if this ever changes, e.g., when more and more - * statistics are added, this may not scale.</p> */ - private Set<ReportedHidServStats> reportedHidServStats = new HashSet<>(); - - /** Instructs the descriptor reader to parse descriptor files, and - * handles the resulting parsed descriptors if they are either - * extra-info descriptors or consensuses. */ - public boolean parseDescriptors() { - Iterator<DescriptorFile> descriptorFiles = - this.descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof ExtraInfoDescriptor) { - this.parseExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); - } else if (descriptor instanceof RelayNetworkStatusConsensus) { - if (!this.parseRelayNetworkStatusConsensus( - (RelayNetworkStatusConsensus) descriptor)) { - return false; - } - } - } - } - - /* Store reported hidden-service statistics to their document file. - * It's more efficient to only do this once after processing all - * descriptors. In contrast, sets of computed network fractions are - * stored immediately after processing the consensus they are based - * on. */ - return this.reportedHidServStatsStore.store( - this.reportedHidServStatsFile, this.reportedHidServStats); - } - - private static final String BIN_SIZE = "bin_size"; - - /** Parses the given extra-info descriptor by extracting its fingerprint - * and contained hidserv-* lines. - * - * <p>If a valid set of hidserv-stats can be extracted, create a new - * stats object that will later be stored to a document file.</p> */ - private void parseExtraInfoDescriptor( - ExtraInfoDescriptor extraInfoDescriptor) { - - /* Extract the fingerprint from the parsed descriptor. */ - String fingerprint = extraInfoDescriptor.getFingerprint(); - - /* If the descriptor did not contain any of the expected hidserv-* - * lines, don't do anything. This applies to the majority of - * descriptors, at least as long as only a minority of relays reports - * these statistics. */ - if (extraInfoDescriptor.getHidservStatsEndMillis() < 0L - && extraInfoDescriptor.getHidservRendRelayedCells() == null - && extraInfoDescriptor.getHidservDirOnionsSeen() == null) { - return; - - /* If the descriptor contained all expected hidserv-* lines, create a - * new stats object and put it in the local map, so that it will later - * be written to a document file. */ - } else if (extraInfoDescriptor.getHidservStatsEndMillis() >= 0L - && extraInfoDescriptor.getHidservStatsIntervalLength() >= 0L - && extraInfoDescriptor.getHidservRendRelayedCells() != null - && extraInfoDescriptor.getHidservRendRelayedCellsParameters() != null - && extraInfoDescriptor.getHidservRendRelayedCellsParameters() - .containsKey(BIN_SIZE) - && extraInfoDescriptor.getHidservDirOnionsSeen() != null - && extraInfoDescriptor.getHidservDirOnionsSeenParameters() != null - && extraInfoDescriptor.getHidservDirOnionsSeenParameters() - .containsKey(BIN_SIZE)) { - ReportedHidServStats reportedStats = new ReportedHidServStats( - fingerprint, extraInfoDescriptor.getHidservStatsEndMillis()); - reportedStats.setStatsIntervalSeconds(extraInfoDescriptor - .getHidservStatsIntervalLength()); - reportedStats.setRendRelayedCells(this.removeNoise(extraInfoDescriptor - .getHidservRendRelayedCells().longValue(), extraInfoDescriptor - .getHidservRendRelayedCellsParameters().get(BIN_SIZE).longValue())); - reportedStats.setDirOnionsSeen(this.removeNoise(extraInfoDescriptor - .getHidservDirOnionsSeen().longValue(), extraInfoDescriptor - .getHidservDirOnionsSeenParameters().get(BIN_SIZE).longValue())); - this.reportedHidServStats.add(reportedStats); - - /* If the descriptor contained some but not all hidserv-* lines, print - * out a warning. This case does not warrant any further action, - * because relays can in theory write anything in their extra-info - * descriptors. But maybe we'll want to know. */ - } else { - System.err.println("Relay " + fingerprint + " published " - + "incomplete hidserv-stats. Ignoring."); - } - } - - /** Removes noise from a reported stats value by rounding to the nearest - * right side of a bin and subtracting half of the bin size. */ - private long removeNoise(long reportedNumber, long binSize) { - long roundedToNearestRightSideOfTheBin = - ((reportedNumber + binSize / 2) / binSize) * binSize; - long subtractedHalfOfBinSize = - roundedToNearestRightSideOfTheBin - binSize / 2; - return subtractedHalfOfBinSize; - } - - /** Parses the given consensus. */ - public boolean parseRelayNetworkStatusConsensus( - RelayNetworkStatusConsensus consensus) { - - /* Make sure that the consensus contains Wxx weights. */ - SortedMap<String, Integer> bandwidthWeights = - consensus.getBandwidthWeights(); - if (bandwidthWeights == null) { - System.err.printf("Consensus with valid-after time %s doesn't " - + "contain any Wxx weights. Skipping.%n", - DateTimeHelper.format(consensus.getValidAfterMillis())); - return false; - } - - /* More precisely, make sure that it contains Wmx weights, and then - * parse them. */ - SortedSet<String> expectedWeightKeys = - new TreeSet<String>(Arrays.asList("Wmg,Wmm,Wme,Wmd".split(","))); - expectedWeightKeys.removeAll(bandwidthWeights.keySet()); - if (!expectedWeightKeys.isEmpty()) { - System.err.printf("Consensus with valid-after time %s doesn't " - + "contain expected Wmx weights. Skipping.%n", - DateTimeHelper.format(consensus.getValidAfterMillis())); - return false; - } - double wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0; - double wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0; - double wme = ((double) bandwidthWeights.get("Wme")) / 10000.0; - double wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0; - - /* Keep a sorted set with the fingerprints of all hidden-service - * directories, in reverse order, so that we can later determine the - * fingerprint distance between a directory and the directory - * preceding it by three positions in the descriptor ring. */ - SortedSet<String> hsDirs = new TreeSet<>(Collections.reverseOrder()); - - /* Prepare for computing the weights of all relays with the Fast flag - * for being selected in the middle position. */ - double totalWeightsRendezvousPoint = 0.0; - SortedMap<String, Double> weightsRendezvousPoint = new TreeMap<>(); - - /* Go through all status entries contained in the consensus. */ - for (Map.Entry<String, NetworkStatusEntry> e - : consensus.getStatusEntries().entrySet()) { - String fingerprint = e.getKey(); - NetworkStatusEntry statusEntry = e.getValue(); - SortedSet<String> flags = statusEntry.getFlags(); - - /* Add the relay to the set of hidden-service directories if it has - * the HSDir flag. */ - if (flags.contains("HSDir")) { - hsDirs.add(statusEntry.getFingerprint()); - } - - /* Compute the probability for being selected as rendezvous point. - * If the relay has the Fast flag, multiply its consensus weight - * with the correct Wmx weight, depending on whether the relay has - * the Guard and/or Exit flag. */ - double weightRendezvousPoint = 0.0; - if (flags.contains("Fast")) { - weightRendezvousPoint = (double) statusEntry.getBandwidth(); - if (flags.contains("Guard") && flags.contains("Exit")) { - weightRendezvousPoint *= wmd; - } else if (flags.contains("Guard")) { - weightRendezvousPoint *= wmg; - } else if (flags.contains("Exit")) { - weightRendezvousPoint *= wme; - } else { - weightRendezvousPoint *= wmm; - } - } - weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint); - totalWeightsRendezvousPoint += weightRendezvousPoint; - } - - /* Store all computed network fractions based on this consensus in a - * set, which will then be written to disk in a single store - * operation. */ - Set<ComputedNetworkFractions> computedNetworkFractions = new HashSet<>(); - - /* Remove all previously added directory fingerprints and re-add them - * twice, once with a leading "0" and once with a leading "1". The - * purpose is to simplify the logic for moving from one fingerprint to - * the previous one, even if that would mean traversing the ring - * start. For example, the fingerprint preceding "1""00..0000" with - * the first "1" being added here could be "0""FF..FFFF". */ - SortedSet<String> hsDirsCopy = new TreeSet<>(hsDirs); - hsDirs.clear(); - for (String fingerprint : hsDirsCopy) { - hsDirs.add("0" + fingerprint); - hsDirs.add("1" + fingerprint); - } - - /* Define the total ring size to compute fractions below. This is - * 16^40 or 2^160. */ - final double ringSize = new BigInteger( - "10000000000000000000000000000000000000000", - 16).doubleValue(); - - /* Go through all status entries again, this time computing network - * fractions. */ - for (Map.Entry<String, NetworkStatusEntry> e - : consensus.getStatusEntries().entrySet()) { - String fingerprint = e.getKey(); - NetworkStatusEntry statusEntry = e.getValue(); - double fractionRendRelayedCells = 0.0; - double fractionDirOnionsSeen = 0.0; - if (statusEntry != null) { - - /* Check if the relay is a hidden-service directory by looking up - * its fingerprint, preceded by "1", in the sorted set that we - * populated above. */ - String fingerprintPrecededByOne = "1" + fingerprint; - if (hsDirs.contains(fingerprintPrecededByOne)) { - - /* Move three positions in the sorted set, which is in reverse - * order, to learn the fingerprint of the directory preceding - * this directory by three positions. */ - String startResponsible = fingerprint; - int positionsToGo = 3; - for (String hsDirFingerprint - : hsDirs.tailSet(fingerprintPrecededByOne)) { - startResponsible = hsDirFingerprint; - if (positionsToGo-- <= 0) { - break; - } - } - - /* Compute the fraction of descriptor space that this relay is - * responsible for as difference between the two fingerprints - * divided by the ring size. */ - fractionDirOnionsSeen = - new BigInteger(fingerprintPrecededByOne, 16).subtract( - new BigInteger(startResponsible, 16)).doubleValue() - / ringSize; - - /* Divide this fraction by three to obtain the fraction of - * descriptors that this directory has seen. This step is - * necessary, because each descriptor that is published to this - * directory is also published to two other directories. */ - fractionDirOnionsSeen /= 3.0; - } - - /* Compute the fraction of cells on rendezvous circuits that this - * relay has seen by dividing its previously calculated weight by - * the sum of all such weights. */ - fractionRendRelayedCells = weightsRendezvousPoint.get(fingerprint) - / totalWeightsRendezvousPoint; - } - - /* If at least one of the computed fractions is non-zero, create a - * new fractions object. */ - if (fractionRendRelayedCells > 0.0 || fractionDirOnionsSeen > 0.0) { - ComputedNetworkFractions fractions = new ComputedNetworkFractions( - fingerprint, consensus.getValidAfterMillis()); - fractions.setFractionRendRelayedCells(fractionRendRelayedCells); - fractions.setFractionDirOnionsSeen(fractionDirOnionsSeen); - computedNetworkFractions.add(fractions); - } - } - - /* Store all newly computed network fractions to a documents file. - * The same file also contains computed network fractions from other - * consensuses that were valid on the same day. This is in contrast - * to the other documents which are all stored in a single file, which - * would not scale for computed network fractions. */ - String date = DateTimeHelper.format(consensus.getValidAfterMillis(), - DateTimeHelper.ISO_DATE_FORMAT); - File documentFile = new File(this.computedNetworkFractionsDirectory, - date); - if (!this.computedNetworkFractionsStore.store(documentFile, - computedNetworkFractions)) { - return false; - } - return true; - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java b/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java deleted file mode 100644 index 6d305d0..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/ReportedHidServStats.java +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -/* Hidden-service statistics reported by a single relay covering a single - * statistics interval of usually 24 hours. These statistics are reported - * by the relay in the "hidserv-" lines of its extra-info descriptor. */ -public class ReportedHidServStats implements Document { - - /* Relay fingerprint consisting of 40 upper-case hex characters. */ - private String fingerprint; - - public String getFingerprint() { - return this.fingerprint; - } - - /* Hidden-service statistics end timestamp in milliseconds. */ - private long statsEndMillis; - - public long getStatsEndMillis() { - return this.statsEndMillis; - } - - /* Statistics interval length in seconds. */ - private long statsIntervalSeconds; - - public void setStatsIntervalSeconds(long statsIntervalSeconds) { - this.statsIntervalSeconds = statsIntervalSeconds; - } - - public long getStatsIntervalSeconds() { - return this.statsIntervalSeconds; - } - - /* Number of relayed cells on rendezvous circuits as reported by the - * relay and adjusted by rounding to the nearest right side of a bin and - * subtracting half of the bin size. */ - private long rendRelayedCells; - - public void setRendRelayedCells(long rendRelayedCells) { - this.rendRelayedCells = rendRelayedCells; - } - - public long getRendRelayedCells() { - return this.rendRelayedCells; - } - - /* Number of distinct .onion addresses as reported by the relay and - * adjusted by rounding to the nearest right side of a bin and - * subtracting half of the bin size. */ - private long dirOnionsSeen; - - public void setDirOnionsSeen(long dirOnionsSeen) { - this.dirOnionsSeen = dirOnionsSeen; - } - - public long getDirOnionsSeen() { - return this.dirOnionsSeen; - } - - /* Instantiate a new stats object using fingerprint and stats interval - * end which together uniquely identify the object. */ - public ReportedHidServStats(String fingerprint, long statsEndMillis) { - this.fingerprint = fingerprint; - this.statsEndMillis = statsEndMillis; - } - - /* Return whether this object contains the same fingerprint and stats - * interval end as the passed object. */ - @Override - public boolean equals(Object otherObject) { - if (!(otherObject instanceof ReportedHidServStats)) { - return false; - } - ReportedHidServStats other = (ReportedHidServStats) otherObject; - return this.fingerprint.equals(other.fingerprint) - && this.statsEndMillis == other.statsEndMillis; - } - - /* Return a (hopefully unique) hash code based on this object's - * fingerprint and stats interval end. */ - @Override - public int hashCode() { - return this.fingerprint.hashCode() + (int) this.statsEndMillis; - } - - /* Return a string representation of this object, consisting of - * fingerprint and the concatenation of all other attributes. */ - @Override - public String[] format() { - String first = this.fingerprint; - String second = String.format("%s,%d,%d,%d", - DateTimeHelper.format(this.statsEndMillis), - this.statsIntervalSeconds, this.rendRelayedCells, - this.dirOnionsSeen); - return new String[] { first, second }; - } - - /* Instantiate an empty stats object that will be initialized more by - * the parse method. */ - ReportedHidServStats() { - } - - /* Initialize this stats object using the two provided strings that have - * been produced by the format method earlier. Return whether this - * operation was successful. */ - @Override - public boolean parse(String[] formattedStrings) { - if (formattedStrings.length != 2) { - System.err.printf("Invalid number of formatted strings. " - + "Skipping.%n", formattedStrings.length); - return false; - } - String[] secondParts = formattedStrings[1].split(",", 4); - if (secondParts.length != 4) { - return false; - } - long statsEndMillis = DateTimeHelper.parse(secondParts[0]); - if (statsEndMillis == DateTimeHelper.NO_TIME_AVAILABLE) { - return false; - } - long statsIntervalSeconds = -1L; - long rendRelayedCells = -1L; - long dirOnionsSeen = -1L; - try { - statsIntervalSeconds = Long.parseLong(secondParts[1]); - rendRelayedCells = Long.parseLong(secondParts[2]); - dirOnionsSeen = Long.parseLong(secondParts[3]); - } catch (NumberFormatException e) { - return false; - } - this.fingerprint = formattedStrings[0]; - this.statsEndMillis = statsEndMillis; - this.statsIntervalSeconds = statsIntervalSeconds; - this.rendRelayedCells = rendRelayedCells; - this.dirOnionsSeen = dirOnionsSeen; - return true; - } -} - diff --git a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java b/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java deleted file mode 100644 index 207b4aa..0000000 --- a/modules/hidserv/src/org/torproject/metrics/hidserv/Simulate.java +++ /dev/null @@ -1,365 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.metrics.hidserv; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - -/* NOTE: This class is not required for running the Main class! (It - * contains its own main method.) */ -public class Simulate { - private static File simCellsCsvFile = - new File("out/csv/sim-cells.csv"); - - private static File simOnionsCsvFile = - new File("out/csv/sim-onions.csv"); - - /** Runs two simulations to evaluate this data-processing module. */ - public static void main(String[] args) throws Exception { - System.out.print("Simulating extrapolation of rendezvous cells"); - simulateManyCells(); - System.out.print("\nSimulating extrapolation of .onions"); - simulateManyOnions(); - System.out.println("\nTerminating."); - } - - private static Random rnd = new Random(); - - private static void simulateManyCells() throws Exception { - simCellsCsvFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - simCellsCsvFile)); - bw.write("run,frac,wmean,wmedian,wiqm\n"); - final int numberOfExtrapolations = 1000; - for (int i = 0; i < numberOfExtrapolations; i++) { - bw.write(simulateCells(i)); - System.out.print("."); - } - bw.close(); - } - - private static void simulateManyOnions() throws Exception { - simOnionsCsvFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - simOnionsCsvFile)); - bw.write("run,frac,wmean,wmedian,wiqm\n"); - final int numberOfExtrapolations = 1000; - for (int i = 0; i < numberOfExtrapolations; i++) { - bw.write(simulateOnions(i)); - System.out.print("."); - } - bw.close(); - } - - private static String simulateCells(int run) { - - /* Generate consensus weights following an exponential distribution - * with lambda = 1 for 3000 potential rendezvous points. */ - final int numberRendPoints = 3000; - double[] consensusWeights = new double[numberRendPoints]; - double totalConsensusWeight = 0.0; - for (int i = 0; i < numberRendPoints; i++) { - double consensusWeight = -Math.log(1.0 - rnd.nextDouble()); - consensusWeights[i] = consensusWeight; - totalConsensusWeight += consensusWeight; - } - - /* Compute probabilities for being selected as rendezvous point. */ - double[] probRendPoint = new double[numberRendPoints]; - for (int i = 0; i < numberRendPoints; i++) { - probRendPoint[i] = consensusWeights[i] / totalConsensusWeight; - } - - /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an - * exponential distribution with lambda = 0.0001, so on average - * 10,000 cells per chunk, and randomly assign them to a rendezvous - * point to report them later. */ - long cellsLeft = 10000000000L; - final double cellsLambda = 0.0001; - long[] observedCells = new long[numberRendPoints]; - while (cellsLeft > 0) { - long cells = Math.min(cellsLeft, - (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda)); - double selectRendPoint = rnd.nextDouble(); - for (int i = 0; i < probRendPoint.length; i++) { - selectRendPoint -= probRendPoint[i]; - if (selectRendPoint <= 0.0) { - observedCells[i] += cells; - break; - } - } - cellsLeft -= cells; - } - - /* Obfuscate reports using binning and Laplace noise, and then attempt - * to remove noise again. */ - final long binSize = 1024L; - final double b = 2048.0 / 0.3; - long[] reportedCells = new long[numberRendPoints]; - long[] removedNoiseCells = new long[numberRendPoints]; - for (int i = 0; i < numberRendPoints; i++) { - long observed = observedCells[i]; - long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; - double randomDouble = rnd.nextDouble(); - double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0) - * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5)); - long reported = afterBinning + (long) laplaceNoise; - reportedCells[i] = reported; - long roundedToNearestRightSideOfTheBin = - ((reported + binSize / 2) / binSize) * binSize; - long subtractedHalfOfBinSize = - roundedToNearestRightSideOfTheBin - binSize / 2; - removedNoiseCells[i] = subtractedHalfOfBinSize; - } - - /* Perform extrapolations from random fractions of reports by - * probability to be selected as rendezvous point. */ - StringBuilder sb = new StringBuilder(); - double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, - 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; - for (double fraction : fractions) { - SortedSet<Integer> nonReportingRelays = new TreeSet<>(); - for (int j = 0; j < numberRendPoints; j++) { - nonReportingRelays.add(j); - } - List<Integer> shuffledRelays = new ArrayList<>(nonReportingRelays); - Collections.shuffle(shuffledRelays); - SortedSet<Integer> reportingRelays = new TreeSet<>(); - for (int j = 0; j < (int) ((double) numberRendPoints * fraction); - j++) { - reportingRelays.add(shuffledRelays.get(j)); - nonReportingRelays.remove(shuffledRelays.get(j)); - } - List<double[]> singleRelayExtrapolations; - double totalReportingProbability; - do { - singleRelayExtrapolations = new ArrayList<>(); - totalReportingProbability = 0.0; - for (int reportingRelay : reportingRelays) { - double probability = probRendPoint[reportingRelay]; - if (probability > 0.0) { - singleRelayExtrapolations.add( - new double[] { - removedNoiseCells[reportingRelay] / probability, - removedNoiseCells[reportingRelay], - probability }); - } - totalReportingProbability += probability; - } - if (totalReportingProbability < fraction - 0.001) { - int addRelay = new ArrayList<>(nonReportingRelays).get( - rnd.nextInt(nonReportingRelays.size())); - nonReportingRelays.remove(addRelay); - reportingRelays.add(addRelay); - } else if (totalReportingProbability > fraction + 0.001) { - int removeRelay = new ArrayList<>(reportingRelays).get( - rnd.nextInt(reportingRelays.size())); - reportingRelays.remove(removeRelay); - nonReportingRelays.add(removeRelay); - } - } while (totalReportingProbability < fraction - 0.001 - || totalReportingProbability > fraction + 0.001); - Collections.sort(singleRelayExtrapolations, - new Comparator<double[]>() { - public int compare(double[] o1, double[] o2) { - return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0; - } - } - ); - double totalProbability = 0.0; - double totalValues = 0.0; - double totalInterquartileProbability = 0.0; - double totalInterquartileValues = 0.0; - Double weightedMedian = null; - for (double[] extrapolation : singleRelayExtrapolations) { - totalValues += extrapolation[1]; - totalProbability += extrapolation[2]; - if (weightedMedian == null - && totalProbability > totalReportingProbability * 0.5) { - weightedMedian = extrapolation[0]; - } - if (totalProbability > totalReportingProbability * 0.25 - && totalProbability < totalReportingProbability * 0.75) { - totalInterquartileValues += extrapolation[1]; - totalInterquartileProbability += extrapolation[2]; - } - } - sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, - totalValues / totalProbability, weightedMedian, - totalInterquartileValues / totalInterquartileProbability)); - } - return sb.toString(); - } - - private static String simulateOnions(final int run) { - - /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */ - final int numberHsDirs = 3000; - SortedSet<Double> hsDirFingerprints = new TreeSet<>(); - for (int i = 0; i < numberHsDirs; i++) { - hsDirFingerprints.add(rnd.nextDouble()); - } - - /* Compute fractions of observed descriptor space. */ - SortedSet<Double> ring = - new TreeSet<>(Collections.reverseOrder()); - for (double fingerprint : hsDirFingerprints) { - ring.add(fingerprint); - ring.add(fingerprint - 1.0); - } - SortedMap<Double, Double> hsDirFractions = new TreeMap<>(); - for (double fingerprint : hsDirFingerprints) { - double start = fingerprint; - int positionsToGo = 3; - for (double prev : ring.tailSet(fingerprint)) { - start = prev; - if (positionsToGo-- <= 0) { - break; - } - } - hsDirFractions.put(fingerprint, fingerprint - start); - } - - /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */ - final int numberOnions = 40000; - final int replicas = 4; - final int storeOnDirs = 3; - SortedMap<Double, SortedSet<Integer>> storedDescs = new TreeMap<>(); - for (double fingerprint : hsDirFingerprints) { - storedDescs.put(fingerprint, new TreeSet<Integer>()); - } - for (int i = 0; i < numberOnions; i++) { - for (int j = 0; j < replicas; j++) { - int leftToStore = storeOnDirs; - for (double fingerprint - : hsDirFingerprints.tailSet(rnd.nextDouble())) { - storedDescs.get(fingerprint).add(i); - if (--leftToStore <= 0) { - break; - } - } - if (leftToStore > 0) { - for (double fingerprint : hsDirFingerprints) { - storedDescs.get(fingerprint).add(i); - if (--leftToStore <= 0) { - break; - } - } - } - } - } - - /* Obfuscate reports using binning and Laplace noise, and then attempt - * to remove noise again. */ - final long binSize = 8L; - final double b = 8.0 / 0.3; - SortedMap<Double, Long> reportedOnions = new TreeMap<>(); - SortedMap<Double, Long> removedNoiseOnions = new TreeMap<>(); - for (Map.Entry<Double, SortedSet<Integer>> e - : storedDescs.entrySet()) { - double fingerprint = e.getKey(); - long observed = (long) e.getValue().size(); - long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; - double randomDouble = rnd.nextDouble(); - double laplaceNoise = -b * (randomDouble > 0.5 ? 1.0 : -1.0) - * Math.log(1.0 - 2.0 * Math.abs(randomDouble - 0.5)); - long reported = afterBinning + (long) laplaceNoise; - reportedOnions.put(fingerprint, reported); - long roundedToNearestRightSideOfTheBin = - ((reported + binSize / 2) / binSize) * binSize; - long subtractedHalfOfBinSize = - roundedToNearestRightSideOfTheBin - binSize / 2; - removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize); - } - - /* Perform extrapolations from random fractions of reports by - * probability to be selected as rendezvous point. */ - StringBuilder sb = new StringBuilder(); - double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, - 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; - for (double fraction : fractions) { - SortedSet<Double> nonReportingRelays = - new TreeSet<>(hsDirFractions.keySet()); - List<Double> shuffledRelays = new ArrayList<>( - nonReportingRelays); - Collections.shuffle(shuffledRelays); - SortedSet<Double> reportingRelays = new TreeSet<>(); - for (int j = 0; j < (int) ((double) hsDirFractions.size() - * fraction); j++) { - reportingRelays.add(shuffledRelays.get(j)); - nonReportingRelays.remove(shuffledRelays.get(j)); - } - List<double[]> singleRelayExtrapolations; - double totalReportingProbability; - do { - singleRelayExtrapolations = new ArrayList<>(); - totalReportingProbability = 0.0; - for (double reportingRelay : reportingRelays) { - double probability = hsDirFractions.get(reportingRelay) / 3.0; - if (probability > 0.0) { - singleRelayExtrapolations.add( - new double[] { removedNoiseOnions.get(reportingRelay) - / probability, removedNoiseOnions.get(reportingRelay), - probability }); - } - totalReportingProbability += probability; - } - if (totalReportingProbability < fraction - 0.001) { - double addRelay = - new ArrayList<>(nonReportingRelays).get( - rnd.nextInt(nonReportingRelays.size())); - nonReportingRelays.remove(addRelay); - reportingRelays.add(addRelay); - } else if (totalReportingProbability > fraction + 0.001) { - double removeRelay = - new ArrayList<>(reportingRelays).get( - rnd.nextInt(reportingRelays.size())); - reportingRelays.remove(removeRelay); - nonReportingRelays.add(removeRelay); - } - } while (totalReportingProbability < fraction - 0.001 - || totalReportingProbability > fraction + 0.001); - Collections.sort(singleRelayExtrapolations, - new Comparator<double[]>() { - public int compare(double[] first, double[] second) { - return first[0] < second[0] ? -1 : first[0] > second[0] ? 1 : 0; - } - } - ); - double totalProbability = 0.0; - double totalValues = 0.0; - double totalInterquartileProbability = 0.0; - double totalInterquartileValues = 0.0; - Double weightedMedian = null; - for (double[] extrapolation : singleRelayExtrapolations) { - totalValues += extrapolation[1]; - totalProbability += extrapolation[2]; - if (weightedMedian == null - && totalProbability > totalReportingProbability * 0.5) { - weightedMedian = extrapolation[0]; - } - if (totalProbability > totalReportingProbability * 0.25 - && totalProbability < totalReportingProbability * 0.75) { - totalInterquartileValues += extrapolation[1]; - totalInterquartileProbability += extrapolation[2]; - } - } - sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction, - totalValues / totalProbability, weightedMedian, - totalInterquartileValues / totalInterquartileProbability)); - } - return sb.toString(); - } -} diff --git a/modules/legacy/build.xml b/modules/legacy/build.xml index 252a712..f4ef8e7 100644 --- a/modules/legacy/build.xml +++ b/modules/legacy/build.xml @@ -8,7 +8,6 @@ <pathelement path="${classes}"/> <path refid="base.classpath" /> <fileset dir="${libs}"> - <include name="commons-codec-1.9.jar"/> <include name="postgresql-jdbc3-9.2.jar"/> </fileset> </path> diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/Configuration.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/Configuration.java new file mode 100644 index 0000000..e0d753f --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/Configuration.java @@ -0,0 +1,206 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Initialize configuration with hard-coded defaults, overwrite with + * configuration in config file, if exists, and answer Main.java about our + * configuration. + */ +public class Configuration { + + private boolean importDirectoryArchives = false; + + private List<String> directoryArchivesDirectories = new ArrayList<>(); + + private boolean keepDirectoryArchiveImportHistory = false; + + private boolean importSanitizedBridges = false; + + private String sanitizedBridgesDirectory = "in/bridge-descriptors/"; + + private boolean keepSanitizedBridgesImportHistory = false; + + private boolean writeRelayDescriptorDatabase = false; + + private String relayDescriptorDatabaseJdbc = + "jdbc:postgresql://localhost/tordir?user=metrics&password=password"; + + private boolean writeRelayDescriptorsRawFiles = false; + + private String relayDescriptorRawFilesDirectory = "pg-import/"; + + private boolean writeBridgeStats = false; + + private boolean importWriteTorperfStats = false; + + private String torperfDirectory = "in/torperf/"; + + private String exoneraTorDatabaseJdbc = "jdbc:postgresql:" + + "//localhost/exonerator?user=metrics&password=password"; + + private String exoneraTorImportDirectory = "exonerator-import/"; + + /** Initializes this configuration class. */ + public Configuration() { + + /* Initialize logger. */ + Logger logger = Logger.getLogger(Configuration.class.getName()); + + /* Read config file, if present. */ + File configFile = new File("config"); + if (!configFile.exists()) { + logger.warning("Could not find config file."); + return; + } + String line = null; + try { + BufferedReader br = new BufferedReader(new FileReader(configFile)); + while ((line = br.readLine()) != null) { + if (line.startsWith("#") || line.length() < 1) { + continue; + } else if (line.startsWith("ImportDirectoryArchives")) { + this.importDirectoryArchives = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DirectoryArchivesDirectory")) { + this.directoryArchivesDirectories.add(line.split(" ")[1]); + } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) { + this.keepDirectoryArchiveImportHistory = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("ImportSanitizedBridges")) { + this.importSanitizedBridges = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("SanitizedBridgesDirectory")) { + this.sanitizedBridgesDirectory = line.split(" ")[1]; + } else if (line.startsWith("KeepSanitizedBridgesImportHistory")) { + this.keepSanitizedBridgesImportHistory = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("WriteRelayDescriptorDatabase")) { + this.writeRelayDescriptorDatabase = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("RelayDescriptorDatabaseJDBC")) { + this.relayDescriptorDatabaseJdbc = line.split(" ")[1]; + } else if (line.startsWith("WriteRelayDescriptorsRawFiles")) { + this.writeRelayDescriptorsRawFiles = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("RelayDescriptorRawFilesDirectory")) { + this.relayDescriptorRawFilesDirectory = line.split(" ")[1]; + } else if (line.startsWith("WriteBridgeStats")) { + this.writeBridgeStats = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("ImportWriteTorperfStats")) { + this.importWriteTorperfStats = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("TorperfDirectory")) { + this.torperfDirectory = line.split(" ")[1]; + } else if (line.startsWith("ExoneraTorDatabaseJdbc")) { + this.exoneraTorDatabaseJdbc = line.split(" ")[1]; + } else if (line.startsWith("ExoneraTorImportDirectory")) { + this.exoneraTorImportDirectory = line.split(" ")[1]; + } else { + logger.severe("Configuration file contains unrecognized " + + "configuration key in line '" + line + "'! Exiting!"); + System.exit(1); + } + } + br.close(); + } catch (ArrayIndexOutOfBoundsException e) { + logger.severe("Configuration file contains configuration key " + + "without value in line '" + line + "'. Exiting!"); + System.exit(1); + } catch (MalformedURLException e) { + logger.severe("Configuration file contains illegal URL or IP:port " + + "pair in line '" + line + "'. Exiting!"); + System.exit(1); + } catch (NumberFormatException e) { + logger.severe("Configuration file contains illegal value in line '" + + line + "' with legal values being 0 or 1. Exiting!"); + System.exit(1); + } catch (IOException e) { + logger.log(Level.SEVERE, "Unknown problem while reading config " + + "file! Exiting!", e); + System.exit(1); + } + } + + public boolean getImportDirectoryArchives() { + return this.importDirectoryArchives; + } + + /** Returns directories containing archived descriptors. */ + public List<String> getDirectoryArchivesDirectories() { + if (this.directoryArchivesDirectories.isEmpty()) { + String prefix = "../../shared/in/recent/relay-descriptors/"; + return Arrays.asList( + (prefix + "consensuses/," + prefix + "server-descriptors/," + + prefix + "extra-infos/").split(",")); + } else { + return this.directoryArchivesDirectories; + } + } + + public boolean getKeepDirectoryArchiveImportHistory() { + return this.keepDirectoryArchiveImportHistory; + } + + public boolean getWriteRelayDescriptorDatabase() { + return this.writeRelayDescriptorDatabase; + } + + public boolean getImportSanitizedBridges() { + return this.importSanitizedBridges; + } + + public String getSanitizedBridgesDirectory() { + return this.sanitizedBridgesDirectory; + } + + public boolean getKeepSanitizedBridgesImportHistory() { + return this.keepSanitizedBridgesImportHistory; + } + + public String getRelayDescriptorDatabaseJdbc() { + return this.relayDescriptorDatabaseJdbc; + } + + public boolean getWriteRelayDescriptorsRawFiles() { + return this.writeRelayDescriptorsRawFiles; + } + + public String getRelayDescriptorRawFilesDirectory() { + return this.relayDescriptorRawFilesDirectory; + } + + public boolean getWriteBridgeStats() { + return this.writeBridgeStats; + } + + public boolean getImportWriteTorperfStats() { + return this.importWriteTorperfStats; + } + + public String getTorperfDirectory() { + return this.torperfDirectory; + } + + public String getExoneraTorDatabaseJdbc() { + return this.exoneraTorDatabaseJdbc; + } + + public String getExoneraTorImportDirectory() { + return this.exoneraTorImportDirectory; + } +} + diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/LockFile.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/LockFile.java new file mode 100644 index 0000000..48eb83d --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/LockFile.java @@ -0,0 +1,58 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.logging.Logger; + +public class LockFile { + + private File lockFile; + private Logger logger; + + public LockFile() { + this.lockFile = new File("lock"); + this.logger = Logger.getLogger(LockFile.class.getName()); + } + + /** Acquires the lock by checking whether a lock file already exists, + * and if not, by creating one with the current system time as + * content. */ + public boolean acquireLock() { + this.logger.fine("Trying to acquire lock..."); + try { + if (this.lockFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader("lock")); + long runStarted = Long.parseLong(br.readLine()); + br.close(); + if (System.currentTimeMillis() - runStarted + < 23L * 60L * 60L * 1000L) { + return false; + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter("lock")); + bw.append("" + System.currentTimeMillis() + "\n"); + bw.close(); + this.logger.fine("Acquired lock."); + return true; + } catch (IOException e) { + this.logger.warning("Caught exception while trying to acquire " + + "lock!"); + return false; + } + } + + /** Releases the lock by deleting the lock file, if present. */ + public void releaseLock() { + this.logger.fine("Releasing lock..."); + this.lockFile.delete(); + this.logger.fine("Released lock."); + } +} + diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/LoggingConfiguration.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/LoggingConfiguration.java new file mode 100644 index 0000000..f6658c5 --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/LoggingConfiguration.java @@ -0,0 +1,100 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; +import java.util.logging.ConsoleHandler; +import java.util.logging.FileHandler; +import java.util.logging.Formatter; +import java.util.logging.Handler; +import java.util.logging.Level; +import java.util.logging.LogRecord; +import java.util.logging.Logger; + +/** + * Initialize logging configuration. + * + * <p>Log levels used by ERNIE:</p> + * + * <p> + * <ul> + * <li>SEVERE: An event made it impossible to continue program execution. + * WARNING: A potential problem occurred that requires the operator to + * look after the otherwise unattended setup</li> + * <li>INFO: Messages on INFO level are meant to help the operator in + * making sure that operation works as expected.</li> + * <li>FINE: Debug messages that are used to identify problems and which + * are turned on by default.</li> + * <li>FINER: More detailed debug messages to investigate problems in more + * detail. Not turned on by default. Increase log file limit when + * using FINER.</li> + * <li>FINEST: Most detailed debug messages. Not used.</li> + * </ul> + * </p> + */ +public class LoggingConfiguration { + + /** Initializes the logging configuration. */ + public LoggingConfiguration() { + + /* Remove default console handler. */ + for (Handler h : Logger.getLogger("").getHandlers()) { + Logger.getLogger("").removeHandler(h); + } + + /* Disable logging of internal Sun classes. */ + Logger.getLogger("sun").setLevel(Level.OFF); + + /* Set minimum log level we care about from INFO to FINER. */ + Logger.getLogger("").setLevel(Level.FINER); + + /* Create log handler that writes messages on WARNING or higher to the + * console. */ + final SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + Formatter cf = new Formatter() { + public String format(LogRecord record) { + return dateTimeFormat.format(new Date(record.getMillis())) + " " + + record.getMessage() + "\n"; + } + }; + Handler ch = new ConsoleHandler(); + ch.setFormatter(cf); + ch.setLevel(Level.WARNING); + Logger.getLogger("").addHandler(ch); + + /* Initialize own logger for this class. */ + Logger logger = Logger.getLogger( + LoggingConfiguration.class.getName()); + + /* Create log handler that writes all messages on FINE or higher to a + * local file. */ + Formatter ff = new Formatter() { + public String format(LogRecord record) { + return dateTimeFormat.format(new Date(record.getMillis())) + " " + + record.getLevel() + " " + record.getSourceClassName() + " " + + record.getSourceMethodName() + " " + record.getMessage() + + (record.getThrown() != null ? " " + record.getThrown() : "") + + "\n"; + } + }; + try { + FileHandler fh = new FileHandler("log", 5000000, 5, true); + fh.setFormatter(ff); + fh.setLevel(Level.FINE); + Logger.getLogger("").addHandler(fh); + } catch (SecurityException e) { + logger.log(Level.WARNING, "No permission to create log file. " + + "Logging to file is disabled.", e); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write to log file. Logging to " + + "file is disabled.", e); + } + } +} + diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/Main.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/Main.java new file mode 100644 index 0000000..0eab86f --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/Main.java @@ -0,0 +1,90 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron; + +import org.torproject.ernie.cron.network.ConsensusStatsFileHandler; +import org.torproject.ernie.cron.performance.TorperfProcessor; + +import java.io.File; +import java.util.logging.Logger; + +/** + * Coordinate downloading and parsing of descriptors and extraction of + * statistically relevant data for later processing with R. + */ +public class Main { + + /** Executes this data-processing module. */ + public static void main(String[] args) { + + /* Initialize logging configuration. */ + new LoggingConfiguration(); + + Logger logger = Logger.getLogger(Main.class.getName()); + logger.info("Starting ERNIE."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile(); + if (!lf.acquireLock()) { + logger.severe("Warning: ERNIE is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Define stats directory for temporary files + File statsDirectory = new File("stats"); + + // Import relay descriptors + if (config.getImportDirectoryArchives()) { + RelayDescriptorDatabaseImporter rddi = + config.getWriteRelayDescriptorDatabase() + || config.getWriteRelayDescriptorsRawFiles() + ? new RelayDescriptorDatabaseImporter( + config.getWriteRelayDescriptorDatabase() + ? config.getRelayDescriptorDatabaseJdbc() : null, + config.getWriteRelayDescriptorsRawFiles() + ? config.getRelayDescriptorRawFilesDirectory() : null, + config.getDirectoryArchivesDirectories(), + statsDirectory, + config.getKeepDirectoryArchiveImportHistory()) : null; + if (rddi != null) { + rddi.importRelayDescriptors(); + } + rddi.closeConnection(); + } + + // Prepare consensus stats file handler (used for stats on running + // bridges only) + ConsensusStatsFileHandler csfh = config.getWriteBridgeStats() + ? new ConsensusStatsFileHandler( + config.getRelayDescriptorDatabaseJdbc(), + new File(config.getSanitizedBridgesDirectory()), + statsDirectory, config.getKeepSanitizedBridgesImportHistory()) + : null; + + // Import sanitized bridges and write updated stats files to disk + if (csfh != null) { + if (config.getImportSanitizedBridges()) { + csfh.importSanitizedBridges(); + } + csfh.writeFiles(); + csfh = null; + } + + // Import and process torperf stats + if (config.getImportWriteTorperfStats()) { + new TorperfProcessor(new File(config.getTorperfDirectory()), + statsDirectory); + } + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating ERNIE."); + } +} + diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java new file mode 100644 index 0000000..97a330e --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java @@ -0,0 +1,995 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; +import org.torproject.descriptor.ServerDescriptor; + +import org.postgresql.util.PGbytea; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.sql.CallableStatement; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Parse directory data. + */ + +/* TODO Split up this class and move its parts to cron.network, + * cron.users, and status.relaysearch packages. Requires extensive + * changes to the database schema though. */ +public final class RelayDescriptorDatabaseImporter { + + /** + * How many records to commit with each database transaction. + */ + private final long autoCommitCount = 500; + + /* Counters to keep track of the number of records committed before + * each transaction. */ + + private int rdsCount = 0; + + private int resCount = 0; + + private int rhsCount = 0; + + private int rrsCount = 0; + + private int rcsCount = 0; + + private int rvsCount = 0; + + private int rqsCount = 0; + + /** + * Relay descriptor database connection. + */ + private Connection conn; + + /** + * Prepared statement to check whether any network status consensus + * entries matching a given valid-after time have been imported into the + * database before. + */ + private PreparedStatement psSs; + + /** + * Prepared statement to check whether a given server descriptor has + * been imported into the database before. + */ + private PreparedStatement psDs; + + /** + * Prepared statement to check whether a given network status consensus + * has been imported into the database before. + */ + private PreparedStatement psCs; + + /** + * Set of dates that have been inserted into the database for being + * included in the next refresh run. + */ + private Set<Long> scheduledUpdates; + + /** + * Prepared statement to insert a date into the database that shall be + * included in the next refresh run. + */ + private PreparedStatement psU; + + /** + * Prepared statement to insert a network status consensus entry into + * the database. + */ + private PreparedStatement psR; + + /** + * Prepared statement to insert a server descriptor into the database. + */ + private PreparedStatement psD; + + /** + * Callable statement to insert the bandwidth history of an extra-info + * descriptor into the database. + */ + private CallableStatement csH; + + /** + * Prepared statement to insert a network status consensus into the + * database. + */ + private PreparedStatement psC; + + /** + * Logger for this class. + */ + private Logger logger; + + /** + * Directory for writing raw import files. + */ + private String rawFilesDirectory; + + /** + * Raw import file containing status entries. + */ + private BufferedWriter statusentryOut; + + /** + * Raw import file containing server descriptors. + */ + private BufferedWriter descriptorOut; + + /** + * Raw import file containing bandwidth histories. + */ + private BufferedWriter bwhistOut; + + /** + * Raw import file containing consensuses. + */ + private BufferedWriter consensusOut; + + /** + * Date format to parse timestamps. + */ + private SimpleDateFormat dateTimeFormat; + + /** + * The last valid-after time for which we checked whether they have been + * any network status entries in the database. + */ + private long lastCheckedStatusEntries; + + /** + * Set of fingerprints that we imported for the valid-after time in + * <code>lastCheckedStatusEntries</code>. + */ + private Set<String> insertedStatusEntries = new HashSet<>(); + + private boolean importIntoDatabase; + + private boolean writeRawImportFiles; + + private List<String> archivesDirectories; + + private File statsDirectory; + + private boolean keepImportHistory; + + /** + * Initialize database importer by connecting to the database and + * preparing statements. + */ + public RelayDescriptorDatabaseImporter(String connectionUrl, + String rawFilesDirectory, List<String> archivesDirectories, + File statsDirectory, boolean keepImportHistory) { + + if (archivesDirectories == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + this.archivesDirectories = archivesDirectories; + this.statsDirectory = statsDirectory; + this.keepImportHistory = keepImportHistory; + + /* Initialize logger. */ + this.logger = Logger.getLogger( + RelayDescriptorDatabaseImporter.class.getName()); + + if (connectionUrl != null) { + try { + /* Connect to database. */ + this.conn = DriverManager.getConnection(connectionUrl); + + /* Turn autocommit off */ + this.conn.setAutoCommit(false); + + /* Prepare statements. */ + this.psSs = conn.prepareStatement("SELECT fingerprint " + + "FROM statusentry WHERE validafter = ?"); + this.psDs = conn.prepareStatement("SELECT COUNT(*) " + + "FROM descriptor WHERE descriptor = ?"); + this.psCs = conn.prepareStatement("SELECT COUNT(*) " + + "FROM consensus WHERE validafter = ?"); + this.psR = conn.prepareStatement("INSERT INTO statusentry " + + "(validafter, nickname, fingerprint, descriptor, " + + "published, address, orport, dirport, isauthority, " + + "isbadexit, isbaddirectory, isexit, isfast, isguard, " + + "ishsdir, isnamed, isstable, isrunning, isunnamed, " + + "isvalid, isv2dir, isv3dir, version, bandwidth, ports, " + + "rawdesc) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, " + + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); + this.psD = conn.prepareStatement("INSERT INTO descriptor " + + "(descriptor, nickname, address, orport, dirport, " + + "fingerprint, bandwidthavg, bandwidthburst, " + + "bandwidthobserved, platform, published, uptime, " + + "extrainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, " + + "?)"); + this.csH = conn.prepareCall("{call insert_bwhist(?, ?, ?, ?, ?, " + + "?)}"); + this.psC = conn.prepareStatement("INSERT INTO consensus " + + "(validafter) VALUES (?)"); + this.psU = conn.prepareStatement("INSERT INTO scheduled_updates " + + "(date) VALUES (?)"); + this.scheduledUpdates = new HashSet<>(); + this.importIntoDatabase = true; + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not connect to database or " + + "prepare statements.", e); + } + } + + /* Remember where we want to write raw import files. */ + if (rawFilesDirectory != null) { + this.rawFilesDirectory = rawFilesDirectory; + this.writeRawImportFiles = true; + } + + /* Initialize date format, so that we can format timestamps. */ + this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private void addDateToScheduledUpdates(long timestamp) + throws SQLException { + if (!this.importIntoDatabase) { + return; + } + long dateMillis = 0L; + try { + dateMillis = this.dateTimeFormat.parse( + this.dateTimeFormat.format(timestamp).substring(0, 10) + + " 00:00:00").getTime(); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Internal parsing error.", e); + return; + } + if (!this.scheduledUpdates.contains(dateMillis)) { + this.psU.setDate(1, new java.sql.Date(dateMillis)); + this.psU.execute(); + this.scheduledUpdates.add(dateMillis); + } + } + + /** + * Insert network status consensus entry into database. + */ + public void addStatusEntryContents(long validAfter, String nickname, + String fingerprint, String descriptor, long published, + String address, long orPort, long dirPort, + SortedSet<String> flags, String version, long bandwidth, + String ports, byte[] rawDescriptor) { + if (this.importIntoDatabase) { + try { + this.addDateToScheduledUpdates(validAfter); + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + Timestamp validAfterTimestamp = new Timestamp(validAfter); + if (lastCheckedStatusEntries != validAfter) { + insertedStatusEntries.clear(); + this.psSs.setTimestamp(1, validAfterTimestamp, cal); + ResultSet rs = psSs.executeQuery(); + while (rs.next()) { + String insertedFingerprint = rs.getString(1); + insertedStatusEntries.add(insertedFingerprint); + } + rs.close(); + lastCheckedStatusEntries = validAfter; + } + if (!insertedStatusEntries.contains(fingerprint)) { + this.psR.clearParameters(); + this.psR.setTimestamp(1, validAfterTimestamp, cal); + this.psR.setString(2, nickname); + this.psR.setString(3, fingerprint); + this.psR.setString(4, descriptor); + this.psR.setTimestamp(5, new Timestamp(published), cal); + this.psR.setString(6, address); + this.psR.setLong(7, orPort); + this.psR.setLong(8, dirPort); + this.psR.setBoolean(9, flags.contains("Authority")); + this.psR.setBoolean(10, flags.contains("BadExit")); + this.psR.setBoolean(11, flags.contains("BadDirectory")); + this.psR.setBoolean(12, flags.contains("Exit")); + this.psR.setBoolean(13, flags.contains("Fast")); + this.psR.setBoolean(14, flags.contains("Guard")); + this.psR.setBoolean(15, flags.contains("HSDir")); + this.psR.setBoolean(16, flags.contains("Named")); + this.psR.setBoolean(17, flags.contains("Stable")); + this.psR.setBoolean(18, flags.contains("Running")); + this.psR.setBoolean(19, flags.contains("Unnamed")); + this.psR.setBoolean(20, flags.contains("Valid")); + this.psR.setBoolean(21, flags.contains("V2Dir")); + this.psR.setBoolean(22, flags.contains("V3Dir")); + this.psR.setString(23, version); + this.psR.setLong(24, bandwidth); + this.psR.setString(25, ports); + this.psR.setBytes(26, rawDescriptor); + this.psR.executeUpdate(); + rrsCount++; + if (rrsCount % autoCommitCount == 0) { + this.conn.commit(); + } + insertedStatusEntries.add(fingerprint); + } + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not add network status " + + "consensus entry. We won't make any further SQL requests " + + "in this execution.", e); + this.importIntoDatabase = false; + } + } + if (this.writeRawImportFiles) { + try { + if (this.statusentryOut == null) { + new File(rawFilesDirectory).mkdirs(); + this.statusentryOut = new BufferedWriter(new FileWriter( + rawFilesDirectory + "/statusentry.sql")); + this.statusentryOut.write(" COPY statusentry (validafter, " + + "nickname, fingerprint, descriptor, published, address, " + + "orport, dirport, isauthority, isbadExit, " + + "isbaddirectory, isexit, isfast, isguard, ishsdir, " + + "isnamed, isstable, isrunning, isunnamed, isvalid, " + + "isv2dir, isv3dir, version, bandwidth, ports, rawdesc) " + + "FROM stdin;\n"); + } + this.statusentryOut.write( + this.dateTimeFormat.format(validAfter) + "\t" + nickname + + "\t" + fingerprint.toLowerCase() + "\t" + + descriptor.toLowerCase() + "\t" + + this.dateTimeFormat.format(published) + "\t" + address + + "\t" + orPort + "\t" + dirPort + "\t" + + (flags.contains("Authority") ? "t" : "f") + "\t" + + (flags.contains("BadExit") ? "t" : "f") + "\t" + + (flags.contains("BadDirectory") ? "t" : "f") + "\t" + + (flags.contains("Exit") ? "t" : "f") + "\t" + + (flags.contains("Fast") ? "t" : "f") + "\t" + + (flags.contains("Guard") ? "t" : "f") + "\t" + + (flags.contains("HSDir") ? "t" : "f") + "\t" + + (flags.contains("Named") ? "t" : "f") + "\t" + + (flags.contains("Stable") ? "t" : "f") + "\t" + + (flags.contains("Running") ? "t" : "f") + "\t" + + (flags.contains("Unnamed") ? "t" : "f") + "\t" + + (flags.contains("Valid") ? "t" : "f") + "\t" + + (flags.contains("V2Dir") ? "t" : "f") + "\t" + + (flags.contains("V3Dir") ? "t" : "f") + "\t" + + (version != null ? version : "\N") + "\t" + + (bandwidth >= 0 ? bandwidth : "\N") + "\t" + + (ports != null ? ports : "\N") + "\t"); + this.statusentryOut.write(PGbytea.toPGString(rawDescriptor) + .replaceAll("\\", "\\\\") + "\n"); + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not write network status " + + "consensus entry to raw database import file. We won't " + + "make any further attempts to write raw import files in " + + "this execution.", e); + this.writeRawImportFiles = false; + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write network status " + + "consensus entry to raw database import file. We won't " + + "make any further attempts to write raw import files in " + + "this execution.", e); + this.writeRawImportFiles = false; + } + } + } + + /** + * Insert server descriptor into database. + */ + public void addServerDescriptorContents(String descriptor, + String nickname, String address, int orPort, int dirPort, + String relayIdentifier, long bandwidthAvg, long bandwidthBurst, + long bandwidthObserved, String platform, long published, + long uptime, String extraInfoDigest) { + if (this.importIntoDatabase) { + try { + this.addDateToScheduledUpdates(published); + this.addDateToScheduledUpdates( + published + 24L * 60L * 60L * 1000L); + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + this.psDs.setString(1, descriptor); + ResultSet rs = psDs.executeQuery(); + rs.next(); + if (rs.getInt(1) == 0) { + this.psD.clearParameters(); + this.psD.setString(1, descriptor); + this.psD.setString(2, nickname); + this.psD.setString(3, address); + this.psD.setInt(4, orPort); + this.psD.setInt(5, dirPort); + this.psD.setString(6, relayIdentifier); + this.psD.setLong(7, bandwidthAvg); + this.psD.setLong(8, bandwidthBurst); + this.psD.setLong(9, bandwidthObserved); + /* Remove all non-ASCII characters from the platform string, or + * we'll make Postgres unhappy. Sun's JDK and OpenJDK behave + * differently when creating a new String with a given encoding. + * That's what the regexp below is for. */ + this.psD.setString(10, new String(platform.getBytes(), + "US-ASCII").replaceAll("[^\p{ASCII}]","")); + this.psD.setTimestamp(11, new Timestamp(published), cal); + this.psD.setLong(12, uptime); + this.psD.setString(13, extraInfoDigest); + this.psD.executeUpdate(); + rdsCount++; + if (rdsCount % autoCommitCount == 0) { + this.conn.commit(); + } + } + } catch (UnsupportedEncodingException e) { + // US-ASCII is supported for sure + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not add server " + + "descriptor. We won't make any further SQL requests in " + + "this execution.", e); + this.importIntoDatabase = false; + } + } + if (this.writeRawImportFiles) { + try { + if (this.descriptorOut == null) { + new File(rawFilesDirectory).mkdirs(); + this.descriptorOut = new BufferedWriter(new FileWriter( + rawFilesDirectory + "/descriptor.sql")); + this.descriptorOut.write(" COPY descriptor (descriptor, " + + "nickname, address, orport, dirport, fingerprint, " + + "bandwidthavg, bandwidthburst, bandwidthobserved, " + + "platform, published, uptime, extrainfo) FROM stdin;\n"); + } + this.descriptorOut.write(descriptor.toLowerCase() + "\t" + + nickname + "\t" + address + "\t" + orPort + "\t" + dirPort + + "\t" + relayIdentifier + "\t" + bandwidthAvg + "\t" + + bandwidthBurst + "\t" + bandwidthObserved + "\t" + + (platform != null && platform.length() > 0 + ? new String(platform.getBytes(), "US-ASCII") : "\N") + + "\t" + this.dateTimeFormat.format(published) + "\t" + + (uptime >= 0 ? uptime : "\N") + "\t" + + (extraInfoDigest != null ? extraInfoDigest : "\N") + + "\n"); + } catch (UnsupportedEncodingException e) { + // US-ASCII is supported for sure + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write server " + + "descriptor to raw database import file. We won't make " + + "any further attempts to write raw import files in this " + + "execution.", e); + this.writeRawImportFiles = false; + } + } + } + + /** + * Insert extra-info descriptor into database. + */ + public void addExtraInfoDescriptorContents(String extraInfoDigest, + String nickname, String fingerprint, long published, + List<String> bandwidthHistoryLines) { + if (!bandwidthHistoryLines.isEmpty()) { + this.addBandwidthHistory(fingerprint.toLowerCase(), published, + bandwidthHistoryLines); + } + } + + private static class BigIntArray implements java.sql.Array { + + private final String stringValue; + + public BigIntArray(long[] array, int offset) { + if (array == null) { + this.stringValue = "[-1:-1]={0}"; + } else { + StringBuilder sb = new StringBuilder("[" + offset + ":" + + (offset + array.length - 1) + "]={"); + for (int i = 0; i < array.length; i++) { + sb.append((i > 0 ? "," : "") + array[i]); + } + sb.append('}'); + this.stringValue = sb.toString(); + } + } + + public String toString() { + return stringValue; + } + + public String getBaseTypeName() { + return "int8"; + } + + /* The other methods are never called; no need to implement them. */ + public void free() { + throw new UnsupportedOperationException(); + } + + public Object getArray() { + throw new UnsupportedOperationException(); + } + + public Object getArray(long index, int count) { + throw new UnsupportedOperationException(); + } + + public Object getArray(long index, int count, + Map<String, Class<?>> map) { + throw new UnsupportedOperationException(); + } + + public Object getArray(Map<String, Class<?>> map) { + throw new UnsupportedOperationException(); + } + + public int getBaseType() { + throw new UnsupportedOperationException(); + } + + public ResultSet getResultSet() { + throw new UnsupportedOperationException(); + } + + public ResultSet getResultSet(long index, int count) { + throw new UnsupportedOperationException(); + } + + public ResultSet getResultSet(long index, int count, + Map<String, Class<?>> map) { + throw new UnsupportedOperationException(); + } + + public ResultSet getResultSet(Map<String, Class<?>> map) { + throw new UnsupportedOperationException(); + } + } + + /** Inserts a bandwidth history into database. */ + public void addBandwidthHistory(String fingerprint, long published, + List<String> bandwidthHistoryStrings) { + + /* Split history lines by date and rewrite them so that the date + * comes first. */ + SortedSet<String> historyLinesByDate = new TreeSet<>(); + for (String bandwidthHistoryString : bandwidthHistoryStrings) { + String[] parts = bandwidthHistoryString.split(" "); + if (parts.length != 6) { + this.logger.finer("Bandwidth history line does not have expected " + + "number of elements. Ignoring this line."); + continue; + } + long intervalLength = 0L; + try { + intervalLength = Long.parseLong(parts[3].substring(1)); + } catch (NumberFormatException e) { + this.logger.fine("Bandwidth history line does not have valid " + + "interval length '" + parts[3] + " " + parts[4] + "'. " + + "Ignoring this line."); + continue; + } + String[] values = parts[5].split(","); + if (intervalLength % 900L != 0L) { + this.logger.fine("Bandwidth history line does not contain " + + "multiples of 15-minute intervals. Ignoring this line."); + continue; + } else if (intervalLength != 900L) { + /* This is a really dirty hack to support bandwidth history + * intervals that are longer than 15 minutes by linearly + * distributing reported bytes to 15 minute intervals. The + * alternative would have been to modify the database schema. */ + try { + long factor = intervalLength / 900L; + String[] newValues = new String[values.length * (int) factor]; + for (int i = 0; i < newValues.length; i++) { + newValues[i] = String.valueOf( + Long.parseLong(values[i / (int) factor]) / factor); + } + values = newValues; + intervalLength = 900L; + } catch (NumberFormatException e) { + this.logger.fine("Number format exception while parsing " + + "bandwidth history line. Ignoring this line."); + continue; + } + } + String type = parts[0]; + String intervalEndTime = parts[1] + " " + parts[2]; + long intervalEnd; + long dateStart; + try { + intervalEnd = dateTimeFormat.parse(intervalEndTime).getTime(); + dateStart = dateTimeFormat.parse(parts[1] + " 00:00:00") + .getTime(); + } catch (ParseException e) { + this.logger.fine("Parse exception while parsing timestamp in " + + "bandwidth history line. Ignoring this line."); + continue; + } + if (Math.abs(published - intervalEnd) + > 7L * 24L * 60L * 60L * 1000L) { + this.logger.fine("Extra-info descriptor publication time " + + dateTimeFormat.format(published) + " and last interval " + + "time " + intervalEndTime + " in " + type + " line differ " + + "by more than 7 days! Not adding this line!"); + continue; + } + long currentIntervalEnd = intervalEnd; + StringBuilder sb = new StringBuilder(); + SortedSet<String> newHistoryLines = new TreeSet<>(); + try { + for (int i = values.length - 1; i >= -1; i--) { + if (i == -1 || currentIntervalEnd < dateStart) { + sb.insert(0, intervalEndTime + " " + type + " (" + + intervalLength + " s) "); + sb.setLength(sb.length() - 1); + String historyLine = sb.toString(); + newHistoryLines.add(historyLine); + sb = new StringBuilder(); + dateStart -= 24L * 60L * 60L * 1000L; + intervalEndTime = dateTimeFormat.format(currentIntervalEnd); + } + if (i == -1) { + break; + } + Long.parseLong(values[i]); + sb.insert(0, values[i] + ","); + currentIntervalEnd -= intervalLength * 1000L; + } + } catch (NumberFormatException e) { + this.logger.fine("Number format exception while parsing " + + "bandwidth history line. Ignoring this line."); + continue; + } + historyLinesByDate.addAll(newHistoryLines); + } + + /* Add split history lines to database. */ + String lastDate = null; + historyLinesByDate.add("EOL"); + long[] readArray = null; + long[] writtenArray = null; + long[] dirreadArray = null; + long[] dirwrittenArray = null; + int readOffset = 0; + int writtenOffset = 0; + int dirreadOffset = 0; + int dirwrittenOffset = 0; + for (String historyLine : historyLinesByDate) { + String[] parts = historyLine.split(" "); + String currentDate = parts[0]; + if (lastDate != null && (historyLine.equals("EOL") + || !currentDate.equals(lastDate))) { + BigIntArray readIntArray = new BigIntArray(readArray, + readOffset); + BigIntArray writtenIntArray = new BigIntArray(writtenArray, + writtenOffset); + BigIntArray dirreadIntArray = new BigIntArray(dirreadArray, + dirreadOffset); + BigIntArray dirwrittenIntArray = new BigIntArray(dirwrittenArray, + dirwrittenOffset); + if (this.importIntoDatabase) { + try { + long dateMillis = dateTimeFormat.parse(lastDate + + " 00:00:00").getTime(); + this.addDateToScheduledUpdates(dateMillis); + this.csH.setString(1, fingerprint); + this.csH.setDate(2, new java.sql.Date(dateMillis)); + this.csH.setArray(3, readIntArray); + this.csH.setArray(4, writtenIntArray); + this.csH.setArray(5, dirreadIntArray); + this.csH.setArray(6, dirwrittenIntArray); + this.csH.addBatch(); + rhsCount++; + if (rhsCount % autoCommitCount == 0) { + this.csH.executeBatch(); + } + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not insert bandwidth " + + "history line into database. We won't make any " + + "further SQL requests in this execution.", e); + this.importIntoDatabase = false; + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not insert bandwidth " + + "history line into database. We won't make any " + + "further SQL requests in this execution.", e); + this.importIntoDatabase = false; + } + } + if (this.writeRawImportFiles) { + try { + if (this.bwhistOut == null) { + new File(rawFilesDirectory).mkdirs(); + this.bwhistOut = new BufferedWriter(new FileWriter( + rawFilesDirectory + "/bwhist.sql")); + } + this.bwhistOut.write("SELECT insert_bwhist('" + fingerprint + + "','" + lastDate + "','" + readIntArray.toString() + + "','" + writtenIntArray.toString() + "','" + + dirreadIntArray.toString() + "','" + + dirwrittenIntArray.toString() + "');\n"); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write bandwidth " + + "history to raw database import file. We won't make " + + "any further attempts to write raw import files in " + + "this execution.", e); + this.writeRawImportFiles = false; + } + } + readArray = writtenArray = dirreadArray = dirwrittenArray = null; + } + if (historyLine.equals("EOL")) { + break; + } + long lastIntervalTime; + try { + lastIntervalTime = dateTimeFormat.parse(parts[0] + " " + + parts[1]).getTime() - dateTimeFormat.parse(parts[0] + + " 00:00:00").getTime(); + } catch (ParseException e) { + continue; + } + String[] stringValues = parts[5].split(","); + long[] longValues = new long[stringValues.length]; + for (int i = 0; i < longValues.length; i++) { + longValues[i] = Long.parseLong(stringValues[i]); + } + + int offset = (int) (lastIntervalTime / (15L * 60L * 1000L)) + - longValues.length + 1; + String type = parts[2]; + if (type.equals("read-history")) { + readArray = longValues; + readOffset = offset; + } else if (type.equals("write-history")) { + writtenArray = longValues; + writtenOffset = offset; + } else if (type.equals("dirreq-read-history")) { + dirreadArray = longValues; + dirreadOffset = offset; + } else if (type.equals("dirreq-write-history")) { + dirwrittenArray = longValues; + dirwrittenOffset = offset; + } + lastDate = currentDate; + } + } + + /** + * Insert network status consensus into database. + */ + public void addConsensus(long validAfter) { + if (this.importIntoDatabase) { + try { + this.addDateToScheduledUpdates(validAfter); + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + Timestamp validAfterTimestamp = new Timestamp(validAfter); + this.psCs.setTimestamp(1, validAfterTimestamp, cal); + ResultSet rs = psCs.executeQuery(); + rs.next(); + if (rs.getInt(1) == 0) { + this.psC.clearParameters(); + this.psC.setTimestamp(1, validAfterTimestamp, cal); + this.psC.executeUpdate(); + rcsCount++; + if (rcsCount % autoCommitCount == 0) { + this.conn.commit(); + } + } + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not add network status " + + "consensus. We won't make any further SQL requests in " + + "this execution.", e); + this.importIntoDatabase = false; + } + } + if (this.writeRawImportFiles) { + try { + if (this.consensusOut == null) { + new File(rawFilesDirectory).mkdirs(); + this.consensusOut = new BufferedWriter(new FileWriter( + rawFilesDirectory + "/consensus.sql")); + this.consensusOut.write(" COPY consensus (validafter) " + + "FROM stdin;\n"); + } + String validAfterString = this.dateTimeFormat.format(validAfter); + this.consensusOut.write(validAfterString + "\n"); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write network status " + + "consensus to raw database import file. We won't make " + + "any further attempts to write raw import files in this " + + "execution.", e); + this.writeRawImportFiles = false; + } + } + } + + /** Imports relay descriptors into the database. */ + public void importRelayDescriptors() { + logger.fine("Importing files in directories " + archivesDirectories + + "/..."); + if (!this.archivesDirectories.isEmpty()) { + DescriptorReader reader = + DescriptorSourceFactory.createDescriptorReader(); + reader.setMaxDescriptorFilesInQueue(10); + for (String archivesPath : this.archivesDirectories) { + File archivesDirectory = new File(archivesPath); + if (archivesDirectory.exists()) { + reader.addDirectory(archivesDirectory); + } + } + if (keepImportHistory) { + reader.setExcludeFiles(new File(statsDirectory, + "database-importer-relay-descriptor-history")); + } + Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getDescriptors() != null) { + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof RelayNetworkStatusConsensus) { + this.addRelayNetworkStatusConsensus( + (RelayNetworkStatusConsensus) descriptor); + } else if (descriptor instanceof ServerDescriptor) { + this.addServerDescriptor((ServerDescriptor) descriptor); + } else if (descriptor instanceof ExtraInfoDescriptor) { + this.addExtraInfoDescriptor( + (ExtraInfoDescriptor) descriptor); + } + } + } + } + } + + logger.info("Finished importing relay descriptors."); + } + + private void addRelayNetworkStatusConsensus( + RelayNetworkStatusConsensus consensus) { + for (NetworkStatusEntry statusEntry + : consensus.getStatusEntries().values()) { + this.addStatusEntryContents(consensus.getValidAfterMillis(), + statusEntry.getNickname(), + statusEntry.getFingerprint().toLowerCase(), + statusEntry.getDescriptor().toLowerCase(), + statusEntry.getPublishedMillis(), statusEntry.getAddress(), + statusEntry.getOrPort(), statusEntry.getDirPort(), + statusEntry.getFlags(), statusEntry.getVersion(), + statusEntry.getBandwidth(), statusEntry.getPortList(), + statusEntry.getStatusEntryBytes()); + } + this.addConsensus(consensus.getValidAfterMillis()); + } + + private void addServerDescriptor(ServerDescriptor descriptor) { + this.addServerDescriptorContents( + descriptor.getServerDescriptorDigest(), descriptor.getNickname(), + descriptor.getAddress(), descriptor.getOrPort(), + descriptor.getDirPort(), descriptor.getFingerprint(), + descriptor.getBandwidthRate(), descriptor.getBandwidthBurst(), + descriptor.getBandwidthObserved(), descriptor.getPlatform(), + descriptor.getPublishedMillis(), descriptor.getUptime(), + descriptor.getExtraInfoDigest()); + } + + private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) { + List<String> bandwidthHistoryLines = new ArrayList<>(); + if (descriptor.getWriteHistory() != null) { + bandwidthHistoryLines.add(descriptor.getWriteHistory().getLine()); + } + if (descriptor.getReadHistory() != null) { + bandwidthHistoryLines.add(descriptor.getReadHistory().getLine()); + } + if (descriptor.getDirreqWriteHistory() != null) { + bandwidthHistoryLines.add( + descriptor.getDirreqWriteHistory().getLine()); + } + if (descriptor.getDirreqReadHistory() != null) { + bandwidthHistoryLines.add( + descriptor.getDirreqReadHistory().getLine()); + } + this.addExtraInfoDescriptorContents(descriptor.getExtraInfoDigest(), + descriptor.getNickname(), + descriptor.getFingerprint().toLowerCase(), + descriptor.getPublishedMillis(), bandwidthHistoryLines); + } + + /** + * Close the relay descriptor database connection. + */ + public void closeConnection() { + + /* Log stats about imported descriptors. */ + this.logger.info(String.format("Finished importing relay " + + "descriptors: %d consensuses, %d network status entries, %d " + + "votes, %d server descriptors, %d extra-info descriptors, %d " + + "bandwidth history elements, and %d dirreq stats elements", + rcsCount, rrsCount, rvsCount, rdsCount, resCount, rhsCount, + rqsCount)); + + /* Insert scheduled updates a second time, just in case the refresh + * run has started since inserting them the first time in which case + * it will miss the data inserted afterwards. We cannot, however, + * insert them only now, because if a Java execution fails at a random + * point, we might have added data, but not the corresponding dates to + * update statistics. */ + if (this.importIntoDatabase) { + try { + for (long dateMillis : this.scheduledUpdates) { + this.psU.setDate(1, new java.sql.Date(dateMillis)); + this.psU.execute(); + } + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not add scheduled dates " + + "for the next refresh run.", e); + } + } + + /* Commit any stragglers before closing. */ + if (this.conn != null) { + try { + this.csH.executeBatch(); + + this.conn.commit(); + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not commit final records " + + "to database", e); + } + try { + this.conn.close(); + } catch (SQLException e) { + this.logger.log(Level.WARNING, "Could not close database " + + "connection.", e); + } + } + + /* Close raw import files. */ + try { + if (this.statusentryOut != null) { + this.statusentryOut.write("\.\n"); + this.statusentryOut.close(); + } + if (this.descriptorOut != null) { + this.descriptorOut.write("\.\n"); + this.descriptorOut.close(); + } + if (this.bwhistOut != null) { + this.bwhistOut.write("\.\n"); + this.bwhistOut.close(); + } + if (this.consensusOut != null) { + this.consensusOut.write("\.\n"); + this.consensusOut.close(); + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not close one or more raw " + + "database import files.", e); + } + } +} + diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java new file mode 100644 index 0000000..aa9469e --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java @@ -0,0 +1,412 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron.network; + +import org.torproject.descriptor.BridgeNetworkStatus; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.NetworkStatusEntry; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Generates statistics on the average number of relays and bridges per + * day. Accepts parse results from <code>RelayDescriptorParser</code> and + * <code>BridgeDescriptorParser</code> and stores them in intermediate + * result files <code>stats/consensus-stats-raw</code> and + * <code>stats/bridge-consensus-stats-raw</code>. Writes final results to + * <code>stats/consensus-stats</code> for all days for which at least half + * of the expected consensuses or statuses are known. + */ +public class ConsensusStatsFileHandler { + + /** + * Intermediate results file holding the number of running bridges per + * bridge status. + */ + private File bridgeConsensusStatsRawFile; + + /** + * Number of running bridges in a given bridge status. Map keys are the bridge + * status time formatted as "yyyy-MM-dd HH:mm:ss", a comma, and the bridge + * authority nickname, map values are lines as read from + * <code>stats/bridge-consensus-stats-raw</code>. + */ + private SortedMap<String, String> bridgesRaw; + + /** + * Average number of running bridges per day. Map keys are dates + * formatted as "yyyy-MM-dd", map values are the remaining columns as written + * to <code>stats/consensus-stats</code>. + */ + private SortedMap<String, String> bridgesPerDay; + + /** + * Logger for this class. + */ + private Logger logger; + + private int bridgeResultsAdded = 0; + + /* Database connection string. */ + private String connectionUrl = null; + + private SimpleDateFormat dateTimeFormat; + + private File bridgesDir; + + private File statsDirectory; + + private boolean keepImportHistory; + + /** + * Initializes this class, including reading in intermediate results + * files <code>stats/consensus-stats-raw</code> and + * <code>stats/bridge-consensus-stats-raw</code> and final results file + * <code>stats/consensus-stats</code>. + */ + public ConsensusStatsFileHandler(String connectionUrl, + File bridgesDir, File statsDirectory, + boolean keepImportHistory) { + + if (bridgesDir == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + this.bridgesDir = bridgesDir; + this.statsDirectory = statsDirectory; + this.keepImportHistory = keepImportHistory; + + /* Initialize local data structures to hold intermediate and final + * results. */ + this.bridgesPerDay = new TreeMap<>(); + this.bridgesRaw = new TreeMap<>(); + + /* Initialize file names for intermediate and final results files. */ + this.bridgeConsensusStatsRawFile = new File( + "stats/bridge-consensus-stats-raw"); + + /* Initialize database connection string. */ + this.connectionUrl = connectionUrl; + + this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + + /* Initialize logger. */ + this.logger = Logger.getLogger( + ConsensusStatsFileHandler.class.getName()); + + /* Read in number of running bridges per bridge status. */ + if (this.bridgeConsensusStatsRawFile.exists()) { + try { + this.logger.fine("Reading file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "..."); + BufferedReader br = new BufferedReader(new FileReader( + this.bridgeConsensusStatsRawFile)); + String line = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("date")) { + /* Skip headers. */ + continue; + } + String[] parts = line.split(","); + if (parts.length < 2 || parts.length > 4) { + this.logger.warning("Corrupt line '" + line + "' in file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + + "! Aborting to read this file!"); + break; + } + /* Assume that all lines without authority nickname are based on + * Tonga's network status, not Bifroest's. */ + String key = parts[0] + "," + (parts.length < 4 ? "Tonga" : parts[1]); + String value = null; + if (parts.length == 2) { + value = key + "," + parts[1] + ",0"; + } else if (parts.length == 3) { + value = key + "," + parts[1] + "," + parts[2]; + } else if (parts.length == 4) { + value = key + "," + parts[2] + "," + parts[3]; + } /* No more cases as we already checked the range above. */ + this.bridgesRaw.put(key, value); + } + br.close(); + this.logger.fine("Finished reading file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!", + e); + } + } + } + + /** + * Adds the intermediate results of the number of running bridges in a + * given bridge status to the existing observations. + */ + public void addBridgeConsensusResults(long publishedMillis, + String authorityNickname, int running, int runningEc2Bridges) { + String publishedAuthority = dateTimeFormat.format(publishedMillis) + "," + + authorityNickname; + String line = publishedAuthority + "," + running + "," + runningEc2Bridges; + if (!this.bridgesRaw.containsKey(publishedAuthority)) { + this.logger.finer("Adding new bridge numbers: " + line); + this.bridgesRaw.put(publishedAuthority, line); + this.bridgeResultsAdded++; + } else if (!line.equals(this.bridgesRaw.get(publishedAuthority))) { + this.logger.warning("The numbers of running bridges we were just " + + "given (" + line + ") are different from what we learned " + + "before (" + this.bridgesRaw.get(publishedAuthority) + ")! " + + "Overwriting!"); + this.bridgesRaw.put(publishedAuthority, line); + } + } + + /** Imports sanitized bridge descriptors. */ + public void importSanitizedBridges() { + if (bridgesDir.exists()) { + logger.fine("Importing files in directory " + bridgesDir + "/..."); + DescriptorReader reader = + DescriptorSourceFactory.createDescriptorReader(); + reader.addDirectory(bridgesDir); + if (keepImportHistory) { + reader.setExcludeFiles(new File(statsDirectory, + "consensus-stats-bridge-descriptor-history")); + } + Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getDescriptors() != null) { + String authority = null; + if (descriptorFile.getFileName().contains( + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D")) { + authority = "Tonga"; + } else if (descriptorFile.getFileName().contains( + "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1")) { + authority = "Bifroest"; + } + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof BridgeNetworkStatus) { + if (authority == null) { + this.logger.warning("Did not recognize the bridge authority " + + "that generated " + descriptorFile.getFileName() + + ". Skipping."); + continue; + } + this.addBridgeNetworkStatus( + (BridgeNetworkStatus) descriptor, authority); + } + } + } + } + logger.info("Finished importing bridge descriptors."); + } + } + + private void addBridgeNetworkStatus(BridgeNetworkStatus status, + String authority) { + int runningBridges = 0; + int runningEc2Bridges = 0; + for (NetworkStatusEntry statusEntry + : status.getStatusEntries().values()) { + if (statusEntry.getFlags().contains("Running")) { + runningBridges++; + if (statusEntry.getNickname().startsWith("ec2bridge")) { + runningEc2Bridges++; + } + } + } + this.addBridgeConsensusResults(status.getPublishedMillis(), authority, + runningBridges, runningEc2Bridges); + } + + /** + * Aggregates the raw observations on relay and bridge numbers and + * writes both raw and aggregate observations to disk. + */ + public void writeFiles() { + + /* Go through raw observations and put everything into nested maps by day + * and bridge authority. */ + Map<String, Map<String, int[]>> bridgesPerDayAndAuthority = new HashMap<>(); + for (String bridgesRawLine : this.bridgesRaw.values()) { + String date = bridgesRawLine.substring(0, 10); + if (!bridgesPerDayAndAuthority.containsKey(date)) { + bridgesPerDayAndAuthority.put(date, new TreeMap<String, int[]>()); + } + String[] parts = bridgesRawLine.split(","); + String authority = parts[1]; + if (!bridgesPerDayAndAuthority.get(date).containsKey(authority)) { + bridgesPerDayAndAuthority.get(date).put(authority, new int[3]); + } + int[] bridges = bridgesPerDayAndAuthority.get(date).get(authority); + bridges[0] += Integer.parseInt(parts[2]); + bridges[1] += Integer.parseInt(parts[3]); + bridges[2]++; + } + + /* Sum up average numbers of running bridges per day reported by all bridge + * authorities and add these averages to final results. */ + for (Map.Entry<String, Map<String, int[]>> perDay + : bridgesPerDayAndAuthority.entrySet()) { + String date = perDay.getKey(); + int brunning = 0; + int brunningEc2 = 0; + for (int[] perAuthority : perDay.getValue().values()) { + int statuses = perAuthority[2]; + if (statuses < 12) { + /* Only write results if we have seen at least a dozen statuses. */ + continue; + } + brunning += perAuthority[0] / statuses; + brunningEc2 += perAuthority[1] / statuses; + } + String line = "," + brunning + "," + brunningEc2; + /* Are our results new? */ + if (!this.bridgesPerDay.containsKey(date)) { + this.logger.finer("Adding new average bridge numbers: " + date + line); + this.bridgesPerDay.put(date, line); + } else if (!line.equals(this.bridgesPerDay.get(date))) { + this.logger.finer("Replacing existing average bridge numbers (" + + this.bridgesPerDay.get(date) + " with new numbers: " + line); + this.bridgesPerDay.put(date, line); + } + } + + /* Write raw numbers of running bridges to disk. */ + try { + this.logger.fine("Writing file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "..."); + this.bridgeConsensusStatsRawFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter( + new FileWriter(this.bridgeConsensusStatsRawFile)); + bw.append("datetime,authority,brunning,brunningec2"); + bw.newLine(); + for (String line : this.bridgesRaw.values()) { + bw.append(line); + bw.newLine(); + } + bw.close(); + this.logger.fine("Finished writing file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to write file " + + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!", + e); + } + + /* Add average number of bridges per day to the database. */ + if (connectionUrl != null) { + try { + Map<String, String> insertRows = new HashMap<>(); + Map<String, String> updateRows = new HashMap<>(); + insertRows.putAll(this.bridgesPerDay); + Connection conn = DriverManager.getConnection(connectionUrl); + conn.setAutoCommit(false); + Statement statement = conn.createStatement(); + ResultSet rs = statement.executeQuery( + "SELECT date, avg_running, avg_running_ec2 " + + "FROM bridge_network_size"); + while (rs.next()) { + String date = rs.getDate(1).toString(); + if (insertRows.containsKey(date)) { + String insertRow = insertRows.remove(date); + String[] parts = insertRow.substring(1).split(","); + long newAvgRunning = Long.parseLong(parts[0]); + long newAvgRunningEc2 = Long.parseLong(parts[1]); + long oldAvgRunning = rs.getLong(2); + long oldAvgRunningEc2 = rs.getLong(3); + if (newAvgRunning != oldAvgRunning + || newAvgRunningEc2 != oldAvgRunningEc2) { + updateRows.put(date, insertRow); + } + } + } + rs.close(); + PreparedStatement psU = conn.prepareStatement( + "UPDATE bridge_network_size SET avg_running = ?, " + + "avg_running_ec2 = ? WHERE date = ?"); + for (Map.Entry<String, String> e : updateRows.entrySet()) { + java.sql.Date date = java.sql.Date.valueOf(e.getKey()); + String[] parts = e.getValue().substring(1).split(","); + long avgRunning = Long.parseLong(parts[0]); + long avgRunningEc2 = Long.parseLong(parts[1]); + psU.clearParameters(); + psU.setLong(1, avgRunning); + psU.setLong(2, avgRunningEc2); + psU.setDate(3, date); + psU.executeUpdate(); + } + PreparedStatement psI = conn.prepareStatement( + "INSERT INTO bridge_network_size (avg_running, " + + "avg_running_ec2, date) VALUES (?, ?, ?)"); + for (Map.Entry<String, String> e : insertRows.entrySet()) { + java.sql.Date date = java.sql.Date.valueOf(e.getKey()); + String[] parts = e.getValue().substring(1).split(","); + long avgRunning = Long.parseLong(parts[0]); + long avgRunningEc2 = Long.parseLong(parts[1]); + psI.clearParameters(); + psI.setLong(1, avgRunning); + psI.setLong(2, avgRunningEc2); + psI.setDate(3, date); + psI.executeUpdate(); + } + conn.commit(); + conn.close(); + } catch (SQLException e) { + logger.log(Level.WARNING, "Failed to add average bridge numbers " + + "to database.", e); + } + } + + /* Write stats. */ + StringBuilder dumpStats = new StringBuilder("Finished writing " + + "statistics on bridge network statuses to disk.\nAdded " + + this.bridgeResultsAdded + " bridge network status(es) in this " + + "execution."); + long now = System.currentTimeMillis(); + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (this.bridgesRaw.isEmpty()) { + dumpStats.append("\nNo bridge status known yet."); + } else { + dumpStats.append("\nLast known bridge status was published " + + this.bridgesRaw.lastKey() + "."); + try { + if (now - 6L * 60L * 60L * 1000L > dateTimeFormat.parse( + this.bridgesRaw.lastKey()).getTime()) { + logger.warning("Last known bridge status is more than 6 hours " + + "old: " + this.bridgesRaw.lastKey()); + } + } catch (ParseException e) { + logger.warning("Can't parse the timestamp? Reason: " + e); + } + } + logger.info(dumpStats.toString()); + } +} + diff --git a/modules/legacy/src/main/java/org/torproject/ernie/cron/performance/TorperfProcessor.java b/modules/legacy/src/main/java/org/torproject/ernie/cron/performance/TorperfProcessor.java new file mode 100644 index 0000000..2883299 --- /dev/null +++ b/modules/legacy/src/main/java/org/torproject/ernie/cron/performance/TorperfProcessor.java @@ -0,0 +1,292 @@ +/* Copyright 2011--2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.ernie.cron.performance; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.TorperfResult; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class TorperfProcessor { + + /** Processes Torperf data from the given directory and writes + * aggregates statistics to the given stats directory. */ + public TorperfProcessor(File torperfDirectory, File statsDirectory) { + + if (torperfDirectory == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + Logger logger = Logger.getLogger(TorperfProcessor.class.getName()); + File rawFile = new File(statsDirectory, "torperf-raw"); + File statsFile = new File(statsDirectory, "torperf.csv"); + SortedMap<String, String> rawObs = new TreeMap<>(); + SortedMap<String, String> stats = new TreeMap<>(); + int addedRawObs = 0; + SimpleDateFormat formatter = + new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + try { + if (rawFile.exists()) { + logger.fine("Reading file " + rawFile.getAbsolutePath() + "..."); + BufferedReader br = new BufferedReader(new FileReader(rawFile)); + String line = br.readLine(); // ignore header + while ((line = br.readLine()) != null) { + if (line.split(",").length != 4) { + logger.warning("Corrupt line in " + rawFile.getAbsolutePath() + + "!"); + break; + } + String key = line.substring(0, line.lastIndexOf(",")); + rawObs.put(key, line); + } + br.close(); + logger.fine("Finished reading file " + rawFile.getAbsolutePath() + + "."); + } + if (statsFile.exists()) { + logger.fine("Reading file " + statsFile.getAbsolutePath() + + "..."); + BufferedReader br = new BufferedReader(new FileReader(statsFile)); + String line = br.readLine(); // ignore header + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + String key = String.format("%s,%s,%s", parts[0], parts[1], + parts[2]); + stats.put(key, line); + } + br.close(); + logger.fine("Finished reading file " + statsFile.getAbsolutePath() + + "."); + } + if (torperfDirectory.exists()) { + logger.fine("Importing files in " + torperfDirectory + "/..."); + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory(torperfDirectory); + descriptorReader.setExcludeFiles(new File(statsDirectory, + "torperf-history")); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getException() != null) { + logger.log(Level.FINE, "Error parsing file.", + descriptorFile.getException()); + continue; + } + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof TorperfResult)) { + continue; + } + TorperfResult result = (TorperfResult) descriptor; + String source = result.getSource(); + long fileSize = result.getFileSize(); + if (fileSize == 51200) { + source += "-50kb"; + } else if (fileSize == 1048576) { + source += "-1mb"; + } else if (fileSize == 5242880) { + source += "-5mb"; + } else { + logger.fine("Unexpected file size '" + fileSize + + "'. Skipping."); + continue; + } + String dateTime = formatter.format(result.getStartMillis()); + long completeMillis = result.getDataCompleteMillis() + - result.getStartMillis(); + String key = source + "," + dateTime; + String value = key; + if ((result.didTimeout() == null + && result.getDataCompleteMillis() < 1) + || (result.didTimeout() != null && result.didTimeout())) { + value += ",-2"; // -2 for timeout + } else if (result.getReadBytes() < fileSize) { + value += ",-1"; // -1 for failure + } else { + value += "," + completeMillis; + } + if (!rawObs.containsKey(key)) { + rawObs.put(key, value); + addedRawObs++; + } + } + } + logger.fine("Finished importing files in " + torperfDirectory + + "/."); + } + if (rawObs.size() > 0) { + logger.fine("Writing file " + rawFile.getAbsolutePath() + "..."); + rawFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(rawFile)); + bw.append("source,date,start,completemillis\n"); + String tempSourceDate = null; + Iterator<Map.Entry<String, String>> it = + rawObs.entrySet().iterator(); + List<Long> dlTimes = new ArrayList<>(); + boolean haveWrittenFinalLine = false; + SortedMap<String, List<Long>> dlTimesAllSources = new TreeMap<>(); + SortedMap<String, long[]> statusesAllSources = new TreeMap<>(); + long failures = 0; + long timeouts = 0; + long requests = 0; + while (it.hasNext() || !haveWrittenFinalLine) { + Map.Entry<String, String> next = + it.hasNext() ? it.next() : null; + if (tempSourceDate != null + && (next == null || !(next.getValue().split(",")[0] + "," + + next.getValue().split(",")[1]).equals(tempSourceDate))) { + if (dlTimes.size() > 4) { + Collections.sort(dlTimes); + long q1 = dlTimes.get(dlTimes.size() / 4 - 1); + long md = dlTimes.get(dlTimes.size() / 2 - 1); + long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1); + String[] tempParts = tempSourceDate.split("[-,]", 3); + String tempDate = tempParts[2]; + int tempSize = Integer.parseInt( + tempParts[1].substring(0, tempParts[1].length() - 2)) + * 1024 * (tempParts[1].endsWith("mb") ? 1024 : 1); + String tempSource = tempParts[0]; + String tempDateSizeSource = String.format("%s,%d,%s", + tempDate, tempSize, tempSource); + stats.put(tempDateSizeSource, + String.format("%s,%s,%s,%s,%s,%s,%s", + tempDateSizeSource, q1, md, q3, timeouts, failures, + requests)); + String allDateSizeSource = String.format("%s,%d,", + tempDate, tempSize); + if (dlTimesAllSources.containsKey(allDateSizeSource)) { + dlTimesAllSources.get(allDateSizeSource).addAll(dlTimes); + } else { + dlTimesAllSources.put(allDateSizeSource, dlTimes); + } + if (statusesAllSources.containsKey(allDateSizeSource)) { + long[] status = statusesAllSources.get(allDateSizeSource); + status[0] += timeouts; + status[1] += failures; + status[2] += requests; + } else { + long[] status = new long[3]; + status[0] = timeouts; + status[1] = failures; + status[2] = requests; + statusesAllSources.put(allDateSizeSource, status); + } + } + dlTimes = new ArrayList<>(); + failures = timeouts = requests = 0; + if (next == null) { + haveWrittenFinalLine = true; + } + } + if (next != null) { + bw.append(next.getValue() + "\n"); + String[] parts = next.getValue().split(","); + tempSourceDate = parts[0] + "," + parts[1]; + long completeMillis = Long.parseLong(parts[3]); + if (completeMillis == -2L) { + timeouts++; + } else if (completeMillis == -1L) { + failures++; + } else { + dlTimes.add(Long.parseLong(parts[3])); + } + requests++; + } + } + bw.close(); + for (Map.Entry<String, List<Long>> e + : dlTimesAllSources.entrySet()) { + String allDateSizeSource = e.getKey(); + dlTimes = e.getValue(); + Collections.sort(dlTimes); + long[] status = statusesAllSources.get(allDateSizeSource); + timeouts = status[0]; + failures = status[1]; + requests = status[2]; + long q1 = dlTimes.get(dlTimes.size() / 4 - 1); + long md = dlTimes.get(dlTimes.size() / 2 - 1); + long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1); + stats.put(allDateSizeSource, + String.format("%s,%s,%s,%s,%s,%s,%s", + allDateSizeSource, q1, md, q3, timeouts, failures, + requests)); + } + logger.fine("Finished writing file " + rawFile.getAbsolutePath() + + "."); + } + if (stats.size() > 0) { + logger.fine("Writing file " + statsFile.getAbsolutePath() + + "..."); + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String yesterday = dateFormat.format(System.currentTimeMillis() + - 86400000L); + statsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(statsFile)); + bw.append("date,size,source,q1,md,q3,timeouts,failures," + + "requests\n"); + for (String s : stats.values()) { + if (s.compareTo(yesterday) < 0) { + bw.append(s + "\n"); + } + } + bw.close(); + logger.fine("Finished writing file " + statsFile.getAbsolutePath() + + "."); + } + } catch (IOException e) { + logger.log(Level.WARNING, "Failed writing " + + rawFile.getAbsolutePath() + " or " + + statsFile.getAbsolutePath() + "!", e); + } + + /* Write stats. */ + StringBuilder dumpStats = new StringBuilder("Finished writing " + + "statistics on torperf results.\nAdded " + addedRawObs + + " new observations in this execution.\n" + + "Last known obserations by source and file size are:"); + String lastSource = null; + String lastLine = null; + for (String s : rawObs.keySet()) { + String[] parts = s.split(","); + if (lastSource == null) { + lastSource = parts[0]; + } else if (!parts[0].equals(lastSource)) { + String lastKnownObservation = lastLine.split(",")[1] + " " + + lastLine.split(",")[2]; + dumpStats.append("\n" + lastSource + " " + lastKnownObservation); + lastSource = parts[0]; + } + lastLine = s; + } + if (lastSource != null) { + String lastKnownObservation = lastLine.split(",")[1] + " " + + lastLine.split(",")[2]; + dumpStats.append("\n" + lastSource + " " + lastKnownObservation); + } + logger.info(dumpStats.toString()); + } +} + diff --git a/modules/legacy/src/org/torproject/ernie/cron/Configuration.java b/modules/legacy/src/org/torproject/ernie/cron/Configuration.java deleted file mode 100644 index e0d753f..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/Configuration.java +++ /dev/null @@ -1,206 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.net.MalformedURLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Initialize configuration with hard-coded defaults, overwrite with - * configuration in config file, if exists, and answer Main.java about our - * configuration. - */ -public class Configuration { - - private boolean importDirectoryArchives = false; - - private List<String> directoryArchivesDirectories = new ArrayList<>(); - - private boolean keepDirectoryArchiveImportHistory = false; - - private boolean importSanitizedBridges = false; - - private String sanitizedBridgesDirectory = "in/bridge-descriptors/"; - - private boolean keepSanitizedBridgesImportHistory = false; - - private boolean writeRelayDescriptorDatabase = false; - - private String relayDescriptorDatabaseJdbc = - "jdbc:postgresql://localhost/tordir?user=metrics&password=password"; - - private boolean writeRelayDescriptorsRawFiles = false; - - private String relayDescriptorRawFilesDirectory = "pg-import/"; - - private boolean writeBridgeStats = false; - - private boolean importWriteTorperfStats = false; - - private String torperfDirectory = "in/torperf/"; - - private String exoneraTorDatabaseJdbc = "jdbc:postgresql:" - + "//localhost/exonerator?user=metrics&password=password"; - - private String exoneraTorImportDirectory = "exonerator-import/"; - - /** Initializes this configuration class. */ - public Configuration() { - - /* Initialize logger. */ - Logger logger = Logger.getLogger(Configuration.class.getName()); - - /* Read config file, if present. */ - File configFile = new File("config"); - if (!configFile.exists()) { - logger.warning("Could not find config file."); - return; - } - String line = null; - try { - BufferedReader br = new BufferedReader(new FileReader(configFile)); - while ((line = br.readLine()) != null) { - if (line.startsWith("#") || line.length() < 1) { - continue; - } else if (line.startsWith("ImportDirectoryArchives")) { - this.importDirectoryArchives = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DirectoryArchivesDirectory")) { - this.directoryArchivesDirectories.add(line.split(" ")[1]); - } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) { - this.keepDirectoryArchiveImportHistory = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("ImportSanitizedBridges")) { - this.importSanitizedBridges = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("SanitizedBridgesDirectory")) { - this.sanitizedBridgesDirectory = line.split(" ")[1]; - } else if (line.startsWith("KeepSanitizedBridgesImportHistory")) { - this.keepSanitizedBridgesImportHistory = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("WriteRelayDescriptorDatabase")) { - this.writeRelayDescriptorDatabase = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("RelayDescriptorDatabaseJDBC")) { - this.relayDescriptorDatabaseJdbc = line.split(" ")[1]; - } else if (line.startsWith("WriteRelayDescriptorsRawFiles")) { - this.writeRelayDescriptorsRawFiles = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("RelayDescriptorRawFilesDirectory")) { - this.relayDescriptorRawFilesDirectory = line.split(" ")[1]; - } else if (line.startsWith("WriteBridgeStats")) { - this.writeBridgeStats = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("ImportWriteTorperfStats")) { - this.importWriteTorperfStats = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("TorperfDirectory")) { - this.torperfDirectory = line.split(" ")[1]; - } else if (line.startsWith("ExoneraTorDatabaseJdbc")) { - this.exoneraTorDatabaseJdbc = line.split(" ")[1]; - } else if (line.startsWith("ExoneraTorImportDirectory")) { - this.exoneraTorImportDirectory = line.split(" ")[1]; - } else { - logger.severe("Configuration file contains unrecognized " - + "configuration key in line '" + line + "'! Exiting!"); - System.exit(1); - } - } - br.close(); - } catch (ArrayIndexOutOfBoundsException e) { - logger.severe("Configuration file contains configuration key " - + "without value in line '" + line + "'. Exiting!"); - System.exit(1); - } catch (MalformedURLException e) { - logger.severe("Configuration file contains illegal URL or IP:port " - + "pair in line '" + line + "'. Exiting!"); - System.exit(1); - } catch (NumberFormatException e) { - logger.severe("Configuration file contains illegal value in line '" - + line + "' with legal values being 0 or 1. Exiting!"); - System.exit(1); - } catch (IOException e) { - logger.log(Level.SEVERE, "Unknown problem while reading config " - + "file! Exiting!", e); - System.exit(1); - } - } - - public boolean getImportDirectoryArchives() { - return this.importDirectoryArchives; - } - - /** Returns directories containing archived descriptors. */ - public List<String> getDirectoryArchivesDirectories() { - if (this.directoryArchivesDirectories.isEmpty()) { - String prefix = "../../shared/in/recent/relay-descriptors/"; - return Arrays.asList( - (prefix + "consensuses/," + prefix + "server-descriptors/," - + prefix + "extra-infos/").split(",")); - } else { - return this.directoryArchivesDirectories; - } - } - - public boolean getKeepDirectoryArchiveImportHistory() { - return this.keepDirectoryArchiveImportHistory; - } - - public boolean getWriteRelayDescriptorDatabase() { - return this.writeRelayDescriptorDatabase; - } - - public boolean getImportSanitizedBridges() { - return this.importSanitizedBridges; - } - - public String getSanitizedBridgesDirectory() { - return this.sanitizedBridgesDirectory; - } - - public boolean getKeepSanitizedBridgesImportHistory() { - return this.keepSanitizedBridgesImportHistory; - } - - public String getRelayDescriptorDatabaseJdbc() { - return this.relayDescriptorDatabaseJdbc; - } - - public boolean getWriteRelayDescriptorsRawFiles() { - return this.writeRelayDescriptorsRawFiles; - } - - public String getRelayDescriptorRawFilesDirectory() { - return this.relayDescriptorRawFilesDirectory; - } - - public boolean getWriteBridgeStats() { - return this.writeBridgeStats; - } - - public boolean getImportWriteTorperfStats() { - return this.importWriteTorperfStats; - } - - public String getTorperfDirectory() { - return this.torperfDirectory; - } - - public String getExoneraTorDatabaseJdbc() { - return this.exoneraTorDatabaseJdbc; - } - - public String getExoneraTorImportDirectory() { - return this.exoneraTorImportDirectory; - } -} - diff --git a/modules/legacy/src/org/torproject/ernie/cron/LockFile.java b/modules/legacy/src/org/torproject/ernie/cron/LockFile.java deleted file mode 100644 index 48eb83d..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/LockFile.java +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.logging.Logger; - -public class LockFile { - - private File lockFile; - private Logger logger; - - public LockFile() { - this.lockFile = new File("lock"); - this.logger = Logger.getLogger(LockFile.class.getName()); - } - - /** Acquires the lock by checking whether a lock file already exists, - * and if not, by creating one with the current system time as - * content. */ - public boolean acquireLock() { - this.logger.fine("Trying to acquire lock..."); - try { - if (this.lockFile.exists()) { - BufferedReader br = new BufferedReader(new FileReader("lock")); - long runStarted = Long.parseLong(br.readLine()); - br.close(); - if (System.currentTimeMillis() - runStarted - < 23L * 60L * 60L * 1000L) { - return false; - } - } - BufferedWriter bw = new BufferedWriter(new FileWriter("lock")); - bw.append("" + System.currentTimeMillis() + "\n"); - bw.close(); - this.logger.fine("Acquired lock."); - return true; - } catch (IOException e) { - this.logger.warning("Caught exception while trying to acquire " - + "lock!"); - return false; - } - } - - /** Releases the lock by deleting the lock file, if present. */ - public void releaseLock() { - this.logger.fine("Releasing lock..."); - this.lockFile.delete(); - this.logger.fine("Released lock."); - } -} - diff --git a/modules/legacy/src/org/torproject/ernie/cron/LoggingConfiguration.java b/modules/legacy/src/org/torproject/ernie/cron/LoggingConfiguration.java deleted file mode 100644 index f6658c5..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/LoggingConfiguration.java +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron; - -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.TimeZone; -import java.util.logging.ConsoleHandler; -import java.util.logging.FileHandler; -import java.util.logging.Formatter; -import java.util.logging.Handler; -import java.util.logging.Level; -import java.util.logging.LogRecord; -import java.util.logging.Logger; - -/** - * Initialize logging configuration. - * - * <p>Log levels used by ERNIE:</p> - * - * <p> - * <ul> - * <li>SEVERE: An event made it impossible to continue program execution. - * WARNING: A potential problem occurred that requires the operator to - * look after the otherwise unattended setup</li> - * <li>INFO: Messages on INFO level are meant to help the operator in - * making sure that operation works as expected.</li> - * <li>FINE: Debug messages that are used to identify problems and which - * are turned on by default.</li> - * <li>FINER: More detailed debug messages to investigate problems in more - * detail. Not turned on by default. Increase log file limit when - * using FINER.</li> - * <li>FINEST: Most detailed debug messages. Not used.</li> - * </ul> - * </p> - */ -public class LoggingConfiguration { - - /** Initializes the logging configuration. */ - public LoggingConfiguration() { - - /* Remove default console handler. */ - for (Handler h : Logger.getLogger("").getHandlers()) { - Logger.getLogger("").removeHandler(h); - } - - /* Disable logging of internal Sun classes. */ - Logger.getLogger("sun").setLevel(Level.OFF); - - /* Set minimum log level we care about from INFO to FINER. */ - Logger.getLogger("").setLevel(Level.FINER); - - /* Create log handler that writes messages on WARNING or higher to the - * console. */ - final SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - Formatter cf = new Formatter() { - public String format(LogRecord record) { - return dateTimeFormat.format(new Date(record.getMillis())) + " " - + record.getMessage() + "\n"; - } - }; - Handler ch = new ConsoleHandler(); - ch.setFormatter(cf); - ch.setLevel(Level.WARNING); - Logger.getLogger("").addHandler(ch); - - /* Initialize own logger for this class. */ - Logger logger = Logger.getLogger( - LoggingConfiguration.class.getName()); - - /* Create log handler that writes all messages on FINE or higher to a - * local file. */ - Formatter ff = new Formatter() { - public String format(LogRecord record) { - return dateTimeFormat.format(new Date(record.getMillis())) + " " - + record.getLevel() + " " + record.getSourceClassName() + " " - + record.getSourceMethodName() + " " + record.getMessage() - + (record.getThrown() != null ? " " + record.getThrown() : "") - + "\n"; - } - }; - try { - FileHandler fh = new FileHandler("log", 5000000, 5, true); - fh.setFormatter(ff); - fh.setLevel(Level.FINE); - Logger.getLogger("").addHandler(fh); - } catch (SecurityException e) { - logger.log(Level.WARNING, "No permission to create log file. " - + "Logging to file is disabled.", e); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write to log file. Logging to " - + "file is disabled.", e); - } - } -} - diff --git a/modules/legacy/src/org/torproject/ernie/cron/Main.java b/modules/legacy/src/org/torproject/ernie/cron/Main.java deleted file mode 100644 index 0eab86f..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/Main.java +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron; - -import org.torproject.ernie.cron.network.ConsensusStatsFileHandler; -import org.torproject.ernie.cron.performance.TorperfProcessor; - -import java.io.File; -import java.util.logging.Logger; - -/** - * Coordinate downloading and parsing of descriptors and extraction of - * statistically relevant data for later processing with R. - */ -public class Main { - - /** Executes this data-processing module. */ - public static void main(String[] args) { - - /* Initialize logging configuration. */ - new LoggingConfiguration(); - - Logger logger = Logger.getLogger(Main.class.getName()); - logger.info("Starting ERNIE."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile(); - if (!lf.acquireLock()) { - logger.severe("Warning: ERNIE is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Define stats directory for temporary files - File statsDirectory = new File("stats"); - - // Import relay descriptors - if (config.getImportDirectoryArchives()) { - RelayDescriptorDatabaseImporter rddi = - config.getWriteRelayDescriptorDatabase() - || config.getWriteRelayDescriptorsRawFiles() - ? new RelayDescriptorDatabaseImporter( - config.getWriteRelayDescriptorDatabase() - ? config.getRelayDescriptorDatabaseJdbc() : null, - config.getWriteRelayDescriptorsRawFiles() - ? config.getRelayDescriptorRawFilesDirectory() : null, - config.getDirectoryArchivesDirectories(), - statsDirectory, - config.getKeepDirectoryArchiveImportHistory()) : null; - if (rddi != null) { - rddi.importRelayDescriptors(); - } - rddi.closeConnection(); - } - - // Prepare consensus stats file handler (used for stats on running - // bridges only) - ConsensusStatsFileHandler csfh = config.getWriteBridgeStats() - ? new ConsensusStatsFileHandler( - config.getRelayDescriptorDatabaseJdbc(), - new File(config.getSanitizedBridgesDirectory()), - statsDirectory, config.getKeepSanitizedBridgesImportHistory()) - : null; - - // Import sanitized bridges and write updated stats files to disk - if (csfh != null) { - if (config.getImportSanitizedBridges()) { - csfh.importSanitizedBridges(); - } - csfh.writeFiles(); - csfh = null; - } - - // Import and process torperf stats - if (config.getImportWriteTorperfStats()) { - new TorperfProcessor(new File(config.getTorperfDirectory()), - statsDirectory); - } - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating ERNIE."); - } -} - diff --git a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java deleted file mode 100644 index 97a330e..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java +++ /dev/null @@ -1,995 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron; - -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; -import org.torproject.descriptor.ServerDescriptor; - -import org.postgresql.util.PGbytea; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.sql.CallableStatement; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Parse directory data. - */ - -/* TODO Split up this class and move its parts to cron.network, - * cron.users, and status.relaysearch packages. Requires extensive - * changes to the database schema though. */ -public final class RelayDescriptorDatabaseImporter { - - /** - * How many records to commit with each database transaction. - */ - private final long autoCommitCount = 500; - - /* Counters to keep track of the number of records committed before - * each transaction. */ - - private int rdsCount = 0; - - private int resCount = 0; - - private int rhsCount = 0; - - private int rrsCount = 0; - - private int rcsCount = 0; - - private int rvsCount = 0; - - private int rqsCount = 0; - - /** - * Relay descriptor database connection. - */ - private Connection conn; - - /** - * Prepared statement to check whether any network status consensus - * entries matching a given valid-after time have been imported into the - * database before. - */ - private PreparedStatement psSs; - - /** - * Prepared statement to check whether a given server descriptor has - * been imported into the database before. - */ - private PreparedStatement psDs; - - /** - * Prepared statement to check whether a given network status consensus - * has been imported into the database before. - */ - private PreparedStatement psCs; - - /** - * Set of dates that have been inserted into the database for being - * included in the next refresh run. - */ - private Set<Long> scheduledUpdates; - - /** - * Prepared statement to insert a date into the database that shall be - * included in the next refresh run. - */ - private PreparedStatement psU; - - /** - * Prepared statement to insert a network status consensus entry into - * the database. - */ - private PreparedStatement psR; - - /** - * Prepared statement to insert a server descriptor into the database. - */ - private PreparedStatement psD; - - /** - * Callable statement to insert the bandwidth history of an extra-info - * descriptor into the database. - */ - private CallableStatement csH; - - /** - * Prepared statement to insert a network status consensus into the - * database. - */ - private PreparedStatement psC; - - /** - * Logger for this class. - */ - private Logger logger; - - /** - * Directory for writing raw import files. - */ - private String rawFilesDirectory; - - /** - * Raw import file containing status entries. - */ - private BufferedWriter statusentryOut; - - /** - * Raw import file containing server descriptors. - */ - private BufferedWriter descriptorOut; - - /** - * Raw import file containing bandwidth histories. - */ - private BufferedWriter bwhistOut; - - /** - * Raw import file containing consensuses. - */ - private BufferedWriter consensusOut; - - /** - * Date format to parse timestamps. - */ - private SimpleDateFormat dateTimeFormat; - - /** - * The last valid-after time for which we checked whether they have been - * any network status entries in the database. - */ - private long lastCheckedStatusEntries; - - /** - * Set of fingerprints that we imported for the valid-after time in - * <code>lastCheckedStatusEntries</code>. - */ - private Set<String> insertedStatusEntries = new HashSet<>(); - - private boolean importIntoDatabase; - - private boolean writeRawImportFiles; - - private List<String> archivesDirectories; - - private File statsDirectory; - - private boolean keepImportHistory; - - /** - * Initialize database importer by connecting to the database and - * preparing statements. - */ - public RelayDescriptorDatabaseImporter(String connectionUrl, - String rawFilesDirectory, List<String> archivesDirectories, - File statsDirectory, boolean keepImportHistory) { - - if (archivesDirectories == null || statsDirectory == null) { - throw new IllegalArgumentException(); - } - this.archivesDirectories = archivesDirectories; - this.statsDirectory = statsDirectory; - this.keepImportHistory = keepImportHistory; - - /* Initialize logger. */ - this.logger = Logger.getLogger( - RelayDescriptorDatabaseImporter.class.getName()); - - if (connectionUrl != null) { - try { - /* Connect to database. */ - this.conn = DriverManager.getConnection(connectionUrl); - - /* Turn autocommit off */ - this.conn.setAutoCommit(false); - - /* Prepare statements. */ - this.psSs = conn.prepareStatement("SELECT fingerprint " - + "FROM statusentry WHERE validafter = ?"); - this.psDs = conn.prepareStatement("SELECT COUNT(*) " - + "FROM descriptor WHERE descriptor = ?"); - this.psCs = conn.prepareStatement("SELECT COUNT(*) " - + "FROM consensus WHERE validafter = ?"); - this.psR = conn.prepareStatement("INSERT INTO statusentry " - + "(validafter, nickname, fingerprint, descriptor, " - + "published, address, orport, dirport, isauthority, " - + "isbadexit, isbaddirectory, isexit, isfast, isguard, " - + "ishsdir, isnamed, isstable, isrunning, isunnamed, " - + "isvalid, isv2dir, isv3dir, version, bandwidth, ports, " - + "rawdesc) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, " - + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - this.psD = conn.prepareStatement("INSERT INTO descriptor " - + "(descriptor, nickname, address, orport, dirport, " - + "fingerprint, bandwidthavg, bandwidthburst, " - + "bandwidthobserved, platform, published, uptime, " - + "extrainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, " - + "?)"); - this.csH = conn.prepareCall("{call insert_bwhist(?, ?, ?, ?, ?, " - + "?)}"); - this.psC = conn.prepareStatement("INSERT INTO consensus " - + "(validafter) VALUES (?)"); - this.psU = conn.prepareStatement("INSERT INTO scheduled_updates " - + "(date) VALUES (?)"); - this.scheduledUpdates = new HashSet<>(); - this.importIntoDatabase = true; - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not connect to database or " - + "prepare statements.", e); - } - } - - /* Remember where we want to write raw import files. */ - if (rawFilesDirectory != null) { - this.rawFilesDirectory = rawFilesDirectory; - this.writeRawImportFiles = true; - } - - /* Initialize date format, so that we can format timestamps. */ - this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - private void addDateToScheduledUpdates(long timestamp) - throws SQLException { - if (!this.importIntoDatabase) { - return; - } - long dateMillis = 0L; - try { - dateMillis = this.dateTimeFormat.parse( - this.dateTimeFormat.format(timestamp).substring(0, 10) - + " 00:00:00").getTime(); - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Internal parsing error.", e); - return; - } - if (!this.scheduledUpdates.contains(dateMillis)) { - this.psU.setDate(1, new java.sql.Date(dateMillis)); - this.psU.execute(); - this.scheduledUpdates.add(dateMillis); - } - } - - /** - * Insert network status consensus entry into database. - */ - public void addStatusEntryContents(long validAfter, String nickname, - String fingerprint, String descriptor, long published, - String address, long orPort, long dirPort, - SortedSet<String> flags, String version, long bandwidth, - String ports, byte[] rawDescriptor) { - if (this.importIntoDatabase) { - try { - this.addDateToScheduledUpdates(validAfter); - Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - Timestamp validAfterTimestamp = new Timestamp(validAfter); - if (lastCheckedStatusEntries != validAfter) { - insertedStatusEntries.clear(); - this.psSs.setTimestamp(1, validAfterTimestamp, cal); - ResultSet rs = psSs.executeQuery(); - while (rs.next()) { - String insertedFingerprint = rs.getString(1); - insertedStatusEntries.add(insertedFingerprint); - } - rs.close(); - lastCheckedStatusEntries = validAfter; - } - if (!insertedStatusEntries.contains(fingerprint)) { - this.psR.clearParameters(); - this.psR.setTimestamp(1, validAfterTimestamp, cal); - this.psR.setString(2, nickname); - this.psR.setString(3, fingerprint); - this.psR.setString(4, descriptor); - this.psR.setTimestamp(5, new Timestamp(published), cal); - this.psR.setString(6, address); - this.psR.setLong(7, orPort); - this.psR.setLong(8, dirPort); - this.psR.setBoolean(9, flags.contains("Authority")); - this.psR.setBoolean(10, flags.contains("BadExit")); - this.psR.setBoolean(11, flags.contains("BadDirectory")); - this.psR.setBoolean(12, flags.contains("Exit")); - this.psR.setBoolean(13, flags.contains("Fast")); - this.psR.setBoolean(14, flags.contains("Guard")); - this.psR.setBoolean(15, flags.contains("HSDir")); - this.psR.setBoolean(16, flags.contains("Named")); - this.psR.setBoolean(17, flags.contains("Stable")); - this.psR.setBoolean(18, flags.contains("Running")); - this.psR.setBoolean(19, flags.contains("Unnamed")); - this.psR.setBoolean(20, flags.contains("Valid")); - this.psR.setBoolean(21, flags.contains("V2Dir")); - this.psR.setBoolean(22, flags.contains("V3Dir")); - this.psR.setString(23, version); - this.psR.setLong(24, bandwidth); - this.psR.setString(25, ports); - this.psR.setBytes(26, rawDescriptor); - this.psR.executeUpdate(); - rrsCount++; - if (rrsCount % autoCommitCount == 0) { - this.conn.commit(); - } - insertedStatusEntries.add(fingerprint); - } - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not add network status " - + "consensus entry. We won't make any further SQL requests " - + "in this execution.", e); - this.importIntoDatabase = false; - } - } - if (this.writeRawImportFiles) { - try { - if (this.statusentryOut == null) { - new File(rawFilesDirectory).mkdirs(); - this.statusentryOut = new BufferedWriter(new FileWriter( - rawFilesDirectory + "/statusentry.sql")); - this.statusentryOut.write(" COPY statusentry (validafter, " - + "nickname, fingerprint, descriptor, published, address, " - + "orport, dirport, isauthority, isbadExit, " - + "isbaddirectory, isexit, isfast, isguard, ishsdir, " - + "isnamed, isstable, isrunning, isunnamed, isvalid, " - + "isv2dir, isv3dir, version, bandwidth, ports, rawdesc) " - + "FROM stdin;\n"); - } - this.statusentryOut.write( - this.dateTimeFormat.format(validAfter) + "\t" + nickname - + "\t" + fingerprint.toLowerCase() + "\t" - + descriptor.toLowerCase() + "\t" - + this.dateTimeFormat.format(published) + "\t" + address - + "\t" + orPort + "\t" + dirPort + "\t" - + (flags.contains("Authority") ? "t" : "f") + "\t" - + (flags.contains("BadExit") ? "t" : "f") + "\t" - + (flags.contains("BadDirectory") ? "t" : "f") + "\t" - + (flags.contains("Exit") ? "t" : "f") + "\t" - + (flags.contains("Fast") ? "t" : "f") + "\t" - + (flags.contains("Guard") ? "t" : "f") + "\t" - + (flags.contains("HSDir") ? "t" : "f") + "\t" - + (flags.contains("Named") ? "t" : "f") + "\t" - + (flags.contains("Stable") ? "t" : "f") + "\t" - + (flags.contains("Running") ? "t" : "f") + "\t" - + (flags.contains("Unnamed") ? "t" : "f") + "\t" - + (flags.contains("Valid") ? "t" : "f") + "\t" - + (flags.contains("V2Dir") ? "t" : "f") + "\t" - + (flags.contains("V3Dir") ? "t" : "f") + "\t" - + (version != null ? version : "\N") + "\t" - + (bandwidth >= 0 ? bandwidth : "\N") + "\t" - + (ports != null ? ports : "\N") + "\t"); - this.statusentryOut.write(PGbytea.toPGString(rawDescriptor) - .replaceAll("\\", "\\\\") + "\n"); - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not write network status " - + "consensus entry to raw database import file. We won't " - + "make any further attempts to write raw import files in " - + "this execution.", e); - this.writeRawImportFiles = false; - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write network status " - + "consensus entry to raw database import file. We won't " - + "make any further attempts to write raw import files in " - + "this execution.", e); - this.writeRawImportFiles = false; - } - } - } - - /** - * Insert server descriptor into database. - */ - public void addServerDescriptorContents(String descriptor, - String nickname, String address, int orPort, int dirPort, - String relayIdentifier, long bandwidthAvg, long bandwidthBurst, - long bandwidthObserved, String platform, long published, - long uptime, String extraInfoDigest) { - if (this.importIntoDatabase) { - try { - this.addDateToScheduledUpdates(published); - this.addDateToScheduledUpdates( - published + 24L * 60L * 60L * 1000L); - Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - this.psDs.setString(1, descriptor); - ResultSet rs = psDs.executeQuery(); - rs.next(); - if (rs.getInt(1) == 0) { - this.psD.clearParameters(); - this.psD.setString(1, descriptor); - this.psD.setString(2, nickname); - this.psD.setString(3, address); - this.psD.setInt(4, orPort); - this.psD.setInt(5, dirPort); - this.psD.setString(6, relayIdentifier); - this.psD.setLong(7, bandwidthAvg); - this.psD.setLong(8, bandwidthBurst); - this.psD.setLong(9, bandwidthObserved); - /* Remove all non-ASCII characters from the platform string, or - * we'll make Postgres unhappy. Sun's JDK and OpenJDK behave - * differently when creating a new String with a given encoding. - * That's what the regexp below is for. */ - this.psD.setString(10, new String(platform.getBytes(), - "US-ASCII").replaceAll("[^\p{ASCII}]","")); - this.psD.setTimestamp(11, new Timestamp(published), cal); - this.psD.setLong(12, uptime); - this.psD.setString(13, extraInfoDigest); - this.psD.executeUpdate(); - rdsCount++; - if (rdsCount % autoCommitCount == 0) { - this.conn.commit(); - } - } - } catch (UnsupportedEncodingException e) { - // US-ASCII is supported for sure - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not add server " - + "descriptor. We won't make any further SQL requests in " - + "this execution.", e); - this.importIntoDatabase = false; - } - } - if (this.writeRawImportFiles) { - try { - if (this.descriptorOut == null) { - new File(rawFilesDirectory).mkdirs(); - this.descriptorOut = new BufferedWriter(new FileWriter( - rawFilesDirectory + "/descriptor.sql")); - this.descriptorOut.write(" COPY descriptor (descriptor, " - + "nickname, address, orport, dirport, fingerprint, " - + "bandwidthavg, bandwidthburst, bandwidthobserved, " - + "platform, published, uptime, extrainfo) FROM stdin;\n"); - } - this.descriptorOut.write(descriptor.toLowerCase() + "\t" - + nickname + "\t" + address + "\t" + orPort + "\t" + dirPort - + "\t" + relayIdentifier + "\t" + bandwidthAvg + "\t" - + bandwidthBurst + "\t" + bandwidthObserved + "\t" - + (platform != null && platform.length() > 0 - ? new String(platform.getBytes(), "US-ASCII") : "\N") - + "\t" + this.dateTimeFormat.format(published) + "\t" - + (uptime >= 0 ? uptime : "\N") + "\t" - + (extraInfoDigest != null ? extraInfoDigest : "\N") - + "\n"); - } catch (UnsupportedEncodingException e) { - // US-ASCII is supported for sure - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write server " - + "descriptor to raw database import file. We won't make " - + "any further attempts to write raw import files in this " - + "execution.", e); - this.writeRawImportFiles = false; - } - } - } - - /** - * Insert extra-info descriptor into database. - */ - public void addExtraInfoDescriptorContents(String extraInfoDigest, - String nickname, String fingerprint, long published, - List<String> bandwidthHistoryLines) { - if (!bandwidthHistoryLines.isEmpty()) { - this.addBandwidthHistory(fingerprint.toLowerCase(), published, - bandwidthHistoryLines); - } - } - - private static class BigIntArray implements java.sql.Array { - - private final String stringValue; - - public BigIntArray(long[] array, int offset) { - if (array == null) { - this.stringValue = "[-1:-1]={0}"; - } else { - StringBuilder sb = new StringBuilder("[" + offset + ":" - + (offset + array.length - 1) + "]={"); - for (int i = 0; i < array.length; i++) { - sb.append((i > 0 ? "," : "") + array[i]); - } - sb.append('}'); - this.stringValue = sb.toString(); - } - } - - public String toString() { - return stringValue; - } - - public String getBaseTypeName() { - return "int8"; - } - - /* The other methods are never called; no need to implement them. */ - public void free() { - throw new UnsupportedOperationException(); - } - - public Object getArray() { - throw new UnsupportedOperationException(); - } - - public Object getArray(long index, int count) { - throw new UnsupportedOperationException(); - } - - public Object getArray(long index, int count, - Map<String, Class<?>> map) { - throw new UnsupportedOperationException(); - } - - public Object getArray(Map<String, Class<?>> map) { - throw new UnsupportedOperationException(); - } - - public int getBaseType() { - throw new UnsupportedOperationException(); - } - - public ResultSet getResultSet() { - throw new UnsupportedOperationException(); - } - - public ResultSet getResultSet(long index, int count) { - throw new UnsupportedOperationException(); - } - - public ResultSet getResultSet(long index, int count, - Map<String, Class<?>> map) { - throw new UnsupportedOperationException(); - } - - public ResultSet getResultSet(Map<String, Class<?>> map) { - throw new UnsupportedOperationException(); - } - } - - /** Inserts a bandwidth history into database. */ - public void addBandwidthHistory(String fingerprint, long published, - List<String> bandwidthHistoryStrings) { - - /* Split history lines by date and rewrite them so that the date - * comes first. */ - SortedSet<String> historyLinesByDate = new TreeSet<>(); - for (String bandwidthHistoryString : bandwidthHistoryStrings) { - String[] parts = bandwidthHistoryString.split(" "); - if (parts.length != 6) { - this.logger.finer("Bandwidth history line does not have expected " - + "number of elements. Ignoring this line."); - continue; - } - long intervalLength = 0L; - try { - intervalLength = Long.parseLong(parts[3].substring(1)); - } catch (NumberFormatException e) { - this.logger.fine("Bandwidth history line does not have valid " - + "interval length '" + parts[3] + " " + parts[4] + "'. " - + "Ignoring this line."); - continue; - } - String[] values = parts[5].split(","); - if (intervalLength % 900L != 0L) { - this.logger.fine("Bandwidth history line does not contain " - + "multiples of 15-minute intervals. Ignoring this line."); - continue; - } else if (intervalLength != 900L) { - /* This is a really dirty hack to support bandwidth history - * intervals that are longer than 15 minutes by linearly - * distributing reported bytes to 15 minute intervals. The - * alternative would have been to modify the database schema. */ - try { - long factor = intervalLength / 900L; - String[] newValues = new String[values.length * (int) factor]; - for (int i = 0; i < newValues.length; i++) { - newValues[i] = String.valueOf( - Long.parseLong(values[i / (int) factor]) / factor); - } - values = newValues; - intervalLength = 900L; - } catch (NumberFormatException e) { - this.logger.fine("Number format exception while parsing " - + "bandwidth history line. Ignoring this line."); - continue; - } - } - String type = parts[0]; - String intervalEndTime = parts[1] + " " + parts[2]; - long intervalEnd; - long dateStart; - try { - intervalEnd = dateTimeFormat.parse(intervalEndTime).getTime(); - dateStart = dateTimeFormat.parse(parts[1] + " 00:00:00") - .getTime(); - } catch (ParseException e) { - this.logger.fine("Parse exception while parsing timestamp in " - + "bandwidth history line. Ignoring this line."); - continue; - } - if (Math.abs(published - intervalEnd) - > 7L * 24L * 60L * 60L * 1000L) { - this.logger.fine("Extra-info descriptor publication time " - + dateTimeFormat.format(published) + " and last interval " - + "time " + intervalEndTime + " in " + type + " line differ " - + "by more than 7 days! Not adding this line!"); - continue; - } - long currentIntervalEnd = intervalEnd; - StringBuilder sb = new StringBuilder(); - SortedSet<String> newHistoryLines = new TreeSet<>(); - try { - for (int i = values.length - 1; i >= -1; i--) { - if (i == -1 || currentIntervalEnd < dateStart) { - sb.insert(0, intervalEndTime + " " + type + " (" - + intervalLength + " s) "); - sb.setLength(sb.length() - 1); - String historyLine = sb.toString(); - newHistoryLines.add(historyLine); - sb = new StringBuilder(); - dateStart -= 24L * 60L * 60L * 1000L; - intervalEndTime = dateTimeFormat.format(currentIntervalEnd); - } - if (i == -1) { - break; - } - Long.parseLong(values[i]); - sb.insert(0, values[i] + ","); - currentIntervalEnd -= intervalLength * 1000L; - } - } catch (NumberFormatException e) { - this.logger.fine("Number format exception while parsing " - + "bandwidth history line. Ignoring this line."); - continue; - } - historyLinesByDate.addAll(newHistoryLines); - } - - /* Add split history lines to database. */ - String lastDate = null; - historyLinesByDate.add("EOL"); - long[] readArray = null; - long[] writtenArray = null; - long[] dirreadArray = null; - long[] dirwrittenArray = null; - int readOffset = 0; - int writtenOffset = 0; - int dirreadOffset = 0; - int dirwrittenOffset = 0; - for (String historyLine : historyLinesByDate) { - String[] parts = historyLine.split(" "); - String currentDate = parts[0]; - if (lastDate != null && (historyLine.equals("EOL") - || !currentDate.equals(lastDate))) { - BigIntArray readIntArray = new BigIntArray(readArray, - readOffset); - BigIntArray writtenIntArray = new BigIntArray(writtenArray, - writtenOffset); - BigIntArray dirreadIntArray = new BigIntArray(dirreadArray, - dirreadOffset); - BigIntArray dirwrittenIntArray = new BigIntArray(dirwrittenArray, - dirwrittenOffset); - if (this.importIntoDatabase) { - try { - long dateMillis = dateTimeFormat.parse(lastDate - + " 00:00:00").getTime(); - this.addDateToScheduledUpdates(dateMillis); - this.csH.setString(1, fingerprint); - this.csH.setDate(2, new java.sql.Date(dateMillis)); - this.csH.setArray(3, readIntArray); - this.csH.setArray(4, writtenIntArray); - this.csH.setArray(5, dirreadIntArray); - this.csH.setArray(6, dirwrittenIntArray); - this.csH.addBatch(); - rhsCount++; - if (rhsCount % autoCommitCount == 0) { - this.csH.executeBatch(); - } - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not insert bandwidth " - + "history line into database. We won't make any " - + "further SQL requests in this execution.", e); - this.importIntoDatabase = false; - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not insert bandwidth " - + "history line into database. We won't make any " - + "further SQL requests in this execution.", e); - this.importIntoDatabase = false; - } - } - if (this.writeRawImportFiles) { - try { - if (this.bwhistOut == null) { - new File(rawFilesDirectory).mkdirs(); - this.bwhistOut = new BufferedWriter(new FileWriter( - rawFilesDirectory + "/bwhist.sql")); - } - this.bwhistOut.write("SELECT insert_bwhist('" + fingerprint - + "','" + lastDate + "','" + readIntArray.toString() - + "','" + writtenIntArray.toString() + "','" - + dirreadIntArray.toString() + "','" - + dirwrittenIntArray.toString() + "');\n"); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write bandwidth " - + "history to raw database import file. We won't make " - + "any further attempts to write raw import files in " - + "this execution.", e); - this.writeRawImportFiles = false; - } - } - readArray = writtenArray = dirreadArray = dirwrittenArray = null; - } - if (historyLine.equals("EOL")) { - break; - } - long lastIntervalTime; - try { - lastIntervalTime = dateTimeFormat.parse(parts[0] + " " - + parts[1]).getTime() - dateTimeFormat.parse(parts[0] - + " 00:00:00").getTime(); - } catch (ParseException e) { - continue; - } - String[] stringValues = parts[5].split(","); - long[] longValues = new long[stringValues.length]; - for (int i = 0; i < longValues.length; i++) { - longValues[i] = Long.parseLong(stringValues[i]); - } - - int offset = (int) (lastIntervalTime / (15L * 60L * 1000L)) - - longValues.length + 1; - String type = parts[2]; - if (type.equals("read-history")) { - readArray = longValues; - readOffset = offset; - } else if (type.equals("write-history")) { - writtenArray = longValues; - writtenOffset = offset; - } else if (type.equals("dirreq-read-history")) { - dirreadArray = longValues; - dirreadOffset = offset; - } else if (type.equals("dirreq-write-history")) { - dirwrittenArray = longValues; - dirwrittenOffset = offset; - } - lastDate = currentDate; - } - } - - /** - * Insert network status consensus into database. - */ - public void addConsensus(long validAfter) { - if (this.importIntoDatabase) { - try { - this.addDateToScheduledUpdates(validAfter); - Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - Timestamp validAfterTimestamp = new Timestamp(validAfter); - this.psCs.setTimestamp(1, validAfterTimestamp, cal); - ResultSet rs = psCs.executeQuery(); - rs.next(); - if (rs.getInt(1) == 0) { - this.psC.clearParameters(); - this.psC.setTimestamp(1, validAfterTimestamp, cal); - this.psC.executeUpdate(); - rcsCount++; - if (rcsCount % autoCommitCount == 0) { - this.conn.commit(); - } - } - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not add network status " - + "consensus. We won't make any further SQL requests in " - + "this execution.", e); - this.importIntoDatabase = false; - } - } - if (this.writeRawImportFiles) { - try { - if (this.consensusOut == null) { - new File(rawFilesDirectory).mkdirs(); - this.consensusOut = new BufferedWriter(new FileWriter( - rawFilesDirectory + "/consensus.sql")); - this.consensusOut.write(" COPY consensus (validafter) " - + "FROM stdin;\n"); - } - String validAfterString = this.dateTimeFormat.format(validAfter); - this.consensusOut.write(validAfterString + "\n"); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write network status " - + "consensus to raw database import file. We won't make " - + "any further attempts to write raw import files in this " - + "execution.", e); - this.writeRawImportFiles = false; - } - } - } - - /** Imports relay descriptors into the database. */ - public void importRelayDescriptors() { - logger.fine("Importing files in directories " + archivesDirectories - + "/..."); - if (!this.archivesDirectories.isEmpty()) { - DescriptorReader reader = - DescriptorSourceFactory.createDescriptorReader(); - reader.setMaxDescriptorFilesInQueue(10); - for (String archivesPath : this.archivesDirectories) { - File archivesDirectory = new File(archivesPath); - if (archivesDirectory.exists()) { - reader.addDirectory(archivesDirectory); - } - } - if (keepImportHistory) { - reader.setExcludeFiles(new File(statsDirectory, - "database-importer-relay-descriptor-history")); - } - Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - if (descriptorFile.getDescriptors() != null) { - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof RelayNetworkStatusConsensus) { - this.addRelayNetworkStatusConsensus( - (RelayNetworkStatusConsensus) descriptor); - } else if (descriptor instanceof ServerDescriptor) { - this.addServerDescriptor((ServerDescriptor) descriptor); - } else if (descriptor instanceof ExtraInfoDescriptor) { - this.addExtraInfoDescriptor( - (ExtraInfoDescriptor) descriptor); - } - } - } - } - } - - logger.info("Finished importing relay descriptors."); - } - - private void addRelayNetworkStatusConsensus( - RelayNetworkStatusConsensus consensus) { - for (NetworkStatusEntry statusEntry - : consensus.getStatusEntries().values()) { - this.addStatusEntryContents(consensus.getValidAfterMillis(), - statusEntry.getNickname(), - statusEntry.getFingerprint().toLowerCase(), - statusEntry.getDescriptor().toLowerCase(), - statusEntry.getPublishedMillis(), statusEntry.getAddress(), - statusEntry.getOrPort(), statusEntry.getDirPort(), - statusEntry.getFlags(), statusEntry.getVersion(), - statusEntry.getBandwidth(), statusEntry.getPortList(), - statusEntry.getStatusEntryBytes()); - } - this.addConsensus(consensus.getValidAfterMillis()); - } - - private void addServerDescriptor(ServerDescriptor descriptor) { - this.addServerDescriptorContents( - descriptor.getServerDescriptorDigest(), descriptor.getNickname(), - descriptor.getAddress(), descriptor.getOrPort(), - descriptor.getDirPort(), descriptor.getFingerprint(), - descriptor.getBandwidthRate(), descriptor.getBandwidthBurst(), - descriptor.getBandwidthObserved(), descriptor.getPlatform(), - descriptor.getPublishedMillis(), descriptor.getUptime(), - descriptor.getExtraInfoDigest()); - } - - private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) { - List<String> bandwidthHistoryLines = new ArrayList<>(); - if (descriptor.getWriteHistory() != null) { - bandwidthHistoryLines.add(descriptor.getWriteHistory().getLine()); - } - if (descriptor.getReadHistory() != null) { - bandwidthHistoryLines.add(descriptor.getReadHistory().getLine()); - } - if (descriptor.getDirreqWriteHistory() != null) { - bandwidthHistoryLines.add( - descriptor.getDirreqWriteHistory().getLine()); - } - if (descriptor.getDirreqReadHistory() != null) { - bandwidthHistoryLines.add( - descriptor.getDirreqReadHistory().getLine()); - } - this.addExtraInfoDescriptorContents(descriptor.getExtraInfoDigest(), - descriptor.getNickname(), - descriptor.getFingerprint().toLowerCase(), - descriptor.getPublishedMillis(), bandwidthHistoryLines); - } - - /** - * Close the relay descriptor database connection. - */ - public void closeConnection() { - - /* Log stats about imported descriptors. */ - this.logger.info(String.format("Finished importing relay " - + "descriptors: %d consensuses, %d network status entries, %d " - + "votes, %d server descriptors, %d extra-info descriptors, %d " - + "bandwidth history elements, and %d dirreq stats elements", - rcsCount, rrsCount, rvsCount, rdsCount, resCount, rhsCount, - rqsCount)); - - /* Insert scheduled updates a second time, just in case the refresh - * run has started since inserting them the first time in which case - * it will miss the data inserted afterwards. We cannot, however, - * insert them only now, because if a Java execution fails at a random - * point, we might have added data, but not the corresponding dates to - * update statistics. */ - if (this.importIntoDatabase) { - try { - for (long dateMillis : this.scheduledUpdates) { - this.psU.setDate(1, new java.sql.Date(dateMillis)); - this.psU.execute(); - } - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not add scheduled dates " - + "for the next refresh run.", e); - } - } - - /* Commit any stragglers before closing. */ - if (this.conn != null) { - try { - this.csH.executeBatch(); - - this.conn.commit(); - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not commit final records " - + "to database", e); - } - try { - this.conn.close(); - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Could not close database " - + "connection.", e); - } - } - - /* Close raw import files. */ - try { - if (this.statusentryOut != null) { - this.statusentryOut.write("\.\n"); - this.statusentryOut.close(); - } - if (this.descriptorOut != null) { - this.descriptorOut.write("\.\n"); - this.descriptorOut.close(); - } - if (this.bwhistOut != null) { - this.bwhistOut.write("\.\n"); - this.bwhistOut.close(); - } - if (this.consensusOut != null) { - this.consensusOut.write("\.\n"); - this.consensusOut.close(); - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not close one or more raw " - + "database import files.", e); - } - } -} - diff --git a/modules/legacy/src/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java b/modules/legacy/src/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java deleted file mode 100644 index aa9469e..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/network/ConsensusStatsFileHandler.java +++ /dev/null @@ -1,412 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron.network; - -import org.torproject.descriptor.BridgeNetworkStatus; -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.NetworkStatusEntry; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.SortedMap; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Generates statistics on the average number of relays and bridges per - * day. Accepts parse results from <code>RelayDescriptorParser</code> and - * <code>BridgeDescriptorParser</code> and stores them in intermediate - * result files <code>stats/consensus-stats-raw</code> and - * <code>stats/bridge-consensus-stats-raw</code>. Writes final results to - * <code>stats/consensus-stats</code> for all days for which at least half - * of the expected consensuses or statuses are known. - */ -public class ConsensusStatsFileHandler { - - /** - * Intermediate results file holding the number of running bridges per - * bridge status. - */ - private File bridgeConsensusStatsRawFile; - - /** - * Number of running bridges in a given bridge status. Map keys are the bridge - * status time formatted as "yyyy-MM-dd HH:mm:ss", a comma, and the bridge - * authority nickname, map values are lines as read from - * <code>stats/bridge-consensus-stats-raw</code>. - */ - private SortedMap<String, String> bridgesRaw; - - /** - * Average number of running bridges per day. Map keys are dates - * formatted as "yyyy-MM-dd", map values are the remaining columns as written - * to <code>stats/consensus-stats</code>. - */ - private SortedMap<String, String> bridgesPerDay; - - /** - * Logger for this class. - */ - private Logger logger; - - private int bridgeResultsAdded = 0; - - /* Database connection string. */ - private String connectionUrl = null; - - private SimpleDateFormat dateTimeFormat; - - private File bridgesDir; - - private File statsDirectory; - - private boolean keepImportHistory; - - /** - * Initializes this class, including reading in intermediate results - * files <code>stats/consensus-stats-raw</code> and - * <code>stats/bridge-consensus-stats-raw</code> and final results file - * <code>stats/consensus-stats</code>. - */ - public ConsensusStatsFileHandler(String connectionUrl, - File bridgesDir, File statsDirectory, - boolean keepImportHistory) { - - if (bridgesDir == null || statsDirectory == null) { - throw new IllegalArgumentException(); - } - this.bridgesDir = bridgesDir; - this.statsDirectory = statsDirectory; - this.keepImportHistory = keepImportHistory; - - /* Initialize local data structures to hold intermediate and final - * results. */ - this.bridgesPerDay = new TreeMap<>(); - this.bridgesRaw = new TreeMap<>(); - - /* Initialize file names for intermediate and final results files. */ - this.bridgeConsensusStatsRawFile = new File( - "stats/bridge-consensus-stats-raw"); - - /* Initialize database connection string. */ - this.connectionUrl = connectionUrl; - - this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - - /* Initialize logger. */ - this.logger = Logger.getLogger( - ConsensusStatsFileHandler.class.getName()); - - /* Read in number of running bridges per bridge status. */ - if (this.bridgeConsensusStatsRawFile.exists()) { - try { - this.logger.fine("Reading file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.bridgeConsensusStatsRawFile)); - String line = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("date")) { - /* Skip headers. */ - continue; - } - String[] parts = line.split(","); - if (parts.length < 2 || parts.length > 4) { - this.logger.warning("Corrupt line '" + line + "' in file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() - + "! Aborting to read this file!"); - break; - } - /* Assume that all lines without authority nickname are based on - * Tonga's network status, not Bifroest's. */ - String key = parts[0] + "," + (parts.length < 4 ? "Tonga" : parts[1]); - String value = null; - if (parts.length == 2) { - value = key + "," + parts[1] + ",0"; - } else if (parts.length == 3) { - value = key + "," + parts[1] + "," + parts[2]; - } else if (parts.length == 4) { - value = key + "," + parts[2] + "," + parts[3]; - } /* No more cases as we already checked the range above. */ - this.bridgesRaw.put(key, value); - } - br.close(); - this.logger.fine("Finished reading file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!", - e); - } - } - } - - /** - * Adds the intermediate results of the number of running bridges in a - * given bridge status to the existing observations. - */ - public void addBridgeConsensusResults(long publishedMillis, - String authorityNickname, int running, int runningEc2Bridges) { - String publishedAuthority = dateTimeFormat.format(publishedMillis) + "," - + authorityNickname; - String line = publishedAuthority + "," + running + "," + runningEc2Bridges; - if (!this.bridgesRaw.containsKey(publishedAuthority)) { - this.logger.finer("Adding new bridge numbers: " + line); - this.bridgesRaw.put(publishedAuthority, line); - this.bridgeResultsAdded++; - } else if (!line.equals(this.bridgesRaw.get(publishedAuthority))) { - this.logger.warning("The numbers of running bridges we were just " - + "given (" + line + ") are different from what we learned " - + "before (" + this.bridgesRaw.get(publishedAuthority) + ")! " - + "Overwriting!"); - this.bridgesRaw.put(publishedAuthority, line); - } - } - - /** Imports sanitized bridge descriptors. */ - public void importSanitizedBridges() { - if (bridgesDir.exists()) { - logger.fine("Importing files in directory " + bridgesDir + "/..."); - DescriptorReader reader = - DescriptorSourceFactory.createDescriptorReader(); - reader.addDirectory(bridgesDir); - if (keepImportHistory) { - reader.setExcludeFiles(new File(statsDirectory, - "consensus-stats-bridge-descriptor-history")); - } - Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - if (descriptorFile.getDescriptors() != null) { - String authority = null; - if (descriptorFile.getFileName().contains( - "4A0CCD2DDC7995083D73F5D667100C8A5831F16D")) { - authority = "Tonga"; - } else if (descriptorFile.getFileName().contains( - "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1")) { - authority = "Bifroest"; - } - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof BridgeNetworkStatus) { - if (authority == null) { - this.logger.warning("Did not recognize the bridge authority " - + "that generated " + descriptorFile.getFileName() - + ". Skipping."); - continue; - } - this.addBridgeNetworkStatus( - (BridgeNetworkStatus) descriptor, authority); - } - } - } - } - logger.info("Finished importing bridge descriptors."); - } - } - - private void addBridgeNetworkStatus(BridgeNetworkStatus status, - String authority) { - int runningBridges = 0; - int runningEc2Bridges = 0; - for (NetworkStatusEntry statusEntry - : status.getStatusEntries().values()) { - if (statusEntry.getFlags().contains("Running")) { - runningBridges++; - if (statusEntry.getNickname().startsWith("ec2bridge")) { - runningEc2Bridges++; - } - } - } - this.addBridgeConsensusResults(status.getPublishedMillis(), authority, - runningBridges, runningEc2Bridges); - } - - /** - * Aggregates the raw observations on relay and bridge numbers and - * writes both raw and aggregate observations to disk. - */ - public void writeFiles() { - - /* Go through raw observations and put everything into nested maps by day - * and bridge authority. */ - Map<String, Map<String, int[]>> bridgesPerDayAndAuthority = new HashMap<>(); - for (String bridgesRawLine : this.bridgesRaw.values()) { - String date = bridgesRawLine.substring(0, 10); - if (!bridgesPerDayAndAuthority.containsKey(date)) { - bridgesPerDayAndAuthority.put(date, new TreeMap<String, int[]>()); - } - String[] parts = bridgesRawLine.split(","); - String authority = parts[1]; - if (!bridgesPerDayAndAuthority.get(date).containsKey(authority)) { - bridgesPerDayAndAuthority.get(date).put(authority, new int[3]); - } - int[] bridges = bridgesPerDayAndAuthority.get(date).get(authority); - bridges[0] += Integer.parseInt(parts[2]); - bridges[1] += Integer.parseInt(parts[3]); - bridges[2]++; - } - - /* Sum up average numbers of running bridges per day reported by all bridge - * authorities and add these averages to final results. */ - for (Map.Entry<String, Map<String, int[]>> perDay - : bridgesPerDayAndAuthority.entrySet()) { - String date = perDay.getKey(); - int brunning = 0; - int brunningEc2 = 0; - for (int[] perAuthority : perDay.getValue().values()) { - int statuses = perAuthority[2]; - if (statuses < 12) { - /* Only write results if we have seen at least a dozen statuses. */ - continue; - } - brunning += perAuthority[0] / statuses; - brunningEc2 += perAuthority[1] / statuses; - } - String line = "," + brunning + "," + brunningEc2; - /* Are our results new? */ - if (!this.bridgesPerDay.containsKey(date)) { - this.logger.finer("Adding new average bridge numbers: " + date + line); - this.bridgesPerDay.put(date, line); - } else if (!line.equals(this.bridgesPerDay.get(date))) { - this.logger.finer("Replacing existing average bridge numbers (" - + this.bridgesPerDay.get(date) + " with new numbers: " + line); - this.bridgesPerDay.put(date, line); - } - } - - /* Write raw numbers of running bridges to disk. */ - try { - this.logger.fine("Writing file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "..."); - this.bridgeConsensusStatsRawFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter( - new FileWriter(this.bridgeConsensusStatsRawFile)); - bw.append("datetime,authority,brunning,brunningec2"); - bw.newLine(); - for (String line : this.bridgesRaw.values()) { - bw.append(line); - bw.newLine(); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to write file " - + this.bridgeConsensusStatsRawFile.getAbsolutePath() + "!", - e); - } - - /* Add average number of bridges per day to the database. */ - if (connectionUrl != null) { - try { - Map<String, String> insertRows = new HashMap<>(); - Map<String, String> updateRows = new HashMap<>(); - insertRows.putAll(this.bridgesPerDay); - Connection conn = DriverManager.getConnection(connectionUrl); - conn.setAutoCommit(false); - Statement statement = conn.createStatement(); - ResultSet rs = statement.executeQuery( - "SELECT date, avg_running, avg_running_ec2 " - + "FROM bridge_network_size"); - while (rs.next()) { - String date = rs.getDate(1).toString(); - if (insertRows.containsKey(date)) { - String insertRow = insertRows.remove(date); - String[] parts = insertRow.substring(1).split(","); - long newAvgRunning = Long.parseLong(parts[0]); - long newAvgRunningEc2 = Long.parseLong(parts[1]); - long oldAvgRunning = rs.getLong(2); - long oldAvgRunningEc2 = rs.getLong(3); - if (newAvgRunning != oldAvgRunning - || newAvgRunningEc2 != oldAvgRunningEc2) { - updateRows.put(date, insertRow); - } - } - } - rs.close(); - PreparedStatement psU = conn.prepareStatement( - "UPDATE bridge_network_size SET avg_running = ?, " - + "avg_running_ec2 = ? WHERE date = ?"); - for (Map.Entry<String, String> e : updateRows.entrySet()) { - java.sql.Date date = java.sql.Date.valueOf(e.getKey()); - String[] parts = e.getValue().substring(1).split(","); - long avgRunning = Long.parseLong(parts[0]); - long avgRunningEc2 = Long.parseLong(parts[1]); - psU.clearParameters(); - psU.setLong(1, avgRunning); - psU.setLong(2, avgRunningEc2); - psU.setDate(3, date); - psU.executeUpdate(); - } - PreparedStatement psI = conn.prepareStatement( - "INSERT INTO bridge_network_size (avg_running, " - + "avg_running_ec2, date) VALUES (?, ?, ?)"); - for (Map.Entry<String, String> e : insertRows.entrySet()) { - java.sql.Date date = java.sql.Date.valueOf(e.getKey()); - String[] parts = e.getValue().substring(1).split(","); - long avgRunning = Long.parseLong(parts[0]); - long avgRunningEc2 = Long.parseLong(parts[1]); - psI.clearParameters(); - psI.setLong(1, avgRunning); - psI.setLong(2, avgRunningEc2); - psI.setDate(3, date); - psI.executeUpdate(); - } - conn.commit(); - conn.close(); - } catch (SQLException e) { - logger.log(Level.WARNING, "Failed to add average bridge numbers " - + "to database.", e); - } - } - - /* Write stats. */ - StringBuilder dumpStats = new StringBuilder("Finished writing " - + "statistics on bridge network statuses to disk.\nAdded " - + this.bridgeResultsAdded + " bridge network status(es) in this " - + "execution."); - long now = System.currentTimeMillis(); - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (this.bridgesRaw.isEmpty()) { - dumpStats.append("\nNo bridge status known yet."); - } else { - dumpStats.append("\nLast known bridge status was published " - + this.bridgesRaw.lastKey() + "."); - try { - if (now - 6L * 60L * 60L * 1000L > dateTimeFormat.parse( - this.bridgesRaw.lastKey()).getTime()) { - logger.warning("Last known bridge status is more than 6 hours " - + "old: " + this.bridgesRaw.lastKey()); - } - } catch (ParseException e) { - logger.warning("Can't parse the timestamp? Reason: " + e); - } - } - logger.info(dumpStats.toString()); - } -} - diff --git a/modules/legacy/src/org/torproject/ernie/cron/performance/TorperfProcessor.java b/modules/legacy/src/org/torproject/ernie/cron/performance/TorperfProcessor.java deleted file mode 100644 index 2883299..0000000 --- a/modules/legacy/src/org/torproject/ernie/cron/performance/TorperfProcessor.java +++ /dev/null @@ -1,292 +0,0 @@ -/* Copyright 2011--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.ernie.cron.performance; - -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.TorperfResult; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class TorperfProcessor { - - /** Processes Torperf data from the given directory and writes - * aggregates statistics to the given stats directory. */ - public TorperfProcessor(File torperfDirectory, File statsDirectory) { - - if (torperfDirectory == null || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - Logger logger = Logger.getLogger(TorperfProcessor.class.getName()); - File rawFile = new File(statsDirectory, "torperf-raw"); - File statsFile = new File(statsDirectory, "torperf.csv"); - SortedMap<String, String> rawObs = new TreeMap<>(); - SortedMap<String, String> stats = new TreeMap<>(); - int addedRawObs = 0; - SimpleDateFormat formatter = - new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); - try { - if (rawFile.exists()) { - logger.fine("Reading file " + rawFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader(rawFile)); - String line = br.readLine(); // ignore header - while ((line = br.readLine()) != null) { - if (line.split(",").length != 4) { - logger.warning("Corrupt line in " + rawFile.getAbsolutePath() - + "!"); - break; - } - String key = line.substring(0, line.lastIndexOf(",")); - rawObs.put(key, line); - } - br.close(); - logger.fine("Finished reading file " + rawFile.getAbsolutePath() - + "."); - } - if (statsFile.exists()) { - logger.fine("Reading file " + statsFile.getAbsolutePath() - + "..."); - BufferedReader br = new BufferedReader(new FileReader(statsFile)); - String line = br.readLine(); // ignore header - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - String key = String.format("%s,%s,%s", parts[0], parts[1], - parts[2]); - stats.put(key, line); - } - br.close(); - logger.fine("Finished reading file " + statsFile.getAbsolutePath() - + "."); - } - if (torperfDirectory.exists()) { - logger.fine("Importing files in " + torperfDirectory + "/..."); - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.addDirectory(torperfDirectory); - descriptorReader.setExcludeFiles(new File(statsDirectory, - "torperf-history")); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - if (descriptorFile.getException() != null) { - logger.log(Level.FINE, "Error parsing file.", - descriptorFile.getException()); - continue; - } - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (!(descriptor instanceof TorperfResult)) { - continue; - } - TorperfResult result = (TorperfResult) descriptor; - String source = result.getSource(); - long fileSize = result.getFileSize(); - if (fileSize == 51200) { - source += "-50kb"; - } else if (fileSize == 1048576) { - source += "-1mb"; - } else if (fileSize == 5242880) { - source += "-5mb"; - } else { - logger.fine("Unexpected file size '" + fileSize - + "'. Skipping."); - continue; - } - String dateTime = formatter.format(result.getStartMillis()); - long completeMillis = result.getDataCompleteMillis() - - result.getStartMillis(); - String key = source + "," + dateTime; - String value = key; - if ((result.didTimeout() == null - && result.getDataCompleteMillis() < 1) - || (result.didTimeout() != null && result.didTimeout())) { - value += ",-2"; // -2 for timeout - } else if (result.getReadBytes() < fileSize) { - value += ",-1"; // -1 for failure - } else { - value += "," + completeMillis; - } - if (!rawObs.containsKey(key)) { - rawObs.put(key, value); - addedRawObs++; - } - } - } - logger.fine("Finished importing files in " + torperfDirectory - + "/."); - } - if (rawObs.size() > 0) { - logger.fine("Writing file " + rawFile.getAbsolutePath() + "..."); - rawFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(rawFile)); - bw.append("source,date,start,completemillis\n"); - String tempSourceDate = null; - Iterator<Map.Entry<String, String>> it = - rawObs.entrySet().iterator(); - List<Long> dlTimes = new ArrayList<>(); - boolean haveWrittenFinalLine = false; - SortedMap<String, List<Long>> dlTimesAllSources = new TreeMap<>(); - SortedMap<String, long[]> statusesAllSources = new TreeMap<>(); - long failures = 0; - long timeouts = 0; - long requests = 0; - while (it.hasNext() || !haveWrittenFinalLine) { - Map.Entry<String, String> next = - it.hasNext() ? it.next() : null; - if (tempSourceDate != null - && (next == null || !(next.getValue().split(",")[0] + "," - + next.getValue().split(",")[1]).equals(tempSourceDate))) { - if (dlTimes.size() > 4) { - Collections.sort(dlTimes); - long q1 = dlTimes.get(dlTimes.size() / 4 - 1); - long md = dlTimes.get(dlTimes.size() / 2 - 1); - long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1); - String[] tempParts = tempSourceDate.split("[-,]", 3); - String tempDate = tempParts[2]; - int tempSize = Integer.parseInt( - tempParts[1].substring(0, tempParts[1].length() - 2)) - * 1024 * (tempParts[1].endsWith("mb") ? 1024 : 1); - String tempSource = tempParts[0]; - String tempDateSizeSource = String.format("%s,%d,%s", - tempDate, tempSize, tempSource); - stats.put(tempDateSizeSource, - String.format("%s,%s,%s,%s,%s,%s,%s", - tempDateSizeSource, q1, md, q3, timeouts, failures, - requests)); - String allDateSizeSource = String.format("%s,%d,", - tempDate, tempSize); - if (dlTimesAllSources.containsKey(allDateSizeSource)) { - dlTimesAllSources.get(allDateSizeSource).addAll(dlTimes); - } else { - dlTimesAllSources.put(allDateSizeSource, dlTimes); - } - if (statusesAllSources.containsKey(allDateSizeSource)) { - long[] status = statusesAllSources.get(allDateSizeSource); - status[0] += timeouts; - status[1] += failures; - status[2] += requests; - } else { - long[] status = new long[3]; - status[0] = timeouts; - status[1] = failures; - status[2] = requests; - statusesAllSources.put(allDateSizeSource, status); - } - } - dlTimes = new ArrayList<>(); - failures = timeouts = requests = 0; - if (next == null) { - haveWrittenFinalLine = true; - } - } - if (next != null) { - bw.append(next.getValue() + "\n"); - String[] parts = next.getValue().split(","); - tempSourceDate = parts[0] + "," + parts[1]; - long completeMillis = Long.parseLong(parts[3]); - if (completeMillis == -2L) { - timeouts++; - } else if (completeMillis == -1L) { - failures++; - } else { - dlTimes.add(Long.parseLong(parts[3])); - } - requests++; - } - } - bw.close(); - for (Map.Entry<String, List<Long>> e - : dlTimesAllSources.entrySet()) { - String allDateSizeSource = e.getKey(); - dlTimes = e.getValue(); - Collections.sort(dlTimes); - long[] status = statusesAllSources.get(allDateSizeSource); - timeouts = status[0]; - failures = status[1]; - requests = status[2]; - long q1 = dlTimes.get(dlTimes.size() / 4 - 1); - long md = dlTimes.get(dlTimes.size() / 2 - 1); - long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1); - stats.put(allDateSizeSource, - String.format("%s,%s,%s,%s,%s,%s,%s", - allDateSizeSource, q1, md, q3, timeouts, failures, - requests)); - } - logger.fine("Finished writing file " + rawFile.getAbsolutePath() - + "."); - } - if (stats.size() > 0) { - logger.fine("Writing file " + statsFile.getAbsolutePath() - + "..."); - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String yesterday = dateFormat.format(System.currentTimeMillis() - - 86400000L); - statsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(statsFile)); - bw.append("date,size,source,q1,md,q3,timeouts,failures," - + "requests\n"); - for (String s : stats.values()) { - if (s.compareTo(yesterday) < 0) { - bw.append(s + "\n"); - } - } - bw.close(); - logger.fine("Finished writing file " + statsFile.getAbsolutePath() - + "."); - } - } catch (IOException e) { - logger.log(Level.WARNING, "Failed writing " - + rawFile.getAbsolutePath() + " or " - + statsFile.getAbsolutePath() + "!", e); - } - - /* Write stats. */ - StringBuilder dumpStats = new StringBuilder("Finished writing " - + "statistics on torperf results.\nAdded " + addedRawObs - + " new observations in this execution.\n" - + "Last known obserations by source and file size are:"); - String lastSource = null; - String lastLine = null; - for (String s : rawObs.keySet()) { - String[] parts = s.split(","); - if (lastSource == null) { - lastSource = parts[0]; - } else if (!parts[0].equals(lastSource)) { - String lastKnownObservation = lastLine.split(",")[1] + " " - + lastLine.split(",")[2]; - dumpStats.append("\n" + lastSource + " " + lastKnownObservation); - lastSource = parts[0]; - } - lastLine = s; - } - if (lastSource != null) { - String lastKnownObservation = lastLine.split(",")[1] + " " - + lastLine.split(",")[2]; - dumpStats.append("\n" + lastSource + " " + lastKnownObservation); - } - logger.info(dumpStats.toString()); - } -} - diff --git a/modules/webstats/build.xml b/modules/webstats/build.xml index bcfe251..3c3291f 100644 --- a/modules/webstats/build.xml +++ b/modules/webstats/build.xml @@ -1,8 +1,5 @@ <project default="run" name="webstats" basedir=".">
- <property name="sources" value="src/main/java"/> - <property name="testsources" value="src/test/java"/> - <include file="../../shared/build-base.xml" as="basetask"/> <target name="clean" depends="basetask.clean"/> <target name="compile" depends="basetask.compile"/> diff --git a/shared/build-base.xml b/shared/build-base.xml index 759e1d0..e6c09de 100644 --- a/shared/build-base.xml +++ b/shared/build-base.xml @@ -1,17 +1,18 @@ <project basedir=".">
- <property name="sources" value="src"/> + <property name="sources" value="src/main/java"/> <property name="testsources" value="src/test/java"/> <property name="libs" value="../../shared/lib"/> <property name="generated" value="generated"/> <property name="classes" value="${generated}/classes/"/> <property name="testclasses" value="${generated}/test-classes/"/> <property name="source-and-target-java-version" value="1.7" /> - <property name="descriptorversion" value="1.5.0" /> + <property name="descriptorversion" value="1.6.0" />
<path id="base.classpath"> <pathelement path="${classes}"/> <fileset dir="${libs}"> + <include name="commons-codec-1.9.jar"/> <include name="commons-compress-1.9.jar"/> <include name="commons-lang-2.6.jar"/> <include name="descriptor-${descriptorversion}.jar"/>