commit 6ca961262f87f90fd075e69f2eae4483b32a6e1c Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Dec 13 13:23:40 2012 +0100
Split up SanitizedBridgesReader in two parts. --- .../ernie/cron/BridgeStatsFileHandler.java | 98 +++++++++++++++- .../ernie/cron/ConsensusStatsFileHandler.java | 65 ++++++++++- src/org/torproject/ernie/cron/Main.java | 36 +++--- .../ernie/cron/SanitizedBridgesReader.java | 130 -------------------- 4 files changed, 182 insertions(+), 147 deletions(-)
diff --git a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java index 70a6a6f..4534d2f 100644 --- a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java +++ b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java @@ -18,6 +18,7 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.SortedMap; @@ -28,6 +29,13 @@ import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger;
+import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.ServerDescriptor; + /** * Determines estimates of bridge users per country and day from the * extra-info descriptors that bridges publish. In a first step, the @@ -105,12 +113,26 @@ public class BridgeStatsFileHandler {
private SimpleDateFormat dateTimeFormat;
+ private File bridgesDir; + + private File statsDirectory; + + private boolean keepImportHistory; + /** * Initializes this class, including reading in intermediate results * files <code>stats/bridge-stats-raw</code> and * <code>stats/hashed-relay-identities</code>. */ - public BridgeStatsFileHandler(String connectionURL) { + public BridgeStatsFileHandler(String connectionURL, + File bridgesDir, File statsDirectory, boolean keepImportHistory) { + + if (bridgesDir == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + this.bridgesDir = bridgesDir; + this.statsDirectory = statsDirectory; + this.keepImportHistory = keepImportHistory;
/* Initialize set of known countries. */ this.countries = new TreeSet<String>(); @@ -328,6 +350,80 @@ public class BridgeStatsFileHandler { } }
+ public void importSanitizedBridges() { + if (bridgesDir.exists()) { + logger.fine("Importing files in directory " + bridgesDir + "/..."); + DescriptorReader reader = + DescriptorSourceFactory.createDescriptorReader(); + reader.addDirectory(bridgesDir); + if (keepImportHistory) { + reader.setExcludeFiles(new File(statsDirectory, + "bridge-descriptor-history")); + } + Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getDescriptors() != null) { + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof ServerDescriptor) { + this.addServerDescriptor((ServerDescriptor) descriptor); + } else if (descriptor instanceof ExtraInfoDescriptor) { + this.addExtraInfoDescriptor( + (ExtraInfoDescriptor) descriptor); + } + } + } + } + logger.info("Finished importing bridge descriptors."); + } + } + + private void addServerDescriptor(ServerDescriptor descriptor) { + if (descriptor.getPlatform() != null && + descriptor.getPlatform().startsWith("Tor 0.2.2")) { + this.addZeroTwoTwoDescriptor(descriptor.getFingerprint(), + descriptor.getPublishedMillis()); + } + } + + private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) { + if (!this.isKnownRelay(descriptor.getFingerprint())) { + if (descriptor.getGeoipStartTimeMillis() >= 0 && + descriptor.getGeoipClientOrigins() != null) { + long seconds = (descriptor.getPublishedMillis() + - descriptor.getGeoipStartTimeMillis()) / 1000L; + double allUsers = 0.0D; + Map<String, String> obs = new HashMap<String, String>(); + for (Map.Entry<String, Integer> e : + descriptor.getGeoipClientOrigins().entrySet()) { + String country = e.getKey(); + double users = ((double) e.getValue() - 4) * 86400.0D + / ((double) seconds); + allUsers += users; + obs.put(country, String.format("%.2f", users)); + } + obs.put("zy", String.format("%.2f", allUsers)); + this.addObs(descriptor.getFingerprint(), + descriptor.getPublishedMillis(), obs); + } + if (descriptor.getBridgeStatsEndMillis() >= 0 && + descriptor.getBridgeIps() != null) { + double allUsers = 0.0D; + Map<String, String> obs = new HashMap<String, String>(); + for (Map.Entry<String, Integer> e : + descriptor.getBridgeIps().entrySet()) { + String country = e.getKey(); + double users = (double) e.getValue() - 4; + allUsers += users; + obs.put(country, String.format("%.2f", users)); + } + obs.put("zy", String.format("%.2f", allUsers)); + this.addObs(descriptor.getFingerprint(), + descriptor.getBridgeStatsEndMillis(), obs); + } + } + } + /** * Writes the list of hashed relay identities and bridge user numbers as * observed by single bridges to disk, aggregates per-day statistics for diff --git a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java index 1babb94..1fcbf19 100644 --- a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java +++ b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java @@ -25,6 +25,13 @@ import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger;
+import org.torproject.descriptor.BridgeNetworkStatus; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.NetworkStatusEntry; + /** * Generates statistics on the average number of relays and bridges per * day. Accepts parse results from <code>RelayDescriptorParser</code> and @@ -68,13 +75,28 @@ public class ConsensusStatsFileHandler {
private SimpleDateFormat dateTimeFormat;
+ private File bridgesDir; + + private File statsDirectory; + + private boolean keepImportHistory; + /** * Initializes this class, including reading in intermediate results * files <code>stats/consensus-stats-raw</code> and * <code>stats/bridge-consensus-stats-raw</code> and final results file * <code>stats/consensus-stats</code>. */ - public ConsensusStatsFileHandler(String connectionURL) { + public ConsensusStatsFileHandler(String connectionURL, + File bridgesDir, File statsDirectory, + boolean keepImportHistory) { + + if (bridgesDir == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + this.bridgesDir = bridgesDir; + this.statsDirectory = statsDirectory; + this.keepImportHistory = keepImportHistory;
/* Initialize local data structures to hold intermediate and final * results. */ @@ -153,6 +175,47 @@ public class ConsensusStatsFileHandler { } }
+ public void importSanitizedBridges() { + if (bridgesDir.exists()) { + logger.fine("Importing files in directory " + bridgesDir + "/..."); + DescriptorReader reader = + DescriptorSourceFactory.createDescriptorReader(); + reader.addDirectory(bridgesDir); + if (keepImportHistory) { + reader.setExcludeFiles(new File(statsDirectory, + "bridge-descriptor-history")); + } + Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getDescriptors() != null) { + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof BridgeNetworkStatus) { + this.addBridgeNetworkStatus( + (BridgeNetworkStatus) descriptor); + } + } + } + } + logger.info("Finished importing bridge descriptors."); + } + } + + private void addBridgeNetworkStatus(BridgeNetworkStatus status) { + int runningBridges = 0, runningEc2Bridges = 0; + for (NetworkStatusEntry statusEntry : + status.getStatusEntries().values()) { + if (statusEntry.getFlags().contains("Running")) { + runningBridges++; + if (statusEntry.getNickname().startsWith("ec2bridge")) { + runningEc2Bridges++; + } + } + } + this.addBridgeConsensusResults(status.getPublishedMillis(), + runningBridges, runningEc2Bridges); + } + /** * Aggregates the raw observations on relay and bridge numbers and * writes both raw and aggregate observations to disk. diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java index 6c98602..457433f 100644 --- a/src/org/torproject/ernie/cron/Main.java +++ b/src/org/torproject/ernie/cron/Main.java @@ -35,7 +35,10 @@ public class Main { // Prepare bridge stats file handler BridgeStatsFileHandler bsfh = config.getWriteBridgeStats() ? new BridgeStatsFileHandler( - config.getRelayDescriptorDatabaseJDBC()) : null; + config.getRelayDescriptorDatabaseJDBC(), + new File(config.getSanitizedBridgesDirectory()), + statsDirectory, config.getKeepSanitizedBridgesImportHistory()) : + null;
// Import relay descriptors if (config.getImportDirectoryArchives()) { @@ -54,26 +57,29 @@ public class Main { rddi.closeConnection(); }
+ // Import sanitized bridges and write updated stats files to disk + if (bsfh != null) { + if (config.getImportSanitizedBridges()) { + bsfh.importSanitizedBridges(); + } + bsfh.writeFiles(); + bsfh = null; + } + // Prepare consensus stats file handler (used for stats on running // bridges only) ConsensusStatsFileHandler csfh = config.getWriteBridgeStats() ? new ConsensusStatsFileHandler( - config.getRelayDescriptorDatabaseJDBC()) : null; - - // Import bridge descriptors - if (config.getWriteBridgeStats() && - config.getImportSanitizedBridges()) { - new SanitizedBridgesReader(csfh, bsfh, - new File(config.getSanitizedBridgesDirectory()), - statsDirectory, config.getKeepSanitizedBridgesImportHistory()); - } + config.getRelayDescriptorDatabaseJDBC(), + new File(config.getSanitizedBridgesDirectory()), + statsDirectory, config.getKeepSanitizedBridgesImportHistory()) : + null;
- // Write updated stats files to disk - if (bsfh != null) { - bsfh.writeFiles(); - bsfh = null; - } + // Import sanitized bridges and write updated stats files to disk if (csfh != null) { + if (config.getImportSanitizedBridges()) { + csfh.importSanitizedBridges(); + } csfh.writeFiles(); csfh = null; } diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java deleted file mode 100644 index da22e56..0000000 --- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright 2011, 2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.cron; - -import java.io.File; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.logging.Logger; - -import org.torproject.descriptor.BridgeNetworkStatus; -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.ServerDescriptor; - -public class SanitizedBridgesReader { - private ConsensusStatsFileHandler csfh; - private BridgeStatsFileHandler bsfh; - private Logger logger; - public SanitizedBridgesReader(ConsensusStatsFileHandler csfh, - BridgeStatsFileHandler bsfh, File bridgesDir, File statsDirectory, - boolean keepImportHistory) { - - if (csfh == null || bsfh == null || bridgesDir == null || - statsDirectory == null) { - throw new IllegalArgumentException(); - } - - this.csfh = csfh; - this.bsfh = bsfh; - this.logger = - Logger.getLogger(SanitizedBridgesReader.class.getName()); - - if (bridgesDir.exists()) { - logger.fine("Importing files in directory " + bridgesDir + "/..."); - DescriptorReader reader = - DescriptorSourceFactory.createDescriptorReader(); - reader.addDirectory(bridgesDir); - if (keepImportHistory) { - reader.setExcludeFiles(new File(statsDirectory, - "bridge-descriptor-history")); - } - Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - if (descriptorFile.getDescriptors() != null) { - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof BridgeNetworkStatus) { - this.addBridgeNetworkStatus( - (BridgeNetworkStatus) descriptor); - } else if (descriptor instanceof ServerDescriptor) { - this.addServerDescriptor((ServerDescriptor) descriptor); - } else if (descriptor instanceof ExtraInfoDescriptor) { - this.addExtraInfoDescriptor( - (ExtraInfoDescriptor) descriptor); - } - } - } - } - logger.info("Finished importing bridge descriptors."); - } - } - - private void addBridgeNetworkStatus(BridgeNetworkStatus status) { - int runningBridges = 0, runningEc2Bridges = 0; - for (NetworkStatusEntry statusEntry : - status.getStatusEntries().values()) { - if (statusEntry.getFlags().contains("Running")) { - runningBridges++; - if (statusEntry.getNickname().startsWith("ec2bridge")) { - runningEc2Bridges++; - } - } - } - this.csfh.addBridgeConsensusResults(status.getPublishedMillis(), - runningBridges, runningEc2Bridges); - } - - private void addServerDescriptor(ServerDescriptor descriptor) { - if (descriptor.getPlatform() != null && - descriptor.getPlatform().startsWith("Tor 0.2.2")) { - this.bsfh.addZeroTwoTwoDescriptor(descriptor.getFingerprint(), - descriptor.getPublishedMillis()); - } - } - - private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) { - if (!this.bsfh.isKnownRelay(descriptor.getFingerprint())) { - if (descriptor.getGeoipStartTimeMillis() >= 0 && - descriptor.getGeoipClientOrigins() != null) { - long seconds = (descriptor.getPublishedMillis() - - descriptor.getGeoipStartTimeMillis()) / 1000L; - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - for (Map.Entry<String, Integer> e : - descriptor.getGeoipClientOrigins().entrySet()) { - String country = e.getKey(); - double users = ((double) e.getValue() - 4) * 86400.0D - / ((double) seconds); - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - this.bsfh.addObs(descriptor.getFingerprint(), - descriptor.getPublishedMillis(), obs); - } - if (descriptor.getBridgeStatsEndMillis() >= 0 && - descriptor.getBridgeIps() != null) { - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - for (Map.Entry<String, Integer> e : - descriptor.getBridgeIps().entrySet()) { - String country = e.getKey(); - double users = (double) e.getValue() - 4; - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - this.bsfh.addObs(descriptor.getFingerprint(), - descriptor.getBridgeStatsEndMillis(), obs); - } - - } - } -} -