commit 1c0661e99eeed206f6c0c0beee72ae82afdda131 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed May 18 09:09:11 2016 +0200
Move all Java sources to src/main/java/. --- build.xml | 4 +- src/main/java/org/torproject/collector/Main.java | 83 ++ .../bridgedescs/BridgeDescriptorParser.java | 51 + .../bridgedescs/BridgeSnapshotReader.java | 228 ++++ .../bridgedescs/SanitizedBridgesWriter.java | 1330 ++++++++++++++++++++ .../collector/exitlists/ExitListDownloader.java | 212 ++++ .../collector/index/CreateIndexJson.java | 168 +++ .../torproject/collector/main/Configuration.java | 318 +++++ .../org/torproject/collector/main/LockFile.java | 56 + .../collector/relaydescs/ArchiveReader.java | 286 +++++ .../collector/relaydescs/ArchiveWriter.java | 845 +++++++++++++ .../relaydescs/CachedRelayDescriptorReader.java | 255 ++++ .../collector/relaydescs/ReferenceChecker.java | 319 +++++ .../relaydescs/RelayDescriptorDownloader.java | 1134 +++++++++++++++++ .../relaydescs/RelayDescriptorParser.java | 337 +++++ .../collector/torperf/TorperfDownloader.java | 643 ++++++++++ src/org/torproject/collector/Main.java | 83 -- .../bridgedescs/BridgeDescriptorParser.java | 51 - .../bridgedescs/BridgeSnapshotReader.java | 228 ---- .../bridgedescs/SanitizedBridgesWriter.java | 1330 -------------------- .../collector/exitlists/ExitListDownloader.java | 212 ---- .../collector/index/CreateIndexJson.java | 168 --- .../torproject/collector/main/Configuration.java | 318 ----- src/org/torproject/collector/main/LockFile.java | 56 - .../collector/relaydescs/ArchiveReader.java | 286 ----- .../collector/relaydescs/ArchiveWriter.java | 845 ------------- .../relaydescs/CachedRelayDescriptorReader.java | 255 ---- .../collector/relaydescs/ReferenceChecker.java | 319 ----- .../relaydescs/RelayDescriptorDownloader.java | 1134 ----------------- .../relaydescs/RelayDescriptorParser.java | 337 ----- .../collector/torperf/TorperfDownloader.java | 643 ---------- 31 files changed, 6267 insertions(+), 6267 deletions(-)
diff --git a/build.xml b/build.xml index f07c67b..bbd83af 100644 --- a/build.xml +++ b/build.xml @@ -3,11 +3,11 @@ <property name="name" value="CollecTor"/> <property name="descriptorversion" value="1.1.0-dev" /> <property name="source-and-target-java-version" value="1.7" /> - <property name="sources" value="src/"/> + <property name="sources" value="src/main/java"/> <property name="resources" value="resources/"/> <property name="generated" value="generated/"/> <property name="testresult" value="${generated}/test-results"/> - <property name="testsources" value="test"/> + <property name="testsources" value="src/test/java"/> <property name="coverageresult" value="${generated}/coverage-report/"/> <property name="instrument" value="${generated}/instrument/"/> <property name="classes" value="${generated}/classes/"/> diff --git a/src/main/java/org/torproject/collector/Main.java b/src/main/java/org/torproject/collector/Main.java new file mode 100644 index 0000000..9c64696 --- /dev/null +++ b/src/main/java/org/torproject/collector/Main.java @@ -0,0 +1,83 @@ +/* Copyright 2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector; + +import org.torproject.collector.bridgedescs.SanitizedBridgesWriter; +import org.torproject.collector.exitlists.ExitListDownloader; +import org.torproject.collector.index.CreateIndexJson; +import org.torproject.collector.relaydescs.ArchiveWriter; +import org.torproject.collector.torperf.TorperfDownloader; + +import java.lang.reflect.InvocationTargetException; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Logger; + +/** + * Main class for starting a CollecTor instance. + * <br> + * Run without arguments in order to read the usage information, i.e. + * <br> + * <code>java -jar collector.jar</code> + */ +public class Main { + + private static Logger log = Logger.getLogger(Main.class.getName()); + + /** All possible main classes. + * If a new CollecTorMain class is available, just add it to this map. + */ + private static final Map<String, Class> collecTorMains = new HashMap<>(); + + static { // add a new main class here + collecTorMains.put("bridgedescs", SanitizedBridgesWriter.class); + collecTorMains.put("exitlists", ExitListDownloader.class); + collecTorMains.put("updateindex", CreateIndexJson.class); + collecTorMains.put("relaydescs", ArchiveWriter.class); + collecTorMains.put("torperf", TorperfDownloader.class); + } + + private static final String modules = collecTorMains.keySet().toString() + .replace("[", "").replace("]", "").replaceAll(", ", "|"); + + /** + * One argument is necessary. + * See class description {@link Main}. + */ + public static void main(String[] args) { + if (null == args || args.length != 1) { + printUsageAndExit("CollecTor needs exactly one argument."); + } else { + invokeGivenMainAndExit(args[0]); + } + } + + private static void printUsageAndExit(String msg) { + final String usage = "Usage:\njava -jar collector.jar " + + "<" + modules + ">"; + System.out.println(msg + "\n" + usage); + System.exit(0); + } + + private static void invokeGivenMainAndExit(String mainId) { + Class clazz = collecTorMains.get(mainId); + if (null == clazz) { + printUsageAndExit("Unknown argument: " + mainId); + } + invokeMainOnClassAndExit(clazz); + } + + private static void invokeMainOnClassAndExit(Class clazz) { + try { + clazz.getMethod("main", new Class[] { String[].class }) + .invoke(null, (Object) new String[]{}); + System.exit(0); + } catch (NoSuchMethodException | IllegalAccessException + | InvocationTargetException e) { + log.severe("Cannot invoke 'main' method on " + + clazz.getName() + ". " + e); + } + } +} + diff --git a/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java b/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java new file mode 100644 index 0000000..f683ea0 --- /dev/null +++ b/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java @@ -0,0 +1,51 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.bridgedescs; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class BridgeDescriptorParser { + + private SanitizedBridgesWriter sbw; + + private Logger logger; + + public BridgeDescriptorParser(SanitizedBridgesWriter sbw) { + this.sbw = sbw; + this.logger = + Logger.getLogger(BridgeDescriptorParser.class.getName()); + } + + public void parse(byte[] allData, String dateTime) { + try { + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + String line = br.readLine(); + if (line == null) { + return; + } else if (line.startsWith("router ")) { + if (this.sbw != null) { + this.sbw.sanitizeAndStoreServerDescriptor(allData); + } + } else if (line.startsWith("extra-info ")) { + if (this.sbw != null) { + this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData); + } + } else { + if (this.sbw != null) { + this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime); + } + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", + e); + return; + } + } +} + diff --git a/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java b/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java new file mode 100644 index 0000000..2d41d18 --- /dev/null +++ b/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java @@ -0,0 +1,228 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.bridgedescs; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Reads the half-hourly snapshots of bridge descriptors from Tonga. + */ +public class BridgeSnapshotReader { + public BridgeSnapshotReader(BridgeDescriptorParser bdp, + File bridgeDirectoriesDir, File statsDirectory) { + + if (bdp == null || bridgeDirectoriesDir == null + || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + Logger logger = + Logger.getLogger(BridgeSnapshotReader.class.getName()); + SortedSet<String> parsed = new TreeSet<String>(); + File bdDir = bridgeDirectoriesDir; + File pbdFile = new File(statsDirectory, "parsed-bridge-directories"); + boolean modified = false; + if (bdDir.exists()) { + if (pbdFile.exists()) { + logger.fine("Reading file " + pbdFile.getAbsolutePath() + "..."); + try { + BufferedReader br = new BufferedReader(new FileReader(pbdFile)); + String line = null; + while ((line = br.readLine()) != null) { + parsed.add(line); + } + br.close(); + logger.fine("Finished reading file " + + pbdFile.getAbsolutePath() + "."); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed reading file " + + pbdFile.getAbsolutePath() + "!", e); + return; + } + } + logger.fine("Importing files in directory " + bridgeDirectoriesDir + + "/..."); + Set<String> descriptorImportHistory = new HashSet<String>(); + int parsedFiles = 0; + int skippedFiles = 0; + int parsedStatuses = 0; + int parsedServerDescriptors = 0; + int skippedServerDescriptors = 0; + int parsedExtraInfoDescriptors = 0; + int skippedExtraInfoDescriptors = 0; + Stack<File> filesInInputDir = new Stack<File>(); + filesInInputDir.add(bdDir); + while (!filesInInputDir.isEmpty()) { + File pop = filesInInputDir.pop(); + if (pop.isDirectory()) { + for (File f : pop.listFiles()) { + filesInInputDir.add(f); + } + } else if (!parsed.contains(pop.getName())) { + try { + FileInputStream in = new FileInputStream(pop); + if (in.available() > 0) { + TarArchiveInputStream tais = null; + if (pop.getName().endsWith(".tar.gz")) { + GzipCompressorInputStream gcis = + new GzipCompressorInputStream(in); + tais = new TarArchiveInputStream(gcis); + } else if (pop.getName().endsWith(".tar")) { + tais = new TarArchiveInputStream(in); + } else { + continue; + } + BufferedInputStream bis = new BufferedInputStream(tais); + String fn = pop.getName(); + String dateTime = fn.substring(11, 21) + " " + + fn.substring(22, 24) + ":" + fn.substring(24, 26) + + ":" + fn.substring(26, 28); + while ((tais.getNextTarEntry()) != null) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + byte[] allData = baos.toByteArray(); + if (allData.length == 0) { + continue; + } + String fileDigest = Hex.encodeHexString(DigestUtils.sha( + allData)); + String ascii = new String(allData, "US-ASCII"); + BufferedReader br3 = new BufferedReader(new StringReader( + ascii)); + String firstLine = null; + while ((firstLine = br3.readLine()) != null) { + if (firstLine.startsWith("@")) { + continue; + } else { + break; + } + } + if (firstLine.startsWith("published ") + || firstLine.startsWith("flag-thresholds ") + || firstLine.startsWith("r ")) { + bdp.parse(allData, dateTime); + parsedStatuses++; + } else if (descriptorImportHistory.contains(fileDigest)) { + /* Skip server descriptors or extra-info descriptors if + * we parsed them before. */ + skippedFiles++; + continue; + } else { + int start = -1; + int sig = -1; + int end = -1; + String startToken = firstLine.startsWith("router ") + ? "router " : "extra-info "; + String sigToken = "\nrouter-signature\n"; + String endToken = "\n-----END SIGNATURE-----\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + sig = ascii.indexOf(sigToken, start); + if (sig < 0) { + break; + } + sig += sigToken.length(); + end = ascii.indexOf(endToken, sig); + if (end < 0) { + break; + } + end += endToken.length(); + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, + end - start); + String descriptorDigest = Hex.encodeHexString( + DigestUtils.sha(descBytes)); + if (!descriptorImportHistory.contains( + descriptorDigest)) { + bdp.parse(descBytes, dateTime); + descriptorImportHistory.add(descriptorDigest); + if (firstLine.startsWith("router ")) { + parsedServerDescriptors++; + } else { + parsedExtraInfoDescriptors++; + } + } else { + if (firstLine.startsWith("router ")) { + skippedServerDescriptors++; + } else { + skippedExtraInfoDescriptors++; + } + } + } + } + descriptorImportHistory.add(fileDigest); + parsedFiles++; + } + bis.close(); + } + in.close(); + + /* Let's give some memory back, or we'll run out of it. */ + System.gc(); + + parsed.add(pop.getName()); + modified = true; + } catch (IOException e) { + logger.log(Level.WARNING, "Could not parse bridge snapshot " + + pop.getName() + "!", e); + continue; + } + } + } + logger.fine("Finished importing files in directory " + + bridgeDirectoriesDir + "/. In total, we parsed " + + parsedFiles + " files (skipped " + skippedFiles + + ") containing " + parsedStatuses + " statuses, " + + parsedServerDescriptors + " server descriptors (skipped " + + skippedServerDescriptors + "), and " + + parsedExtraInfoDescriptors + " extra-info descriptors " + + "(skipped " + skippedExtraInfoDescriptors + ")."); + if (!parsed.isEmpty() && modified) { + logger.fine("Writing file " + pbdFile.getAbsolutePath() + "..."); + try { + pbdFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile)); + for (String f : parsed) { + bw.append(f + "\n"); + } + bw.close(); + logger.fine("Finished writing file " + pbdFile.getAbsolutePath() + + "."); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed writing file " + + pbdFile.getAbsolutePath() + "!", e); + } + } + } + } +} + diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java new file mode 100644 index 0000000..3214715 --- /dev/null +++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java @@ -0,0 +1,1330 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.bridgedescs; + +import org.torproject.collector.main.Configuration; +import org.torproject.collector.main.LockFile; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.io.UnsupportedEncodingException; +import java.security.GeneralSecurityException; +import java.security.SecureRandom; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Sanitizes bridge descriptors, i.e., removes all possibly sensitive + * information from them, and writes them to a local directory structure. + * During the sanitizing process, all information about the bridge + * identity or IP address are removed or replaced. The goal is to keep the + * sanitized bridge descriptors useful for statistical analysis while not + * making it easier for an adversary to enumerate bridges. + * + * There are three types of bridge descriptors: bridge network statuses + * (lists of all bridges at a given time), server descriptors (published + * by the bridge to advertise their capabilities), and extra-info + * descriptors (published by the bridge, mainly for statistical analysis). + */ +public class SanitizedBridgesWriter extends Thread { + + public static void main(String[] args) { + + Logger logger = Logger.getLogger( + SanitizedBridgesWriter.class.getName()); + logger.info("Starting bridge-descriptors module of CollecTor."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile("bridge-descriptors"); + if (!lf.acquireLock()) { + logger.severe("Warning: CollecTor is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Sanitize bridge descriptors + new SanitizedBridgesWriter(config).run(); + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating bridge-descriptors module of CollecTor."); + } + + private Configuration config; + + /** + * Initializes this class. + */ + public SanitizedBridgesWriter(Configuration config) { + this.config = config; + } + + /** + * Logger for this class. + */ + private Logger logger; + + private String rsyncCatString; + + private File bridgeDirectoriesDirectory; + + /** + * Output directory for writing sanitized bridge descriptors. + */ + private File sanitizedBridgesDirectory; + + private boolean replaceIPAddressesWithHashes; + + private boolean persistenceProblemWithSecrets; + + private SortedMap<String, byte[]> secretsForHashingIPAddresses; + + private String bridgeSanitizingCutOffTimestamp; + + private boolean haveWarnedAboutInterval; + + private File bridgeIpSecretsFile; + + private SecureRandom secureRandom; + + public void run() { + + File bridgeDirectoriesDirectory = + new File(config.getBridgeSnapshotsDirectory()); + File sanitizedBridgesDirectory = + new File(config.getSanitizedBridgesWriteDirectory()); + boolean replaceIPAddressesWithHashes = + config.getReplaceIPAddressesWithHashes(); + long limitBridgeSanitizingInterval = + config.getLimitBridgeDescriptorMappings(); + File statsDirectory = new File("stats"); + + if (bridgeDirectoriesDirectory == null + || sanitizedBridgesDirectory == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + /* Memorize argument values. */ + this.bridgeDirectoriesDirectory = bridgeDirectoriesDirectory; + this.sanitizedBridgesDirectory = sanitizedBridgesDirectory; + this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes; + + /* Initialize logger. */ + this.logger = Logger.getLogger( + SanitizedBridgesWriter.class.getName()); + + SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + this.rsyncCatString = rsyncCatFormat.format( + System.currentTimeMillis()); + + /* Initialize secure random number generator if we need it. */ + if (this.replaceIPAddressesWithHashes) { + try { + this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN"); + } catch (GeneralSecurityException e) { + this.logger.log(Level.WARNING, "Could not initialize secure " + + "random number generator! Not calculating any IP address " + + "hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } + } + + /* Read hex-encoded secrets for replacing IP addresses with hashes + * from disk. */ + this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>(); + this.bridgeIpSecretsFile = new File(statsDirectory, + "bridge-ip-secrets"); + if (this.bridgeIpSecretsFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + this.bridgeIpSecretsFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + if ((line.length() != ("yyyy-MM,".length() + 31 * 2) + && line.length() != ("yyyy-MM,".length() + 50 * 2)) + || parts.length != 2) { + this.logger.warning("Invalid line in bridge-ip-secrets file " + + "starting with '" + line.substring(0, 7) + "'! " + + "Not calculating any IP address hashes in this " + + "execution!"); + this.persistenceProblemWithSecrets = true; + break; + } + String month = parts[0]; + byte[] secret = Hex.decodeHex(parts[1].toCharArray()); + this.secretsForHashingIPAddresses.put(month, secret); + } + br.close(); + if (!this.persistenceProblemWithSecrets) { + this.logger.fine("Read " + + this.secretsForHashingIPAddresses.size() + " secrets for " + + "hashing bridge IP addresses."); + } + } catch (DecoderException e) { + this.logger.log(Level.WARNING, "Failed to decode hex string in " + + this.bridgeIpSecretsFile + "! Not calculating any IP " + + "address hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read " + + this.bridgeIpSecretsFile + "! Not calculating any IP " + + "address hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } + } + + /* If we're configured to keep secrets only for a limited time, define + * the cut-off day and time. */ + if (limitBridgeSanitizingInterval >= 0L) { + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + this.bridgeSanitizingCutOffTimestamp = formatter.format( + System.currentTimeMillis() - 24L * 60L * 60L * 1000L + * limitBridgeSanitizingInterval); + } else { + this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59"; + } + + // Prepare bridge descriptor parser + BridgeDescriptorParser bdp = new BridgeDescriptorParser(this); + + // Import bridge descriptors + new BridgeSnapshotReader(bdp, this.bridgeDirectoriesDirectory, + statsDirectory); + + // Finish writing sanitized bridge descriptors to disk + this.finishWriting(); + + this.checkStaleDescriptors(); + + this.cleanUpRsyncDirectory(); + } + + private String scrubOrAddress(String orAddress, byte[] fingerprintBytes, + String published) throws IOException { + if (!orAddress.contains(":")) { + /* Malformed or-address or a line. */ + return null; + } + String addressPart = orAddress.substring(0, + orAddress.lastIndexOf(":")); + String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1); + String scrubbedAddressPart = null; + if (addressPart.startsWith("[")) { + scrubbedAddressPart = this.scrubIpv6Address(addressPart, + fingerprintBytes, published); + } else { + scrubbedAddressPart = this.scrubIpv4Address(addressPart, + fingerprintBytes, published); + } + return (scrubbedAddressPart == null ? null : + scrubbedAddressPart + ":" + portPart); + } + + private String scrubIpv4Address(String address, byte[] fingerprintBytes, + String published) throws IOException { + if (this.replaceIPAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return null; + } + byte[] hashInput = new byte[4 + 20 + 31]; + String[] ipParts = address.split("\."); + for (int i = 0; i < 4; i++) { + hashInput[i] = (byte) Integer.parseInt(ipParts[i]); + } + System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20); + String month = published.substring(0, "yyyy-MM".length()); + byte[] secret = this.getSecretForMonth(month); + System.arraycopy(secret, 0, hashInput, 24, 31); + byte[] hashOutput = DigestUtils.sha256(hashInput); + String hashedAddress = "10." + + (((int) hashOutput[0] + 256) % 256) + "." + + (((int) hashOutput[1] + 256) % 256) + "." + + (((int) hashOutput[2] + 256) % 256); + return hashedAddress; + } else { + return "127.0.0.1"; + } + } + + private String scrubIpv6Address(String address, byte[] fingerprintBytes, + String published) throws IOException { + StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::"); + if (this.replaceIPAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return null; + } + byte[] hashInput = new byte[16 + 20 + 19]; + String[] doubleColonSeparatedParts = address.substring(1, + address.length() - 1).split("::", -1); + if (doubleColonSeparatedParts.length > 2) { + /* Invalid IPv6 address. */ + return null; + } + List<String> hexParts = new ArrayList<String>(); + for (String doubleColonSeparatedPart : doubleColonSeparatedParts) { + StringBuilder hexPart = new StringBuilder(); + String[] parts = doubleColonSeparatedPart.split(":", -1); + if (parts.length < 1 || parts.length > 8) { + /* Invalid IPv6 address. */ + return null; + } + for (int i = 0; i < parts.length; i++) { + String part = parts[i]; + if (part.contains(".")) { + String[] ipParts = part.split("\."); + byte[] ipv4Bytes = new byte[4]; + if (ipParts.length != 4) { + /* Invalid IPv4 part in IPv6 address. */ + return null; + } + for (int m = 0; m < 4; m++) { + ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]); + } + hexPart.append(Hex.encodeHexString(ipv4Bytes)); + } else if (part.length() > 4) { + /* Invalid IPv6 address. */ + return null; + } else { + for (int k = part.length(); k < 4; k++) { + hexPart.append("0"); + } + hexPart.append(part); + } + } + hexParts.add(hexPart.toString()); + } + StringBuilder hex = new StringBuilder(); + hex.append(hexParts.get(0)); + if (hexParts.size() == 2) { + for (int i = 32 - hexParts.get(0).length() + - hexParts.get(1).length(); i > 0; i--) { + hex.append("0"); + } + hex.append(hexParts.get(1)); + } + byte[] ipBytes = null; + try { + ipBytes = Hex.decodeHex(hex.toString().toCharArray()); + } catch (DecoderException e) { + /* TODO Invalid IPv6 address. */ + return null; + } + if (ipBytes.length != 16) { + /* TODO Invalid IPv6 address. */ + return null; + } + System.arraycopy(ipBytes, 0, hashInput, 0, 16); + System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20); + String month = published.substring(0, "yyyy-MM".length()); + byte[] secret = this.getSecretForMonth(month); + System.arraycopy(secret, 31, hashInput, 36, 19); + String hashOutput = DigestUtils.sha256Hex(hashInput); + sb.append(hashOutput.substring(hashOutput.length() - 6, + hashOutput.length() - 4)); + sb.append(":"); + sb.append(hashOutput.substring(hashOutput.length() - 4)); + } + sb.append("]"); + return sb.toString(); + } + + private byte[] getSecretForMonth(String month) throws IOException { + if (!this.secretsForHashingIPAddresses.containsKey(month) + || this.secretsForHashingIPAddresses.get(month).length == 31) { + byte[] secret = new byte[50]; + this.secureRandom.nextBytes(secret); + if (this.secretsForHashingIPAddresses.containsKey(month)) { + System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0, + secret, 0, 31); + } + if (month.compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + this.logger.warning("Generated a secret that we won't make " + + "persistent, because it's outside our bridge descriptor " + + "sanitizing interval."); + } else { + /* Append secret to file on disk immediately before using it, or + * we might end with inconsistently sanitized bridges. */ + try { + if (!this.bridgeIpSecretsFile.exists()) { + this.bridgeIpSecretsFile.getParentFile().mkdirs(); + } + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.bridgeIpSecretsFile, + this.bridgeIpSecretsFile.exists())); + bw.write(month + "," + Hex.encodeHexString(secret) + "\n"); + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store new secret " + + "to disk! Not calculating any IP address hashes in " + + "this execution!", e); + this.persistenceProblemWithSecrets = true; + throw new IOException(e); + } + } + this.secretsForHashingIPAddresses.put(month, secret); + } + return this.secretsForHashingIPAddresses.get(month); + } + + private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00"; + + /** + * Sanitizes a network status and writes it to disk. + */ + public void sanitizeAndStoreNetworkStatus(byte[] data, + String publicationTime) { + + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return; + } + + if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) { + maxNetworkStatusPublishedTime = publicationTime; + } + + if (this.bridgeSanitizingCutOffTimestamp + .compareTo(publicationTime) > 0) { + this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING + : Level.FINE, "Sanitizing and storing network status with " + + "publication time outside our descriptor sanitizing " + + "interval."); + this.haveWarnedAboutInterval = true; + } + + /* Parse the given network status line by line. */ + StringBuilder header = new StringBuilder(); + SortedMap<String, String> scrubbedLines = + new TreeMap<String, String>(); + try { + StringBuilder scrubbed = new StringBuilder(); + BufferedReader br = new BufferedReader(new StringReader(new String( + data, "US-ASCII"))); + String line = null; + String mostRecentDescPublished = null; + byte[] fingerprintBytes = null; + String descPublicationTime = null; + String hashedBridgeIdentityHex = null; + while ((line = br.readLine()) != null) { + + /* Use publication time from "published" line instead of the + * file's last-modified time. Don't copy over the line, because + * we're going to write a "published" line below. */ + if (line.startsWith("published ")) { + publicationTime = line.substring("published ".length()); + + /* Additional header lines don't have to be cleaned up. */ + } else if (line.startsWith("flag-thresholds ")) { + header.append(line + "\n"); + + /* r lines contain sensitive information that needs to be removed + * or replaced. */ + } else if (line.startsWith("r ")) { + + /* Clear buffer from previously scrubbed lines. */ + if (scrubbed.length() > 0) { + String scrubbedLine = scrubbed.toString(); + scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); + scrubbed = new StringBuilder(); + } + + /* Parse the relevant parts of this r line. */ + String[] parts = line.split(" "); + String nickname = parts[1]; + fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); + String descriptorIdentifier = parts[3]; + descPublicationTime = parts[4] + " " + parts[5]; + String address = parts[6]; + String orPort = parts[7]; + String dirPort = parts[8]; + + /* Determine most recent descriptor publication time. */ + if (descPublicationTime.compareTo(publicationTime) <= 0 + && (mostRecentDescPublished == null + || descPublicationTime.compareTo( + mostRecentDescPublished) > 0)) { + mostRecentDescPublished = descPublicationTime; + } + + /* Write scrubbed r line to buffer. */ + byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes); + String hashedBridgeIdentityBase64 = Base64.encodeBase64String( + hashedBridgeIdentity).substring(0, 27); + hashedBridgeIdentityHex = Hex.encodeHexString( + hashedBridgeIdentity); + String hashedDescriptorIdentifier = Base64.encodeBase64String( + DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier + + "=="))).substring(0, 27); + String scrubbedAddress = scrubIpv4Address(address, + fingerprintBytes, + descPublicationTime); + scrubbed.append("r " + nickname + " " + + hashedBridgeIdentityBase64 + " " + + hashedDescriptorIdentifier + " " + descPublicationTime + + " " + scrubbedAddress + " " + orPort + " " + dirPort + + "\n"); + + /* Sanitize any addresses in a lines using the fingerprint and + * descriptor publication time from the previous r line. */ + } else if (line.startsWith("a ")) { + String scrubbedOrAddress = scrubOrAddress( + line.substring("a ".length()), fingerprintBytes, + descPublicationTime); + if (scrubbedOrAddress != null) { + scrubbed.append("a " + scrubbedOrAddress + "\n"); + } else { + this.logger.warning("Invalid address in line '" + line + + "' in bridge network status. Skipping line!"); + } + + /* Nothing special about s, w, and p lines; just copy them. */ + } else if (line.startsWith("s ") || line.equals("s") + || line.startsWith("w ") || line.equals("w") + || line.startsWith("p ") || line.equals("p")) { + scrubbed.append(line + "\n"); + + /* There should be nothing else but r, w, p, and s lines in the + * network status. If there is, we should probably learn before + * writing anything to the sanitized descriptors. */ + } else { + this.logger.fine("Unknown line '" + line + "' in bridge " + + "network status. Not writing to disk!"); + return; + } + } + br.close(); + if (scrubbed.length() > 0) { + String scrubbedLine = scrubbed.toString(); + scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); + scrubbed = new StringBuilder(); + } + + /* Check if we can tell from the descriptor publication times + * whether this status is possibly stale. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + if (formatter.parse(publicationTime).getTime() + - formatter.parse(mostRecentDescPublished).getTime() + > 60L * 60L * 1000L) { + this.logger.warning("The most recent descriptor in the bridge " + + "network status published at " + publicationTime + " was " + + "published at " + mostRecentDescPublished + " which is " + + "more than 1 hour before the status. This is a sign for " + + "the status being stale. Please check!"); + } + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse timestamp in " + + "bridge network status.", e); + return; + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse bridge network " + + "status.", e); + return; + } + + /* Write the sanitized network status to disk. */ + try { + String syear = publicationTime.substring(0, 4); + String smonth = publicationTime.substring(5, 7); + String sday = publicationTime.substring(8, 10); + String stime = publicationTime.substring(11, 13) + + publicationTime.substring(14, 16) + + publicationTime.substring(17, 19); + File tarballFile = new File( + this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear + + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth + + sday + "-" + stime + "-" + + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D"); + File rsyncFile = new File("recent/bridge-descriptors/statuses/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + for (File outputFile : outputFiles) { + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile)); + bw.write("@type bridge-network-status 1.0\n"); + bw.write("published " + publicationTime + "\n"); + bw.write(header.toString()); + for (String scrubbed : scrubbedLines.values()) { + bw.write(scrubbed); + } + bw.close(); + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write sanitized bridge " + + "network status to disk.", e); + return; + } + } + + private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00"; + + /** + * Sanitizes a bridge server descriptor and writes it to disk. + */ + public void sanitizeAndStoreServerDescriptor(byte[] data) { + + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return; + } + + /* Parse descriptor to generate a sanitized version. */ + String scrubbedDesc = null; + String published = null; + String masterKeyEd25519FromIdentityEd25519 = null; + try { + BufferedReader br = new BufferedReader(new StringReader( + new String(data, "US-ASCII"))); + StringBuilder scrubbed = new StringBuilder(); + String line = null; + String hashedBridgeIdentity = null; + String address = null; + String routerLine = null; + String scrubbedAddress = null; + String masterKeyEd25519 = null; + List<String> orAddresses = null; + List<String> scrubbedOrAddresses = null; + boolean skipCrypto = false; + while ((line = br.readLine()) != null) { + + /* Skip all crypto parts that might be used to derive the bridge's + * identity fingerprint. */ + if (skipCrypto && !line.startsWith("-----END ")) { + continue; + + /* Store the router line for later processing, because we may need + * the bridge identity fingerprint for replacing the IP address in + * the scrubbed version. */ + } else if (line.startsWith("router ")) { + address = line.split(" ")[2]; + routerLine = line; + + /* Store or-address parts in a list and sanitize them when we have + * read the fingerprint. */ + } else if (line.startsWith("or-address ")) { + if (orAddresses == null) { + orAddresses = new ArrayList<String>(); + } + orAddresses.add(line.substring("or-address ".length())); + + /* Parse the publication time to see if we're still inside the + * sanitizing interval. */ + } else if (line.startsWith("published ")) { + published = line.substring("published ".length()); + if (published.compareTo(maxServerDescriptorPublishedTime) > 0) { + maxServerDescriptorPublishedTime = published; + } + if (this.bridgeSanitizingCutOffTimestamp + .compareTo(published) > 0) { + this.logger.log(!this.haveWarnedAboutInterval + ? Level.WARNING : Level.FINE, "Sanitizing and storing " + + "server descriptor with publication time outside our " + + "descriptor sanitizing interval."); + this.haveWarnedAboutInterval = true; + } + scrubbed.append(line + "\n"); + + /* Parse the fingerprint to determine the hashed bridge + * identity. */ + } else if (line.startsWith("opt fingerprint ") + || line.startsWith("fingerprint ")) { + String fingerprint = line.substring(line.startsWith("opt ") + ? "opt fingerprint".length() : "fingerprint".length()) + .replaceAll(" ", "").toLowerCase(); + byte[] fingerprintBytes = Hex.decodeHex( + fingerprint.toCharArray()); + hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes) + .toLowerCase(); + try { + scrubbedAddress = scrubIpv4Address(address, fingerprintBytes, + published); + if (orAddresses != null) { + scrubbedOrAddresses = new ArrayList<String>(); + for (String orAddress : orAddresses) { + String scrubbedOrAddress = scrubOrAddress(orAddress, + fingerprintBytes, published); + if (scrubbedOrAddress != null) { + scrubbedOrAddresses.add(scrubbedOrAddress); + } else { + this.logger.warning("Invalid address in line " + + "'or-address " + orAddress + "' in bridge server " + + "descriptor. Skipping line!"); + } + } + } + } catch (IOException e) { + /* There's a persistence problem, so we shouldn't scrub more + * IP addresses in this execution. */ + this.persistenceProblemWithSecrets = true; + return; + } + scrubbed.append((line.startsWith("opt ") ? "opt " : "") + + "fingerprint"); + for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) { + scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i, + 4 * (i + 1)).toUpperCase()); + } + scrubbed.append("\n"); + + /* Replace the contact line (if present) with a generic one. */ + } else if (line.startsWith("contact ")) { + scrubbed.append("contact somebody\n"); + + /* When we reach the signature, we're done. Write the sanitized + * descriptor to disk below. */ + } else if (line.startsWith("router-signature")) { + String[] routerLineParts = routerLine.split(" "); + scrubbedDesc = "router " + routerLineParts[1] + " " + + scrubbedAddress + " " + routerLineParts[3] + " " + + routerLineParts[4] + " " + routerLineParts[5] + "\n"; + if (scrubbedOrAddresses != null) { + for (String scrubbedOrAddress : scrubbedOrAddresses) { + scrubbedDesc = scrubbedDesc += "or-address " + + scrubbedOrAddress + "\n"; + } + } + scrubbedDesc += scrubbed.toString(); + break; + + /* Replace extra-info digest with the hashed digest of the + * non-scrubbed descriptor. */ + } else if (line.startsWith("opt extra-info-digest ") + || line.startsWith("extra-info-digest ")) { + String[] parts = line.split(" "); + if (line.startsWith("opt ")) { + scrubbed.append("opt "); + parts = line.substring(4).split(" "); + } + scrubbed.append("extra-info-digest " + DigestUtils.shaHex( + Hex.decodeHex(parts[1].toCharArray())).toUpperCase()); + if (parts.length > 2) { + scrubbed.append(" " + Base64.encodeBase64String( + DigestUtils.sha256(Base64.decodeBase64(parts[2]))) + .replaceAll("=", "")); + } + scrubbed.append("\n"); + + /* Possibly sanitize reject lines if they contain the bridge's own + * IP address. */ + } else if (line.startsWith("reject ")) { + if (address != null && line.startsWith("reject " + address)) { + scrubbed.append("reject " + scrubbedAddress + + line.substring("reject ".length() + address.length()) + + "\n"); + } else { + scrubbed.append(line + "\n"); + } + + /* Extract master-key-ed25519 from identity-ed25519. */ + } else if (line.equals("identity-ed25519")) { + StringBuilder sb = new StringBuilder(); + while ((line = br.readLine()) != null + && !line.equals("-----END ED25519 CERT-----")) { + if (line.equals("-----BEGIN ED25519 CERT-----")) { + continue; + } + sb.append(line); + } + masterKeyEd25519FromIdentityEd25519 = + this.parseMasterKeyEd25519FromIdentityEd25519( + sb.toString()); + String sha256MasterKeyEd25519 = Base64.encodeBase64String( + DigestUtils.sha256(Base64.decodeBase64( + masterKeyEd25519FromIdentityEd25519 + "="))) + .replaceAll("=", ""); + scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519 + + "\n"); + if (masterKeyEd25519 != null && !masterKeyEd25519.equals( + masterKeyEd25519FromIdentityEd25519)) { + this.logger.warning("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return; + } + + /* Verify that identity-ed25519 and master-key-ed25519 match. */ + } else if (line.startsWith("master-key-ed25519 ")) { + masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); + if (masterKeyEd25519FromIdentityEd25519 != null + && !masterKeyEd25519FromIdentityEd25519.equals( + masterKeyEd25519)) { + this.logger.warning("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return; + } + + /* Write the following lines unmodified to the sanitized + * descriptor. */ + } else if (line.startsWith("accept ") + || line.startsWith("platform ") + || line.startsWith("opt protocols ") + || line.startsWith("protocols ") + || line.startsWith("uptime ") + || line.startsWith("bandwidth ") + || line.startsWith("opt hibernating ") + || line.startsWith("hibernating ") + || line.startsWith("ntor-onion-key ") + || line.equals("opt hidden-service-dir") + || line.equals("hidden-service-dir") + || line.equals("opt caches-extra-info") + || line.equals("caches-extra-info") + || line.equals("opt allow-single-hop-exits") + || line.equals("allow-single-hop-exits") + || line.startsWith("ipv6-policy ") + || line.equals("tunnelled-dir-server")) { + scrubbed.append(line + "\n"); + + /* Replace node fingerprints in the family line with their hashes + * and leave nicknames unchanged. */ + } else if (line.startsWith("family ")) { + StringBuilder familyLine = new StringBuilder("family"); + for (String s : line.substring(7).split(" ")) { + if (s.startsWith("$")) { + familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex( + s.substring(1).toCharArray())).toUpperCase()); + } else { + familyLine.append(" " + s); + } + } + scrubbed.append(familyLine.toString() + "\n"); + + /* Skip the purpose line that the bridge authority adds to its + * cached-descriptors file. */ + } else if (line.startsWith("@purpose ")) { + continue; + + /* Skip all crypto parts that might leak the bridge's identity + * fingerprint. */ + } else if (line.startsWith("-----BEGIN ") + || line.equals("onion-key") || line.equals("signing-key") + || line.equals("onion-key-crosscert") + || line.startsWith("ntor-onion-key-crosscert ")) { + skipCrypto = true; + + /* Stop skipping lines when the crypto parts are over. */ + } else if (line.startsWith("-----END ")) { + skipCrypto = false; + + /* Skip the ed25519 signature; we'll include a SHA256 digest of + * the SHA256 descriptor digest in router-digest-sha256. */ + } else if (line.startsWith("router-sig-ed25519 ")) { + continue; + + /* If we encounter an unrecognized line, stop parsing and print + * out a warning. We might have overlooked sensitive information + * that we need to remove or replace for the sanitized descriptor + * version. */ + } else { + this.logger.warning("Unrecognized line '" + line + + "'. Skipping."); + return; + } + } + br.close(); + } catch (Exception e) { + this.logger.log(Level.WARNING, "Could not parse server " + + "descriptor.", e); + return; + } + + /* Determine filename of sanitized server descriptor. */ + String descriptorDigest = null; + try { + String ascii = new String(data, "US-ASCII"); + String startToken = "router "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); + } + } catch (UnsupportedEncodingException e) { + /* Handle below. */ + } + if (descriptorDigest == null) { + this.logger.log(Level.WARNING, "Could not calculate server " + + "descriptor digest."); + return; + } + String descriptorDigestSha256Base64 = null; + if (masterKeyEd25519FromIdentityEd25519 != null) { + try { + String ascii = new String(data, "US-ASCII"); + String startToken = "router "; + String sigToken = "\n-----END SIGNATURE-----\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + descriptorDigestSha256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(DigestUtils.sha256(forDigest))) + .replaceAll("=", ""); + } + } catch (UnsupportedEncodingException e) { + /* Handle below. */ + } + if (descriptorDigestSha256Base64 == null) { + this.logger.log(Level.WARNING, "Could not calculate server " + + "descriptor SHA256 digest."); + return; + } + } + String dyear = published.substring(0, 4); + String dmonth = published.substring(5, 7); + File tarballFile = new File( + this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + + dyear + "/" + dmonth + "/server-descriptors/" + + "/" + descriptorDigest.charAt(0) + "/" + + descriptorDigest.charAt(1) + "/" + + descriptorDigest); + File rsyncCatFile = new File("recent/bridge-descriptors/" + + "server-descriptors/" + this.rsyncCatString + + "-server-descriptors.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + try { + for (int i = 0; i < outputFiles.length; i++) { + File outputFile = outputFiles[i]; + boolean appendToFile = append[i]; + if (outputFile.exists() && !appendToFile) { + /* We already stored this descriptor to disk before, so let's + * not store it yet another time. */ + break; + } + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, appendToFile)); + bw.write("@type bridge-server-descriptor 1.1\n"); + bw.write(scrubbedDesc); + if (descriptorDigestSha256Base64 != null) { + bw.write("router-digest-sha256 " + descriptorDigestSha256Base64 + + "\n"); + } + bw.write("router-digest " + descriptorDigest.toUpperCase() + + "\n"); + bw.close(); + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write sanitized server " + + "descriptor to disk.", e); + return; + } + } + + private String parseMasterKeyEd25519FromIdentityEd25519( + String identityEd25519Base64) { + byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64); + if (identityEd25519.length < 40) { + this.logger.warning("Invalid length of identity-ed25519 (in " + + "bytes): " + identityEd25519.length); + } else if (identityEd25519[0] != 0x01) { + this.logger.warning("Unknown version in identity-ed25519: " + + identityEd25519[0]); + } else if (identityEd25519[1] != 0x04) { + this.logger.warning("Unknown cert type in identity-ed25519: " + + identityEd25519[1]); + } else if (identityEd25519[6] != 0x01) { + this.logger.warning("Unknown certified key type in " + + "identity-ed25519: " + identityEd25519[1]); + } else if (identityEd25519[39] == 0x00) { + this.logger.warning("No extensions in identity-ed25519 (which " + + "would contain the encoded master-key-ed25519): " + + identityEd25519[39]); + } else { + int extensionStart = 40; + for (int i = 0; i < (int) identityEd25519[39]; i++) { + if (identityEd25519.length < extensionStart + 4) { + this.logger.warning("Invalid extension with id " + i + + " in identity-ed25519."); + break; + } + int extensionLength = identityEd25519[extensionStart]; + extensionLength <<= 8; + extensionLength += identityEd25519[extensionStart + 1]; + int extensionType = identityEd25519[extensionStart + 2]; + if (extensionLength == 32 && extensionType == 4) { + if (identityEd25519.length < extensionStart + 4 + 32) { + this.logger.warning("Invalid extension with id " + i + + " in identity-ed25519."); + break; + } + byte[] masterKeyEd25519 = new byte[32]; + System.arraycopy(identityEd25519, extensionStart + 4, + masterKeyEd25519, 0, masterKeyEd25519.length); + String masterKeyEd25519Base64 = Base64.encodeBase64String( + masterKeyEd25519); + String masterKeyEd25519Base64NoTrailingEqualSigns = + masterKeyEd25519Base64.replaceAll("=", ""); + return masterKeyEd25519Base64NoTrailingEqualSigns; + } + extensionStart += 4 + extensionLength; + } + } + this.logger.warning("Unable to locate master-key-ed25519 in " + + "identity-ed25519."); + return null; + } + + private String maxExtraInfoDescriptorPublishedTime = + "1970-01-01 00:00:00"; + + /** + * Sanitizes an extra-info descriptor and writes it to disk. + */ + public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) { + + /* Parse descriptor to generate a sanitized version. */ + String scrubbedDesc = null; + String published = null; + String masterKeyEd25519FromIdentityEd25519 = null; + try { + BufferedReader br = new BufferedReader(new StringReader(new String( + data, "US-ASCII"))); + String line = null; + StringBuilder scrubbed = null; + String hashedBridgeIdentity = null; + String masterKeyEd25519 = null; + while ((line = br.readLine()) != null) { + + /* Parse bridge identity from extra-info line and replace it with + * its hash in the sanitized descriptor. */ + String[] parts = line.split(" "); + if (line.startsWith("extra-info ")) { + hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex( + parts[2].toCharArray())).toLowerCase(); + scrubbed = new StringBuilder("extra-info " + parts[1] + " " + + hashedBridgeIdentity.toUpperCase() + "\n"); + + /* Parse the publication time to determine the file name. */ + } else if (line.startsWith("published ")) { + scrubbed.append(line + "\n"); + published = line.substring("published ".length()); + if (published.compareTo(maxExtraInfoDescriptorPublishedTime) + > 0) { + maxExtraInfoDescriptorPublishedTime = published; + } + + /* Remove everything from transport lines except the transport + * name. */ + } else if (line.startsWith("transport ")) { + if (parts.length < 3) { + this.logger.fine("Illegal line in extra-info descriptor: '" + + line + "'. Skipping descriptor."); + return; + } + scrubbed.append("transport " + parts[1] + "\n"); + + /* Skip transport-info lines entirely. */ + } else if (line.startsWith("transport-info ")) { + + /* Extract master-key-ed25519 from identity-ed25519. */ + } else if (line.equals("identity-ed25519")) { + StringBuilder sb = new StringBuilder(); + while ((line = br.readLine()) != null + && !line.equals("-----END ED25519 CERT-----")) { + if (line.equals("-----BEGIN ED25519 CERT-----")) { + continue; + } + sb.append(line); + } + masterKeyEd25519FromIdentityEd25519 = + this.parseMasterKeyEd25519FromIdentityEd25519( + sb.toString()); + String sha256MasterKeyEd25519 = Base64.encodeBase64String( + DigestUtils.sha256(Base64.decodeBase64( + masterKeyEd25519FromIdentityEd25519 + "="))) + .replaceAll("=", ""); + scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519 + + "\n"); + if (masterKeyEd25519 != null && !masterKeyEd25519.equals( + masterKeyEd25519FromIdentityEd25519)) { + this.logger.warning("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return; + } + + /* Verify that identity-ed25519 and master-key-ed25519 match. */ + } else if (line.startsWith("master-key-ed25519 ")) { + masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); + if (masterKeyEd25519FromIdentityEd25519 != null + && !masterKeyEd25519FromIdentityEd25519.equals( + masterKeyEd25519)) { + this.logger.warning("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return; + } + + /* Write the following lines unmodified to the sanitized + * descriptor. */ + } else if (line.startsWith("write-history ") + || line.startsWith("read-history ") + || line.startsWith("geoip-start-time ") + || line.startsWith("geoip-client-origins ") + || line.startsWith("geoip-db-digest ") + || line.startsWith("geoip6-db-digest ") + || line.startsWith("conn-bi-direct ") + || line.startsWith("bridge-") + || line.startsWith("dirreq-") + || line.startsWith("cell-") + || line.startsWith("entry-") + || line.startsWith("exit-")) { + scrubbed.append(line + "\n"); + + /* When we reach the signature, we're done. Write the sanitized + * descriptor to disk below. */ + } else if (line.startsWith("router-signature")) { + scrubbedDesc = scrubbed.toString(); + break; + + /* Skip the ed25519 signature; we'll include a SHA256 digest of + * the SHA256 descriptor digest in router-digest-sha256. */ + } else if (line.startsWith("router-sig-ed25519 ")) { + continue; + + /* If we encounter an unrecognized line, stop parsing and print + * out a warning. We might have overlooked sensitive information + * that we need to remove or replace for the sanitized descriptor + * version. */ + } else { + this.logger.warning("Unrecognized line '" + line + + "'. Skipping."); + return; + } + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse extra-info " + + "descriptor.", e); + return; + } catch (DecoderException e) { + this.logger.log(Level.WARNING, "Could not parse extra-info " + + "descriptor.", e); + return; + } + + /* Determine filename of sanitized extra-info descriptor. */ + String descriptorDigest = null; + try { + String ascii = new String(data, "US-ASCII"); + String startToken = "extra-info "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); + } + } catch (UnsupportedEncodingException e) { + /* Handle below. */ + } + if (descriptorDigest == null) { + this.logger.log(Level.WARNING, "Could not calculate extra-info " + + "descriptor digest."); + return; + } + String descriptorDigestSha256Base64 = null; + if (masterKeyEd25519FromIdentityEd25519 != null) { + try { + String ascii = new String(data, "US-ASCII"); + String startToken = "extra-info "; + String sigToken = "\n-----END SIGNATURE-----\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + descriptorDigestSha256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(DigestUtils.sha256(forDigest))) + .replaceAll("=", ""); + } + } catch (UnsupportedEncodingException e) { + /* Handle below. */ + } + if (descriptorDigestSha256Base64 == null) { + this.logger.log(Level.WARNING, "Could not calculate extra-info " + + "descriptor SHA256 digest."); + return; + } + } + String dyear = published.substring(0, 4); + String dmonth = published.substring(5, 7); + File tarballFile = new File( + this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + + dyear + "/" + dmonth + "/extra-infos/" + + descriptorDigest.charAt(0) + "/" + + descriptorDigest.charAt(1) + "/" + + descriptorDigest); + File rsyncCatFile = new File("recent/bridge-descriptors/" + + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + try { + for (int i = 0; i < outputFiles.length; i++) { + File outputFile = outputFiles[i]; + boolean appendToFile = append[i]; + if (outputFile.exists() && !appendToFile) { + /* We already stored this descriptor to disk before, so let's + * not store it yet another time. */ + break; + } + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, appendToFile)); + bw.write("@type bridge-extra-info 1.3\n"); + bw.write(scrubbedDesc); + if (descriptorDigestSha256Base64 != null) { + bw.write("router-digest-sha256 " + descriptorDigestSha256Base64 + + "\n"); + } + bw.write("router-digest " + descriptorDigest.toUpperCase() + + "\n"); + bw.close(); + } + } catch (Exception e) { + this.logger.log(Level.WARNING, "Could not write sanitized " + + "extra-info descriptor to disk.", e); + } + } + + /** + * Rewrite all network statuses that might contain references to server + * descriptors we added or updated in this execution. This applies to + * all statuses that have been published up to 24 hours after any added + * or updated server descriptor. + */ + public void finishWriting() { + + /* Delete secrets that we don't need anymore. */ + if (!this.secretsForHashingIPAddresses.isEmpty() + && this.secretsForHashingIPAddresses.firstKey().compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + try { + int kept = 0; + int deleted = 0; + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.bridgeIpSecretsFile)); + for (Map.Entry<String, byte[]> e : + this.secretsForHashingIPAddresses.entrySet()) { + if (e.getKey().compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + deleted++; + } else { + bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue()) + + "\n"); + kept++; + } + } + bw.close(); + this.logger.info("Deleted " + deleted + " secrets that we don't " + + "need anymore and kept " + kept + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store reduced set of " + + "secrets to disk! This is a bad sign, better check what's " + + "going on!", e); + } + } + } + + private void checkStaleDescriptors() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L; + try { + long maxNetworkStatusPublishedMillis = + dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime(); + if (maxNetworkStatusPublishedMillis > 0L + && maxNetworkStatusPublishedMillis < tooOldMillis) { + this.logger.warning("The last known bridge network status was " + + "published " + maxNetworkStatusPublishedTime + ", which is " + + "more than 5:30 hours in the past."); + } + long maxServerDescriptorPublishedMillis = + dateTimeFormat.parse(maxServerDescriptorPublishedTime) + .getTime(); + if (maxServerDescriptorPublishedMillis > 0L + && maxServerDescriptorPublishedMillis < tooOldMillis) { + this.logger.warning("The last known bridge server descriptor was " + + "published " + maxServerDescriptorPublishedTime + ", which " + + "is more than 5:30 hours in the past."); + } + long maxExtraInfoDescriptorPublishedMillis = + dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime) + .getTime(); + if (maxExtraInfoDescriptorPublishedMillis > 0L + && maxExtraInfoDescriptorPublishedMillis < tooOldMillis) { + this.logger.warning("The last known bridge extra-info descriptor " + + "was published " + maxExtraInfoDescriptorPublishedTime + + ", which is more than 5:30 hours in the past."); + } + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Unable to parse timestamp for " + + "stale check.", e); + } + } + + /* Delete all files from the rsync directory that have not been modified + * in the last three days, and remove the .tmp extension from newly + * written files. */ + public void cleanUpRsyncDirectory() { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<File>(); + allFiles.add(new File("recent/bridge-descriptors")); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } else if (file.getName().endsWith(".tmp")) { + file.renameTo(new File(file.getParentFile(), + file.getName().substring(0, + file.getName().lastIndexOf(".tmp")))); + } + } + } +} + diff --git a/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java b/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java new file mode 100644 index 0000000..54fd50f --- /dev/null +++ b/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java @@ -0,0 +1,212 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.exitlists; + +import org.torproject.collector.main.Configuration; +import org.torproject.collector.main.LockFile; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.DescriptorParser; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExitList; + +import java.io.BufferedInputStream; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ExitListDownloader extends Thread { + + public static void main(String[] args) { + + Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); + logger.info("Starting exit-lists module of CollecTor."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile("exit-lists"); + if (!lf.acquireLock()) { + logger.severe("Warning: CollecTor is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Download exit list and store it to disk + new ExitListDownloader(config).run(); + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating exit-lists module of CollecTor."); + } + + public ExitListDownloader(Configuration config) { + } + + public void run() { + + Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); + + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + + Date downloadedDate = new Date(); + String downloadedExitList = null; + try { + logger.fine("Downloading exit list..."); + StringBuilder sb = new StringBuilder(); + sb.append("@type tordnsel 1.0\n"); + sb.append("Downloaded " + dateTimeFormat.format(downloadedDate) + + "\n"); + String exitAddressesUrl = + "http://exitlist.torproject.org/exit-addresses"; + URL u = new URL(exitAddressesUrl); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + int response = huc.getResponseCode(); + if (response != 200) { + logger.warning("Could not download exit list. Response code " + + response); + return; + } + BufferedInputStream in = new BufferedInputStream( + huc.getInputStream()); + int len; + byte[] data = new byte[1024]; + while ((len = in.read(data, 0, 1024)) >= 0) { + sb.append(new String(data, 0, len)); + } + in.close(); + downloadedExitList = sb.toString(); + logger.fine("Finished downloading exit list."); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed downloading exit list", e); + return; + } + if (downloadedExitList == null) { + logger.warning("Failed downloading exit list"); + return; + } + + SimpleDateFormat tarballFormat = + new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File("out/exit-lists/" + tarballFormat.format( + downloadedDate)); + + long maxScanMillis = 0L; + try { + DescriptorParser descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + List<Descriptor> parsedDescriptors = + descriptorParser.parseDescriptors(downloadedExitList.getBytes(), + tarballFile.getName()); + if (parsedDescriptors.size() != 1 + || !(parsedDescriptors.get(0) instanceof ExitList)) { + logger.warning("Could not parse downloaded exit list"); + return; + } + ExitList parsedExitList = (ExitList) parsedDescriptors.get(0); + for (ExitList.Entry entry : parsedExitList.getEntries()) { + for (long scanMillis : entry.getExitAddresses().values()) { + maxScanMillis = Math.max(maxScanMillis, scanMillis); + } + } + } catch (DescriptorParseException e) { + logger.log(Level.WARNING, "Could not parse downloaded exit list", + e); + } + if (maxScanMillis > 0L + && maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) { + logger.warning("The last reported scan in the downloaded exit list " + + "took place at " + dateTimeFormat.format(maxScanMillis) + + ", which is more than 5:30 hours in the past."); + } + + /* Write to disk. */ + File rsyncFile = new File("recent/exit-lists/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + for (File outputFile : outputFiles) { + try { + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile)); + bw.write(downloadedExitList); + bw.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write downloaded exit list " + + "to " + outputFile.getAbsolutePath(), e); + } + } + + /* Write stats. */ + StringBuilder dumpStats = new StringBuilder("Finished downloading " + + "exit list.\nLast three exit lists are:"); + Stack<File> filesInInputDir = new Stack<File>(); + filesInInputDir.add(new File("out/exit-lists")); + SortedSet<File> lastThreeExitLists = new TreeSet<File>(); + while (!filesInInputDir.isEmpty()) { + File pop = filesInInputDir.pop(); + if (pop.isDirectory()) { + SortedSet<File> lastThreeElements = new TreeSet<File>(); + for (File f : pop.listFiles()) { + lastThreeElements.add(f); + } + while (lastThreeElements.size() > 3) { + lastThreeElements.remove(lastThreeElements.first()); + } + for (File f : lastThreeElements) { + filesInInputDir.add(f); + } + } else { + lastThreeExitLists.add(pop); + while (lastThreeExitLists.size() > 3) { + lastThreeExitLists.remove(lastThreeExitLists.first()); + } + } + } + for (File f : lastThreeExitLists) { + dumpStats.append("\n" + f.getName()); + } + logger.info(dumpStats.toString()); + + this.cleanUpRsyncDirectory(); + } + + /* Delete all files from the rsync directory that have not been modified + * in the last three days. */ + public void cleanUpRsyncDirectory() { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<File>(); + allFiles.add(new File("recent/exit-lists")); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } + } + } +} + diff --git a/src/main/java/org/torproject/collector/index/CreateIndexJson.java b/src/main/java/org/torproject/collector/index/CreateIndexJson.java new file mode 100644 index 0000000..ac5adf5 --- /dev/null +++ b/src/main/java/org/torproject/collector/index/CreateIndexJson.java @@ -0,0 +1,168 @@ +/* Copyright 2015--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.index; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Locale; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.zip.GZIPOutputStream; + +/* Create a fresh index.json containing all directories and files in the + * archive/ and recent/ directories. + * + * Note that if this ever takes longer than a few seconds, we'll have to + * cache index parts of directories or files that haven't changed. + * Example: if we parse include cryptographic hashes or @type information, + * we'll likely have to do that. */ +public class CreateIndexJson { + + static final File indexJsonFile = new File("index.json"); + + static final String basePath = "https://collector.torproject.org"; + + static final File[] indexedDirectories = new File[] { + new File("archive"), new File("recent") }; + + static final String dateTimePattern = "yyyy-MM-dd HH:mm"; + + static final Locale dateTimeLocale = Locale.US; + + static final TimeZone dateTimezone = TimeZone.getTimeZone("UTC"); + + public static void main(String[] args) throws IOException { + writeIndex(indexDirectories()); + } + + static class DirectoryNode implements Comparable<DirectoryNode> { + String path; + SortedSet<FileNode> files; + SortedSet<DirectoryNode> directories; + + DirectoryNode(String path, SortedSet<FileNode> files, + SortedSet<DirectoryNode> directories) { + this.path = path; + this.files = files; + this.directories = directories; + } + + public int compareTo(DirectoryNode o) { + return this.path.compareTo(o.path); + } + } + + static class IndexNode { + String index_created; + String path; + SortedSet<FileNode> files; + SortedSet<DirectoryNode> directories; + + IndexNode(String index_created, String path, + SortedSet<FileNode> files, + SortedSet<DirectoryNode> directories) { + this.index_created = index_created; + this.path = path; + this.files = files; + this.directories = directories; + } + } + + static class FileNode implements Comparable<FileNode> { + String path; + long size; + String last_modified; + + FileNode(String path, long size, String last_modified) { + this.path = path; + this.size = size; + this.last_modified = last_modified; + } + + public int compareTo(FileNode o) { + return this.path.compareTo(o.path); + } + } + + static DateFormat dateTimeFormat; + + static { + dateTimeFormat = new SimpleDateFormat(dateTimePattern, + dateTimeLocale); + dateTimeFormat.setLenient(false); + dateTimeFormat.setTimeZone(dateTimezone); + } + + static IndexNode indexDirectories() { + SortedSet<DirectoryNode> directoryNodes = + new TreeSet<DirectoryNode>(); + for (File directory : indexedDirectories) { + if (directory.exists() && directory.isDirectory()) { + directoryNodes.add(indexDirectory(directory)); + } + } + return new IndexNode(dateTimeFormat.format( + System.currentTimeMillis()), basePath, null, directoryNodes); + } + + static DirectoryNode indexDirectory(File directory) { + SortedSet<FileNode> fileNodes = new TreeSet<FileNode>(); + SortedSet<DirectoryNode> directoryNodes = + new TreeSet<DirectoryNode>(); + for (File fileOrDirectory : directory.listFiles()) { + if (fileOrDirectory.getName().startsWith(".")) { + continue; + } + if (fileOrDirectory.isFile()) { + fileNodes.add(indexFile(fileOrDirectory)); + } else { + directoryNodes.add(indexDirectory(fileOrDirectory)); + } + } + DirectoryNode directoryNode = new DirectoryNode( + directory.getName(), fileNodes.isEmpty() ? null : fileNodes, + directoryNodes.isEmpty() ? null : directoryNodes); + return directoryNode; + } + + static FileNode indexFile(File file) { + FileNode fileNode = new FileNode(file.getName(), file.length(), + dateTimeFormat.format(file.lastModified())); + return fileNode; + } + + static void writeIndex(IndexNode indexNode) throws IOException { + Gson gson = new GsonBuilder().create(); + String indexNodeString = gson.toJson(indexNode); + Writer[] writers = new Writer[] { + new FileWriter(indexJsonFile), + new OutputStreamWriter(new GZIPOutputStream( + new FileOutputStream(indexJsonFile + ".gz"))), + new OutputStreamWriter(new XZCompressorOutputStream( + new FileOutputStream(indexJsonFile + ".xz"))), + new OutputStreamWriter(new BZip2CompressorOutputStream( + new FileOutputStream(indexJsonFile + ".bz2"))) + }; + for (Writer writer : writers) { + BufferedWriter bufferedWriter = new BufferedWriter(writer); + bufferedWriter.write(indexNodeString); + bufferedWriter.close(); + } + } +} + diff --git a/src/main/java/org/torproject/collector/main/Configuration.java b/src/main/java/org/torproject/collector/main/Configuration.java new file mode 100644 index 0000000..aee1d02 --- /dev/null +++ b/src/main/java/org/torproject/collector/main/Configuration.java @@ -0,0 +1,318 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.main; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Initialize configuration with hard-coded defaults, overwrite with + * configuration in config file, if exists, and answer Main.java about our + * configuration. + */ +public class Configuration { + private String directoryArchivesOutputDirectory = + "out/relay-descriptors/"; + private boolean importCachedRelayDescriptors = false; + private List<String> cachedRelayDescriptorsDirectory = + new ArrayList<String>(Arrays.asList( + "in/relay-descriptors/cacheddesc/".split(","))); + private boolean importDirectoryArchives = false; + private String directoryArchivesDirectory = + "in/relay-descriptors/archives/"; + private boolean keepDirectoryArchiveImportHistory = false; + private boolean replaceIPAddressesWithHashes = false; + private long limitBridgeDescriptorMappings = -1L; + private String sanitizedBridgesWriteDirectory = + "out/bridge-descriptors/"; + private String bridgeSnapshotsDirectory = "in/bridge-descriptors/"; + private boolean downloadRelayDescriptors = false; + private List<String> downloadFromDirectoryAuthorities = Arrays.asList(( + "86.59.21.38,76.73.17.194:9030,171.25.193.9:443," + + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131," + + "194.109.206.212,212.112.245.170,154.35.32.5").split(",")); + private List<String> downloadVotesByFingerprint = Arrays.asList(( + "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4," + + "27B6B5996C426270A5C95488AA5BCEB6BCC86956," + + "49015F787433103580E3B66A1707A00E60F2D15B," + + "585769C78764D58426B8B52B6651A5A71137189A," + + "80550987E1D626E3EBA5E5E75A458DE0626D088C," + + "D586D18309DED4CD6D57C18FDB97EFA96D330566," + + "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58," + + "ED03BB616EB2F60BEC80151114BB25CEF515B226," + + "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(",")); + private boolean downloadCurrentConsensus = true; + private boolean downloadCurrentMicrodescConsensus = true; + private boolean downloadCurrentVotes = true; + private boolean downloadMissingServerDescriptors = true; + private boolean downloadMissingExtraInfoDescriptors = true; + private boolean downloadMissingMicrodescriptors = true; + private boolean downloadAllServerDescriptors = false; + private boolean downloadAllExtraInfoDescriptors = false; + private boolean compressRelayDescriptorDownloads; + private String torperfOutputDirectory = "out/torperf/"; + private SortedMap<String, String> torperfSources = null; + private List<String> torperfFiles = null; + + public Configuration() { + + /* Initialize logger. */ + Logger logger = Logger.getLogger(Configuration.class.getName()); + + /* Read config file, if present. */ + File configFile = new File("config"); + if (!configFile.exists()) { + logger.warning("Could not find config file. In the default " + + "configuration, we are not configured to read data from any " + + "data source or write data to any data sink. You need to " + + "create a config file (" + configFile.getAbsolutePath() + + ") and provide at least one data source and one data sink. " + + "Refer to the manual for more information."); + return; + } + String line = null; + boolean containsCachedRelayDescriptorsDirectory = false; + try { + BufferedReader br = new BufferedReader(new FileReader(configFile)); + while ((line = br.readLine()) != null) { + if (line.startsWith("#") || line.length() < 1) { + continue; + } else if (line.startsWith("DirectoryArchivesOutputDirectory")) { + this.directoryArchivesOutputDirectory = line.split(" ")[1]; + } else if (line.startsWith("ImportCachedRelayDescriptors")) { + this.importCachedRelayDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("CachedRelayDescriptorsDirectory")) { + if (!containsCachedRelayDescriptorsDirectory) { + this.cachedRelayDescriptorsDirectory.clear(); + containsCachedRelayDescriptorsDirectory = true; + } + this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]); + } else if (line.startsWith("ImportDirectoryArchives")) { + this.importDirectoryArchives = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DirectoryArchivesDirectory")) { + this.directoryArchivesDirectory = line.split(" ")[1]; + } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) { + this.keepDirectoryArchiveImportHistory = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("ReplaceIPAddressesWithHashes")) { + this.replaceIPAddressesWithHashes = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("LimitBridgeDescriptorMappings")) { + this.limitBridgeDescriptorMappings = Long.parseLong( + line.split(" ")[1]); + } else if (line.startsWith("SanitizedBridgesWriteDirectory")) { + this.sanitizedBridgesWriteDirectory = line.split(" ")[1]; + } else if (line.startsWith("BridgeSnapshotsDirectory")) { + this.bridgeSnapshotsDirectory = line.split(" ")[1]; + } else if (line.startsWith("DownloadRelayDescriptors")) { + this.downloadRelayDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadFromDirectoryAuthorities")) { + this.downloadFromDirectoryAuthorities = new ArrayList<String>(); + for (String dir : line.split(" ")[1].split(",")) { + // test if IP:port pair has correct format + if (dir.length() < 1) { + logger.severe("Configuration file contains directory " + + "authority IP:port of length 0 in line '" + line + + "'! Exiting!"); + System.exit(1); + } + new URL("http://" + dir + "/"); + this.downloadFromDirectoryAuthorities.add(dir); + } + } else if (line.startsWith("DownloadVotesByFingerprint")) { + this.downloadVotesByFingerprint = new ArrayList<String>(); + for (String fingerprint : line.split(" ")[1].split(",")) { + this.downloadVotesByFingerprint.add(fingerprint); + } + } else if (line.startsWith("DownloadCurrentConsensus")) { + this.downloadCurrentConsensus = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) { + this.downloadCurrentMicrodescConsensus = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadCurrentVotes")) { + this.downloadCurrentVotes = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadMissingServerDescriptors")) { + this.downloadMissingServerDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith( + "DownloadMissingExtraInfoDescriptors")) { + this.downloadMissingExtraInfoDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadMissingMicrodescriptors")) { + this.downloadMissingMicrodescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadAllServerDescriptors")) { + this.downloadAllServerDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) { + this.downloadAllExtraInfoDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("CompressRelayDescriptorDownloads")) { + this.compressRelayDescriptorDownloads = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("TorperfOutputDirectory")) { + this.torperfOutputDirectory = line.split(" ")[1]; + } else if (line.startsWith("TorperfSource")) { + if (this.torperfSources == null) { + this.torperfSources = new TreeMap<String, String>(); + } + String[] parts = line.split(" "); + String sourceName = parts[1]; + String baseUrl = parts[2]; + this.torperfSources.put(sourceName, baseUrl); + } else if (line.startsWith("TorperfFiles")) { + if (this.torperfFiles == null) { + this.torperfFiles = new ArrayList<String>(); + } + String[] parts = line.split(" "); + if (parts.length != 5) { + logger.severe("Configuration file contains TorperfFiles " + + "option with wrong number of values in line '" + line + + "'! Exiting!"); + System.exit(1); + } + this.torperfFiles.add(line); + } else { + logger.severe("Configuration file contains unrecognized " + + "configuration key in line '" + line + "'! Exiting!"); + System.exit(1); + } + } + br.close(); + } catch (ArrayIndexOutOfBoundsException e) { + logger.severe("Configuration file contains configuration key " + + "without value in line '" + line + "'. Exiting!"); + System.exit(1); + } catch (MalformedURLException e) { + logger.severe("Configuration file contains illegal URL or IP:port " + + "pair in line '" + line + "'. Exiting!"); + System.exit(1); + } catch (NumberFormatException e) { + logger.severe("Configuration file contains illegal value in line '" + + line + "' with legal values being 0 or 1. Exiting!"); + System.exit(1); + } catch (IOException e) { + logger.log(Level.SEVERE, "Unknown problem while reading config " + + "file! Exiting!", e); + System.exit(1); + } + } + + public String getDirectoryArchivesOutputDirectory() { + return this.directoryArchivesOutputDirectory; + } + + public boolean getImportCachedRelayDescriptors() { + return this.importCachedRelayDescriptors; + } + + public List<String> getCachedRelayDescriptorDirectory() { + return this.cachedRelayDescriptorsDirectory; + } + + public boolean getImportDirectoryArchives() { + return this.importDirectoryArchives; + } + + public String getDirectoryArchivesDirectory() { + return this.directoryArchivesDirectory; + } + + public boolean getKeepDirectoryArchiveImportHistory() { + return this.keepDirectoryArchiveImportHistory; + } + + public boolean getReplaceIPAddressesWithHashes() { + return this.replaceIPAddressesWithHashes; + } + + public long getLimitBridgeDescriptorMappings() { + return this.limitBridgeDescriptorMappings; + } + + public String getSanitizedBridgesWriteDirectory() { + return this.sanitizedBridgesWriteDirectory; + } + + public String getBridgeSnapshotsDirectory() { + return this.bridgeSnapshotsDirectory; + } + + public boolean getDownloadRelayDescriptors() { + return this.downloadRelayDescriptors; + } + + public List<String> getDownloadFromDirectoryAuthorities() { + return this.downloadFromDirectoryAuthorities; + } + + public List<String> getDownloadVotesByFingerprint() { + return this.downloadVotesByFingerprint; + } + + public boolean getDownloadCurrentConsensus() { + return this.downloadCurrentConsensus; + } + + public boolean getDownloadCurrentMicrodescConsensus() { + return this.downloadCurrentMicrodescConsensus; + } + + public boolean getDownloadCurrentVotes() { + return this.downloadCurrentVotes; + } + + public boolean getDownloadMissingServerDescriptors() { + return this.downloadMissingServerDescriptors; + } + + public boolean getDownloadMissingExtraInfoDescriptors() { + return this.downloadMissingExtraInfoDescriptors; + } + + public boolean getDownloadMissingMicrodescriptors() { + return this.downloadMissingMicrodescriptors; + } + + public boolean getDownloadAllServerDescriptors() { + return this.downloadAllServerDescriptors; + } + + public boolean getDownloadAllExtraInfoDescriptors() { + return this.downloadAllExtraInfoDescriptors; + } + + public boolean getCompressRelayDescriptorDownloads() { + return this.compressRelayDescriptorDownloads; + } + + public String getTorperfOutputDirectory() { + return this.torperfOutputDirectory; + } + + public SortedMap<String, String> getTorperfSources() { + return this.torperfSources; + } + + public List<String> getTorperfFiles() { + return this.torperfFiles; + } +} + diff --git a/src/main/java/org/torproject/collector/main/LockFile.java b/src/main/java/org/torproject/collector/main/LockFile.java new file mode 100644 index 0000000..b07d4b1 --- /dev/null +++ b/src/main/java/org/torproject/collector/main/LockFile.java @@ -0,0 +1,56 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.main; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.logging.Logger; + +public class LockFile { + + private File lockFile; + private Logger logger; + + public LockFile(String moduleName) { + this.lockFile = new File("lock/" + moduleName); + this.logger = Logger.getLogger(LockFile.class.getName()); + } + + public boolean acquireLock() { + this.logger.fine("Trying to acquire lock..."); + try { + if (this.lockFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader( + this.lockFile)); + long runStarted = Long.parseLong(br.readLine()); + br.close(); + if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) { + return false; + } + } + this.lockFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.lockFile)); + bw.append("" + System.currentTimeMillis() + "\n"); + bw.close(); + this.logger.fine("Acquired lock."); + return true; + } catch (IOException e) { + this.logger.warning("Caught exception while trying to acquire " + + "lock!"); + return false; + } + } + + public void releaseLock() { + this.logger.fine("Releasing lock..."); + this.lockFile.delete(); + this.logger.fine("Released lock."); + } +} + diff --git a/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java b/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java new file mode 100644 index 0000000..72f8231 --- /dev/null +++ b/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java @@ -0,0 +1,286 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.relaydescs; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Read in all files in a given directory and pass buffered readers of + * them to the relay descriptor parser. + */ +public class ArchiveReader { + + private Map<String, Set<String>> microdescriptorValidAfterTimes = + new HashMap<String, Set<String>>(); + + public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory, + File statsDirectory, boolean keepImportHistory) { + + if (rdp == null || archivesDirectory == null + || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + rdp.setArchiveReader(this); + int parsedFiles = 0; + int ignoredFiles = 0; + Logger logger = Logger.getLogger(ArchiveReader.class.getName()); + SortedSet<String> archivesImportHistory = new TreeSet<String>(); + File archivesImportHistoryFile = new File(statsDirectory, + "archives-import-history"); + if (keepImportHistory && archivesImportHistoryFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + archivesImportHistoryFile)); + String line = null; + while ((line = br.readLine()) != null) { + archivesImportHistory.add(line); + } + br.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not read in archives import " + + "history file. Skipping."); + } + } + if (archivesDirectory.exists()) { + logger.fine("Importing files in directory " + archivesDirectory + + "/..."); + Stack<File> filesInInputDir = new Stack<File>(); + filesInInputDir.add(archivesDirectory); + List<File> problems = new ArrayList<File>(); + Set<File> filesToRetry = new HashSet<File>(); + while (!filesInInputDir.isEmpty()) { + File pop = filesInInputDir.pop(); + if (pop.isDirectory()) { + for (File f : pop.listFiles()) { + filesInInputDir.add(f); + } + } else { + if (rdp != null) { + try { + BufferedInputStream bis = null; + if (keepImportHistory + && archivesImportHistory.contains(pop.getName())) { + ignoredFiles++; + continue; + } else if (pop.getName().endsWith(".tar.bz2")) { + logger.warning("Cannot parse compressed tarball " + + pop.getAbsolutePath() + ". Skipping."); + continue; + } else if (pop.getName().endsWith(".bz2")) { + FileInputStream fis = new FileInputStream(pop); + BZip2CompressorInputStream bcis = + new BZip2CompressorInputStream(fis); + bis = new BufferedInputStream(bcis); + } else { + FileInputStream fis = new FileInputStream(pop); + bis = new BufferedInputStream(fis); + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + bis.close(); + byte[] allData = baos.toByteArray(); + boolean stored = rdp.parse(allData); + if (!stored) { + filesToRetry.add(pop); + continue; + } + if (keepImportHistory) { + archivesImportHistory.add(pop.getName()); + } + parsedFiles++; + } catch (IOException e) { + problems.add(pop); + if (problems.size() > 3) { + break; + } + } + } + } + } + for (File pop : filesToRetry) { + /* TODO We need to parse microdescriptors ourselves, rather than + * RelayDescriptorParser, because only we know the valid-after + * time(s) of microdesc consensus(es) containing this + * microdescriptor. However, this breaks functional abstraction + * pretty badly. */ + if (rdp != null) { + try { + BufferedInputStream bis = null; + if (pop.getName().endsWith(".bz2")) { + FileInputStream fis = new FileInputStream(pop); + BZip2CompressorInputStream bcis = + new BZip2CompressorInputStream(fis); + bis = new BufferedInputStream(bcis); + } else { + FileInputStream fis = new FileInputStream(pop); + bis = new BufferedInputStream(fis); + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + bis.close(); + byte[] allData = baos.toByteArray(); + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + String line; + do { + line = br.readLine(); + } while (line != null && line.startsWith("@")); + br.close(); + if (line == null) { + logger.fine("We were given an empty descriptor for " + + "parsing. Ignoring."); + continue; + } + if (!line.equals("onion-key")) { + logger.fine("Skipping non-recognized descriptor."); + continue; + } + SimpleDateFormat parseFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String ascii = null; + try { + ascii = new String(allData, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* No way that US-ASCII is not supported. */ + } + int start = -1; + int end = -1; + String startToken = "onion-key\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + end = ascii.indexOf(startToken, start + 1); + if (end < 0) { + end = ascii.length(); + if (end <= start) { + break; + } + } + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + String digest256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(descBytes)).replaceAll("=", ""); + String digest256Hex = DigestUtils.sha256Hex(descBytes); + if (!this.microdescriptorValidAfterTimes.containsKey( + digest256Hex)) { + logger.fine("Could not store microdescriptor '" + + digest256Hex + "', which was not contained in a " + + "microdesc consensus."); + continue; + } + for (String validAfterTime : + this.microdescriptorValidAfterTimes.get(digest256Hex)) { + try { + long validAfter = + parseFormat.parse(validAfterTime).getTime(); + rdp.storeMicrodescriptor(descBytes, digest256Hex, + digest256Base64, validAfter); + } catch (ParseException e) { + logger.log(Level.WARNING, "Could not parse " + + "valid-after time '" + validAfterTime + "'. Not " + + "storing microdescriptor.", e); + } + } + } + if (keepImportHistory) { + archivesImportHistory.add(pop.getName()); + } + parsedFiles++; + } catch (IOException e) { + problems.add(pop); + if (problems.size() > 3) { + break; + } + } + } + } + if (problems.isEmpty()) { + logger.fine("Finished importing files in directory " + + archivesDirectory + "/."); + } else { + StringBuilder sb = new StringBuilder("Failed importing files in " + + "directory " + archivesDirectory + "/:"); + int printed = 0; + for (File f : problems) { + sb.append("\n " + f.getAbsolutePath()); + if (++printed >= 3) { + sb.append("\n ... more"); + break; + } + } + } + } + if (keepImportHistory) { + try { + archivesImportHistoryFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + archivesImportHistoryFile)); + for (String line : archivesImportHistory) { + bw.write(line + "\n"); + } + bw.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write archives import " + + "history file."); + } + } + logger.info("Finished importing relay descriptors from local " + + "directory:\nParsed " + parsedFiles + ", ignored " + + ignoredFiles + " files."); + } + + public void haveParsedMicrodescConsensus(String validAfterTime, + SortedSet<String> microdescriptorDigests) { + for (String microdescriptor : microdescriptorDigests) { + if (!this.microdescriptorValidAfterTimes.containsKey( + microdescriptor)) { + this.microdescriptorValidAfterTimes.put(microdescriptor, + new HashSet<String>()); + } + this.microdescriptorValidAfterTimes.get(microdescriptor).add( + validAfterTime); + } + } +} + diff --git a/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java new file mode 100644 index 0000000..cf603d1 --- /dev/null +++ b/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java @@ -0,0 +1,845 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.relaydescs; + +import org.torproject.collector.main.Configuration; +import org.torproject.collector.main.LockFile; +import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.DescriptorParser; +import org.torproject.descriptor.DescriptorSourceFactory; + +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ArchiveWriter extends Thread { + + private Configuration config; + + private long now = System.currentTimeMillis(); + private Logger logger; + private File outputDirectory; + private String rsyncCatString; + private DescriptorParser descriptorParser; + private int storedConsensusesCounter = 0; + private int storedMicrodescConsensusesCounter = 0; + private int storedVotesCounter = 0; + private int storedCertsCounter = 0; + private int storedServerDescriptorsCounter = 0; + private int storedExtraInfoDescriptorsCounter = 0; + private int storedMicrodescriptorsCounter = 0; + + private SortedMap<Long, SortedSet<String>> storedConsensuses = + new TreeMap<Long, SortedSet<String>>(); + private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses = + new TreeMap<Long, SortedSet<String>>(); + private SortedMap<Long, Integer> expectedVotes = + new TreeMap<Long, Integer>(); + private SortedMap<Long, SortedMap<String, SortedSet<String>>> + storedVotes = + new TreeMap<Long, SortedMap<String, SortedSet<String>>>(); + private SortedMap<Long, Map<String, String>> storedServerDescriptors = + new TreeMap<Long, Map<String, String>>(); + private SortedMap<Long, Set<String>> storedExtraInfoDescriptors = + new TreeMap<Long, Set<String>>(); + private SortedMap<Long, Set<String>> storedMicrodescriptors = + new TreeMap<Long, Set<String>>(); + + private File storedServerDescriptorsFile = new File( + "stats/stored-server-descriptors"); + private File storedExtraInfoDescriptorsFile = new File( + "stats/stored-extra-info-descriptors"); + private File storedMicrodescriptorsFile = new File( + "stats/stored-microdescriptors"); + + private static final byte[] CONSENSUS_ANNOTATION = + "@type network-status-consensus-3 1.0\n".getBytes(); + + private static final byte[] MICRODESCCONSENSUS_ANNOTATION = + "@type network-status-microdesc-consensus-3 1.0\n".getBytes(); + + private static final byte[] VOTE_ANNOTATION = + "@type network-status-vote-3 1.0\n".getBytes(); + + private static final byte[] CERTIFICATE_ANNOTATION = + "@type dir-key-certificate-3 1.0\n".getBytes(); + + private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = + "@type server-descriptor 1.0\n".getBytes(); + + private static final byte[] EXTRA_INFO_ANNOTATION = + "@type extra-info 1.0\n".getBytes(); + + private static final byte[] MICRODESCRIPTOR_ANNOTATION = + "@type microdescriptor 1.0\n".getBytes(); + + private StringBuilder intermediateStats = new StringBuilder(); + + public static void main(String[] args) { + + Logger logger = Logger.getLogger(ArchiveWriter.class.getName()); + logger.info("Starting relay-descriptors module of CollecTor."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile("relay-descriptors"); + if (!lf.acquireLock()) { + logger.severe("Warning: CollecTor is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Import/download relay descriptors from the various sources + new ArchiveWriter(config).run(); + + new ReferenceChecker(new File("recent/relay-descriptors"), + new File("stats/references"), + new File("stats/references-history")).check(); + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating relay-descriptors module of CollecTor."); + } + + public ArchiveWriter(Configuration config) { + this.config = config; + } + + public void run() { + + File outputDirectory = + new File(config.getDirectoryArchivesOutputDirectory()); + File statsDirectory = new File("stats"); + + this.logger = Logger.getLogger(ArchiveWriter.class.getName()); + this.outputDirectory = outputDirectory; + SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + this.rsyncCatString = rsyncCatFormat.format( + System.currentTimeMillis()); + this.descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + + this.loadDescriptorDigests(); + + // Prepare relay descriptor parser + RelayDescriptorParser rdp = new RelayDescriptorParser(this); + + RelayDescriptorDownloader rdd = null; + if (config.getDownloadRelayDescriptors()) { + List<String> dirSources = + config.getDownloadFromDirectoryAuthorities(); + rdd = new RelayDescriptorDownloader(rdp, dirSources, + config.getDownloadVotesByFingerprint(), + config.getDownloadCurrentConsensus(), + config.getDownloadCurrentMicrodescConsensus(), + config.getDownloadCurrentVotes(), + config.getDownloadMissingServerDescriptors(), + config.getDownloadMissingExtraInfoDescriptors(), + config.getDownloadMissingMicrodescriptors(), + config.getDownloadAllServerDescriptors(), + config.getDownloadAllExtraInfoDescriptors(), + config.getCompressRelayDescriptorDownloads()); + rdp.setRelayDescriptorDownloader(rdd); + } + if (config.getImportCachedRelayDescriptors()) { + new CachedRelayDescriptorReader(rdp, + config.getCachedRelayDescriptorDirectory(), statsDirectory); + this.intermediateStats("importing relay descriptors from local " + + "Tor data directories"); + } + if (config.getImportDirectoryArchives()) { + new ArchiveReader(rdp, + new File(config.getDirectoryArchivesDirectory()), + statsDirectory, + config.getKeepDirectoryArchiveImportHistory()); + this.intermediateStats("importing relay descriptors from local " + + "directory"); + } + if (rdd != null) { + rdd.downloadDescriptors(); + rdd.writeFile(); + rdd = null; + this.intermediateStats("downloading relay descriptors from the " + + "directory authorities"); + } + + this.checkMissingDescriptors(); + + this.checkStaledescriptors(); + + this.cleanUpRsyncDirectory(); + + this.saveDescriptorDigests(); + } + + private void loadDescriptorDigests() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + try { + if (this.storedServerDescriptorsFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader( + this.storedServerDescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + if (parts.length != 3) { + this.logger.warning("Could not load server descriptor " + + "digests because of illegal line '" + line + "'. We " + + "might not be able to correctly check descriptors for " + + "completeness."); + break; + } + long published = dateTimeFormat.parse(parts[0]).getTime(); + if (published < this.now - 48L * 60L * 60L * 1000L) { + continue; + } + if (!this.storedServerDescriptors.containsKey(published)) { + this.storedServerDescriptors.put(published, + new HashMap<String, String>()); + } + String serverDescriptorDigest = parts[1]; + String extraInfoDescriptorDigest = parts[2].equals("NA") ? null + : parts[2]; + this.storedServerDescriptors.get(published).put( + serverDescriptorDigest, extraInfoDescriptorDigest); + } + br.close(); + } + if (this.storedExtraInfoDescriptorsFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader( + this.storedExtraInfoDescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + if (parts.length != 2) { + this.logger.warning("Could not load extra-info descriptor " + + "digests because of illegal line '" + line + "'. We " + + "might not be able to correctly check descriptors for " + + "completeness."); + break; + } + long published = dateTimeFormat.parse(parts[0]).getTime(); + if (published < this.now - 48L * 60L * 60L * 1000L) { + continue; + } + if (!this.storedExtraInfoDescriptors.containsKey(published)) { + this.storedExtraInfoDescriptors.put(published, + new HashSet<String>()); + } + String extraInfoDescriptorDigest = parts[1]; + this.storedExtraInfoDescriptors.get(published).add( + extraInfoDescriptorDigest); + } + br.close(); + } + if (this.storedMicrodescriptorsFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader( + this.storedMicrodescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + if (parts.length != 2) { + this.logger.warning("Could not load microdescriptor digests " + + "because of illegal line '" + line + "'. We might not " + + "be able to correctly check descriptors for " + + "completeness."); + break; + } + long validAfter = dateTimeFormat.parse(parts[0]).getTime(); + if (validAfter < this.now - 40L * 24L * 60L * 60L * 1000L) { + continue; + } + if (!this.storedMicrodescriptors.containsKey(validAfter)) { + this.storedMicrodescriptors.put(validAfter, + new HashSet<String>()); + } + String microdescriptorDigest = parts[1]; + this.storedMicrodescriptors.get(validAfter).add( + microdescriptorDigest); + } + br.close(); + } + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not load descriptor " + + "digests. We might not be able to correctly check " + + "descriptors for completeness.", e); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not load descriptor " + + "digests. We might not be able to correctly check " + + "descriptors for completeness.", e); + } + } + + public void intermediateStats(String event) { + intermediateStats.append("While " + event + ", we stored " + + this.storedConsensusesCounter + " consensus(es), " + + this.storedMicrodescConsensusesCounter + " microdesc " + + "consensus(es), " + this.storedVotesCounter + " vote(s), " + + this.storedCertsCounter + " certificate(s), " + + this.storedServerDescriptorsCounter + " server descriptor(s), " + + this.storedExtraInfoDescriptorsCounter + " extra-info " + + "descriptor(s), and " + this.storedMicrodescriptorsCounter + + " microdescriptor(s) to disk.\n"); + this.storedConsensusesCounter = 0; + this.storedMicrodescConsensusesCounter = 0; + this.storedVotesCounter = 0; + this.storedCertsCounter = 0; + this.storedServerDescriptorsCounter = 0; + this.storedExtraInfoDescriptorsCounter = 0; + this.storedMicrodescriptorsCounter = 0; + } + + private void checkMissingDescriptors() { + StringBuilder sb = new StringBuilder("Finished writing relay " + + "descriptors to disk.\n"); + sb.append(intermediateStats.toString()); + sb.append("Statistics on the completeness of written relay " + + "descriptors:"); + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + Map<String, String> knownServerDescriptors = + new HashMap<String, String>(); + for (Map<String, String> descriptors : + this.storedServerDescriptors.values()) { + knownServerDescriptors.putAll(descriptors); + } + Set<String> knownExtraInfoDescriptors = new HashSet<String>(); + for (Set<String> descriptors : + this.storedExtraInfoDescriptors.values()) { + knownExtraInfoDescriptors.addAll(descriptors); + } + Set<String> knownMicrodescriptors = new HashSet<String>(); + for (Set<String> descriptors : this.storedMicrodescriptors.values()) { + knownMicrodescriptors.addAll(descriptors); + } + boolean missingDescriptors = false; + boolean missingVotes = false; + boolean missingMicrodescConsensus = false; + for (Map.Entry<Long, SortedSet<String>> c : + this.storedConsensuses.entrySet()) { + long validAfterMillis = c.getKey(); + String validAfterTime = dateTimeFormat.format(validAfterMillis); + int allVotes = this.expectedVotes.containsKey(validAfterMillis) + ? this.expectedVotes.get(validAfterMillis) : 0; + int foundVotes = 0; + if (this.storedVotes.containsKey(validAfterMillis)) { + foundVotes = this.storedVotes.get(validAfterMillis).size(); + for (Map.Entry<String, SortedSet<String>> v : + this.storedVotes.get(validAfterMillis).entrySet()) { + int voteFoundServerDescs = 0; + int voteAllServerDescs = 0; + int voteFoundExtraInfos = 0; + int voteAllExtraInfos = 0; + for (String serverDescriptorDigest : v.getValue()) { + voteAllServerDescs++; + if (knownServerDescriptors.containsKey( + serverDescriptorDigest)) { + voteFoundServerDescs++; + if (knownServerDescriptors.get(serverDescriptorDigest) + != null) { + String extraInfoDescriptorDigest = + knownServerDescriptors.get(serverDescriptorDigest); + voteAllExtraInfos++; + if (knownExtraInfoDescriptors.contains( + extraInfoDescriptorDigest)) { + voteFoundExtraInfos++; + } + } + } + } + sb.append("\nV, " + validAfterTime); + if (voteAllServerDescs > 0) { + sb.append(String.format(", %d/%d S (%.1f%%)", + voteFoundServerDescs, voteAllServerDescs, + 100.0D * (double) voteFoundServerDescs + / (double) voteAllServerDescs)); + } else { + sb.append(", 0/0 S"); + } + if (voteAllExtraInfos > 0) { + sb.append(String.format(", %d/%d E (%.1f%%)", + voteFoundExtraInfos, voteAllExtraInfos, + 100.0D * (double) voteFoundExtraInfos + / (double) voteAllExtraInfos)); + } else { + sb.append(", 0/0 E"); + } + String fingerprint = v.getKey(); + /* Ignore turtles when warning about missing descriptors. */ + if (!fingerprint.equalsIgnoreCase( + "27B6B5996C426270A5C95488AA5BCEB6BCC86956") + && (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 + || voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995)) { + missingDescriptors = true; + } + } + } + int foundServerDescs = 0; + int allServerDescs = 0; + int foundExtraInfos = 0; + int allExtraInfos = 0; + int foundMicrodescriptors = 0; + int allMicrodescriptors = 0; + for (String serverDescriptorDigest : c.getValue()) { + allServerDescs++; + if (knownServerDescriptors.containsKey( + serverDescriptorDigest)) { + foundServerDescs++; + if (knownServerDescriptors.get( + serverDescriptorDigest) != null) { + allExtraInfos++; + String extraInfoDescriptorDigest = + knownServerDescriptors.get(serverDescriptorDigest); + if (knownExtraInfoDescriptors.contains( + extraInfoDescriptorDigest)) { + foundExtraInfos++; + } + } + } + } + sb.append("\nC, " + validAfterTime); + if (allVotes > 0) { + sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes, + 100.0D * (double) foundVotes / (double) allVotes)); + } else { + sb.append(", 0/0 V"); + } + if (allServerDescs > 0) { + sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs, + allServerDescs, 100.0D * (double) foundServerDescs + / (double) allServerDescs)); + } else { + sb.append(", 0/0 S"); + } + if (allExtraInfos > 0) { + sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos, + allExtraInfos, 100.0D * (double) foundExtraInfos + / (double) allExtraInfos)); + } else { + sb.append(", 0/0 E"); + } + if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) { + for (String microdescriptorDigest : + this.storedMicrodescConsensuses.get(validAfterMillis)) { + allMicrodescriptors++; + if (knownMicrodescriptors.contains(microdescriptorDigest)) { + foundMicrodescriptors++; + } + } + sb.append("\nM, " + validAfterTime); + if (allMicrodescriptors > 0) { + sb.append(String.format(", %d/%d M (%.1f%%)", + foundMicrodescriptors, allMicrodescriptors, + 100.0D * (double) foundMicrodescriptors + / (double) allMicrodescriptors)); + } else { + sb.append(", 0/0 M"); + } + } else { + missingMicrodescConsensus = true; + } + if (foundServerDescs * 1000 < allServerDescs * 995 + || foundExtraInfos * 1000 < allExtraInfos * 995 + || foundMicrodescriptors * 1000 < allMicrodescriptors * 995) { + missingDescriptors = true; + } + if (foundVotes < allVotes) { + missingVotes = true; + } + } + this.logger.info(sb.toString()); + if (missingDescriptors) { + this.logger.fine("We are missing at least 0.5% of server or " + + "extra-info descriptors referenced from a consensus or " + + "vote or at least 0.5% of microdescriptors referenced from a " + + "microdesc consensus."); + } + if (missingVotes) { + /* TODO Shouldn't warn if we're not trying to archive votes at + * all. */ + this.logger.fine("We are missing at least one vote that was " + + "referenced from a consensus."); + } + if (missingMicrodescConsensus) { + /* TODO Shouldn't warn if we're not trying to archive microdesc + * consensuses at all. */ + this.logger.fine("We are missing at least one microdesc " + + "consensus that was published together with a known " + + "consensus."); + } + } + + private void checkStaledescriptors() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + long tooOldMillis = this.now - 330L * 60L * 1000L; + if (!this.storedConsensuses.isEmpty() + && this.storedConsensuses.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status " + + "consensus was valid after " + + dateTimeFormat.format(this.storedConsensuses.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedMicrodescConsensuses.isEmpty() + && this.storedMicrodescConsensuses.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status " + + "microdesc consensus was valid after " + + dateTimeFormat.format( + this.storedMicrodescConsensuses.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedVotes.isEmpty() + && this.storedVotes.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status vote " + + "was valid after " + dateTimeFormat.format( + this.storedVotes.lastKey()) + ", which is more than 5:30 hours " + + "in the past."); + } + if (!this.storedServerDescriptors.isEmpty() + && this.storedServerDescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay server descriptor was " + + "published at " + + dateTimeFormat.format(this.storedServerDescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedExtraInfoDescriptors.isEmpty() + && this.storedExtraInfoDescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay extra-info descriptor " + + "was published at " + dateTimeFormat.format( + this.storedExtraInfoDescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedMicrodescriptors.isEmpty() + && this.storedMicrodescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay microdescriptor was " + + "contained in a microdesc consensus that was valid after " + + dateTimeFormat.format(this.storedMicrodescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + } + + /* Delete all files from the rsync directory that have not been modified + * in the last three days (except for microdescriptors which are kept + * for up to thirty days), and remove the .tmp extension from newly + * written files. */ + public void cleanUpRsyncDirectory() { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<File>(); + allFiles.add(new File("recent/relay-descriptors")); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.getName().endsWith("-micro")) { + if (file.lastModified() < cutOffMicroMillis) { + file.delete(); + } + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } else if (file.getName().endsWith(".tmp")) { + file.renameTo(new File(file.getParentFile(), + file.getName().substring(0, + file.getName().lastIndexOf(".tmp")))); + } + } + } + + private void saveDescriptorDigests() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + try { + this.storedServerDescriptorsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.storedServerDescriptorsFile)); + for (Map.Entry<Long, Map<String, String>> e : + this.storedServerDescriptors.entrySet()) { + String published = dateTimeFormat.format(e.getKey()); + for (Map.Entry<String, String> f : e.getValue().entrySet()) { + String serverDescriptorDigest = f.getKey(); + String extraInfoDescriptorDigest = f.getValue() == null ? "NA" + : f.getValue(); + bw.write(String.format("%s,%s,%s%n", published, + serverDescriptorDigest, extraInfoDescriptorDigest)); + } + } + bw.close(); + this.storedExtraInfoDescriptorsFile.getParentFile().mkdirs(); + bw = new BufferedWriter(new FileWriter( + this.storedExtraInfoDescriptorsFile)); + for (Map.Entry<Long, Set<String>> e : + this.storedExtraInfoDescriptors.entrySet()) { + String published = dateTimeFormat.format(e.getKey()); + for (String extraInfoDescriptorDigest : e.getValue()) { + bw.write(String.format("%s,%s%n", published, + extraInfoDescriptorDigest)); + } + } + bw.close(); + this.storedMicrodescriptorsFile.getParentFile().mkdirs(); + bw = new BufferedWriter(new FileWriter( + this.storedMicrodescriptorsFile)); + for (Map.Entry<Long, Set<String>> e : + this.storedMicrodescriptors.entrySet()) { + String validAfter = dateTimeFormat.format(e.getKey()); + for (String microdescriptorDigest : e.getValue()) { + bw.write(String.format("%s,%s%n", validAfter, + microdescriptorDigest)); + } + } + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not save descriptor " + + "digests. We might not be able to correctly check " + + "descriptors for completeness in the next run.", e); + } + } + + public void storeConsensus(byte[] data, long validAfter, + SortedSet<String> dirSources, + SortedSet<String> serverDescriptorDigests) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/consensus/" + + printFormat.format(new Date(validAfter)) + "-consensus"); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = new File("recent/relay-descriptors/consensuses/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(CONSENSUS_ANNOTATION, data, outputFiles, null)) { + this.storedConsensusesCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 3L * 60L * 60L * 1000L) { + this.storedConsensuses.put(validAfter, serverDescriptorDigests); + this.expectedVotes.put(validAfter, dirSources.size()); + } + } + + public void storeMicrodescConsensus(byte[] data, long validAfter, + SortedSet<String> microdescriptorDigests) { + SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat( + "yyyy/MM"); + yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat( + "dd/yyyy-MM-dd-HH-mm-ss"); + dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter) + + "/consensus-microdesc/" + + dayDirectoryFileFormat.format(validAfter) + + "-consensus-microdesc"); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = new File("recent/relay-descriptors/microdescs/" + + "consensus-microdesc/" + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles, + null)) { + this.storedMicrodescConsensusesCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 3L * 60L * 60L * 1000L) { + this.storedMicrodescConsensuses.put(validAfter, + microdescriptorDigests); + } + } + + public void storeVote(byte[] data, long validAfter, + String fingerprint, String digest, + SortedSet<String> serverDescriptorDigests) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/vote/" + + printFormat.format(new Date(validAfter)) + "-vote-" + + fingerprint + "-" + digest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = new File("recent/relay-descriptors/votes/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(VOTE_ANNOTATION, data, outputFiles, null)) { + this.storedVotesCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 3L * 60L * 60L * 1000L) { + if (!this.storedVotes.containsKey(validAfter)) { + this.storedVotes.put(validAfter, + new TreeMap<String, SortedSet<String>>()); + } + this.storedVotes.get(validAfter).put(fingerprint, + serverDescriptorDigests); + } + } + + public void storeCertificate(byte[] data, String fingerprint, + long published) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/certs/" + + fingerprint + "-" + printFormat.format(new Date(published))); + File[] outputFiles = new File[] { tarballFile }; + if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles, null)) { + this.storedCertsCounter++; + } + } + + public void storeServerDescriptor(byte[] data, String digest, + long published, String extraInfoDigest) { + SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + + "/server-descriptor/" + printFormat.format(new Date(published)) + + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/" + + digest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncCatFile = new File("recent/relay-descriptors/" + + "server-descriptors/" + this.rsyncCatString + + "-server-descriptors.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles, + append)) { + this.storedServerDescriptorsCounter++; + } + if (!tarballFileExistedBefore + && this.now - published < 48L * 60L * 60L * 1000L) { + if (!this.storedServerDescriptors.containsKey(published)) { + this.storedServerDescriptors.put(published, + new HashMap<String, String>()); + } + this.storedServerDescriptors.get(published).put(digest, + extraInfoDigest); + } + } + + public void storeExtraInfoDescriptor(byte[] data, + String extraInfoDigest, long published) { + SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/extra-info/" + + descriptorFormat.format(new Date(published)) + + extraInfoDigest.substring(0, 1) + "/" + + extraInfoDigest.substring(1, 2) + "/" + + extraInfoDigest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncCatFile = new File("recent/relay-descriptors/" + + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles, append)) { + this.storedExtraInfoDescriptorsCounter++; + } + if (!tarballFileExistedBefore + && this.now - published < 48L * 60L * 60L * 1000L) { + if (!this.storedExtraInfoDescriptors.containsKey(published)) { + this.storedExtraInfoDescriptors.put(published, + new HashSet<String>()); + } + this.storedExtraInfoDescriptors.get(published).add(extraInfoDigest); + } + } + + public void storeMicrodescriptor(byte[] data, + String microdescriptorDigest, long validAfter) { + /* TODO We could check here whether we already stored the + * microdescriptor in the same valid-after month. This can happen, + * e.g., when two relays share the same microdescriptor. In that case + * this method gets called twice and the second call overwrites the + * file written in the first call. However, this method must be + * called twice to store the same microdescriptor in two different + * valid-after months. */ + SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/microdesc/" + + descriptorFormat.format(validAfter) + "micro/" + + microdescriptorDigest.substring(0, 1) + "/" + + microdescriptorDigest.substring(1, 2) + "/" + + microdescriptorDigest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncCatFile = new File("recent/relay-descriptors/" + + "microdescs/micro/" + this.rsyncCatString + + "-micro.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles, + append)) { + this.storedMicrodescriptorsCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 40L * 24L * 60L * 60L * 1000L) { + if (!this.storedMicrodescriptors.containsKey(validAfter)) { + this.storedMicrodescriptors.put(validAfter, + new HashSet<String>()); + } + this.storedMicrodescriptors.get(validAfter).add( + microdescriptorDigest); + } + } + + private boolean store(byte[] typeAnnotation, byte[] data, + File[] outputFiles, boolean[] append) { + try { + this.logger.finer("Storing " + outputFiles[0]); + if (this.descriptorParser.parseDescriptors(data, + outputFiles[0].getName()).size() != 1) { + this.logger.info("Relay descriptor file " + outputFiles[0] + + " doesn't contain exactly one descriptor. Not storing."); + return false; + } + for (int i = 0; i < outputFiles.length; i++) { + File outputFile = outputFiles[i]; + boolean appendToFile = append == null ? false : append[i]; + outputFile.getParentFile().mkdirs(); + BufferedOutputStream bos = new BufferedOutputStream( + new FileOutputStream(outputFile, appendToFile)); + if (data.length > 0 && data[0] != '@') { + bos.write(typeAnnotation, 0, typeAnnotation.length); + } + bos.write(data, 0, data.length); + bos.close(); + } + return true; + } catch (DescriptorParseException e) { + this.logger.log(Level.WARNING, "Could not parse relay descriptor " + + outputFiles[0] + " before storing it to disk. Skipping.", e); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store relay descriptor " + + outputFiles[0], e); + } + return false; + } +} diff --git a/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java b/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java new file mode 100644 index 0000000..b9001dd --- /dev/null +++ b/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java @@ -0,0 +1,255 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.relaydescs; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Parses all descriptors in local directory cacheddesc/ and sorts them + * into directory structure in directory-archive/. + */ +public class CachedRelayDescriptorReader { + public CachedRelayDescriptorReader(RelayDescriptorParser rdp, + List<String> inputDirectories, File statsDirectory) { + + if (rdp == null || inputDirectories == null + || inputDirectories.isEmpty() || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + StringBuilder dumpStats = new StringBuilder("Finished importing " + + "relay descriptors from local Tor data directories:"); + Logger logger = Logger.getLogger( + CachedRelayDescriptorReader.class.getName()); + + /* Read import history containing SHA-1 digests of previously parsed + * statuses and descriptors, so that we can skip them in this run. */ + Set<String> lastImportHistory = new HashSet<String>(); + Set<String> currentImportHistory = new HashSet<String>(); + File importHistoryFile = new File(statsDirectory, + "cacheddesc-import-history"); + if (importHistoryFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + importHistoryFile)); + String line; + while ((line = br.readLine()) != null) { + lastImportHistory.add(line); + } + br.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not read import history from " + + importHistoryFile.getAbsolutePath() + ".", e); + } + } + + /* Read cached descriptors directories. */ + for (String inputDirectory : inputDirectories) { + File cachedDescDir = new File(inputDirectory); + if (!cachedDescDir.exists()) { + logger.warning("Directory " + cachedDescDir.getAbsolutePath() + + " does not exist. Skipping."); + continue; + } + logger.fine("Reading " + cachedDescDir.getAbsolutePath() + + " directory."); + SortedSet<File> cachedDescFiles = new TreeSet<File>(); + Stack<File> files = new Stack<File>(); + files.add(cachedDescDir); + while (!files.isEmpty()) { + File file = files.pop(); + if (file.isDirectory()) { + files.addAll(Arrays.asList(file.listFiles())); + } else { + cachedDescFiles.add(file); + } + } + for (File f : cachedDescFiles) { + try { + // descriptors may contain non-ASCII chars; read as bytes to + // determine digests + BufferedInputStream bis = + new BufferedInputStream(new FileInputStream(f)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + bis.close(); + byte[] allData = baos.toByteArray(); + if (f.getName().equals("cached-consensus")) { + /* Check if directory information is stale. */ + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + String line = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("valid-after ")) { + dumpStats.append("\n" + f.getName() + ": " + line.substring( + "valid-after ".length())); + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (dateTimeFormat.parse(line.substring("valid-after " + .length())).getTime() < System.currentTimeMillis() + - 6L * 60L * 60L * 1000L) { + logger.warning("Cached descriptor files in " + + cachedDescDir.getAbsolutePath() + " are stale. " + + "The valid-after line in cached-consensus is '" + + line + "'."); + dumpStats.append(" (stale!)"); + } + break; + } + } + br.close(); + + /* Parse the cached consensus if we haven't parsed it before + * (but regardless of whether it's stale or not). */ + if (rdp != null) { + String digest = Hex.encodeHexString(DigestUtils.sha( + allData)); + if (!lastImportHistory.contains(digest) + && !currentImportHistory.contains(digest)) { + rdp.parse(allData); + } else { + dumpStats.append(" (skipped)"); + } + currentImportHistory.add(digest); + } + } else if (f.getName().equals("v3-status-votes")) { + int parsedNum = 0; + int skippedNum = 0; + String ascii = new String(allData, "US-ASCII"); + String startToken = "network-status-version "; + int end = ascii.length(); + int start = ascii.indexOf(startToken); + while (start >= 0 && start < end) { + int next = ascii.indexOf(startToken, start + 1); + if (next < 0) { + next = end; + } + if (start < next) { + byte[] rawNetworkStatusBytes = new byte[next - start]; + System.arraycopy(allData, start, rawNetworkStatusBytes, 0, + next - start); + if (rdp != null) { + String digest = Hex.encodeHexString(DigestUtils.sha( + rawNetworkStatusBytes)); + if (!lastImportHistory.contains(digest) + && !currentImportHistory.contains(digest)) { + rdp.parse(rawNetworkStatusBytes); + parsedNum++; + } else { + skippedNum++; + } + currentImportHistory.add(digest); + } + } + start = next; + } + dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum + + ", skipped " + skippedNum + " votes"); + } else if (f.getName().startsWith("cached-descriptors") + || f.getName().startsWith("cached-extrainfo")) { + String ascii = new String(allData, "US-ASCII"); + int start = -1; + int sig = -1; + int end = -1; + String startToken = + f.getName().startsWith("cached-descriptors") + ? "router " : "extra-info "; + String sigToken = "\nrouter-signature\n"; + String endToken = "\n-----END SIGNATURE-----\n"; + int parsedNum = 0; + int skippedNum = 0; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + sig = ascii.indexOf(sigToken, start); + if (sig < 0) { + break; + } + sig += sigToken.length(); + end = ascii.indexOf(endToken, sig); + if (end < 0) { + break; + } + end += endToken.length(); + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + if (rdp != null) { + String digest = Hex.encodeHexString(DigestUtils.sha( + descBytes)); + if (!lastImportHistory.contains(digest) + && !currentImportHistory.contains(digest)) { + rdp.parse(descBytes); + parsedNum++; + } else { + skippedNum++; + } + currentImportHistory.add(digest); + } + } + dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum + + ", skipped " + skippedNum + " " + + (f.getName().startsWith("cached-descriptors") + ? "server" : "extra-info") + " descriptors"); + } + } catch (IOException e) { + logger.log(Level.WARNING, "Failed reading " + + cachedDescDir.getAbsolutePath() + " directory.", e); + } catch (ParseException e) { + logger.log(Level.WARNING, "Failed reading " + + cachedDescDir.getAbsolutePath() + " directory.", e); + } + } + logger.fine("Finished reading " + + cachedDescDir.getAbsolutePath() + " directory."); + } + + /* Write import history containing SHA-1 digests to disk. */ + try { + importHistoryFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + importHistoryFile)); + for (String digest : currentImportHistory) { + bw.write(digest + "\n"); + } + bw.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write import history to " + + importHistoryFile.getAbsolutePath() + ".", e); + } + + logger.info(dumpStats.toString()); + } +} + diff --git a/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java b/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java new file mode 100644 index 0000000..9f0f183 --- /dev/null +++ b/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java @@ -0,0 +1,319 @@ +/* Copyright 2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.relaydescs; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.DirSourceEntry; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.Microdescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; +import org.torproject.descriptor.RelayNetworkStatusVote; +import org.torproject.descriptor.ServerDescriptor; + +import com.google.gson.Gson; + +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Locale; +import java.util.Set; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ReferenceChecker { + + private Logger log = Logger.getLogger(ReferenceChecker.class.getName()); + + private File descriptorsDir; + + private File referencesFile; + + private File historyFile; + + private long currentTimeMillis; + + private SortedSet<Reference> references = new TreeSet<Reference>(); + + private static DateFormat dateTimeFormat; + + static { + dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", + Locale.US); + dateTimeFormat.setLenient(false); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static final long ONE_HOUR = 60L * 60L * 1000L; + + private static final long THREE_HOURS = 3L * ONE_HOUR; + + private static final long SIX_HOURS = 6L * ONE_HOUR; + + private static final long ONE_DAY = 24L * ONE_HOUR; + + private static final long THIRTY_DAYS = 30L * ONE_DAY; + + public ReferenceChecker(File descriptorsDir, File referencesFile, + File historyFile) { + this.descriptorsDir = descriptorsDir; + this.referencesFile = referencesFile; + this.historyFile = historyFile; + } + + public void check() { + this.getCurrentTimeMillis(); + this.readReferencesFile(); + this.readNewDescriptors(); + this.dropStaleReferences(); + this.checkReferences(); + this.writeReferencesFile(); + } + + private void getCurrentTimeMillis() { + this.currentTimeMillis = System.currentTimeMillis(); + } + + private static class Reference implements Comparable<Reference> { + + private String referencing; + + private String referenced; + + private double weight; + + private long expiresAfterMillis; + + public Reference(String referencing, String referenced, double weight, + long expiresAfterMillis) { + this.referencing = referencing; + this.referenced = referenced; + this.weight = weight; + this.expiresAfterMillis = expiresAfterMillis; + } + + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof Reference)) { + return false; + } + Reference other = (Reference) otherObject; + return this.referencing.equals(other.referencing) + && this.referenced.equals(other.referenced); + } + + @Override + public int hashCode() { + return this.referencing.hashCode() + this.referenced.hashCode(); + } + + @Override + public int compareTo(Reference other) { + int result = this.referencing.compareTo(other.referencing); + if (result == 0) { + result = this.referenced.compareTo(other.referenced); + } + return result; + } + } + + private void readReferencesFile() { + if (!this.referencesFile.exists()) { + return; + } + Gson gson = new Gson(); + try { + FileReader fr = new FileReader(this.referencesFile); + this.references.addAll(Arrays.asList(gson.fromJson(fr, + Reference[].class))); + fr.close(); + } catch (IOException e) { + this.log.log(Level.WARNING, "Cannot read existing references file " + + "from previous run.", e); + } + } + + private void readNewDescriptors() { + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory(this.descriptorsDir); + descriptorReader.setExcludeFiles(this.historyFile); + Iterator<DescriptorFile> descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (descriptor instanceof RelayNetworkStatusConsensus) { + RelayNetworkStatusConsensus consensus = + (RelayNetworkStatusConsensus) descriptor; + String consensusFlavor = consensus.getConsensusFlavor(); + if (consensusFlavor == null) { + this.readRelayNetworkStatusConsensusUnflavored(consensus); + } else if (consensusFlavor.equals("microdesc")) { + this.readRelayNetworkStatusConsensusMicrodesc(consensus); + } else { + /* Ignore unknown consensus flavors. */ + } + } else if (descriptor instanceof RelayNetworkStatusVote) { + this.readRelayNetworkStatusVote( + (RelayNetworkStatusVote) descriptor); + } else if (descriptor instanceof ServerDescriptor) { + this.readServerDescriptor((ServerDescriptor) descriptor); + } else if (descriptor instanceof ExtraInfoDescriptor) { + this.readExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); + } else if (descriptor instanceof Microdescriptor) { + readMicrodescriptor((Microdescriptor) descriptor); + } else { + /* Ignore unknown descriptors. */ + } + } + } + } + + private void readRelayNetworkStatusConsensusUnflavored( + RelayNetworkStatusConsensus consensus) { + String validAfter = dateTimeFormat.format( + consensus.getValidAfterMillis()); + String referencing = String.format("C-%s", validAfter); + this.addReference(referencing, String.format("M-%s", validAfter), 1.0, + consensus.getValidAfterMillis() + THREE_HOURS); + for (DirSourceEntry dirSourceEntry : + consensus.getDirSourceEntries().values()) { + if (!dirSourceEntry.isLegacy()) { + this.addReference(referencing, String.format("V-%s-%s", + validAfter, dirSourceEntry.getIdentity()), 1.0, + consensus.getValidAfterMillis() + THREE_HOURS); + } + } + double entryWeight = 200.0 + / ((double) consensus.getStatusEntries().size()); + for (NetworkStatusEntry entry : + consensus.getStatusEntries().values()) { + this.addReference(referencing, + String.format("S-%s", entry.getDescriptor()), entryWeight, + entry.getPublishedMillis() + THREE_HOURS); + } + } + + private void readRelayNetworkStatusConsensusMicrodesc( + RelayNetworkStatusConsensus consensus) { + String validAfter = dateTimeFormat.format( + consensus.getValidAfterMillis()); + String referencing = String.format("M-%s", validAfter); + this.addReference(referencing, String.format("C-%s", validAfter), 1.0, + consensus.getValidAfterMillis() + THREE_HOURS); + double entryWeight = 200.0 + / ((double) consensus.getStatusEntries().size()); + for (NetworkStatusEntry entry : + consensus.getStatusEntries().values()) { + for (String digest : entry.getMicrodescriptorDigests()) { + this.addReference(referencing, String.format("D-%s", digest), + entryWeight, entry.getPublishedMillis() + THREE_HOURS); + } + } + } + + private void readRelayNetworkStatusVote(RelayNetworkStatusVote vote) { + String validAfter = dateTimeFormat.format(vote.getValidAfterMillis()); + String referencing = String.format("V-%s-%s", validAfter, + vote.getIdentity()); + double entryWeight = 200.0 + / ((double) vote.getStatusEntries().size()); + for (NetworkStatusEntry entry : vote.getStatusEntries().values()) { + this.addReference(referencing, + String.format("S-%s", entry.getDescriptor()), entryWeight, + entry.getPublishedMillis() + SIX_HOURS); + } + } + + private void readServerDescriptor(ServerDescriptor serverDescriptor) { + String referenced = serverDescriptor.getExtraInfoDigest() == null ? "" + : String.format("E-%s", serverDescriptor.getExtraInfoDigest()); + this.addReference(String.format("S-%s", + serverDescriptor.getServerDescriptorDigest()), referenced, 0.01, + serverDescriptor.getPublishedMillis() + SIX_HOURS); + } + + private void readExtraInfoDescriptor( + ExtraInfoDescriptor extraInfoDescriptor) { + this.addReference(String.format("E-%s", + extraInfoDescriptor.getExtraInfoDigest()), "", 0.005, + extraInfoDescriptor.getPublishedMillis() + SIX_HOURS); + } + + private void readMicrodescriptor(Microdescriptor microdesc) { + this.addReference( + String.format("D-%s", microdesc.getMicrodescriptorDigest()), "", + 0.0, this.currentTimeMillis + THIRTY_DAYS); + } + + private void addReference(String referencing, String referenced, + double weight, long expiresAfterMillis) { + this.references.add(new Reference(referencing.toUpperCase(), + referenced.toUpperCase(), weight, expiresAfterMillis)); + } + + private void dropStaleReferences() { + SortedSet<Reference> recentReferences = new TreeSet<Reference>(); + for (Reference reference : this.references) { + if (this.currentTimeMillis <= reference.expiresAfterMillis) { + recentReferences.add(reference); + } + } + this.references = recentReferences; + } + + private void checkReferences() { + Set<String> knownDescriptors = new HashSet<String>(); + for (Reference reference : this.references) { + knownDescriptors.add(reference.referencing); + } + double totalMissingDescriptorsWeight = 0.0; + Set<String> missingDescriptors = new TreeSet<String>(); + StringBuilder sb = new StringBuilder("Missing referenced " + + "descriptors:"); + for (Reference reference : this.references) { + if (reference.referenced.length() > 0 + && !knownDescriptors.contains(reference.referenced)) { + if (!missingDescriptors.contains(reference.referenced)) { + totalMissingDescriptorsWeight += reference.weight; + } + missingDescriptors.add(reference.referenced); + sb.append(String.format("%n%s -> %s (%.4f -> %.4f)", + reference.referencing, reference.referenced, reference.weight, + totalMissingDescriptorsWeight)); + } + } + this.log.log(Level.INFO, sb.toString()); + if (totalMissingDescriptorsWeight > 0.999) { + this.log.log(Level.WARNING, "Missing too many referenced " + + "descriptors (" + totalMissingDescriptorsWeight + ")."); + } + } + + private void writeReferencesFile() { + Gson gson = new Gson(); + try { + FileWriter fw = new FileWriter(this.referencesFile); + gson.toJson(this.references, fw); + fw.close(); + } catch (IOException e) { + this.log.log(Level.WARNING, "Cannot write references file for next " + + "run.", e); + } + } +} + diff --git a/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java new file mode 100644 index 0000000..458332a --- /dev/null +++ b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java @@ -0,0 +1,1134 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.relaydescs; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.digest.DigestUtils; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.InflaterInputStream; + +/** + * Downloads relay descriptors from the directory authorities via HTTP. + * Keeps a list of missing descriptors that gets updated by parse results + * from <code>RelayDescriptorParser</code> and downloads all missing + * descriptors that have been published in the last 24 hours. Also + * downloads all server and extra-info descriptors known to a directory + * authority at most once a day. + */ +public class RelayDescriptorDownloader { + + /** + * Text file containing the descriptors that we are missing and that we + * want to download. Lines are formatted as: + * + * - "consensus,<validafter>,<parsed>", + * - "consensus-microdesc,<validafter>,<parsed>", + * - "vote,<validafter>,<fingerprint>,<parsed>", + * - "server,<published>,<relayid>,<descid>,<parsed>", + * - "extra,<published>,<relayid>,<descid>,<parsed>", or + * - "micro,<validafter>,<relayid>,<descid>,<parsed>". + */ + private File missingDescriptorsFile; + + /** + * Relay descriptors that we are missing and that we want to download + * either in this execution or write to disk and try next time. Map keys + * contain comma-separated values as in the missing descriptors files + * without the "parsed" column. Map values contain the "parsed" column. + */ + private SortedMap<String, String> missingDescriptors; + + /** + * Map from base64 microdescriptor digests to keys in missingDescriptors + * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because + * we can't learn <validafter> or <relayid> from parsing + * microdescriptors, but we need to know <validafter> to store + * microdescriptors to disk and both <validafter> and <relayid> to + * remove microdescriptors from the missing list. There are potentially + * many matching keys in missingDescriptors for the same microdescriptor + * digest. Also, in rare cases relays share the same microdescriptor + * (which is only possible if they share the same onion key), and then + * we don't have to download their microdescriptor more than once. + */ + private Map<String, Set<String>> microdescriptorKeys; + + /** + * Set of microdescriptor digests that are currently missing. Used for + * logging statistics instead of "micro,<validafter>,..." keys which may + * contain the same microdescriptor digest multiple times. + */ + private Set<String> missingMicrodescriptors; + + /** + * Text file containing the IP addresses (and Dir ports if not 80) of + * directory authorities and when we last downloaded all server and + * extra-info descriptors from them, so that we can avoid downloading + * them too often. + */ + private File lastDownloadedAllDescriptorsFile; + + /** + * Map of directory authorities and when we last downloaded all server + * and extra-info descriptors from them. Map keys are IP addresses (and + * Dir ports if not 80), map values are timestamps. + */ + private Map<String, String> lastDownloadedAllDescriptors; + + /** + * <code>RelayDescriptorParser</code> that we will hand over the + * downloaded descriptors for parsing. + */ + private RelayDescriptorParser rdp; + + /** + * Directory authorities that we will try to download missing + * descriptors from. + */ + private List<String> authorities; + + /** + * Fingerprints of directory authorities that we will use to download + * votes without requiring a successfully downloaded consensus. + */ + private List<String> authorityFingerprints; + + /** + * Should we try to download the current consensus if we don't have it? + */ + private boolean downloadCurrentConsensus; + + /** + * Should we try to download the current microdesc consensus if we don't + * have it? + */ + private boolean downloadCurrentMicrodescConsensus; + + /** + * Should we try to download current votes if we don't have them? + */ + private boolean downloadCurrentVotes; + + /** + * Should we try to download missing server descriptors that have been + * published within the past 24 hours? + */ + private boolean downloadMissingServerDescriptors; + + /** + * Should we try to download missing extra-info descriptors that have + * been published within the past 24 hours? + */ + private boolean downloadMissingExtraInfos; + + /** + * Should we try to download missing microdescriptors that have been + * published within the past 24 hours? + */ + private boolean downloadMissingMicrodescriptors; + + /** + * Should we try to download all server descriptors from the authorities + * once every 24 hours? + */ + private boolean downloadAllServerDescriptors; + + /** + * Should we try to download all extra-info descriptors from the + * authorities once every 24 hours? + */ + private boolean downloadAllExtraInfos; + + /** + * Should we download zlib-compressed versions of descriptors by adding + * ".z" to URLs? + */ + private boolean downloadCompressed; + + /** + * valid-after time that we expect the current consensus, + * microdescriptor consensus, and votes to have, formatted + * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this + * valid-after time on the directory authorities. This time is + * initialized as the beginning of the current hour. + */ + private String currentValidAfter; + + /** + * Cut-off time for missing server and extra-info descriptors, formatted + * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system + * time minus 24 hours. + */ + private String descriptorCutOff; + + /** + * Cut-off time for downloading all server and extra-info descriptors + * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This + * time is initialized as the current system time minus 23:30 hours. + */ + private String downloadAllDescriptorsCutOff; + + /** + * Directory authorities that we plan to download all server and + * extra-info descriptors from in this execution. + */ + private Set<String> downloadAllDescriptorsFromAuthorities; + + /** + * Current timestamp that is written to the missing list for descriptors + * that we parsed in this execution and for authorities that we + * downloaded all server and extra-info descriptors from. + */ + private String currentTimestamp; + + /** + * Logger for this class. + */ + private Logger logger; + + /** + * Number of descriptors requested by directory authority to be included + * in logs. + */ + private Map<String, Integer> requestsByAuthority; + + /** + * Counters for descriptors that we had on the missing list at the + * beginning of the execution, that we added to the missing list, + * that we requested, and that we successfully downloaded in this + * execution. + */ + private int oldMissingConsensuses = 0; + + private int oldMissingMicrodescConsensuses = 0; + + private int oldMissingVotes = 0; + + private int oldMissingServerDescriptors = 0; + + private int oldMissingExtraInfoDescriptors = 0; + + private int oldMissingMicrodescriptors = 0; + + private int newMissingConsensuses = 0; + + private int newMissingMicrodescConsensuses = 0; + + private int newMissingVotes = 0; + + private int newMissingServerDescriptors = 0; + + private int newMissingExtraInfoDescriptors = 0; + + private int newMissingMicrodescriptors = 0; + + private int requestedConsensuses = 0; + + private int requestedMicrodescConsensuses = 0; + + private int requestedVotes = 0; + + private int requestedMissingServerDescriptors = 0; + + private int requestedAllServerDescriptors = 0; + + private int requestedMissingExtraInfoDescriptors = 0; + + private int requestedAllExtraInfoDescriptors = 0; + + private int requestedMissingMicrodescriptors = 0; + + private int downloadedConsensuses = 0; + + private int downloadedMicrodescConsensuses = 0; + + private int downloadedVotes = 0; + + private int downloadedMissingServerDescriptors = 0; + + private int downloadedAllServerDescriptors = 0; + + private int downloadedMissingExtraInfoDescriptors = 0; + + private int downloadedAllExtraInfoDescriptors = 0; + + private int downloadedMissingMicrodescriptors = 0; + + /** + * Initializes this class, including reading in missing descriptors from + * <code>stats/missing-relay-descriptors</code> and the times when we + * last downloaded all server and extra-info descriptors from + * <code>stats/last-downloaded-all-descriptors</code>. + */ + public RelayDescriptorDownloader(RelayDescriptorParser rdp, + List<String> authorities, List<String> authorityFingerprints, + boolean downloadCurrentConsensus, + boolean downloadCurrentMicrodescConsensus, + boolean downloadCurrentVotes, + boolean downloadMissingServerDescriptors, + boolean downloadMissingExtraInfos, + boolean downloadMissingMicrodescriptors, + boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos, + boolean downloadCompressed) { + + /* Memorize argument values. */ + this.rdp = rdp; + this.authorities = new ArrayList<String>(authorities); + this.authorityFingerprints = new ArrayList<String>( + authorityFingerprints); + this.downloadCurrentConsensus = downloadCurrentConsensus; + this.downloadCurrentMicrodescConsensus = + downloadCurrentMicrodescConsensus; + this.downloadCurrentVotes = downloadCurrentVotes; + this.downloadMissingServerDescriptors = + downloadMissingServerDescriptors; + this.downloadMissingExtraInfos = downloadMissingExtraInfos; + this.downloadMissingMicrodescriptors = + downloadMissingMicrodescriptors; + this.downloadAllServerDescriptors = downloadAllServerDescriptors; + this.downloadAllExtraInfos = downloadAllExtraInfos; + this.downloadCompressed = downloadCompressed; + + /* Shuffle list of authorities for better load balancing over time. */ + Collections.shuffle(this.authorities); + + /* Initialize logger. */ + this.logger = Logger.getLogger( + RelayDescriptorDownloader.class.getName()); + + /* Prepare cut-off times and timestamp for the missing descriptors + * list and the list of authorities to download all server and + * extra-info descriptors from. */ + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + format.setTimeZone(TimeZone.getTimeZone("UTC")); + long now = System.currentTimeMillis(); + this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) + * (60L * 60L * 1000L)); + this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L); + this.currentTimestamp = format.format(now); + this.downloadAllDescriptorsCutOff = format.format(now + - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L); + + /* Read list of missing descriptors from disk and memorize those that + * we are interested in and that are likely to be found on the + * directory authorities. */ + this.missingDescriptors = new TreeMap<String, String>(); + this.microdescriptorKeys = new HashMap<String, Set<String>>(); + this.missingMicrodescriptors = new HashSet<String>(); + this.missingDescriptorsFile = new File( + "stats/missing-relay-descriptors"); + if (this.missingDescriptorsFile.exists()) { + try { + this.logger.fine("Reading file " + + this.missingDescriptorsFile.getAbsolutePath() + "..."); + BufferedReader br = new BufferedReader(new FileReader( + this.missingDescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.split(",").length > 2) { + String published = line.split(",")[1]; + if (((line.startsWith("consensus,") + || line.startsWith("consensus-microdesc,") + || line.startsWith("vote,")) + && this.currentValidAfter.equals(published)) + || ((line.startsWith("server,") + || line.startsWith("extra,") + || line.startsWith("micro,")) + && this.descriptorCutOff.compareTo(published) < 0)) { + if (!line.endsWith("NA")) { + /* Not missing. */ + } else if (line.startsWith("consensus,")) { + oldMissingConsensuses++; + } else if (line.startsWith("consensus-microdesc,")) { + oldMissingMicrodescConsensuses++; + } else if (line.startsWith("vote,")) { + oldMissingVotes++; + } else if (line.startsWith("server,")) { + oldMissingServerDescriptors++; + } else if (line.startsWith("extra,")) { + oldMissingExtraInfoDescriptors++; + } + int separateAt = line.lastIndexOf(","); + this.missingDescriptors.put(line.substring(0, + separateAt), line.substring(separateAt + 1)); + if (line.startsWith("micro,")) { + String microdescriptorDigest = line.split(",")[3]; + String microdescriptorKey = line.substring(0, + line.lastIndexOf(",")); + if (!this.microdescriptorKeys.containsKey( + microdescriptorDigest)) { + this.microdescriptorKeys.put( + microdescriptorDigest, new HashSet<String>()); + } + this.microdescriptorKeys.get(microdescriptorDigest).add( + microdescriptorKey); + if (line.endsWith("NA") && !this.missingMicrodescriptors + .contains(microdescriptorDigest)) { + this.missingMicrodescriptors.add(microdescriptorDigest); + oldMissingMicrodescriptors++; + } + } + } + } else { + this.logger.fine("Invalid line '" + line + "' in " + + this.missingDescriptorsFile.getAbsolutePath() + + ". Ignoring."); + } + } + br.close(); + this.logger.fine("Finished reading file " + + this.missingDescriptorsFile.getAbsolutePath() + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read file " + + this.missingDescriptorsFile.getAbsolutePath() + + "! This means that we might forget to dowload relay " + + "descriptors we are missing.", e); + } + } + + /* Read list of directory authorities and when we last downloaded all + * server and extra-info descriptors from them. */ + this.lastDownloadedAllDescriptors = new HashMap<String, String>(); + this.lastDownloadedAllDescriptorsFile = new File( + "stats/last-downloaded-all-descriptors"); + if (this.lastDownloadedAllDescriptorsFile.exists()) { + try { + this.logger.fine("Reading file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "..."); + BufferedReader br = new BufferedReader(new FileReader( + this.lastDownloadedAllDescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.split(",").length != 2) { + this.logger.fine("Invalid line '" + line + "' in " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + ". Ignoring."); + } else { + String[] parts = line.split(","); + String authority = parts[0]; + String lastDownloaded = parts[1]; + this.lastDownloadedAllDescriptors.put(authority, + lastDownloaded); + } + } + br.close(); + this.logger.fine("Finished reading file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "! This means that we might download all server and " + + "extra-info descriptors more often than we should.", e); + } + } + + /* Make a list of at most two directory authorities that we want to + * download all server and extra-info descriptors from. */ + this.downloadAllDescriptorsFromAuthorities = new HashSet<String>(); + for (String authority : this.authorities) { + if (!this.lastDownloadedAllDescriptors.containsKey(authority) + || this.lastDownloadedAllDescriptors.get(authority).compareTo( + this.downloadAllDescriptorsCutOff) < 0) { + this.downloadAllDescriptorsFromAuthorities.add(authority); + } + if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) { + break; + } + } + + /* Prepare statistics on this execution. */ + this.requestsByAuthority = new HashMap<String, Integer>(); + for (String authority : this.authorities) { + this.requestsByAuthority.put(authority, 0); + } + } + + /** + * We have parsed a consensus. Take this consensus off the missing list + * and add the votes created by the given <code>authorities</code> and + * the <code>serverDescriptors</code> which are in the format + * "<published>,<relayid>,<descid>" to that list. + */ + public void haveParsedConsensus(String validAfter, + Set<String> authorities, Set<String> serverDescriptors) { + + /* Mark consensus as parsed. */ + if (this.currentValidAfter.equals(validAfter)) { + String consensusKey = "consensus," + validAfter; + this.missingDescriptors.put(consensusKey, this.currentTimestamp); + + /* Add votes to missing list. */ + for (String authority : authorities) { + String voteKey = "vote," + validAfter + "," + authority; + if (!this.missingDescriptors.containsKey(voteKey)) { + this.missingDescriptors.put(voteKey, "NA"); + this.newMissingVotes++; + } + } + } + + /* Add server descriptors to missing list. */ + for (String serverDescriptor : serverDescriptors) { + String published = serverDescriptor.split(",")[0]; + if (this.descriptorCutOff.compareTo(published) < 0) { + String serverDescriptorKey = "server," + serverDescriptor; + if (!this.missingDescriptors.containsKey( + serverDescriptorKey)) { + this.missingDescriptors.put(serverDescriptorKey, "NA"); + this.newMissingServerDescriptors++; + } + } + } + } + + /** + * We have parsed a microdesc consensus. Take this microdesc consensus + * off the missing list and add the <code>microdescriptors</code> which + * are in the format "<validafter>,<relayid>,<descid>" to that + * list. + */ + public void haveParsedMicrodescConsensus(String validAfter, + Set<String> microdescriptors) { + + /* Mark microdesc consensus as parsed. */ + if (this.currentValidAfter.equals(validAfter)) { + String microdescConsensusKey = "consensus-microdesc," + validAfter; + this.missingDescriptors.put(microdescConsensusKey, + this.currentTimestamp); + } + + /* Add microdescriptors to missing list. Exclude those that we already + * downloaded this month. (We download each microdescriptor at least + * once per month to keep the storage logic sane; otherwise we'd have + * to copy microdescriptors from the earlier month to the current + * month, and that gets messy.) */ + if (this.descriptorCutOff.compareTo(validAfter) < 0) { + String validAfterYearMonth = validAfter.substring(0, + "YYYY-MM".length()); + for (String microdescriptor : microdescriptors) { + String microdescriptorKey = "micro," + microdescriptor; + String parsed = "NA"; + String microdescriptorDigest = microdescriptor.split(",")[2]; + if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) { + for (String otherMicrodescriptorKey : + this.microdescriptorKeys.get(microdescriptorDigest)) { + String otherValidAfter = + otherMicrodescriptorKey.split(",")[1]; + if (!otherValidAfter.startsWith(validAfterYearMonth)) { + continue; + } + String otherParsed = this.missingDescriptors.get( + otherMicrodescriptorKey); + if (otherParsed != null && !otherParsed.equals("NA")) { + parsed = otherParsed; + break; + } + } + } else { + this.microdescriptorKeys.put( + microdescriptorDigest, new HashSet<String>()); + } + this.microdescriptorKeys.get(microdescriptorDigest).add( + microdescriptorKey); + this.missingDescriptors.put(microdescriptorKey, parsed); + if (parsed.equals("NA") + && !this.missingMicrodescriptors.contains(microdescriptorDigest)) { + this.missingMicrodescriptors.add(microdescriptorDigest); + this.newMissingMicrodescriptors++; + } + } + } + } + + /** + * We have parsed a vote. Take this vote off the missing list and add + * the <code>serverDescriptors</code> which are in the format + * "<published>,<relayid>,<descid>" to that list. + */ + public void haveParsedVote(String validAfter, String fingerprint, + Set<String> serverDescriptors) { + + /* Mark vote as parsed. */ + if (this.currentValidAfter.equals(validAfter)) { + String voteKey = "vote," + validAfter + "," + fingerprint; + this.missingDescriptors.put(voteKey, this.currentTimestamp); + } + + /* Add server descriptors to missing list. */ + for (String serverDescriptor : serverDescriptors) { + String published = serverDescriptor.split(",")[0]; + if (this.descriptorCutOff.compareTo(published) < 0) { + String serverDescriptorKey = "server," + serverDescriptor; + if (!this.missingDescriptors.containsKey( + serverDescriptorKey)) { + this.missingDescriptors.put(serverDescriptorKey, "NA"); + this.newMissingServerDescriptors++; + } + } + } + } + + /** + * We have parsed a server descriptor. Take this server descriptor off + * the missing list and put the extra-info descriptor digest on that + * list. + */ + public void haveParsedServerDescriptor(String published, + String relayIdentity, String serverDescriptorDigest, + String extraInfoDigest) { + + /* Mark server descriptor as parsed. */ + if (this.descriptorCutOff.compareTo(published) < 0) { + String serverDescriptorKey = "server," + published + "," + + relayIdentity + "," + serverDescriptorDigest; + this.missingDescriptors.put(serverDescriptorKey, + this.currentTimestamp); + + /* Add extra-info descriptor to missing list. */ + if (extraInfoDigest != null) { + String extraInfoKey = "extra," + published + "," + + relayIdentity + "," + extraInfoDigest; + if (!this.missingDescriptors.containsKey(extraInfoKey)) { + this.missingDescriptors.put(extraInfoKey, "NA"); + this.newMissingExtraInfoDescriptors++; + } + } + } + } + + /** + * We have parsed an extra-info descriptor. Take it off the missing + * list. + */ + public void haveParsedExtraInfoDescriptor(String published, + String relayIdentity, String extraInfoDigest) { + if (this.descriptorCutOff.compareTo(published) < 0) { + String extraInfoKey = "extra," + published + "," + + relayIdentity + "," + extraInfoDigest; + this.missingDescriptors.put(extraInfoKey, this.currentTimestamp); + } + } + + /** + * We have parsed a microdescriptor. Take it off the missing list. + */ + public void haveParsedMicrodescriptor(String descriptorDigest) { + if (this.microdescriptorKeys.containsKey(descriptorDigest)) { + for (String microdescriptorKey : + this.microdescriptorKeys.get(descriptorDigest)) { + String validAfter = microdescriptorKey.split(",")[0]; + if (this.descriptorCutOff.compareTo(validAfter) < 0) { + this.missingDescriptors.put(microdescriptorKey, + this.currentTimestamp); + } + } + this.missingMicrodescriptors.remove(descriptorDigest); + } + } + + /** + * Downloads missing descriptors that we think might still be available + * on the directory authorities as well as all server and extra-info + * descriptors once per day. + */ + public void downloadDescriptors() { + + /* Put the current consensus and votes on the missing list, unless we + * already have them. */ + String consensusKey = "consensus," + this.currentValidAfter; + if (!this.missingDescriptors.containsKey(consensusKey)) { + this.missingDescriptors.put(consensusKey, "NA"); + this.newMissingConsensuses++; + } + String microdescConsensusKey = "consensus-microdesc," + + this.currentValidAfter; + if (!this.missingDescriptors.containsKey(microdescConsensusKey)) { + this.missingDescriptors.put(microdescConsensusKey, "NA"); + this.newMissingMicrodescConsensuses++; + } + for (String authority : authorityFingerprints) { + String voteKey = "vote," + this.currentValidAfter + "," + authority; + if (!this.missingDescriptors.containsKey(voteKey)) { + this.missingDescriptors.put(voteKey, "NA"); + this.newMissingVotes++; + } + } + + /* Download descriptors from authorities which are in random order, so + * that we distribute the load somewhat fairly over time. */ + for (String authority : authorities) { + + /* Make all requests to an authority in a single try block. If + * something goes wrong with this authority, we give up on all + * downloads and continue with the next authority. */ + /* TODO Some authorities provide very little bandwidth and could + * slow down the entire download process. Ponder adding a timeout of + * 3 or 5 minutes per authority to avoid getting in the way of the + * next execution. */ + try { + + /* Start with downloading the current consensus, unless we already + * have it. */ + if (downloadCurrentConsensus) { + if (this.missingDescriptors.containsKey(consensusKey) + && this.missingDescriptors.get(consensusKey).equals("NA")) { + this.requestedConsensuses++; + this.downloadedConsensuses += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/current/consensus"); + } + } + + /* Then try to download the microdesc consensus. */ + if (downloadCurrentMicrodescConsensus) { + if (this.missingDescriptors.containsKey(microdescConsensusKey) + && this.missingDescriptors.get(microdescConsensusKey) + .equals("NA")) { + this.requestedMicrodescConsensuses++; + this.downloadedMicrodescConsensuses += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/current/consensus-microdesc"); + } + } + + /* Next, try to download current votes that we're missing. */ + if (downloadCurrentVotes) { + String voteKeyPrefix = "vote," + this.currentValidAfter; + SortedSet<String> fingerprints = new TreeSet<String>(); + for (Map.Entry<String, String> e : + this.missingDescriptors.entrySet()) { + if (e.getValue().equals("NA") + && e.getKey().startsWith(voteKeyPrefix)) { + String fingerprint = e.getKey().split(",")[2]; + fingerprints.add(fingerprint); + } + } + for (String fingerprint : fingerprints) { + this.requestedVotes++; + this.downloadedVotes += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/current/" + fingerprint); + } + } + + /* Download either all server and extra-info descriptors or only + * those that we're missing. Start with server descriptors, then + * request extra-info descriptors. Finally, request missing + * microdescriptors. */ + for (String type : new String[] { "server", "extra", "micro" }) { + + /* Download all server or extra-info descriptors from this + * authority if we haven't done so for 24 hours and if we're + * configured to do so. */ + if (this.downloadAllDescriptorsFromAuthorities.contains( + authority) && ((type.equals("server") + && this.downloadAllServerDescriptors) + || (type.equals("extra") && this.downloadAllExtraInfos))) { + int downloadedAllDescriptors = + this.downloadResourceFromAuthority(authority, "/tor/" + + type + "/all"); + if (type.equals("server")) { + this.requestedAllServerDescriptors++; + this.downloadedAllServerDescriptors += + downloadedAllDescriptors; + } else if (type.equals("extra")) { + this.requestedAllExtraInfoDescriptors++; + this.downloadedAllExtraInfoDescriptors += + downloadedAllDescriptors; + } + + /* Download missing server descriptors, extra-info descriptors, + * and microdescriptors if we're configured to do so. */ + } else if ((type.equals("server") + && this.downloadMissingServerDescriptors) + || (type.equals("extra") && this.downloadMissingExtraInfos) + || (type.equals("micro") + && this.downloadMissingMicrodescriptors)) { + + /* Go through the list of missing descriptors of this type + * and combine the descriptor identifiers to a URL of up to + * 96 server or extra-info descriptors or 92 microdescriptors + * that we can download at once. */ + SortedSet<String> descriptorIdentifiers = + new TreeSet<String>(); + for (Map.Entry<String, String> e : + this.missingDescriptors.entrySet()) { + if (e.getValue().equals("NA") + && e.getKey().startsWith(type + ",") + && this.descriptorCutOff.compareTo( + e.getKey().split(",")[1]) < 0) { + String descriptorIdentifier = e.getKey().split(",")[3]; + descriptorIdentifiers.add(descriptorIdentifier); + } + } + StringBuilder combinedResource = null; + int descriptorsInCombinedResource = 0; + int requestedDescriptors = 0; + int downloadedDescriptors = 0; + int maxDescriptorsInCombinedResource = + type.equals("micro") ? 92 : 96; + String separator = type.equals("micro") ? "-" : "+"; + for (String descriptorIdentifier : descriptorIdentifiers) { + if (descriptorsInCombinedResource + >= maxDescriptorsInCombinedResource) { + requestedDescriptors += descriptorsInCombinedResource; + downloadedDescriptors += + this.downloadResourceFromAuthority(authority, + combinedResource.toString()); + combinedResource = null; + descriptorsInCombinedResource = 0; + } + if (descriptorsInCombinedResource == 0) { + combinedResource = new StringBuilder("/tor/" + type + + "/d/" + descriptorIdentifier); + } else { + combinedResource.append(separator + descriptorIdentifier); + } + descriptorsInCombinedResource++; + } + if (descriptorsInCombinedResource > 0) { + requestedDescriptors += descriptorsInCombinedResource; + downloadedDescriptors += + this.downloadResourceFromAuthority(authority, + combinedResource.toString()); + } + if (type.equals("server")) { + this.requestedMissingServerDescriptors += + requestedDescriptors; + this.downloadedMissingServerDescriptors += + downloadedDescriptors; + } else if (type.equals("extra")) { + this.requestedMissingExtraInfoDescriptors += + requestedDescriptors; + this.downloadedMissingExtraInfoDescriptors += + downloadedDescriptors; + } else if (type.equals("micro")) { + this.requestedMissingMicrodescriptors += + requestedDescriptors; + this.downloadedMissingMicrodescriptors += + downloadedDescriptors; + } + } + } + + /* If a download failed, stop requesting descriptors from this + * authority and move on to the next. */ + } catch (IOException e) { + logger.log(Level.FINE, "Failed downloading from " + authority + + "!", e); + } + } + } + + /** + * Attempts to download one or more descriptors identified by a resource + * string from a directory authority and passes the returned + * descriptor(s) to the <code>RelayDescriptorParser</code> upon success. + * Returns the number of descriptors contained in the reply. Throws an + * <code>IOException</code> if something goes wrong while downloading. + */ + private int downloadResourceFromAuthority(String authority, + String resource) throws IOException { + byte[] allData = null; + this.requestsByAuthority.put(authority, + this.requestsByAuthority.get(authority) + 1); + /* TODO Disable compressed downloads for extra-info descriptors, + * because zlib decompression doesn't work correctly. Figure out why + * this is and fix it. */ + String fullUrl = "http://" + authority + resource + + (this.downloadCompressed && !resource.startsWith("/tor/extra/") + ? ".z" : ""); + URL u = new URL(fullUrl); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + int response = huc.getResponseCode(); + if (response == 200) { + BufferedInputStream in = this.downloadCompressed + && !resource.startsWith("/tor/extra/") + ? new BufferedInputStream(new InflaterInputStream( + huc.getInputStream())) + : new BufferedInputStream(huc.getInputStream()); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = in.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + in.close(); + allData = baos.toByteArray(); + } + logger.fine("Downloaded " + fullUrl + " -> " + response + " (" + + (allData == null ? 0 : allData.length) + " bytes)"); + int receivedDescriptors = 0; + if (allData != null) { + if (resource.startsWith("/tor/status-vote/current/")) { + this.rdp.parse(allData); + receivedDescriptors = 1; + } else if (resource.startsWith("/tor/server/") + || resource.startsWith("/tor/extra/")) { + if (resource.equals("/tor/server/all") + || resource.equals("/tor/extra/all")) { + this.lastDownloadedAllDescriptors.put(authority, + this.currentTimestamp); + } + String ascii = null; + try { + ascii = new String(allData, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* No way that US-ASCII is not supported. */ + } + int start = -1; + int sig = -1; + int end = -1; + String startToken = resource.startsWith("/tor/server/") + ? "router " : "extra-info "; + String sigToken = "\nrouter-signature\n"; + String endToken = "\n-----END SIGNATURE-----\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + sig = ascii.indexOf(sigToken, start); + if (sig < 0) { + break; + } + sig += sigToken.length(); + end = ascii.indexOf(endToken, sig); + if (end < 0) { + break; + } + end += endToken.length(); + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + this.rdp.parse(descBytes); + receivedDescriptors++; + } + } else if (resource.startsWith("/tor/micro/")) { + /* TODO We need to parse microdescriptors ourselves, rather than + * RelayDescriptorParser, because only we know the valid-after + * time(s) of microdesc consensus(es) containing this + * microdescriptor. However, this breaks functional abstraction + * pretty badly. */ + SimpleDateFormat parseFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String ascii = null; + try { + ascii = new String(allData, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* No way that US-ASCII is not supported. */ + } + int start = -1; + int end = -1; + String startToken = "onion-key\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + end = ascii.indexOf(startToken, start + 1); + if (end < 0) { + end = ascii.length(); + if (end <= start) { + break; + } + } + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + String digest256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(descBytes)).replaceAll("=", ""); + if (!this.microdescriptorKeys.containsKey(digest256Base64)) { + continue; + } + String digest256Hex = DigestUtils.sha256Hex(descBytes); + for (String microdescriptorKey : + this.microdescriptorKeys.get(digest256Base64)) { + String validAfterTime = microdescriptorKey.split(",")[1]; + try { + long validAfter = + parseFormat.parse(validAfterTime).getTime(); + this.rdp.storeMicrodescriptor(descBytes, digest256Hex, + digest256Base64, validAfter); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse " + + "valid-after time '" + validAfterTime + "' in " + + "microdescriptor key. Not storing microdescriptor.", + e); + } + } + receivedDescriptors++; + } + } + } + return receivedDescriptors; + } + + /** + * Writes status files to disk and logs statistics about downloading + * relay descriptors in this execution. + */ + public void writeFile() { + + /* Write missing descriptors file to disk. */ + int missingConsensuses = 0; + int missingMicrodescConsensuses = 0; + int missingVotes = 0; + int missingServerDescriptors = 0; + int missingExtraInfoDescriptors = 0; + try { + this.logger.fine("Writing file " + + this.missingDescriptorsFile.getAbsolutePath() + "..."); + this.missingDescriptorsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.missingDescriptorsFile)); + for (Map.Entry<String, String> e : + this.missingDescriptors.entrySet()) { + String key = e.getKey(); + String value = e.getValue(); + if (!value.equals("NA")) { + /* Not missing. */ + } else if (key.startsWith("consensus,")) { + missingConsensuses++; + } else if (key.startsWith("consensus-microdesc,")) { + missingMicrodescConsensuses++; + } else if (key.startsWith("vote,")) { + missingVotes++; + } else if (key.startsWith("server,")) { + missingServerDescriptors++; + } else if (key.startsWith("extra,")) { + missingExtraInfoDescriptors++; + } else if (key.startsWith("micro,")) { + /* We're counting missing microdescriptors below. */ + } + bw.write(key + "," + value + "\n"); + } + bw.close(); + this.logger.fine("Finished writing file " + + this.missingDescriptorsFile.getAbsolutePath() + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed writing " + + this.missingDescriptorsFile.getAbsolutePath() + "!", e); + } + int missingMicrodescriptors = this.missingMicrodescriptors.size(); + + /* Write text file containing the directory authorities and when we + * last downloaded all server and extra-info descriptors from them to + * disk. */ + try { + this.logger.fine("Writing file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "..."); + this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.lastDownloadedAllDescriptorsFile)); + for (Map.Entry<String, String> e : + this.lastDownloadedAllDescriptors.entrySet()) { + String authority = e.getKey(); + String lastDownloaded = e.getValue(); + bw.write(authority + "," + lastDownloaded + "\n"); + } + bw.close(); + this.logger.fine("Finished writing file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed writing " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!", + e); + } + + /* Log statistics about this execution. */ + this.logger.info("Finished downloading relay descriptors from the " + + "directory authorities."); + this.logger.info("At the beginning of this execution, we were " + + "missing " + oldMissingConsensuses + " consensus(es), " + + oldMissingMicrodescConsensuses + " microdesc consensus(es), " + + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors + + " server descriptor(s), " + oldMissingExtraInfoDescriptors + + " extra-info descriptor(s), and " + oldMissingMicrodescriptors + + " microdescriptor(s)."); + this.logger.info("During this execution, we added " + + this.newMissingConsensuses + " consensus(es), " + + this.newMissingMicrodescConsensuses + + " microdesc consensus(es), " + this.newMissingVotes + + " vote(s), " + this.newMissingServerDescriptors + + " server descriptor(s), " + this.newMissingExtraInfoDescriptors + + " extra-info descriptor(s), and " + + this.newMissingMicrodescriptors + " microdescriptor(s) to the " + + "missing list, some of which we also " + + "requested and removed from the list again."); + this.logger.info("We requested " + this.requestedConsensuses + + " consensus(es), " + this.requestedMicrodescConsensuses + + " microdesc consensus(es), " + this.requestedVotes + + " vote(s), " + this.requestedMissingServerDescriptors + + " missing server descriptor(s), " + + this.requestedAllServerDescriptors + + " times all server descriptors, " + + this.requestedMissingExtraInfoDescriptors + " missing " + + "extra-info descriptor(s), " + + this.requestedAllExtraInfoDescriptors + " times all extra-info " + + "descriptors, and " + this.requestedMissingMicrodescriptors + + " missing microdescriptor(s) from the directory authorities."); + StringBuilder sb = new StringBuilder(); + for (String authority : this.authorities) { + sb.append(" " + authority + "=" + + this.requestsByAuthority.get(authority)); + } + this.logger.info("We sent these numbers of requests to the directory " + + "authorities:" + sb.toString()); + this.logger.info("We successfully downloaded " + + this.downloadedConsensuses + " consensus(es), " + + this.downloadedMicrodescConsensuses + + " microdesc consensus(es), " + this.downloadedVotes + + " vote(s), " + this.downloadedMissingServerDescriptors + + " missing server descriptor(s), " + + this.downloadedAllServerDescriptors + + " server descriptor(s) when downloading all descriptors, " + + this.downloadedMissingExtraInfoDescriptors + " missing " + + "extra-info descriptor(s), " + + this.downloadedAllExtraInfoDescriptors + " extra-info " + + "descriptor(s) when downloading all descriptors, and " + + this.downloadedMissingMicrodescriptors + + " missing microdescriptor(s)."); + this.logger.info("At the end of this execution, we are missing " + + missingConsensuses + " consensus(es), " + + missingMicrodescConsensuses + " microdesc consensus(es), " + + missingVotes + " vote(s), " + missingServerDescriptors + + " server descriptor(s), " + missingExtraInfoDescriptors + + " extra-info descriptor(s), and " + missingMicrodescriptors + + " microdescriptor(s), some of which we may try in the next " + + "execution."); + } +} + diff --git a/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java new file mode 100644 index 0000000..3f9b912 --- /dev/null +++ b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java @@ -0,0 +1,337 @@ +/* Copyright 2010--2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.relaydescs; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Parses relay descriptors including network status consensuses and + * votes, server and extra-info descriptors, and passes the results to the + * stats handlers, to the archive writer, or to the relay descriptor + * downloader. + */ +public class RelayDescriptorParser { + + /** + * File writer that writes descriptor contents to files in a + * directory-archive directory structure. + */ + private ArchiveWriter aw; + + private ArchiveReader ar; + + /** + * Missing descriptor downloader that uses the parse results to learn + * which descriptors we are missing and want to download. + */ + private RelayDescriptorDownloader rdd; + + /** + * Logger for this class. + */ + private Logger logger; + + private SimpleDateFormat dateTimeFormat; + + /** + * Initializes this class. + */ + public RelayDescriptorParser(ArchiveWriter aw) { + this.aw = aw; + + /* Initialize logger. */ + this.logger = Logger.getLogger(RelayDescriptorParser.class.getName()); + + this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + public void setRelayDescriptorDownloader( + RelayDescriptorDownloader rdd) { + this.rdd = rdd; + } + + public void setArchiveReader(ArchiveReader ar) { + this.ar = ar; + } + + public boolean parse(byte[] data) { + boolean stored = false; + try { + /* Convert descriptor to ASCII for parsing. This means we'll lose + * the non-ASCII chars, but we don't care about them for parsing + * anyway. */ + BufferedReader br = new BufferedReader(new StringReader(new String( + data, "US-ASCII"))); + String line; + do { + line = br.readLine(); + } while (line != null && line.startsWith("@")); + if (line == null) { + this.logger.fine("We were given an empty descriptor for " + + "parsing. Ignoring."); + return false; + } + SimpleDateFormat parseFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (line.startsWith("network-status-version 3")) { + String statusType = "consensus"; + if (line.equals("network-status-version 3 microdesc")) { + statusType = "consensus-microdesc"; + } + String validAfterTime = null; + String fingerprint = null; + String dirSource = null; + long validAfter = -1L; + long dirKeyPublished = -1L; + SortedSet<String> dirSources = new TreeSet<String>(); + SortedSet<String> serverDescriptors = new TreeSet<String>(); + SortedSet<String> serverDescriptorDigests = new TreeSet<String>(); + SortedSet<String> microdescriptorKeys = new TreeSet<String>(); + SortedSet<String> microdescriptorDigests = new TreeSet<String>(); + StringBuilder certificateStringBuilder = null; + String certificateString = null; + String lastRelayIdentity = null; + while ((line = br.readLine()) != null) { + if (certificateStringBuilder != null) { + if (line.startsWith("r ")) { + certificateString = certificateStringBuilder.toString(); + certificateStringBuilder = null; + } else { + certificateStringBuilder.append(line + "\n"); + } + } + if (line.equals("vote-status vote")) { + statusType = "vote"; + } else if (line.startsWith("valid-after ")) { + validAfterTime = line.substring("valid-after ".length()); + validAfter = parseFormat.parse(validAfterTime).getTime(); + } else if (line.startsWith("dir-source ")) { + dirSource = line.split(" ")[2]; + } else if (line.startsWith("vote-digest ")) { + dirSources.add(dirSource); + } else if (line.startsWith("dir-key-certificate-version ")) { + certificateStringBuilder = new StringBuilder(); + certificateStringBuilder.append(line + "\n"); + } else if (line.startsWith("fingerprint ")) { + fingerprint = line.split(" ")[1]; + } else if (line.startsWith("dir-key-published ")) { + String dirKeyPublishedTime = line.substring( + "dir-key-published ".length()); + dirKeyPublished = parseFormat.parse(dirKeyPublishedTime) + .getTime(); + } else if (line.startsWith("r ")) { + String[] parts = line.split(" "); + if (parts.length == 8) { + lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64( + parts[2] + "=")).toLowerCase(); + } else if (parts.length == 9) { + lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64( + parts[2] + "=")).toLowerCase(); + String serverDesc = Hex.encodeHexString(Base64.decodeBase64( + parts[3] + "=")).toLowerCase(); + String publishedTime = parts[4] + " " + parts[5]; + serverDescriptors.add(publishedTime + "," + + lastRelayIdentity + "," + serverDesc); + serverDescriptorDigests.add(serverDesc); + } else { + this.logger.log(Level.WARNING, "Could not parse r line '" + + line + "' in descriptor. Skipping."); + break; + } + } else if (line.startsWith("m ")) { + String[] parts = line.split(" "); + if (parts.length == 2 && parts[1].length() == 43) { + String digest256Base64 = parts[1]; + microdescriptorKeys.add(validAfterTime + "," + + lastRelayIdentity + "," + digest256Base64); + String digest256Hex = Hex.encodeHexString( + Base64.decodeBase64(digest256Base64 + "=")) + .toLowerCase(); + microdescriptorDigests.add(digest256Hex); + } else if (parts.length != 3 + || !parts[2].startsWith("sha256=") + || parts[2].length() != 50) { + this.logger.log(Level.WARNING, "Could not parse m line '" + + line + "' in descriptor. Skipping."); + break; + } + } + } + if (statusType.equals("consensus")) { + if (this.rdd != null) { + this.rdd.haveParsedConsensus(validAfterTime, dirSources, + serverDescriptors); + } + if (this.aw != null) { + this.aw.storeConsensus(data, validAfter, dirSources, + serverDescriptorDigests); + stored = true; + } + } else if (statusType.equals("consensus-microdesc")) { + if (this.rdd != null) { + this.rdd.haveParsedMicrodescConsensus(validAfterTime, + microdescriptorKeys); + } + if (this.ar != null) { + this.ar.haveParsedMicrodescConsensus(validAfterTime, + microdescriptorDigests); + } + if (this.aw != null) { + this.aw.storeMicrodescConsensus(data, validAfter, + microdescriptorDigests); + stored = true; + } + } else { + if (this.aw != null || this.rdd != null) { + String ascii = new String(data, "US-ASCII"); + String startToken = "network-status-version "; + String sigToken = "directory-signature "; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken); + if (start >= 0 && sig >= 0 && sig > start) { + sig += sigToken.length(); + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + String digest = DigestUtils.shaHex(forDigest).toUpperCase(); + if (this.aw != null) { + this.aw.storeVote(data, validAfter, dirSource, digest, + serverDescriptorDigests); + stored = true; + } + if (this.rdd != null) { + this.rdd.haveParsedVote(validAfterTime, fingerprint, + serverDescriptors); + } + } + if (certificateString != null) { + if (this.aw != null) { + this.aw.storeCertificate(certificateString.getBytes(), + dirSource, dirKeyPublished); + stored = true; + } + } + } + } + } else if (line.startsWith("router ")) { + String publishedTime = null; + String extraInfoDigest = null; + String relayIdentifier = null; + long published = -1L; + while ((line = br.readLine()) != null) { + if (line.startsWith("published ")) { + publishedTime = line.substring("published ".length()); + published = parseFormat.parse(publishedTime).getTime(); + } else if (line.startsWith("opt fingerprint") + || line.startsWith("fingerprint")) { + relayIdentifier = line.substring(line.startsWith("opt ") + ? "opt fingerprint".length() : "fingerprint".length()) + .replaceAll(" ", "").toLowerCase(); + } else if (line.startsWith("opt extra-info-digest ") + || line.startsWith("extra-info-digest ")) { + extraInfoDigest = line.startsWith("opt ") + ? line.split(" ")[2].toLowerCase() + : line.split(" ")[1].toLowerCase(); + } + } + String ascii = new String(data, "US-ASCII"); + String startToken = "router "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + String digest = null; + if (start >= 0 || sig >= 0 || sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + digest = DigestUtils.shaHex(forDigest); + } + if (this.aw != null && digest != null) { + this.aw.storeServerDescriptor(data, digest, published, + extraInfoDigest); + stored = true; + } + if (this.rdd != null && digest != null) { + this.rdd.haveParsedServerDescriptor(publishedTime, + relayIdentifier, digest, extraInfoDigest); + } + } else if (line.startsWith("extra-info ")) { + String publishedTime = null; + String relayIdentifier = line.split(" ")[2]; + long published = -1L; + while ((line = br.readLine()) != null) { + if (line.startsWith("published ")) { + publishedTime = line.substring("published ".length()); + published = parseFormat.parse(publishedTime).getTime(); + } + } + String ascii = new String(data, "US-ASCII"); + String startToken = "extra-info "; + String sigToken = "\nrouter-signature\n"; + String digest = null; + int start = ascii.indexOf(startToken); + if (start > 0) { + /* Do not confuse "extra-info " in "@type extra-info 1.0" with + * "extra-info 0000...". TODO This is a hack that should be + * solved by using metrics-lib some day. */ + start = ascii.indexOf("\n" + startToken); + if (start > 0) { + start++; + } + } + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + digest = DigestUtils.shaHex(forDigest); + } + if (this.aw != null && digest != null) { + this.aw.storeExtraInfoDescriptor(data, digest, published); + stored = true; + } + if (this.rdd != null && digest != null) { + this.rdd.haveParsedExtraInfoDescriptor(publishedTime, + relayIdentifier.toLowerCase(), digest); + } + } else if (line.equals("onion-key")) { + /* Cannot store microdescriptors without knowing valid-after + * time(s) of microdesc consensuses containing them, because we + * don't know which month directories to put them in. Have to use + * storeMicrodescriptor below. */ + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse descriptor. " + + "Skipping.", e); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse descriptor. " + + "Skipping.", e); + } + return stored; + } + + public void storeMicrodescriptor(byte[] data, String digest256Hex, + String digest256Base64, long validAfter) { + if (this.aw != null) { + this.aw.storeMicrodescriptor(data, digest256Hex, validAfter); + } + if (this.rdd != null) { + this.rdd.haveParsedMicrodescriptor(digest256Base64); + } + } +} + diff --git a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java new file mode 100644 index 0000000..7bcfbf3 --- /dev/null +++ b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java @@ -0,0 +1,643 @@ +/* Copyright 2012-2016 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.torperf; + +import org.torproject.collector.main.Configuration; +import org.torproject.collector.main.LockFile; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +/* Download possibly truncated Torperf .data and .extradata files from + * configured sources, append them to the files we already have, and merge + * the two files into the .tpf format. */ +public class TorperfDownloader extends Thread { + + public static void main(String[] args) { + + Logger logger = Logger.getLogger(TorperfDownloader.class.getName()); + logger.info("Starting torperf module of CollecTor."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile("torperf"); + if (!lf.acquireLock()) { + logger.severe("Warning: CollecTor is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Process Torperf files + new TorperfDownloader(config).run(); + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating torperf module of CollecTor."); + } + + private Configuration config; + + public TorperfDownloader(Configuration config) { + this.config = config; + } + + private File torperfOutputDirectory = null; + private SortedMap<String, String> torperfSources = null; + private List<String> torperfFilesLines = null; + private Logger logger = null; + private SimpleDateFormat dateFormat; + + public void run() { + + File torperfOutputDirectory = + new File(config.getTorperfOutputDirectory()); + SortedMap<String, String> torperfSources = config.getTorperfSources(); + List<String> torperfFilesLines = config.getTorperfFiles(); + + this.torperfOutputDirectory = torperfOutputDirectory; + this.torperfSources = torperfSources; + this.torperfFilesLines = torperfFilesLines; + if (!this.torperfOutputDirectory.exists()) { + this.torperfOutputDirectory.mkdirs(); + } + this.logger = Logger.getLogger(TorperfDownloader.class.getName()); + this.dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + this.readLastMergedTimestamps(); + for (String torperfFilesLine : this.torperfFilesLines) { + this.downloadAndMergeFiles(torperfFilesLine); + } + this.writeLastMergedTimestamps(); + + this.cleanUpRsyncDirectory(); + } + + private File torperfLastMergedFile = + new File("stats/torperf-last-merged"); + + SortedMap<String, String> lastMergedTimestamps = + new TreeMap<String, String>(); + + private void readLastMergedTimestamps() { + if (!this.torperfLastMergedFile.exists()) { + return; + } + try { + BufferedReader br = new BufferedReader(new FileReader( + this.torperfLastMergedFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + String fileName = null; + String timestamp = null; + if (parts.length == 2) { + try { + Double.parseDouble(parts[1]); + fileName = parts[0]; + timestamp = parts[1]; + } catch (NumberFormatException e) { + /* Handle below. */ + } + } + if (fileName == null || timestamp == null) { + this.logger.log(Level.WARNING, "Invalid line '" + line + "' in " + + this.torperfLastMergedFile.getAbsolutePath() + ". " + + "Ignoring past history of merging .data and .extradata " + + "files."); + this.lastMergedTimestamps.clear(); + break; + } + this.lastMergedTimestamps.put(fileName, timestamp); + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Error while reading '" + + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring " + + "past history of merging .data and .extradata files."); + this.lastMergedTimestamps.clear(); + } + } + + private void writeLastMergedTimestamps() { + try { + this.torperfLastMergedFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.torperfLastMergedFile)); + for (Map.Entry<String, String> e : + this.lastMergedTimestamps.entrySet()) { + String fileName = e.getKey(); + String timestamp = e.getValue(); + bw.write(fileName + " " + timestamp + "\n"); + } + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Error while writing '" + + this.torperfLastMergedFile.getAbsolutePath() + ". This may " + + "result in ignoring history of merging .data and .extradata " + + "files in the next execution.", e); + } + } + + private void downloadAndMergeFiles(String torperfFilesLine) { + String[] parts = torperfFilesLine.split(" "); + String sourceName = parts[1]; + int fileSize = -1; + try { + fileSize = Integer.parseInt(parts[2]); + } catch (NumberFormatException e) { + this.logger.log(Level.WARNING, "Could not parse file size in " + + "TorperfFiles configuration line '" + torperfFilesLine + + "'."); + return; + } + + /* Download and append the .data file. */ + String dataFileName = parts[3]; + String sourceBaseUrl = torperfSources.get(sourceName); + String dataUrl = sourceBaseUrl + dataFileName; + String dataOutputFileName = sourceName + "-" + dataFileName; + File dataOutputFile = new File(torperfOutputDirectory, + dataOutputFileName); + boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl, + dataOutputFile, true); + + /* Download and append the .extradata file. */ + String extradataFileName = parts[4]; + String extradataUrl = sourceBaseUrl + extradataFileName; + String extradataOutputFileName = sourceName + "-" + extradataFileName; + File extradataOutputFile = new File(torperfOutputDirectory, + extradataOutputFileName); + boolean downloadedExtradataFile = this.downloadAndAppendFile( + extradataUrl, extradataOutputFile, false); + + /* Merge both files into .tpf format. */ + if (!downloadedDataFile && !downloadedExtradataFile) { + return; + } + String skipUntil = null; + if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) { + skipUntil = this.lastMergedTimestamps.get(dataOutputFileName); + } + try { + skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile, + sourceName, fileSize, skipUntil); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile + + " and " + extradataOutputFile + ".", e); + } + if (skipUntil != null) { + this.lastMergedTimestamps.put(dataOutputFileName, skipUntil); + } + } + + private boolean downloadAndAppendFile(String url, File outputFile, + boolean isDataFile) { + + /* Read an existing output file to determine which line will be the + * first to append to it. */ + String lastTimestampLine = null; + int linesAfterLastTimestampLine = 0; + if (outputFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + outputFile)); + String line; + while ((line = br.readLine()) != null) { + if (isDataFile || line.contains(" LAUNCH")) { + lastTimestampLine = line; + linesAfterLastTimestampLine = 0; + } else { + linesAfterLastTimestampLine++; + } + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed reading '" + + outputFile.getAbsolutePath() + "' to determine the first " + + "line to append to it.", e); + return false; + } + } + try { + this.logger.fine("Downloading " + (isDataFile ? ".data" : + ".extradata") + " file from '" + url + "' and merging it into " + + "'" + outputFile.getAbsolutePath() + "'."); + URL u = new URL(url); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + BufferedReader br = new BufferedReader(new InputStreamReader( + huc.getInputStream())); + String line; + BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile, + true)); + boolean copyLines = lastTimestampLine == null; + while ((line = br.readLine()) != null) { + if (copyLines && linesAfterLastTimestampLine == 0) { + if (isDataFile || line.contains(" LAUNCH")) { + lastTimestampLine = line; + } + bw.write(line + "\n"); + } else if (copyLines && linesAfterLastTimestampLine > 0) { + linesAfterLastTimestampLine--; + } else if (line.equals(lastTimestampLine)) { + copyLines = true; + } + } + bw.close(); + br.close(); + if (!copyLines) { + this.logger.warning("The last timestamp line in '" + + outputFile.getAbsolutePath() + "' is not contained in the " + + "new file downloaded from '" + url + "'. Cannot append " + + "new lines without possibly leaving a gap. Skipping."); + return false; + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed downloading and/or merging '" + + url + "'.", e); + return false; + } + if (lastTimestampLine == null) { + this.logger.warning("'" + outputFile.getAbsolutePath() + + "' doesn't contain any timestamp lines. Unable to check " + + "whether that file is stale or not."); + } else { + long lastTimestampMillis = -1L; + if (isDataFile) { + lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( + 0, lastTimestampLine.indexOf(" "))) * 1000L; + } else { + lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( + lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(), + lastTimestampLine.indexOf(".", + lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L; + } + if (lastTimestampMillis < System.currentTimeMillis() + - 330L * 60L * 1000L) { + this.logger.warning("The last timestamp in '" + + outputFile.getAbsolutePath() + "' is more than 5:30 hours " + + "old: " + lastTimestampMillis); + } + } + return true; + } + + private String mergeFiles(File dataFile, File extradataFile, + String source, int fileSize, String skipUntil) throws IOException { + SortedMap<String, String> config = new TreeMap<String, String>(); + config.put("SOURCE", source); + config.put("FILESIZE", String.valueOf(fileSize)); + if (!dataFile.exists() || !extradataFile.exists()) { + this.logger.warning("File " + dataFile.getAbsolutePath() + " or " + + extradataFile.getAbsolutePath() + " is missing."); + return null; + } + this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and " + + extradataFile.getAbsolutePath() + " into .tpf format."); + BufferedReader brD = new BufferedReader(new FileReader(dataFile)); + BufferedReader brE = new BufferedReader(new FileReader(extradataFile)); + String lineD = brD.readLine(); + String lineE = brE.readLine(); + int d = 1; + int e = 1; + String maxDataComplete = null; + String maxUsedAt = null; + while (lineD != null) { + + /* Parse .data line. Every valid .data line will go into the .tpf + * format, either with additional information from the .extradata + * file or without it. */ + if (lineD.isEmpty()) { + this.logger.finer("Skipping empty line " + dataFile.getName() + + ":" + d++ + "."); + lineD = brD.readLine(); + continue; + } + SortedMap<String, String> data = this.parseDataLine(lineD); + if (data == null) { + this.logger.finer("Skipping illegal line " + dataFile.getName() + + ":" + d++ + " '" + lineD + "'."); + lineD = brD.readLine(); + continue; + } + String dataComplete = data.get("DATACOMPLETE"); + double dataCompleteSeconds = Double.parseDouble(dataComplete); + if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) { + this.logger.finer("Skipping " + dataFile.getName() + ":" + + d++ + " which we already processed before."); + lineD = brD.readLine(); + continue; + } + maxDataComplete = dataComplete; + + /* Parse .extradata line if available and try to find the one that + * matches the .data line. */ + SortedMap<String, String> extradata = null; + while (lineE != null) { + if (lineE.isEmpty()) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is empty."); + lineE = brE.readLine(); + continue; + } + if (lineE.startsWith("BUILDTIMEOUT_SET ")) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is a BUILDTIMEOUT_SET line."); + lineE = brE.readLine(); + continue; + } else if (lineE.startsWith("ok ") + || lineE.startsWith("error ")) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is in the old format."); + lineE = brE.readLine(); + continue; + } + extradata = this.parseExtradataLine(lineE); + if (extradata == null) { + this.logger.finer("Skipping Illegal line " + + extradataFile.getName() + ":" + e++ + " '" + lineE + + "'."); + lineE = brE.readLine(); + continue; + } + if (!extradata.containsKey("USED_AT")) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which doesn't contain a USED_AT element."); + lineE = brE.readLine(); + continue; + } + String usedAt = extradata.get("USED_AT"); + double usedAtSeconds = Double.parseDouble(usedAt); + if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which we already processed before."); + lineE = brE.readLine(); + continue; + } + maxUsedAt = usedAt; + if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) { + this.logger.fine("Merging " + extradataFile.getName() + ":" + + e++ + " into the current .data line."); + lineE = brE.readLine(); + break; + } else if (usedAtSeconds > dataCompleteSeconds) { + this.logger.finer("Comparing " + extradataFile.getName() + + " to the next .data line."); + extradata = null; + break; + } else { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is too old to be merged with " + + dataFile.getName() + ":" + d + "."); + lineE = brE.readLine(); + continue; + } + } + + /* Write output line to .tpf file. */ + SortedMap<String, String> keysAndValues = + new TreeMap<String, String>(); + if (extradata != null) { + keysAndValues.putAll(extradata); + } + keysAndValues.putAll(data); + keysAndValues.putAll(config); + this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + "."); + lineD = brD.readLine(); + try { + this.writeTpfLine(source, fileSize, keysAndValues); + } catch (IOException ex) { + this.logger.log(Level.WARNING, "Error writing output line. " + + "Aborting to merge " + dataFile.getName() + " and " + + extradataFile.getName() + ".", e); + break; + } + } + brD.close(); + brE.close(); + this.writeCachedTpfLines(); + if (maxDataComplete == null) { + return maxUsedAt; + } else if (maxUsedAt == null) { + return maxDataComplete; + } else if (maxDataComplete.compareTo(maxUsedAt) > 0) { + return maxUsedAt; + } else { + return maxDataComplete; + } + } + + private SortedMap<Integer, String> dataTimestamps; + + private SortedMap<String, String> parseDataLine(String line) { + String[] parts = line.trim().split(" "); + if (line.length() == 0 || parts.length < 20) { + return null; + } + if (this.dataTimestamps == null) { + this.dataTimestamps = new TreeMap<Integer, String>(); + this.dataTimestamps.put(0, "START"); + this.dataTimestamps.put(2, "SOCKET"); + this.dataTimestamps.put(4, "CONNECT"); + this.dataTimestamps.put(6, "NEGOTIATE"); + this.dataTimestamps.put(8, "REQUEST"); + this.dataTimestamps.put(10, "RESPONSE"); + this.dataTimestamps.put(12, "DATAREQUEST"); + this.dataTimestamps.put(14, "DATARESPONSE"); + this.dataTimestamps.put(16, "DATACOMPLETE"); + this.dataTimestamps.put(21, "DATAPERC10"); + this.dataTimestamps.put(23, "DATAPERC20"); + this.dataTimestamps.put(25, "DATAPERC30"); + this.dataTimestamps.put(27, "DATAPERC40"); + this.dataTimestamps.put(29, "DATAPERC50"); + this.dataTimestamps.put(31, "DATAPERC60"); + this.dataTimestamps.put(33, "DATAPERC70"); + this.dataTimestamps.put(35, "DATAPERC80"); + this.dataTimestamps.put(37, "DATAPERC90"); + } + SortedMap<String, String> data = new TreeMap<String, String>(); + try { + for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) { + int i = e.getKey(); + if (parts.length > i + 1) { + String key = e.getValue(); + String value = String.format("%s.%02d", parts[i], + Integer.parseInt(parts[i + 1]) / 10000); + data.put(key, value); + } + } + } catch (NumberFormatException e) { + return null; + } + data.put("WRITEBYTES", parts[18]); + data.put("READBYTES", parts[19]); + if (parts.length >= 21) { + data.put("DIDTIMEOUT", parts[20]); + } + return data; + } + + private SortedMap<String, String> parseExtradataLine(String line) { + String[] parts = line.split(" "); + SortedMap<String, String> extradata = new TreeMap<String, String>(); + String previousKey = null; + for (String part : parts) { + String[] keyAndValue = part.split("=", -1); + if (keyAndValue.length == 2) { + String key = keyAndValue[0]; + previousKey = key; + String value = keyAndValue[1]; + if (value.contains(".") && value.lastIndexOf(".") + == value.length() - 2) { + /* Make sure that all floats have two trailing digits. */ + value += "0"; + } + extradata.put(key, value); + } else if (keyAndValue.length == 1 && previousKey != null) { + String value = keyAndValue[0]; + if (previousKey.equals("STREAM_FAIL_REASONS") + && (value.equals("MISC") || value.equals("EXITPOLICY") + || value.equals("RESOURCELIMIT") + || value.equals("RESOLVEFAILED"))) { + extradata.put(previousKey, extradata.get(previousKey) + ":" + + value); + } else { + return null; + } + } else { + return null; + } + } + return extradata; + } + + private String cachedSource; + + private int cachedFileSize; + + private String cachedStartDate; + + private SortedMap<String, String> cachedTpfLines; + + private void writeTpfLine(String source, int fileSize, + SortedMap<String, String> keysAndValues) throws IOException { + StringBuilder sb = new StringBuilder(); + int written = 0; + for (Map.Entry<String, String> keyAndValue : + keysAndValues.entrySet()) { + String key = keyAndValue.getKey(); + String value = keyAndValue.getValue(); + sb.append((written++ > 0 ? " " : "") + key + "=" + value); + } + String line = sb.toString(); + String startString = keysAndValues.get("START"); + long startMillis = Long.parseLong(startString.substring(0, + startString.indexOf("."))) * 1000L; + String startDate = dateFormat.format(startMillis); + if (this.cachedTpfLines == null || !source.equals(this.cachedSource) + || fileSize != this.cachedFileSize + || !startDate.equals(this.cachedStartDate)) { + this.writeCachedTpfLines(); + this.readTpfLinesToCache(source, fileSize, startDate); + } + if (!this.cachedTpfLines.containsKey(startString) + || line.length() > this.cachedTpfLines.get(startString).length()) { + this.cachedTpfLines.put(startString, line); + } + } + + private void readTpfLinesToCache(String source, int fileSize, + String startDate) throws IOException { + this.cachedTpfLines = new TreeMap<String, String>(); + this.cachedSource = source; + this.cachedFileSize = fileSize; + this.cachedStartDate = startDate; + File tpfFile = new File(torperfOutputDirectory, + startDate.replaceAll("-", "/") + "/" + + source + "-" + String.valueOf(fileSize) + "-" + startDate + + ".tpf"); + if (!tpfFile.exists()) { + return; + } + BufferedReader br = new BufferedReader(new FileReader(tpfFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.startsWith("@type ")) { + continue; + } + if (line.contains("START=")) { + String startString = line.substring(line.indexOf("START=") + + "START=".length()).split(" ")[0]; + this.cachedTpfLines.put(startString, line); + } + } + br.close(); + } + + private void writeCachedTpfLines() throws IOException { + if (this.cachedSource == null || this.cachedFileSize == 0 + || this.cachedStartDate == null || this.cachedTpfLines == null) { + return; + } + File tarballFile = new File(torperfOutputDirectory, + this.cachedStartDate.replaceAll("-", "/") + + "/" + this.cachedSource + "-" + + String.valueOf(this.cachedFileSize) + "-" + + this.cachedStartDate + ".tpf"); + File rsyncFile = new File("recent/torperf/" + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + for (File outputFile : outputFiles) { + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile)); + for (String line : this.cachedTpfLines.values()) { + bw.write("@type torperf 1.0\n"); + bw.write(line + "\n"); + } + bw.close(); + } + this.cachedSource = null; + this.cachedFileSize = 0; + this.cachedStartDate = null; + this.cachedTpfLines = null; + } + + /* Delete all files from the rsync directory that have not been modified + * in the last three days. */ + public void cleanUpRsyncDirectory() { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<File>(); + allFiles.add(new File("recent/torperf")); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } + } + } +} + diff --git a/src/org/torproject/collector/Main.java b/src/org/torproject/collector/Main.java deleted file mode 100644 index 9c64696..0000000 --- a/src/org/torproject/collector/Main.java +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright 2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector; - -import org.torproject.collector.bridgedescs.SanitizedBridgesWriter; -import org.torproject.collector.exitlists.ExitListDownloader; -import org.torproject.collector.index.CreateIndexJson; -import org.torproject.collector.relaydescs.ArchiveWriter; -import org.torproject.collector.torperf.TorperfDownloader; - -import java.lang.reflect.InvocationTargetException; -import java.util.HashMap; -import java.util.Map; -import java.util.logging.Logger; - -/** - * Main class for starting a CollecTor instance. - * <br> - * Run without arguments in order to read the usage information, i.e. - * <br> - * <code>java -jar collector.jar</code> - */ -public class Main { - - private static Logger log = Logger.getLogger(Main.class.getName()); - - /** All possible main classes. - * If a new CollecTorMain class is available, just add it to this map. - */ - private static final Map<String, Class> collecTorMains = new HashMap<>(); - - static { // add a new main class here - collecTorMains.put("bridgedescs", SanitizedBridgesWriter.class); - collecTorMains.put("exitlists", ExitListDownloader.class); - collecTorMains.put("updateindex", CreateIndexJson.class); - collecTorMains.put("relaydescs", ArchiveWriter.class); - collecTorMains.put("torperf", TorperfDownloader.class); - } - - private static final String modules = collecTorMains.keySet().toString() - .replace("[", "").replace("]", "").replaceAll(", ", "|"); - - /** - * One argument is necessary. - * See class description {@link Main}. - */ - public static void main(String[] args) { - if (null == args || args.length != 1) { - printUsageAndExit("CollecTor needs exactly one argument."); - } else { - invokeGivenMainAndExit(args[0]); - } - } - - private static void printUsageAndExit(String msg) { - final String usage = "Usage:\njava -jar collector.jar " - + "<" + modules + ">"; - System.out.println(msg + "\n" + usage); - System.exit(0); - } - - private static void invokeGivenMainAndExit(String mainId) { - Class clazz = collecTorMains.get(mainId); - if (null == clazz) { - printUsageAndExit("Unknown argument: " + mainId); - } - invokeMainOnClassAndExit(clazz); - } - - private static void invokeMainOnClassAndExit(Class clazz) { - try { - clazz.getMethod("main", new Class[] { String[].class }) - .invoke(null, (Object) new String[]{}); - System.exit(0); - } catch (NoSuchMethodException | IllegalAccessException - | InvocationTargetException e) { - log.severe("Cannot invoke 'main' method on " - + clazz.getName() + ". " + e); - } - } -} - diff --git a/src/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java b/src/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java deleted file mode 100644 index f683ea0..0000000 --- a/src/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.bridgedescs; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class BridgeDescriptorParser { - - private SanitizedBridgesWriter sbw; - - private Logger logger; - - public BridgeDescriptorParser(SanitizedBridgesWriter sbw) { - this.sbw = sbw; - this.logger = - Logger.getLogger(BridgeDescriptorParser.class.getName()); - } - - public void parse(byte[] allData, String dateTime) { - try { - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - String line = br.readLine(); - if (line == null) { - return; - } else if (line.startsWith("router ")) { - if (this.sbw != null) { - this.sbw.sanitizeAndStoreServerDescriptor(allData); - } - } else if (line.startsWith("extra-info ")) { - if (this.sbw != null) { - this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData); - } - } else { - if (this.sbw != null) { - this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime); - } - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", - e); - return; - } - } -} - diff --git a/src/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java b/src/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java deleted file mode 100644 index 2d41d18..0000000 --- a/src/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java +++ /dev/null @@ -1,228 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.bridgedescs; - -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.util.HashSet; -import java.util.Set; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Reads the half-hourly snapshots of bridge descriptors from Tonga. - */ -public class BridgeSnapshotReader { - public BridgeSnapshotReader(BridgeDescriptorParser bdp, - File bridgeDirectoriesDir, File statsDirectory) { - - if (bdp == null || bridgeDirectoriesDir == null - || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - Logger logger = - Logger.getLogger(BridgeSnapshotReader.class.getName()); - SortedSet<String> parsed = new TreeSet<String>(); - File bdDir = bridgeDirectoriesDir; - File pbdFile = new File(statsDirectory, "parsed-bridge-directories"); - boolean modified = false; - if (bdDir.exists()) { - if (pbdFile.exists()) { - logger.fine("Reading file " + pbdFile.getAbsolutePath() + "..."); - try { - BufferedReader br = new BufferedReader(new FileReader(pbdFile)); - String line = null; - while ((line = br.readLine()) != null) { - parsed.add(line); - } - br.close(); - logger.fine("Finished reading file " - + pbdFile.getAbsolutePath() + "."); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed reading file " - + pbdFile.getAbsolutePath() + "!", e); - return; - } - } - logger.fine("Importing files in directory " + bridgeDirectoriesDir - + "/..."); - Set<String> descriptorImportHistory = new HashSet<String>(); - int parsedFiles = 0; - int skippedFiles = 0; - int parsedStatuses = 0; - int parsedServerDescriptors = 0; - int skippedServerDescriptors = 0; - int parsedExtraInfoDescriptors = 0; - int skippedExtraInfoDescriptors = 0; - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(bdDir); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - for (File f : pop.listFiles()) { - filesInInputDir.add(f); - } - } else if (!parsed.contains(pop.getName())) { - try { - FileInputStream in = new FileInputStream(pop); - if (in.available() > 0) { - TarArchiveInputStream tais = null; - if (pop.getName().endsWith(".tar.gz")) { - GzipCompressorInputStream gcis = - new GzipCompressorInputStream(in); - tais = new TarArchiveInputStream(gcis); - } else if (pop.getName().endsWith(".tar")) { - tais = new TarArchiveInputStream(in); - } else { - continue; - } - BufferedInputStream bis = new BufferedInputStream(tais); - String fn = pop.getName(); - String dateTime = fn.substring(11, 21) + " " - + fn.substring(22, 24) + ":" + fn.substring(24, 26) - + ":" + fn.substring(26, 28); - while ((tais.getNextTarEntry()) != null) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - byte[] allData = baos.toByteArray(); - if (allData.length == 0) { - continue; - } - String fileDigest = Hex.encodeHexString(DigestUtils.sha( - allData)); - String ascii = new String(allData, "US-ASCII"); - BufferedReader br3 = new BufferedReader(new StringReader( - ascii)); - String firstLine = null; - while ((firstLine = br3.readLine()) != null) { - if (firstLine.startsWith("@")) { - continue; - } else { - break; - } - } - if (firstLine.startsWith("published ") - || firstLine.startsWith("flag-thresholds ") - || firstLine.startsWith("r ")) { - bdp.parse(allData, dateTime); - parsedStatuses++; - } else if (descriptorImportHistory.contains(fileDigest)) { - /* Skip server descriptors or extra-info descriptors if - * we parsed them before. */ - skippedFiles++; - continue; - } else { - int start = -1; - int sig = -1; - int end = -1; - String startToken = firstLine.startsWith("router ") - ? "router " : "extra-info "; - String sigToken = "\nrouter-signature\n"; - String endToken = "\n-----END SIGNATURE-----\n"; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - sig = ascii.indexOf(sigToken, start); - if (sig < 0) { - break; - } - sig += sigToken.length(); - end = ascii.indexOf(endToken, sig); - if (end < 0) { - break; - } - end += endToken.length(); - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, - end - start); - String descriptorDigest = Hex.encodeHexString( - DigestUtils.sha(descBytes)); - if (!descriptorImportHistory.contains( - descriptorDigest)) { - bdp.parse(descBytes, dateTime); - descriptorImportHistory.add(descriptorDigest); - if (firstLine.startsWith("router ")) { - parsedServerDescriptors++; - } else { - parsedExtraInfoDescriptors++; - } - } else { - if (firstLine.startsWith("router ")) { - skippedServerDescriptors++; - } else { - skippedExtraInfoDescriptors++; - } - } - } - } - descriptorImportHistory.add(fileDigest); - parsedFiles++; - } - bis.close(); - } - in.close(); - - /* Let's give some memory back, or we'll run out of it. */ - System.gc(); - - parsed.add(pop.getName()); - modified = true; - } catch (IOException e) { - logger.log(Level.WARNING, "Could not parse bridge snapshot " - + pop.getName() + "!", e); - continue; - } - } - } - logger.fine("Finished importing files in directory " - + bridgeDirectoriesDir + "/. In total, we parsed " - + parsedFiles + " files (skipped " + skippedFiles - + ") containing " + parsedStatuses + " statuses, " - + parsedServerDescriptors + " server descriptors (skipped " - + skippedServerDescriptors + "), and " - + parsedExtraInfoDescriptors + " extra-info descriptors " - + "(skipped " + skippedExtraInfoDescriptors + ")."); - if (!parsed.isEmpty() && modified) { - logger.fine("Writing file " + pbdFile.getAbsolutePath() + "..."); - try { - pbdFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile)); - for (String f : parsed) { - bw.append(f + "\n"); - } - bw.close(); - logger.fine("Finished writing file " + pbdFile.getAbsolutePath() - + "."); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed writing file " - + pbdFile.getAbsolutePath() + "!", e); - } - } - } - } -} - diff --git a/src/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java deleted file mode 100644 index 3214715..0000000 --- a/src/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java +++ /dev/null @@ -1,1330 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.bridgedescs; - -import org.torproject.collector.main.Configuration; -import org.torproject.collector.main.LockFile; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.io.UnsupportedEncodingException; -import java.security.GeneralSecurityException; -import java.security.SecureRandom; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Sanitizes bridge descriptors, i.e., removes all possibly sensitive - * information from them, and writes them to a local directory structure. - * During the sanitizing process, all information about the bridge - * identity or IP address are removed or replaced. The goal is to keep the - * sanitized bridge descriptors useful for statistical analysis while not - * making it easier for an adversary to enumerate bridges. - * - * There are three types of bridge descriptors: bridge network statuses - * (lists of all bridges at a given time), server descriptors (published - * by the bridge to advertise their capabilities), and extra-info - * descriptors (published by the bridge, mainly for statistical analysis). - */ -public class SanitizedBridgesWriter extends Thread { - - public static void main(String[] args) { - - Logger logger = Logger.getLogger( - SanitizedBridgesWriter.class.getName()); - logger.info("Starting bridge-descriptors module of CollecTor."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile("bridge-descriptors"); - if (!lf.acquireLock()) { - logger.severe("Warning: CollecTor is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Sanitize bridge descriptors - new SanitizedBridgesWriter(config).run(); - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating bridge-descriptors module of CollecTor."); - } - - private Configuration config; - - /** - * Initializes this class. - */ - public SanitizedBridgesWriter(Configuration config) { - this.config = config; - } - - /** - * Logger for this class. - */ - private Logger logger; - - private String rsyncCatString; - - private File bridgeDirectoriesDirectory; - - /** - * Output directory for writing sanitized bridge descriptors. - */ - private File sanitizedBridgesDirectory; - - private boolean replaceIPAddressesWithHashes; - - private boolean persistenceProblemWithSecrets; - - private SortedMap<String, byte[]> secretsForHashingIPAddresses; - - private String bridgeSanitizingCutOffTimestamp; - - private boolean haveWarnedAboutInterval; - - private File bridgeIpSecretsFile; - - private SecureRandom secureRandom; - - public void run() { - - File bridgeDirectoriesDirectory = - new File(config.getBridgeSnapshotsDirectory()); - File sanitizedBridgesDirectory = - new File(config.getSanitizedBridgesWriteDirectory()); - boolean replaceIPAddressesWithHashes = - config.getReplaceIPAddressesWithHashes(); - long limitBridgeSanitizingInterval = - config.getLimitBridgeDescriptorMappings(); - File statsDirectory = new File("stats"); - - if (bridgeDirectoriesDirectory == null - || sanitizedBridgesDirectory == null || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - /* Memorize argument values. */ - this.bridgeDirectoriesDirectory = bridgeDirectoriesDirectory; - this.sanitizedBridgesDirectory = sanitizedBridgesDirectory; - this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes; - - /* Initialize logger. */ - this.logger = Logger.getLogger( - SanitizedBridgesWriter.class.getName()); - - SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - this.rsyncCatString = rsyncCatFormat.format( - System.currentTimeMillis()); - - /* Initialize secure random number generator if we need it. */ - if (this.replaceIPAddressesWithHashes) { - try { - this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN"); - } catch (GeneralSecurityException e) { - this.logger.log(Level.WARNING, "Could not initialize secure " - + "random number generator! Not calculating any IP address " - + "hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } - } - - /* Read hex-encoded secrets for replacing IP addresses with hashes - * from disk. */ - this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>(); - this.bridgeIpSecretsFile = new File(statsDirectory, - "bridge-ip-secrets"); - if (this.bridgeIpSecretsFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - this.bridgeIpSecretsFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - if ((line.length() != ("yyyy-MM,".length() + 31 * 2) - && line.length() != ("yyyy-MM,".length() + 50 * 2)) - || parts.length != 2) { - this.logger.warning("Invalid line in bridge-ip-secrets file " - + "starting with '" + line.substring(0, 7) + "'! " - + "Not calculating any IP address hashes in this " - + "execution!"); - this.persistenceProblemWithSecrets = true; - break; - } - String month = parts[0]; - byte[] secret = Hex.decodeHex(parts[1].toCharArray()); - this.secretsForHashingIPAddresses.put(month, secret); - } - br.close(); - if (!this.persistenceProblemWithSecrets) { - this.logger.fine("Read " - + this.secretsForHashingIPAddresses.size() + " secrets for " - + "hashing bridge IP addresses."); - } - } catch (DecoderException e) { - this.logger.log(Level.WARNING, "Failed to decode hex string in " - + this.bridgeIpSecretsFile + "! Not calculating any IP " - + "address hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read " - + this.bridgeIpSecretsFile + "! Not calculating any IP " - + "address hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } - } - - /* If we're configured to keep secrets only for a limited time, define - * the cut-off day and time. */ - if (limitBridgeSanitizingInterval >= 0L) { - SimpleDateFormat formatter = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); - this.bridgeSanitizingCutOffTimestamp = formatter.format( - System.currentTimeMillis() - 24L * 60L * 60L * 1000L - * limitBridgeSanitizingInterval); - } else { - this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59"; - } - - // Prepare bridge descriptor parser - BridgeDescriptorParser bdp = new BridgeDescriptorParser(this); - - // Import bridge descriptors - new BridgeSnapshotReader(bdp, this.bridgeDirectoriesDirectory, - statsDirectory); - - // Finish writing sanitized bridge descriptors to disk - this.finishWriting(); - - this.checkStaleDescriptors(); - - this.cleanUpRsyncDirectory(); - } - - private String scrubOrAddress(String orAddress, byte[] fingerprintBytes, - String published) throws IOException { - if (!orAddress.contains(":")) { - /* Malformed or-address or a line. */ - return null; - } - String addressPart = orAddress.substring(0, - orAddress.lastIndexOf(":")); - String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1); - String scrubbedAddressPart = null; - if (addressPart.startsWith("[")) { - scrubbedAddressPart = this.scrubIpv6Address(addressPart, - fingerprintBytes, published); - } else { - scrubbedAddressPart = this.scrubIpv4Address(addressPart, - fingerprintBytes, published); - } - return (scrubbedAddressPart == null ? null : - scrubbedAddressPart + ":" + portPart); - } - - private String scrubIpv4Address(String address, byte[] fingerprintBytes, - String published) throws IOException { - if (this.replaceIPAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return null; - } - byte[] hashInput = new byte[4 + 20 + 31]; - String[] ipParts = address.split("\."); - for (int i = 0; i < 4; i++) { - hashInput[i] = (byte) Integer.parseInt(ipParts[i]); - } - System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 0, hashInput, 24, 31); - byte[] hashOutput = DigestUtils.sha256(hashInput); - String hashedAddress = "10." - + (((int) hashOutput[0] + 256) % 256) + "." - + (((int) hashOutput[1] + 256) % 256) + "." - + (((int) hashOutput[2] + 256) % 256); - return hashedAddress; - } else { - return "127.0.0.1"; - } - } - - private String scrubIpv6Address(String address, byte[] fingerprintBytes, - String published) throws IOException { - StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::"); - if (this.replaceIPAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return null; - } - byte[] hashInput = new byte[16 + 20 + 19]; - String[] doubleColonSeparatedParts = address.substring(1, - address.length() - 1).split("::", -1); - if (doubleColonSeparatedParts.length > 2) { - /* Invalid IPv6 address. */ - return null; - } - List<String> hexParts = new ArrayList<String>(); - for (String doubleColonSeparatedPart : doubleColonSeparatedParts) { - StringBuilder hexPart = new StringBuilder(); - String[] parts = doubleColonSeparatedPart.split(":", -1); - if (parts.length < 1 || parts.length > 8) { - /* Invalid IPv6 address. */ - return null; - } - for (int i = 0; i < parts.length; i++) { - String part = parts[i]; - if (part.contains(".")) { - String[] ipParts = part.split("\."); - byte[] ipv4Bytes = new byte[4]; - if (ipParts.length != 4) { - /* Invalid IPv4 part in IPv6 address. */ - return null; - } - for (int m = 0; m < 4; m++) { - ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]); - } - hexPart.append(Hex.encodeHexString(ipv4Bytes)); - } else if (part.length() > 4) { - /* Invalid IPv6 address. */ - return null; - } else { - for (int k = part.length(); k < 4; k++) { - hexPart.append("0"); - } - hexPart.append(part); - } - } - hexParts.add(hexPart.toString()); - } - StringBuilder hex = new StringBuilder(); - hex.append(hexParts.get(0)); - if (hexParts.size() == 2) { - for (int i = 32 - hexParts.get(0).length() - - hexParts.get(1).length(); i > 0; i--) { - hex.append("0"); - } - hex.append(hexParts.get(1)); - } - byte[] ipBytes = null; - try { - ipBytes = Hex.decodeHex(hex.toString().toCharArray()); - } catch (DecoderException e) { - /* TODO Invalid IPv6 address. */ - return null; - } - if (ipBytes.length != 16) { - /* TODO Invalid IPv6 address. */ - return null; - } - System.arraycopy(ipBytes, 0, hashInput, 0, 16); - System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 31, hashInput, 36, 19); - String hashOutput = DigestUtils.sha256Hex(hashInput); - sb.append(hashOutput.substring(hashOutput.length() - 6, - hashOutput.length() - 4)); - sb.append(":"); - sb.append(hashOutput.substring(hashOutput.length() - 4)); - } - sb.append("]"); - return sb.toString(); - } - - private byte[] getSecretForMonth(String month) throws IOException { - if (!this.secretsForHashingIPAddresses.containsKey(month) - || this.secretsForHashingIPAddresses.get(month).length == 31) { - byte[] secret = new byte[50]; - this.secureRandom.nextBytes(secret); - if (this.secretsForHashingIPAddresses.containsKey(month)) { - System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0, - secret, 0, 31); - } - if (month.compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - this.logger.warning("Generated a secret that we won't make " - + "persistent, because it's outside our bridge descriptor " - + "sanitizing interval."); - } else { - /* Append secret to file on disk immediately before using it, or - * we might end with inconsistently sanitized bridges. */ - try { - if (!this.bridgeIpSecretsFile.exists()) { - this.bridgeIpSecretsFile.getParentFile().mkdirs(); - } - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.bridgeIpSecretsFile, - this.bridgeIpSecretsFile.exists())); - bw.write(month + "," + Hex.encodeHexString(secret) + "\n"); - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store new secret " - + "to disk! Not calculating any IP address hashes in " - + "this execution!", e); - this.persistenceProblemWithSecrets = true; - throw new IOException(e); - } - } - this.secretsForHashingIPAddresses.put(month, secret); - } - return this.secretsForHashingIPAddresses.get(month); - } - - private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00"; - - /** - * Sanitizes a network status and writes it to disk. - */ - public void sanitizeAndStoreNetworkStatus(byte[] data, - String publicationTime) { - - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return; - } - - if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) { - maxNetworkStatusPublishedTime = publicationTime; - } - - if (this.bridgeSanitizingCutOffTimestamp - .compareTo(publicationTime) > 0) { - this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING - : Level.FINE, "Sanitizing and storing network status with " - + "publication time outside our descriptor sanitizing " - + "interval."); - this.haveWarnedAboutInterval = true; - } - - /* Parse the given network status line by line. */ - StringBuilder header = new StringBuilder(); - SortedMap<String, String> scrubbedLines = - new TreeMap<String, String>(); - try { - StringBuilder scrubbed = new StringBuilder(); - BufferedReader br = new BufferedReader(new StringReader(new String( - data, "US-ASCII"))); - String line = null; - String mostRecentDescPublished = null; - byte[] fingerprintBytes = null; - String descPublicationTime = null; - String hashedBridgeIdentityHex = null; - while ((line = br.readLine()) != null) { - - /* Use publication time from "published" line instead of the - * file's last-modified time. Don't copy over the line, because - * we're going to write a "published" line below. */ - if (line.startsWith("published ")) { - publicationTime = line.substring("published ".length()); - - /* Additional header lines don't have to be cleaned up. */ - } else if (line.startsWith("flag-thresholds ")) { - header.append(line + "\n"); - - /* r lines contain sensitive information that needs to be removed - * or replaced. */ - } else if (line.startsWith("r ")) { - - /* Clear buffer from previously scrubbed lines. */ - if (scrubbed.length() > 0) { - String scrubbedLine = scrubbed.toString(); - scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); - scrubbed = new StringBuilder(); - } - - /* Parse the relevant parts of this r line. */ - String[] parts = line.split(" "); - String nickname = parts[1]; - fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); - String descriptorIdentifier = parts[3]; - descPublicationTime = parts[4] + " " + parts[5]; - String address = parts[6]; - String orPort = parts[7]; - String dirPort = parts[8]; - - /* Determine most recent descriptor publication time. */ - if (descPublicationTime.compareTo(publicationTime) <= 0 - && (mostRecentDescPublished == null - || descPublicationTime.compareTo( - mostRecentDescPublished) > 0)) { - mostRecentDescPublished = descPublicationTime; - } - - /* Write scrubbed r line to buffer. */ - byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes); - String hashedBridgeIdentityBase64 = Base64.encodeBase64String( - hashedBridgeIdentity).substring(0, 27); - hashedBridgeIdentityHex = Hex.encodeHexString( - hashedBridgeIdentity); - String hashedDescriptorIdentifier = Base64.encodeBase64String( - DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier - + "=="))).substring(0, 27); - String scrubbedAddress = scrubIpv4Address(address, - fingerprintBytes, - descPublicationTime); - scrubbed.append("r " + nickname + " " - + hashedBridgeIdentityBase64 + " " - + hashedDescriptorIdentifier + " " + descPublicationTime - + " " + scrubbedAddress + " " + orPort + " " + dirPort - + "\n"); - - /* Sanitize any addresses in a lines using the fingerprint and - * descriptor publication time from the previous r line. */ - } else if (line.startsWith("a ")) { - String scrubbedOrAddress = scrubOrAddress( - line.substring("a ".length()), fingerprintBytes, - descPublicationTime); - if (scrubbedOrAddress != null) { - scrubbed.append("a " + scrubbedOrAddress + "\n"); - } else { - this.logger.warning("Invalid address in line '" + line - + "' in bridge network status. Skipping line!"); - } - - /* Nothing special about s, w, and p lines; just copy them. */ - } else if (line.startsWith("s ") || line.equals("s") - || line.startsWith("w ") || line.equals("w") - || line.startsWith("p ") || line.equals("p")) { - scrubbed.append(line + "\n"); - - /* There should be nothing else but r, w, p, and s lines in the - * network status. If there is, we should probably learn before - * writing anything to the sanitized descriptors. */ - } else { - this.logger.fine("Unknown line '" + line + "' in bridge " - + "network status. Not writing to disk!"); - return; - } - } - br.close(); - if (scrubbed.length() > 0) { - String scrubbedLine = scrubbed.toString(); - scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); - scrubbed = new StringBuilder(); - } - - /* Check if we can tell from the descriptor publication times - * whether this status is possibly stale. */ - SimpleDateFormat formatter = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); - if (formatter.parse(publicationTime).getTime() - - formatter.parse(mostRecentDescPublished).getTime() - > 60L * 60L * 1000L) { - this.logger.warning("The most recent descriptor in the bridge " - + "network status published at " + publicationTime + " was " - + "published at " + mostRecentDescPublished + " which is " - + "more than 1 hour before the status. This is a sign for " - + "the status being stale. Please check!"); - } - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse timestamp in " - + "bridge network status.", e); - return; - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse bridge network " - + "status.", e); - return; - } - - /* Write the sanitized network status to disk. */ - try { - String syear = publicationTime.substring(0, 4); - String smonth = publicationTime.substring(5, 7); - String sday = publicationTime.substring(8, 10); - String stime = publicationTime.substring(11, 13) - + publicationTime.substring(14, 16) - + publicationTime.substring(17, 19); - File tarballFile = new File( - this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear - + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth - + sday + "-" + stime + "-" - + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D"); - File rsyncFile = new File("recent/bridge-descriptors/statuses/" - + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - for (File outputFile : outputFiles) { - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFile)); - bw.write("@type bridge-network-status 1.0\n"); - bw.write("published " + publicationTime + "\n"); - bw.write(header.toString()); - for (String scrubbed : scrubbedLines.values()) { - bw.write(scrubbed); - } - bw.close(); - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write sanitized bridge " - + "network status to disk.", e); - return; - } - } - - private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00"; - - /** - * Sanitizes a bridge server descriptor and writes it to disk. - */ - public void sanitizeAndStoreServerDescriptor(byte[] data) { - - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return; - } - - /* Parse descriptor to generate a sanitized version. */ - String scrubbedDesc = null; - String published = null; - String masterKeyEd25519FromIdentityEd25519 = null; - try { - BufferedReader br = new BufferedReader(new StringReader( - new String(data, "US-ASCII"))); - StringBuilder scrubbed = new StringBuilder(); - String line = null; - String hashedBridgeIdentity = null; - String address = null; - String routerLine = null; - String scrubbedAddress = null; - String masterKeyEd25519 = null; - List<String> orAddresses = null; - List<String> scrubbedOrAddresses = null; - boolean skipCrypto = false; - while ((line = br.readLine()) != null) { - - /* Skip all crypto parts that might be used to derive the bridge's - * identity fingerprint. */ - if (skipCrypto && !line.startsWith("-----END ")) { - continue; - - /* Store the router line for later processing, because we may need - * the bridge identity fingerprint for replacing the IP address in - * the scrubbed version. */ - } else if (line.startsWith("router ")) { - address = line.split(" ")[2]; - routerLine = line; - - /* Store or-address parts in a list and sanitize them when we have - * read the fingerprint. */ - } else if (line.startsWith("or-address ")) { - if (orAddresses == null) { - orAddresses = new ArrayList<String>(); - } - orAddresses.add(line.substring("or-address ".length())); - - /* Parse the publication time to see if we're still inside the - * sanitizing interval. */ - } else if (line.startsWith("published ")) { - published = line.substring("published ".length()); - if (published.compareTo(maxServerDescriptorPublishedTime) > 0) { - maxServerDescriptorPublishedTime = published; - } - if (this.bridgeSanitizingCutOffTimestamp - .compareTo(published) > 0) { - this.logger.log(!this.haveWarnedAboutInterval - ? Level.WARNING : Level.FINE, "Sanitizing and storing " - + "server descriptor with publication time outside our " - + "descriptor sanitizing interval."); - this.haveWarnedAboutInterval = true; - } - scrubbed.append(line + "\n"); - - /* Parse the fingerprint to determine the hashed bridge - * identity. */ - } else if (line.startsWith("opt fingerprint ") - || line.startsWith("fingerprint ")) { - String fingerprint = line.substring(line.startsWith("opt ") - ? "opt fingerprint".length() : "fingerprint".length()) - .replaceAll(" ", "").toLowerCase(); - byte[] fingerprintBytes = Hex.decodeHex( - fingerprint.toCharArray()); - hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes) - .toLowerCase(); - try { - scrubbedAddress = scrubIpv4Address(address, fingerprintBytes, - published); - if (orAddresses != null) { - scrubbedOrAddresses = new ArrayList<String>(); - for (String orAddress : orAddresses) { - String scrubbedOrAddress = scrubOrAddress(orAddress, - fingerprintBytes, published); - if (scrubbedOrAddress != null) { - scrubbedOrAddresses.add(scrubbedOrAddress); - } else { - this.logger.warning("Invalid address in line " - + "'or-address " + orAddress + "' in bridge server " - + "descriptor. Skipping line!"); - } - } - } - } catch (IOException e) { - /* There's a persistence problem, so we shouldn't scrub more - * IP addresses in this execution. */ - this.persistenceProblemWithSecrets = true; - return; - } - scrubbed.append((line.startsWith("opt ") ? "opt " : "") - + "fingerprint"); - for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) { - scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i, - 4 * (i + 1)).toUpperCase()); - } - scrubbed.append("\n"); - - /* Replace the contact line (if present) with a generic one. */ - } else if (line.startsWith("contact ")) { - scrubbed.append("contact somebody\n"); - - /* When we reach the signature, we're done. Write the sanitized - * descriptor to disk below. */ - } else if (line.startsWith("router-signature")) { - String[] routerLineParts = routerLine.split(" "); - scrubbedDesc = "router " + routerLineParts[1] + " " - + scrubbedAddress + " " + routerLineParts[3] + " " - + routerLineParts[4] + " " + routerLineParts[5] + "\n"; - if (scrubbedOrAddresses != null) { - for (String scrubbedOrAddress : scrubbedOrAddresses) { - scrubbedDesc = scrubbedDesc += "or-address " - + scrubbedOrAddress + "\n"; - } - } - scrubbedDesc += scrubbed.toString(); - break; - - /* Replace extra-info digest with the hashed digest of the - * non-scrubbed descriptor. */ - } else if (line.startsWith("opt extra-info-digest ") - || line.startsWith("extra-info-digest ")) { - String[] parts = line.split(" "); - if (line.startsWith("opt ")) { - scrubbed.append("opt "); - parts = line.substring(4).split(" "); - } - scrubbed.append("extra-info-digest " + DigestUtils.shaHex( - Hex.decodeHex(parts[1].toCharArray())).toUpperCase()); - if (parts.length > 2) { - scrubbed.append(" " + Base64.encodeBase64String( - DigestUtils.sha256(Base64.decodeBase64(parts[2]))) - .replaceAll("=", "")); - } - scrubbed.append("\n"); - - /* Possibly sanitize reject lines if they contain the bridge's own - * IP address. */ - } else if (line.startsWith("reject ")) { - if (address != null && line.startsWith("reject " + address)) { - scrubbed.append("reject " + scrubbedAddress - + line.substring("reject ".length() + address.length()) - + "\n"); - } else { - scrubbed.append(line + "\n"); - } - - /* Extract master-key-ed25519 from identity-ed25519. */ - } else if (line.equals("identity-ed25519")) { - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null - && !line.equals("-----END ED25519 CERT-----")) { - if (line.equals("-----BEGIN ED25519 CERT-----")) { - continue; - } - sb.append(line); - } - masterKeyEd25519FromIdentityEd25519 = - this.parseMasterKeyEd25519FromIdentityEd25519( - sb.toString()); - String sha256MasterKeyEd25519 = Base64.encodeBase64String( - DigestUtils.sha256(Base64.decodeBase64( - masterKeyEd25519FromIdentityEd25519 + "="))) - .replaceAll("=", ""); - scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519 - + "\n"); - if (masterKeyEd25519 != null && !masterKeyEd25519.equals( - masterKeyEd25519FromIdentityEd25519)) { - this.logger.warning("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Verify that identity-ed25519 and master-key-ed25519 match. */ - } else if (line.startsWith("master-key-ed25519 ")) { - masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); - if (masterKeyEd25519FromIdentityEd25519 != null - && !masterKeyEd25519FromIdentityEd25519.equals( - masterKeyEd25519)) { - this.logger.warning("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Write the following lines unmodified to the sanitized - * descriptor. */ - } else if (line.startsWith("accept ") - || line.startsWith("platform ") - || line.startsWith("opt protocols ") - || line.startsWith("protocols ") - || line.startsWith("uptime ") - || line.startsWith("bandwidth ") - || line.startsWith("opt hibernating ") - || line.startsWith("hibernating ") - || line.startsWith("ntor-onion-key ") - || line.equals("opt hidden-service-dir") - || line.equals("hidden-service-dir") - || line.equals("opt caches-extra-info") - || line.equals("caches-extra-info") - || line.equals("opt allow-single-hop-exits") - || line.equals("allow-single-hop-exits") - || line.startsWith("ipv6-policy ") - || line.equals("tunnelled-dir-server")) { - scrubbed.append(line + "\n"); - - /* Replace node fingerprints in the family line with their hashes - * and leave nicknames unchanged. */ - } else if (line.startsWith("family ")) { - StringBuilder familyLine = new StringBuilder("family"); - for (String s : line.substring(7).split(" ")) { - if (s.startsWith("$")) { - familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex( - s.substring(1).toCharArray())).toUpperCase()); - } else { - familyLine.append(" " + s); - } - } - scrubbed.append(familyLine.toString() + "\n"); - - /* Skip the purpose line that the bridge authority adds to its - * cached-descriptors file. */ - } else if (line.startsWith("@purpose ")) { - continue; - - /* Skip all crypto parts that might leak the bridge's identity - * fingerprint. */ - } else if (line.startsWith("-----BEGIN ") - || line.equals("onion-key") || line.equals("signing-key") - || line.equals("onion-key-crosscert") - || line.startsWith("ntor-onion-key-crosscert ")) { - skipCrypto = true; - - /* Stop skipping lines when the crypto parts are over. */ - } else if (line.startsWith("-----END ")) { - skipCrypto = false; - - /* Skip the ed25519 signature; we'll include a SHA256 digest of - * the SHA256 descriptor digest in router-digest-sha256. */ - } else if (line.startsWith("router-sig-ed25519 ")) { - continue; - - /* If we encounter an unrecognized line, stop parsing and print - * out a warning. We might have overlooked sensitive information - * that we need to remove or replace for the sanitized descriptor - * version. */ - } else { - this.logger.warning("Unrecognized line '" + line - + "'. Skipping."); - return; - } - } - br.close(); - } catch (Exception e) { - this.logger.log(Level.WARNING, "Could not parse server " - + "descriptor.", e); - return; - } - - /* Determine filename of sanitized server descriptor. */ - String descriptorDigest = null; - try { - String ascii = new String(data, "US-ASCII"); - String startToken = "router "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); - } - } catch (UnsupportedEncodingException e) { - /* Handle below. */ - } - if (descriptorDigest == null) { - this.logger.log(Level.WARNING, "Could not calculate server " - + "descriptor digest."); - return; - } - String descriptorDigestSha256Base64 = null; - if (masterKeyEd25519FromIdentityEd25519 != null) { - try { - String ascii = new String(data, "US-ASCII"); - String startToken = "router "; - String sigToken = "\n-----END SIGNATURE-----\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigestSha256Base64 = Base64.encodeBase64String( - DigestUtils.sha256(DigestUtils.sha256(forDigest))) - .replaceAll("=", ""); - } - } catch (UnsupportedEncodingException e) { - /* Handle below. */ - } - if (descriptorDigestSha256Base64 == null) { - this.logger.log(Level.WARNING, "Could not calculate server " - + "descriptor SHA256 digest."); - return; - } - } - String dyear = published.substring(0, 4); - String dmonth = published.substring(5, 7); - File tarballFile = new File( - this.sanitizedBridgesDirectory.getAbsolutePath() + "/" - + dyear + "/" + dmonth + "/server-descriptors/" - + "/" + descriptorDigest.charAt(0) + "/" - + descriptorDigest.charAt(1) + "/" - + descriptorDigest); - File rsyncCatFile = new File("recent/bridge-descriptors/" - + "server-descriptors/" + this.rsyncCatString - + "-server-descriptors.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - try { - for (int i = 0; i < outputFiles.length; i++) { - File outputFile = outputFiles[i]; - boolean appendToFile = append[i]; - if (outputFile.exists() && !appendToFile) { - /* We already stored this descriptor to disk before, so let's - * not store it yet another time. */ - break; - } - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFile, appendToFile)); - bw.write("@type bridge-server-descriptor 1.1\n"); - bw.write(scrubbedDesc); - if (descriptorDigestSha256Base64 != null) { - bw.write("router-digest-sha256 " + descriptorDigestSha256Base64 - + "\n"); - } - bw.write("router-digest " + descriptorDigest.toUpperCase() - + "\n"); - bw.close(); - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write sanitized server " - + "descriptor to disk.", e); - return; - } - } - - private String parseMasterKeyEd25519FromIdentityEd25519( - String identityEd25519Base64) { - byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64); - if (identityEd25519.length < 40) { - this.logger.warning("Invalid length of identity-ed25519 (in " - + "bytes): " + identityEd25519.length); - } else if (identityEd25519[0] != 0x01) { - this.logger.warning("Unknown version in identity-ed25519: " - + identityEd25519[0]); - } else if (identityEd25519[1] != 0x04) { - this.logger.warning("Unknown cert type in identity-ed25519: " - + identityEd25519[1]); - } else if (identityEd25519[6] != 0x01) { - this.logger.warning("Unknown certified key type in " - + "identity-ed25519: " + identityEd25519[1]); - } else if (identityEd25519[39] == 0x00) { - this.logger.warning("No extensions in identity-ed25519 (which " - + "would contain the encoded master-key-ed25519): " - + identityEd25519[39]); - } else { - int extensionStart = 40; - for (int i = 0; i < (int) identityEd25519[39]; i++) { - if (identityEd25519.length < extensionStart + 4) { - this.logger.warning("Invalid extension with id " + i - + " in identity-ed25519."); - break; - } - int extensionLength = identityEd25519[extensionStart]; - extensionLength <<= 8; - extensionLength += identityEd25519[extensionStart + 1]; - int extensionType = identityEd25519[extensionStart + 2]; - if (extensionLength == 32 && extensionType == 4) { - if (identityEd25519.length < extensionStart + 4 + 32) { - this.logger.warning("Invalid extension with id " + i - + " in identity-ed25519."); - break; - } - byte[] masterKeyEd25519 = new byte[32]; - System.arraycopy(identityEd25519, extensionStart + 4, - masterKeyEd25519, 0, masterKeyEd25519.length); - String masterKeyEd25519Base64 = Base64.encodeBase64String( - masterKeyEd25519); - String masterKeyEd25519Base64NoTrailingEqualSigns = - masterKeyEd25519Base64.replaceAll("=", ""); - return masterKeyEd25519Base64NoTrailingEqualSigns; - } - extensionStart += 4 + extensionLength; - } - } - this.logger.warning("Unable to locate master-key-ed25519 in " - + "identity-ed25519."); - return null; - } - - private String maxExtraInfoDescriptorPublishedTime = - "1970-01-01 00:00:00"; - - /** - * Sanitizes an extra-info descriptor and writes it to disk. - */ - public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) { - - /* Parse descriptor to generate a sanitized version. */ - String scrubbedDesc = null; - String published = null; - String masterKeyEd25519FromIdentityEd25519 = null; - try { - BufferedReader br = new BufferedReader(new StringReader(new String( - data, "US-ASCII"))); - String line = null; - StringBuilder scrubbed = null; - String hashedBridgeIdentity = null; - String masterKeyEd25519 = null; - while ((line = br.readLine()) != null) { - - /* Parse bridge identity from extra-info line and replace it with - * its hash in the sanitized descriptor. */ - String[] parts = line.split(" "); - if (line.startsWith("extra-info ")) { - hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex( - parts[2].toCharArray())).toLowerCase(); - scrubbed = new StringBuilder("extra-info " + parts[1] + " " - + hashedBridgeIdentity.toUpperCase() + "\n"); - - /* Parse the publication time to determine the file name. */ - } else if (line.startsWith("published ")) { - scrubbed.append(line + "\n"); - published = line.substring("published ".length()); - if (published.compareTo(maxExtraInfoDescriptorPublishedTime) - > 0) { - maxExtraInfoDescriptorPublishedTime = published; - } - - /* Remove everything from transport lines except the transport - * name. */ - } else if (line.startsWith("transport ")) { - if (parts.length < 3) { - this.logger.fine("Illegal line in extra-info descriptor: '" - + line + "'. Skipping descriptor."); - return; - } - scrubbed.append("transport " + parts[1] + "\n"); - - /* Skip transport-info lines entirely. */ - } else if (line.startsWith("transport-info ")) { - - /* Extract master-key-ed25519 from identity-ed25519. */ - } else if (line.equals("identity-ed25519")) { - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null - && !line.equals("-----END ED25519 CERT-----")) { - if (line.equals("-----BEGIN ED25519 CERT-----")) { - continue; - } - sb.append(line); - } - masterKeyEd25519FromIdentityEd25519 = - this.parseMasterKeyEd25519FromIdentityEd25519( - sb.toString()); - String sha256MasterKeyEd25519 = Base64.encodeBase64String( - DigestUtils.sha256(Base64.decodeBase64( - masterKeyEd25519FromIdentityEd25519 + "="))) - .replaceAll("=", ""); - scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519 - + "\n"); - if (masterKeyEd25519 != null && !masterKeyEd25519.equals( - masterKeyEd25519FromIdentityEd25519)) { - this.logger.warning("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Verify that identity-ed25519 and master-key-ed25519 match. */ - } else if (line.startsWith("master-key-ed25519 ")) { - masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); - if (masterKeyEd25519FromIdentityEd25519 != null - && !masterKeyEd25519FromIdentityEd25519.equals( - masterKeyEd25519)) { - this.logger.warning("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Write the following lines unmodified to the sanitized - * descriptor. */ - } else if (line.startsWith("write-history ") - || line.startsWith("read-history ") - || line.startsWith("geoip-start-time ") - || line.startsWith("geoip-client-origins ") - || line.startsWith("geoip-db-digest ") - || line.startsWith("geoip6-db-digest ") - || line.startsWith("conn-bi-direct ") - || line.startsWith("bridge-") - || line.startsWith("dirreq-") - || line.startsWith("cell-") - || line.startsWith("entry-") - || line.startsWith("exit-")) { - scrubbed.append(line + "\n"); - - /* When we reach the signature, we're done. Write the sanitized - * descriptor to disk below. */ - } else if (line.startsWith("router-signature")) { - scrubbedDesc = scrubbed.toString(); - break; - - /* Skip the ed25519 signature; we'll include a SHA256 digest of - * the SHA256 descriptor digest in router-digest-sha256. */ - } else if (line.startsWith("router-sig-ed25519 ")) { - continue; - - /* If we encounter an unrecognized line, stop parsing and print - * out a warning. We might have overlooked sensitive information - * that we need to remove or replace for the sanitized descriptor - * version. */ - } else { - this.logger.warning("Unrecognized line '" + line - + "'. Skipping."); - return; - } - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse extra-info " - + "descriptor.", e); - return; - } catch (DecoderException e) { - this.logger.log(Level.WARNING, "Could not parse extra-info " - + "descriptor.", e); - return; - } - - /* Determine filename of sanitized extra-info descriptor. */ - String descriptorDigest = null; - try { - String ascii = new String(data, "US-ASCII"); - String startToken = "extra-info "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); - } - } catch (UnsupportedEncodingException e) { - /* Handle below. */ - } - if (descriptorDigest == null) { - this.logger.log(Level.WARNING, "Could not calculate extra-info " - + "descriptor digest."); - return; - } - String descriptorDigestSha256Base64 = null; - if (masterKeyEd25519FromIdentityEd25519 != null) { - try { - String ascii = new String(data, "US-ASCII"); - String startToken = "extra-info "; - String sigToken = "\n-----END SIGNATURE-----\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigestSha256Base64 = Base64.encodeBase64String( - DigestUtils.sha256(DigestUtils.sha256(forDigest))) - .replaceAll("=", ""); - } - } catch (UnsupportedEncodingException e) { - /* Handle below. */ - } - if (descriptorDigestSha256Base64 == null) { - this.logger.log(Level.WARNING, "Could not calculate extra-info " - + "descriptor SHA256 digest."); - return; - } - } - String dyear = published.substring(0, 4); - String dmonth = published.substring(5, 7); - File tarballFile = new File( - this.sanitizedBridgesDirectory.getAbsolutePath() + "/" - + dyear + "/" + dmonth + "/extra-infos/" - + descriptorDigest.charAt(0) + "/" - + descriptorDigest.charAt(1) + "/" - + descriptorDigest); - File rsyncCatFile = new File("recent/bridge-descriptors/" - + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - try { - for (int i = 0; i < outputFiles.length; i++) { - File outputFile = outputFiles[i]; - boolean appendToFile = append[i]; - if (outputFile.exists() && !appendToFile) { - /* We already stored this descriptor to disk before, so let's - * not store it yet another time. */ - break; - } - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFile, appendToFile)); - bw.write("@type bridge-extra-info 1.3\n"); - bw.write(scrubbedDesc); - if (descriptorDigestSha256Base64 != null) { - bw.write("router-digest-sha256 " + descriptorDigestSha256Base64 - + "\n"); - } - bw.write("router-digest " + descriptorDigest.toUpperCase() - + "\n"); - bw.close(); - } - } catch (Exception e) { - this.logger.log(Level.WARNING, "Could not write sanitized " - + "extra-info descriptor to disk.", e); - } - } - - /** - * Rewrite all network statuses that might contain references to server - * descriptors we added or updated in this execution. This applies to - * all statuses that have been published up to 24 hours after any added - * or updated server descriptor. - */ - public void finishWriting() { - - /* Delete secrets that we don't need anymore. */ - if (!this.secretsForHashingIPAddresses.isEmpty() - && this.secretsForHashingIPAddresses.firstKey().compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - try { - int kept = 0; - int deleted = 0; - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.bridgeIpSecretsFile)); - for (Map.Entry<String, byte[]> e : - this.secretsForHashingIPAddresses.entrySet()) { - if (e.getKey().compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - deleted++; - } else { - bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue()) - + "\n"); - kept++; - } - } - bw.close(); - this.logger.info("Deleted " + deleted + " secrets that we don't " - + "need anymore and kept " + kept + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store reduced set of " - + "secrets to disk! This is a bad sign, better check what's " - + "going on!", e); - } - } - } - - private void checkStaleDescriptors() { - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L; - try { - long maxNetworkStatusPublishedMillis = - dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime(); - if (maxNetworkStatusPublishedMillis > 0L - && maxNetworkStatusPublishedMillis < tooOldMillis) { - this.logger.warning("The last known bridge network status was " - + "published " + maxNetworkStatusPublishedTime + ", which is " - + "more than 5:30 hours in the past."); - } - long maxServerDescriptorPublishedMillis = - dateTimeFormat.parse(maxServerDescriptorPublishedTime) - .getTime(); - if (maxServerDescriptorPublishedMillis > 0L - && maxServerDescriptorPublishedMillis < tooOldMillis) { - this.logger.warning("The last known bridge server descriptor was " - + "published " + maxServerDescriptorPublishedTime + ", which " - + "is more than 5:30 hours in the past."); - } - long maxExtraInfoDescriptorPublishedMillis = - dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime) - .getTime(); - if (maxExtraInfoDescriptorPublishedMillis > 0L - && maxExtraInfoDescriptorPublishedMillis < tooOldMillis) { - this.logger.warning("The last known bridge extra-info descriptor " - + "was published " + maxExtraInfoDescriptorPublishedTime - + ", which is more than 5:30 hours in the past."); - } - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Unable to parse timestamp for " - + "stale check.", e); - } - } - - /* Delete all files from the rsync directory that have not been modified - * in the last three days, and remove the .tmp extension from newly - * written files. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<File>(); - allFiles.add(new File("recent/bridge-descriptors")); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } else if (file.getName().endsWith(".tmp")) { - file.renameTo(new File(file.getParentFile(), - file.getName().substring(0, - file.getName().lastIndexOf(".tmp")))); - } - } - } -} - diff --git a/src/org/torproject/collector/exitlists/ExitListDownloader.java b/src/org/torproject/collector/exitlists/ExitListDownloader.java deleted file mode 100644 index 54fd50f..0000000 --- a/src/org/torproject/collector/exitlists/ExitListDownloader.java +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.exitlists; - -import org.torproject.collector.main.Configuration; -import org.torproject.collector.main.LockFile; -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorParseException; -import org.torproject.descriptor.DescriptorParser; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExitList; - -import java.io.BufferedInputStream; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.List; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class ExitListDownloader extends Thread { - - public static void main(String[] args) { - - Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); - logger.info("Starting exit-lists module of CollecTor."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile("exit-lists"); - if (!lf.acquireLock()) { - logger.severe("Warning: CollecTor is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Download exit list and store it to disk - new ExitListDownloader(config).run(); - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating exit-lists module of CollecTor."); - } - - public ExitListDownloader(Configuration config) { - } - - public void run() { - - Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); - - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - - Date downloadedDate = new Date(); - String downloadedExitList = null; - try { - logger.fine("Downloading exit list..."); - StringBuilder sb = new StringBuilder(); - sb.append("@type tordnsel 1.0\n"); - sb.append("Downloaded " + dateTimeFormat.format(downloadedDate) - + "\n"); - String exitAddressesUrl = - "http://exitlist.torproject.org/exit-addresses"; - URL u = new URL(exitAddressesUrl); - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - int response = huc.getResponseCode(); - if (response != 200) { - logger.warning("Could not download exit list. Response code " - + response); - return; - } - BufferedInputStream in = new BufferedInputStream( - huc.getInputStream()); - int len; - byte[] data = new byte[1024]; - while ((len = in.read(data, 0, 1024)) >= 0) { - sb.append(new String(data, 0, len)); - } - in.close(); - downloadedExitList = sb.toString(); - logger.fine("Finished downloading exit list."); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed downloading exit list", e); - return; - } - if (downloadedExitList == null) { - logger.warning("Failed downloading exit list"); - return; - } - - SimpleDateFormat tarballFormat = - new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File("out/exit-lists/" + tarballFormat.format( - downloadedDate)); - - long maxScanMillis = 0L; - try { - DescriptorParser descriptorParser = - DescriptorSourceFactory.createDescriptorParser(); - List<Descriptor> parsedDescriptors = - descriptorParser.parseDescriptors(downloadedExitList.getBytes(), - tarballFile.getName()); - if (parsedDescriptors.size() != 1 - || !(parsedDescriptors.get(0) instanceof ExitList)) { - logger.warning("Could not parse downloaded exit list"); - return; - } - ExitList parsedExitList = (ExitList) parsedDescriptors.get(0); - for (ExitList.Entry entry : parsedExitList.getEntries()) { - for (long scanMillis : entry.getExitAddresses().values()) { - maxScanMillis = Math.max(maxScanMillis, scanMillis); - } - } - } catch (DescriptorParseException e) { - logger.log(Level.WARNING, "Could not parse downloaded exit list", - e); - } - if (maxScanMillis > 0L - && maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) { - logger.warning("The last reported scan in the downloaded exit list " - + "took place at " + dateTimeFormat.format(maxScanMillis) - + ", which is more than 5:30 hours in the past."); - } - - /* Write to disk. */ - File rsyncFile = new File("recent/exit-lists/" - + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - for (File outputFile : outputFiles) { - try { - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFile)); - bw.write(downloadedExitList); - bw.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write downloaded exit list " - + "to " + outputFile.getAbsolutePath(), e); - } - } - - /* Write stats. */ - StringBuilder dumpStats = new StringBuilder("Finished downloading " - + "exit list.\nLast three exit lists are:"); - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(new File("out/exit-lists")); - SortedSet<File> lastThreeExitLists = new TreeSet<File>(); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - SortedSet<File> lastThreeElements = new TreeSet<File>(); - for (File f : pop.listFiles()) { - lastThreeElements.add(f); - } - while (lastThreeElements.size() > 3) { - lastThreeElements.remove(lastThreeElements.first()); - } - for (File f : lastThreeElements) { - filesInInputDir.add(f); - } - } else { - lastThreeExitLists.add(pop); - while (lastThreeExitLists.size() > 3) { - lastThreeExitLists.remove(lastThreeExitLists.first()); - } - } - } - for (File f : lastThreeExitLists) { - dumpStats.append("\n" + f.getName()); - } - logger.info(dumpStats.toString()); - - this.cleanUpRsyncDirectory(); - } - - /* Delete all files from the rsync directory that have not been modified - * in the last three days. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<File>(); - allFiles.add(new File("recent/exit-lists")); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } - } - } -} - diff --git a/src/org/torproject/collector/index/CreateIndexJson.java b/src/org/torproject/collector/index/CreateIndexJson.java deleted file mode 100644 index ac5adf5..0000000 --- a/src/org/torproject/collector/index/CreateIndexJson.java +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright 2015--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.index; - -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; - -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; -import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Locale; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.zip.GZIPOutputStream; - -/* Create a fresh index.json containing all directories and files in the - * archive/ and recent/ directories. - * - * Note that if this ever takes longer than a few seconds, we'll have to - * cache index parts of directories or files that haven't changed. - * Example: if we parse include cryptographic hashes or @type information, - * we'll likely have to do that. */ -public class CreateIndexJson { - - static final File indexJsonFile = new File("index.json"); - - static final String basePath = "https://collector.torproject.org"; - - static final File[] indexedDirectories = new File[] { - new File("archive"), new File("recent") }; - - static final String dateTimePattern = "yyyy-MM-dd HH:mm"; - - static final Locale dateTimeLocale = Locale.US; - - static final TimeZone dateTimezone = TimeZone.getTimeZone("UTC"); - - public static void main(String[] args) throws IOException { - writeIndex(indexDirectories()); - } - - static class DirectoryNode implements Comparable<DirectoryNode> { - String path; - SortedSet<FileNode> files; - SortedSet<DirectoryNode> directories; - - DirectoryNode(String path, SortedSet<FileNode> files, - SortedSet<DirectoryNode> directories) { - this.path = path; - this.files = files; - this.directories = directories; - } - - public int compareTo(DirectoryNode o) { - return this.path.compareTo(o.path); - } - } - - static class IndexNode { - String index_created; - String path; - SortedSet<FileNode> files; - SortedSet<DirectoryNode> directories; - - IndexNode(String index_created, String path, - SortedSet<FileNode> files, - SortedSet<DirectoryNode> directories) { - this.index_created = index_created; - this.path = path; - this.files = files; - this.directories = directories; - } - } - - static class FileNode implements Comparable<FileNode> { - String path; - long size; - String last_modified; - - FileNode(String path, long size, String last_modified) { - this.path = path; - this.size = size; - this.last_modified = last_modified; - } - - public int compareTo(FileNode o) { - return this.path.compareTo(o.path); - } - } - - static DateFormat dateTimeFormat; - - static { - dateTimeFormat = new SimpleDateFormat(dateTimePattern, - dateTimeLocale); - dateTimeFormat.setLenient(false); - dateTimeFormat.setTimeZone(dateTimezone); - } - - static IndexNode indexDirectories() { - SortedSet<DirectoryNode> directoryNodes = - new TreeSet<DirectoryNode>(); - for (File directory : indexedDirectories) { - if (directory.exists() && directory.isDirectory()) { - directoryNodes.add(indexDirectory(directory)); - } - } - return new IndexNode(dateTimeFormat.format( - System.currentTimeMillis()), basePath, null, directoryNodes); - } - - static DirectoryNode indexDirectory(File directory) { - SortedSet<FileNode> fileNodes = new TreeSet<FileNode>(); - SortedSet<DirectoryNode> directoryNodes = - new TreeSet<DirectoryNode>(); - for (File fileOrDirectory : directory.listFiles()) { - if (fileOrDirectory.getName().startsWith(".")) { - continue; - } - if (fileOrDirectory.isFile()) { - fileNodes.add(indexFile(fileOrDirectory)); - } else { - directoryNodes.add(indexDirectory(fileOrDirectory)); - } - } - DirectoryNode directoryNode = new DirectoryNode( - directory.getName(), fileNodes.isEmpty() ? null : fileNodes, - directoryNodes.isEmpty() ? null : directoryNodes); - return directoryNode; - } - - static FileNode indexFile(File file) { - FileNode fileNode = new FileNode(file.getName(), file.length(), - dateTimeFormat.format(file.lastModified())); - return fileNode; - } - - static void writeIndex(IndexNode indexNode) throws IOException { - Gson gson = new GsonBuilder().create(); - String indexNodeString = gson.toJson(indexNode); - Writer[] writers = new Writer[] { - new FileWriter(indexJsonFile), - new OutputStreamWriter(new GZIPOutputStream( - new FileOutputStream(indexJsonFile + ".gz"))), - new OutputStreamWriter(new XZCompressorOutputStream( - new FileOutputStream(indexJsonFile + ".xz"))), - new OutputStreamWriter(new BZip2CompressorOutputStream( - new FileOutputStream(indexJsonFile + ".bz2"))) - }; - for (Writer writer : writers) { - BufferedWriter bufferedWriter = new BufferedWriter(writer); - bufferedWriter.write(indexNodeString); - bufferedWriter.close(); - } - } -} - diff --git a/src/org/torproject/collector/main/Configuration.java b/src/org/torproject/collector/main/Configuration.java deleted file mode 100644 index aee1d02..0000000 --- a/src/org/torproject/collector/main/Configuration.java +++ /dev/null @@ -1,318 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.main; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Initialize configuration with hard-coded defaults, overwrite with - * configuration in config file, if exists, and answer Main.java about our - * configuration. - */ -public class Configuration { - private String directoryArchivesOutputDirectory = - "out/relay-descriptors/"; - private boolean importCachedRelayDescriptors = false; - private List<String> cachedRelayDescriptorsDirectory = - new ArrayList<String>(Arrays.asList( - "in/relay-descriptors/cacheddesc/".split(","))); - private boolean importDirectoryArchives = false; - private String directoryArchivesDirectory = - "in/relay-descriptors/archives/"; - private boolean keepDirectoryArchiveImportHistory = false; - private boolean replaceIPAddressesWithHashes = false; - private long limitBridgeDescriptorMappings = -1L; - private String sanitizedBridgesWriteDirectory = - "out/bridge-descriptors/"; - private String bridgeSnapshotsDirectory = "in/bridge-descriptors/"; - private boolean downloadRelayDescriptors = false; - private List<String> downloadFromDirectoryAuthorities = Arrays.asList(( - "86.59.21.38,76.73.17.194:9030,171.25.193.9:443," - + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131," - + "194.109.206.212,212.112.245.170,154.35.32.5").split(",")); - private List<String> downloadVotesByFingerprint = Arrays.asList(( - "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4," - + "27B6B5996C426270A5C95488AA5BCEB6BCC86956," - + "49015F787433103580E3B66A1707A00E60F2D15B," - + "585769C78764D58426B8B52B6651A5A71137189A," - + "80550987E1D626E3EBA5E5E75A458DE0626D088C," - + "D586D18309DED4CD6D57C18FDB97EFA96D330566," - + "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58," - + "ED03BB616EB2F60BEC80151114BB25CEF515B226," - + "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(",")); - private boolean downloadCurrentConsensus = true; - private boolean downloadCurrentMicrodescConsensus = true; - private boolean downloadCurrentVotes = true; - private boolean downloadMissingServerDescriptors = true; - private boolean downloadMissingExtraInfoDescriptors = true; - private boolean downloadMissingMicrodescriptors = true; - private boolean downloadAllServerDescriptors = false; - private boolean downloadAllExtraInfoDescriptors = false; - private boolean compressRelayDescriptorDownloads; - private String torperfOutputDirectory = "out/torperf/"; - private SortedMap<String, String> torperfSources = null; - private List<String> torperfFiles = null; - - public Configuration() { - - /* Initialize logger. */ - Logger logger = Logger.getLogger(Configuration.class.getName()); - - /* Read config file, if present. */ - File configFile = new File("config"); - if (!configFile.exists()) { - logger.warning("Could not find config file. In the default " - + "configuration, we are not configured to read data from any " - + "data source or write data to any data sink. You need to " - + "create a config file (" + configFile.getAbsolutePath() - + ") and provide at least one data source and one data sink. " - + "Refer to the manual for more information."); - return; - } - String line = null; - boolean containsCachedRelayDescriptorsDirectory = false; - try { - BufferedReader br = new BufferedReader(new FileReader(configFile)); - while ((line = br.readLine()) != null) { - if (line.startsWith("#") || line.length() < 1) { - continue; - } else if (line.startsWith("DirectoryArchivesOutputDirectory")) { - this.directoryArchivesOutputDirectory = line.split(" ")[1]; - } else if (line.startsWith("ImportCachedRelayDescriptors")) { - this.importCachedRelayDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("CachedRelayDescriptorsDirectory")) { - if (!containsCachedRelayDescriptorsDirectory) { - this.cachedRelayDescriptorsDirectory.clear(); - containsCachedRelayDescriptorsDirectory = true; - } - this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]); - } else if (line.startsWith("ImportDirectoryArchives")) { - this.importDirectoryArchives = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DirectoryArchivesDirectory")) { - this.directoryArchivesDirectory = line.split(" ")[1]; - } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) { - this.keepDirectoryArchiveImportHistory = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("ReplaceIPAddressesWithHashes")) { - this.replaceIPAddressesWithHashes = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("LimitBridgeDescriptorMappings")) { - this.limitBridgeDescriptorMappings = Long.parseLong( - line.split(" ")[1]); - } else if (line.startsWith("SanitizedBridgesWriteDirectory")) { - this.sanitizedBridgesWriteDirectory = line.split(" ")[1]; - } else if (line.startsWith("BridgeSnapshotsDirectory")) { - this.bridgeSnapshotsDirectory = line.split(" ")[1]; - } else if (line.startsWith("DownloadRelayDescriptors")) { - this.downloadRelayDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadFromDirectoryAuthorities")) { - this.downloadFromDirectoryAuthorities = new ArrayList<String>(); - for (String dir : line.split(" ")[1].split(",")) { - // test if IP:port pair has correct format - if (dir.length() < 1) { - logger.severe("Configuration file contains directory " - + "authority IP:port of length 0 in line '" + line - + "'! Exiting!"); - System.exit(1); - } - new URL("http://" + dir + "/"); - this.downloadFromDirectoryAuthorities.add(dir); - } - } else if (line.startsWith("DownloadVotesByFingerprint")) { - this.downloadVotesByFingerprint = new ArrayList<String>(); - for (String fingerprint : line.split(" ")[1].split(",")) { - this.downloadVotesByFingerprint.add(fingerprint); - } - } else if (line.startsWith("DownloadCurrentConsensus")) { - this.downloadCurrentConsensus = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) { - this.downloadCurrentMicrodescConsensus = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadCurrentVotes")) { - this.downloadCurrentVotes = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadMissingServerDescriptors")) { - this.downloadMissingServerDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith( - "DownloadMissingExtraInfoDescriptors")) { - this.downloadMissingExtraInfoDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadMissingMicrodescriptors")) { - this.downloadMissingMicrodescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadAllServerDescriptors")) { - this.downloadAllServerDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) { - this.downloadAllExtraInfoDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("CompressRelayDescriptorDownloads")) { - this.compressRelayDescriptorDownloads = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("TorperfOutputDirectory")) { - this.torperfOutputDirectory = line.split(" ")[1]; - } else if (line.startsWith("TorperfSource")) { - if (this.torperfSources == null) { - this.torperfSources = new TreeMap<String, String>(); - } - String[] parts = line.split(" "); - String sourceName = parts[1]; - String baseUrl = parts[2]; - this.torperfSources.put(sourceName, baseUrl); - } else if (line.startsWith("TorperfFiles")) { - if (this.torperfFiles == null) { - this.torperfFiles = new ArrayList<String>(); - } - String[] parts = line.split(" "); - if (parts.length != 5) { - logger.severe("Configuration file contains TorperfFiles " - + "option with wrong number of values in line '" + line - + "'! Exiting!"); - System.exit(1); - } - this.torperfFiles.add(line); - } else { - logger.severe("Configuration file contains unrecognized " - + "configuration key in line '" + line + "'! Exiting!"); - System.exit(1); - } - } - br.close(); - } catch (ArrayIndexOutOfBoundsException e) { - logger.severe("Configuration file contains configuration key " - + "without value in line '" + line + "'. Exiting!"); - System.exit(1); - } catch (MalformedURLException e) { - logger.severe("Configuration file contains illegal URL or IP:port " - + "pair in line '" + line + "'. Exiting!"); - System.exit(1); - } catch (NumberFormatException e) { - logger.severe("Configuration file contains illegal value in line '" - + line + "' with legal values being 0 or 1. Exiting!"); - System.exit(1); - } catch (IOException e) { - logger.log(Level.SEVERE, "Unknown problem while reading config " - + "file! Exiting!", e); - System.exit(1); - } - } - - public String getDirectoryArchivesOutputDirectory() { - return this.directoryArchivesOutputDirectory; - } - - public boolean getImportCachedRelayDescriptors() { - return this.importCachedRelayDescriptors; - } - - public List<String> getCachedRelayDescriptorDirectory() { - return this.cachedRelayDescriptorsDirectory; - } - - public boolean getImportDirectoryArchives() { - return this.importDirectoryArchives; - } - - public String getDirectoryArchivesDirectory() { - return this.directoryArchivesDirectory; - } - - public boolean getKeepDirectoryArchiveImportHistory() { - return this.keepDirectoryArchiveImportHistory; - } - - public boolean getReplaceIPAddressesWithHashes() { - return this.replaceIPAddressesWithHashes; - } - - public long getLimitBridgeDescriptorMappings() { - return this.limitBridgeDescriptorMappings; - } - - public String getSanitizedBridgesWriteDirectory() { - return this.sanitizedBridgesWriteDirectory; - } - - public String getBridgeSnapshotsDirectory() { - return this.bridgeSnapshotsDirectory; - } - - public boolean getDownloadRelayDescriptors() { - return this.downloadRelayDescriptors; - } - - public List<String> getDownloadFromDirectoryAuthorities() { - return this.downloadFromDirectoryAuthorities; - } - - public List<String> getDownloadVotesByFingerprint() { - return this.downloadVotesByFingerprint; - } - - public boolean getDownloadCurrentConsensus() { - return this.downloadCurrentConsensus; - } - - public boolean getDownloadCurrentMicrodescConsensus() { - return this.downloadCurrentMicrodescConsensus; - } - - public boolean getDownloadCurrentVotes() { - return this.downloadCurrentVotes; - } - - public boolean getDownloadMissingServerDescriptors() { - return this.downloadMissingServerDescriptors; - } - - public boolean getDownloadMissingExtraInfoDescriptors() { - return this.downloadMissingExtraInfoDescriptors; - } - - public boolean getDownloadMissingMicrodescriptors() { - return this.downloadMissingMicrodescriptors; - } - - public boolean getDownloadAllServerDescriptors() { - return this.downloadAllServerDescriptors; - } - - public boolean getDownloadAllExtraInfoDescriptors() { - return this.downloadAllExtraInfoDescriptors; - } - - public boolean getCompressRelayDescriptorDownloads() { - return this.compressRelayDescriptorDownloads; - } - - public String getTorperfOutputDirectory() { - return this.torperfOutputDirectory; - } - - public SortedMap<String, String> getTorperfSources() { - return this.torperfSources; - } - - public List<String> getTorperfFiles() { - return this.torperfFiles; - } -} - diff --git a/src/org/torproject/collector/main/LockFile.java b/src/org/torproject/collector/main/LockFile.java deleted file mode 100644 index b07d4b1..0000000 --- a/src/org/torproject/collector/main/LockFile.java +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.main; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.logging.Logger; - -public class LockFile { - - private File lockFile; - private Logger logger; - - public LockFile(String moduleName) { - this.lockFile = new File("lock/" + moduleName); - this.logger = Logger.getLogger(LockFile.class.getName()); - } - - public boolean acquireLock() { - this.logger.fine("Trying to acquire lock..."); - try { - if (this.lockFile.exists()) { - BufferedReader br = new BufferedReader(new FileReader( - this.lockFile)); - long runStarted = Long.parseLong(br.readLine()); - br.close(); - if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) { - return false; - } - } - this.lockFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.lockFile)); - bw.append("" + System.currentTimeMillis() + "\n"); - bw.close(); - this.logger.fine("Acquired lock."); - return true; - } catch (IOException e) { - this.logger.warning("Caught exception while trying to acquire " - + "lock!"); - return false; - } - } - - public void releaseLock() { - this.logger.fine("Releasing lock..."); - this.lockFile.delete(); - this.logger.fine("Released lock."); - } -} - diff --git a/src/org/torproject/collector/relaydescs/ArchiveReader.java b/src/org/torproject/collector/relaydescs/ArchiveReader.java deleted file mode 100644 index 72f8231..0000000 --- a/src/org/torproject/collector/relaydescs/ArchiveReader.java +++ /dev/null @@ -1,286 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.relaydescs; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.io.UnsupportedEncodingException; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Read in all files in a given directory and pass buffered readers of - * them to the relay descriptor parser. - */ -public class ArchiveReader { - - private Map<String, Set<String>> microdescriptorValidAfterTimes = - new HashMap<String, Set<String>>(); - - public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory, - File statsDirectory, boolean keepImportHistory) { - - if (rdp == null || archivesDirectory == null - || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - rdp.setArchiveReader(this); - int parsedFiles = 0; - int ignoredFiles = 0; - Logger logger = Logger.getLogger(ArchiveReader.class.getName()); - SortedSet<String> archivesImportHistory = new TreeSet<String>(); - File archivesImportHistoryFile = new File(statsDirectory, - "archives-import-history"); - if (keepImportHistory && archivesImportHistoryFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - archivesImportHistoryFile)); - String line = null; - while ((line = br.readLine()) != null) { - archivesImportHistory.add(line); - } - br.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not read in archives import " - + "history file. Skipping."); - } - } - if (archivesDirectory.exists()) { - logger.fine("Importing files in directory " + archivesDirectory - + "/..."); - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(archivesDirectory); - List<File> problems = new ArrayList<File>(); - Set<File> filesToRetry = new HashSet<File>(); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - for (File f : pop.listFiles()) { - filesInInputDir.add(f); - } - } else { - if (rdp != null) { - try { - BufferedInputStream bis = null; - if (keepImportHistory - && archivesImportHistory.contains(pop.getName())) { - ignoredFiles++; - continue; - } else if (pop.getName().endsWith(".tar.bz2")) { - logger.warning("Cannot parse compressed tarball " - + pop.getAbsolutePath() + ". Skipping."); - continue; - } else if (pop.getName().endsWith(".bz2")) { - FileInputStream fis = new FileInputStream(pop); - BZip2CompressorInputStream bcis = - new BZip2CompressorInputStream(fis); - bis = new BufferedInputStream(bcis); - } else { - FileInputStream fis = new FileInputStream(pop); - bis = new BufferedInputStream(fis); - } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - boolean stored = rdp.parse(allData); - if (!stored) { - filesToRetry.add(pop); - continue; - } - if (keepImportHistory) { - archivesImportHistory.add(pop.getName()); - } - parsedFiles++; - } catch (IOException e) { - problems.add(pop); - if (problems.size() > 3) { - break; - } - } - } - } - } - for (File pop : filesToRetry) { - /* TODO We need to parse microdescriptors ourselves, rather than - * RelayDescriptorParser, because only we know the valid-after - * time(s) of microdesc consensus(es) containing this - * microdescriptor. However, this breaks functional abstraction - * pretty badly. */ - if (rdp != null) { - try { - BufferedInputStream bis = null; - if (pop.getName().endsWith(".bz2")) { - FileInputStream fis = new FileInputStream(pop); - BZip2CompressorInputStream bcis = - new BZip2CompressorInputStream(fis); - bis = new BufferedInputStream(bcis); - } else { - FileInputStream fis = new FileInputStream(pop); - bis = new BufferedInputStream(fis); - } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - String line; - do { - line = br.readLine(); - } while (line != null && line.startsWith("@")); - br.close(); - if (line == null) { - logger.fine("We were given an empty descriptor for " - + "parsing. Ignoring."); - continue; - } - if (!line.equals("onion-key")) { - logger.fine("Skipping non-recognized descriptor."); - continue; - } - SimpleDateFormat parseFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String ascii = null; - try { - ascii = new String(allData, "US-ASCII"); - } catch (UnsupportedEncodingException e) { - /* No way that US-ASCII is not supported. */ - } - int start = -1; - int end = -1; - String startToken = "onion-key\n"; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - end = ascii.indexOf(startToken, start + 1); - if (end < 0) { - end = ascii.length(); - if (end <= start) { - break; - } - } - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, end - start); - String digest256Base64 = Base64.encodeBase64String( - DigestUtils.sha256(descBytes)).replaceAll("=", ""); - String digest256Hex = DigestUtils.sha256Hex(descBytes); - if (!this.microdescriptorValidAfterTimes.containsKey( - digest256Hex)) { - logger.fine("Could not store microdescriptor '" - + digest256Hex + "', which was not contained in a " - + "microdesc consensus."); - continue; - } - for (String validAfterTime : - this.microdescriptorValidAfterTimes.get(digest256Hex)) { - try { - long validAfter = - parseFormat.parse(validAfterTime).getTime(); - rdp.storeMicrodescriptor(descBytes, digest256Hex, - digest256Base64, validAfter); - } catch (ParseException e) { - logger.log(Level.WARNING, "Could not parse " - + "valid-after time '" + validAfterTime + "'. Not " - + "storing microdescriptor.", e); - } - } - } - if (keepImportHistory) { - archivesImportHistory.add(pop.getName()); - } - parsedFiles++; - } catch (IOException e) { - problems.add(pop); - if (problems.size() > 3) { - break; - } - } - } - } - if (problems.isEmpty()) { - logger.fine("Finished importing files in directory " - + archivesDirectory + "/."); - } else { - StringBuilder sb = new StringBuilder("Failed importing files in " - + "directory " + archivesDirectory + "/:"); - int printed = 0; - for (File f : problems) { - sb.append("\n " + f.getAbsolutePath()); - if (++printed >= 3) { - sb.append("\n ... more"); - break; - } - } - } - } - if (keepImportHistory) { - try { - archivesImportHistoryFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - archivesImportHistoryFile)); - for (String line : archivesImportHistory) { - bw.write(line + "\n"); - } - bw.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write archives import " - + "history file."); - } - } - logger.info("Finished importing relay descriptors from local " - + "directory:\nParsed " + parsedFiles + ", ignored " - + ignoredFiles + " files."); - } - - public void haveParsedMicrodescConsensus(String validAfterTime, - SortedSet<String> microdescriptorDigests) { - for (String microdescriptor : microdescriptorDigests) { - if (!this.microdescriptorValidAfterTimes.containsKey( - microdescriptor)) { - this.microdescriptorValidAfterTimes.put(microdescriptor, - new HashSet<String>()); - } - this.microdescriptorValidAfterTimes.get(microdescriptor).add( - validAfterTime); - } - } -} - diff --git a/src/org/torproject/collector/relaydescs/ArchiveWriter.java b/src/org/torproject/collector/relaydescs/ArchiveWriter.java deleted file mode 100644 index cf603d1..0000000 --- a/src/org/torproject/collector/relaydescs/ArchiveWriter.java +++ /dev/null @@ -1,845 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.relaydescs; - -import org.torproject.collector.main.Configuration; -import org.torproject.collector.main.LockFile; -import org.torproject.descriptor.DescriptorParseException; -import org.torproject.descriptor.DescriptorParser; -import org.torproject.descriptor.DescriptorSourceFactory; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class ArchiveWriter extends Thread { - - private Configuration config; - - private long now = System.currentTimeMillis(); - private Logger logger; - private File outputDirectory; - private String rsyncCatString; - private DescriptorParser descriptorParser; - private int storedConsensusesCounter = 0; - private int storedMicrodescConsensusesCounter = 0; - private int storedVotesCounter = 0; - private int storedCertsCounter = 0; - private int storedServerDescriptorsCounter = 0; - private int storedExtraInfoDescriptorsCounter = 0; - private int storedMicrodescriptorsCounter = 0; - - private SortedMap<Long, SortedSet<String>> storedConsensuses = - new TreeMap<Long, SortedSet<String>>(); - private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses = - new TreeMap<Long, SortedSet<String>>(); - private SortedMap<Long, Integer> expectedVotes = - new TreeMap<Long, Integer>(); - private SortedMap<Long, SortedMap<String, SortedSet<String>>> - storedVotes = - new TreeMap<Long, SortedMap<String, SortedSet<String>>>(); - private SortedMap<Long, Map<String, String>> storedServerDescriptors = - new TreeMap<Long, Map<String, String>>(); - private SortedMap<Long, Set<String>> storedExtraInfoDescriptors = - new TreeMap<Long, Set<String>>(); - private SortedMap<Long, Set<String>> storedMicrodescriptors = - new TreeMap<Long, Set<String>>(); - - private File storedServerDescriptorsFile = new File( - "stats/stored-server-descriptors"); - private File storedExtraInfoDescriptorsFile = new File( - "stats/stored-extra-info-descriptors"); - private File storedMicrodescriptorsFile = new File( - "stats/stored-microdescriptors"); - - private static final byte[] CONSENSUS_ANNOTATION = - "@type network-status-consensus-3 1.0\n".getBytes(); - - private static final byte[] MICRODESCCONSENSUS_ANNOTATION = - "@type network-status-microdesc-consensus-3 1.0\n".getBytes(); - - private static final byte[] VOTE_ANNOTATION = - "@type network-status-vote-3 1.0\n".getBytes(); - - private static final byte[] CERTIFICATE_ANNOTATION = - "@type dir-key-certificate-3 1.0\n".getBytes(); - - private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = - "@type server-descriptor 1.0\n".getBytes(); - - private static final byte[] EXTRA_INFO_ANNOTATION = - "@type extra-info 1.0\n".getBytes(); - - private static final byte[] MICRODESCRIPTOR_ANNOTATION = - "@type microdescriptor 1.0\n".getBytes(); - - private StringBuilder intermediateStats = new StringBuilder(); - - public static void main(String[] args) { - - Logger logger = Logger.getLogger(ArchiveWriter.class.getName()); - logger.info("Starting relay-descriptors module of CollecTor."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile("relay-descriptors"); - if (!lf.acquireLock()) { - logger.severe("Warning: CollecTor is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Import/download relay descriptors from the various sources - new ArchiveWriter(config).run(); - - new ReferenceChecker(new File("recent/relay-descriptors"), - new File("stats/references"), - new File("stats/references-history")).check(); - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating relay-descriptors module of CollecTor."); - } - - public ArchiveWriter(Configuration config) { - this.config = config; - } - - public void run() { - - File outputDirectory = - new File(config.getDirectoryArchivesOutputDirectory()); - File statsDirectory = new File("stats"); - - this.logger = Logger.getLogger(ArchiveWriter.class.getName()); - this.outputDirectory = outputDirectory; - SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - this.rsyncCatString = rsyncCatFormat.format( - System.currentTimeMillis()); - this.descriptorParser = - DescriptorSourceFactory.createDescriptorParser(); - - this.loadDescriptorDigests(); - - // Prepare relay descriptor parser - RelayDescriptorParser rdp = new RelayDescriptorParser(this); - - RelayDescriptorDownloader rdd = null; - if (config.getDownloadRelayDescriptors()) { - List<String> dirSources = - config.getDownloadFromDirectoryAuthorities(); - rdd = new RelayDescriptorDownloader(rdp, dirSources, - config.getDownloadVotesByFingerprint(), - config.getDownloadCurrentConsensus(), - config.getDownloadCurrentMicrodescConsensus(), - config.getDownloadCurrentVotes(), - config.getDownloadMissingServerDescriptors(), - config.getDownloadMissingExtraInfoDescriptors(), - config.getDownloadMissingMicrodescriptors(), - config.getDownloadAllServerDescriptors(), - config.getDownloadAllExtraInfoDescriptors(), - config.getCompressRelayDescriptorDownloads()); - rdp.setRelayDescriptorDownloader(rdd); - } - if (config.getImportCachedRelayDescriptors()) { - new CachedRelayDescriptorReader(rdp, - config.getCachedRelayDescriptorDirectory(), statsDirectory); - this.intermediateStats("importing relay descriptors from local " - + "Tor data directories"); - } - if (config.getImportDirectoryArchives()) { - new ArchiveReader(rdp, - new File(config.getDirectoryArchivesDirectory()), - statsDirectory, - config.getKeepDirectoryArchiveImportHistory()); - this.intermediateStats("importing relay descriptors from local " - + "directory"); - } - if (rdd != null) { - rdd.downloadDescriptors(); - rdd.writeFile(); - rdd = null; - this.intermediateStats("downloading relay descriptors from the " - + "directory authorities"); - } - - this.checkMissingDescriptors(); - - this.checkStaledescriptors(); - - this.cleanUpRsyncDirectory(); - - this.saveDescriptorDigests(); - } - - private void loadDescriptorDigests() { - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - try { - if (this.storedServerDescriptorsFile.exists()) { - BufferedReader br = new BufferedReader(new FileReader( - this.storedServerDescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - if (parts.length != 3) { - this.logger.warning("Could not load server descriptor " - + "digests because of illegal line '" + line + "'. We " - + "might not be able to correctly check descriptors for " - + "completeness."); - break; - } - long published = dateTimeFormat.parse(parts[0]).getTime(); - if (published < this.now - 48L * 60L * 60L * 1000L) { - continue; - } - if (!this.storedServerDescriptors.containsKey(published)) { - this.storedServerDescriptors.put(published, - new HashMap<String, String>()); - } - String serverDescriptorDigest = parts[1]; - String extraInfoDescriptorDigest = parts[2].equals("NA") ? null - : parts[2]; - this.storedServerDescriptors.get(published).put( - serverDescriptorDigest, extraInfoDescriptorDigest); - } - br.close(); - } - if (this.storedExtraInfoDescriptorsFile.exists()) { - BufferedReader br = new BufferedReader(new FileReader( - this.storedExtraInfoDescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - if (parts.length != 2) { - this.logger.warning("Could not load extra-info descriptor " - + "digests because of illegal line '" + line + "'. We " - + "might not be able to correctly check descriptors for " - + "completeness."); - break; - } - long published = dateTimeFormat.parse(parts[0]).getTime(); - if (published < this.now - 48L * 60L * 60L * 1000L) { - continue; - } - if (!this.storedExtraInfoDescriptors.containsKey(published)) { - this.storedExtraInfoDescriptors.put(published, - new HashSet<String>()); - } - String extraInfoDescriptorDigest = parts[1]; - this.storedExtraInfoDescriptors.get(published).add( - extraInfoDescriptorDigest); - } - br.close(); - } - if (this.storedMicrodescriptorsFile.exists()) { - BufferedReader br = new BufferedReader(new FileReader( - this.storedMicrodescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - if (parts.length != 2) { - this.logger.warning("Could not load microdescriptor digests " - + "because of illegal line '" + line + "'. We might not " - + "be able to correctly check descriptors for " - + "completeness."); - break; - } - long validAfter = dateTimeFormat.parse(parts[0]).getTime(); - if (validAfter < this.now - 40L * 24L * 60L * 60L * 1000L) { - continue; - } - if (!this.storedMicrodescriptors.containsKey(validAfter)) { - this.storedMicrodescriptors.put(validAfter, - new HashSet<String>()); - } - String microdescriptorDigest = parts[1]; - this.storedMicrodescriptors.get(validAfter).add( - microdescriptorDigest); - } - br.close(); - } - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not load descriptor " - + "digests. We might not be able to correctly check " - + "descriptors for completeness.", e); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not load descriptor " - + "digests. We might not be able to correctly check " - + "descriptors for completeness.", e); - } - } - - public void intermediateStats(String event) { - intermediateStats.append("While " + event + ", we stored " - + this.storedConsensusesCounter + " consensus(es), " - + this.storedMicrodescConsensusesCounter + " microdesc " - + "consensus(es), " + this.storedVotesCounter + " vote(s), " - + this.storedCertsCounter + " certificate(s), " - + this.storedServerDescriptorsCounter + " server descriptor(s), " - + this.storedExtraInfoDescriptorsCounter + " extra-info " - + "descriptor(s), and " + this.storedMicrodescriptorsCounter - + " microdescriptor(s) to disk.\n"); - this.storedConsensusesCounter = 0; - this.storedMicrodescConsensusesCounter = 0; - this.storedVotesCounter = 0; - this.storedCertsCounter = 0; - this.storedServerDescriptorsCounter = 0; - this.storedExtraInfoDescriptorsCounter = 0; - this.storedMicrodescriptorsCounter = 0; - } - - private void checkMissingDescriptors() { - StringBuilder sb = new StringBuilder("Finished writing relay " - + "descriptors to disk.\n"); - sb.append(intermediateStats.toString()); - sb.append("Statistics on the completeness of written relay " - + "descriptors:"); - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - Map<String, String> knownServerDescriptors = - new HashMap<String, String>(); - for (Map<String, String> descriptors : - this.storedServerDescriptors.values()) { - knownServerDescriptors.putAll(descriptors); - } - Set<String> knownExtraInfoDescriptors = new HashSet<String>(); - for (Set<String> descriptors : - this.storedExtraInfoDescriptors.values()) { - knownExtraInfoDescriptors.addAll(descriptors); - } - Set<String> knownMicrodescriptors = new HashSet<String>(); - for (Set<String> descriptors : this.storedMicrodescriptors.values()) { - knownMicrodescriptors.addAll(descriptors); - } - boolean missingDescriptors = false; - boolean missingVotes = false; - boolean missingMicrodescConsensus = false; - for (Map.Entry<Long, SortedSet<String>> c : - this.storedConsensuses.entrySet()) { - long validAfterMillis = c.getKey(); - String validAfterTime = dateTimeFormat.format(validAfterMillis); - int allVotes = this.expectedVotes.containsKey(validAfterMillis) - ? this.expectedVotes.get(validAfterMillis) : 0; - int foundVotes = 0; - if (this.storedVotes.containsKey(validAfterMillis)) { - foundVotes = this.storedVotes.get(validAfterMillis).size(); - for (Map.Entry<String, SortedSet<String>> v : - this.storedVotes.get(validAfterMillis).entrySet()) { - int voteFoundServerDescs = 0; - int voteAllServerDescs = 0; - int voteFoundExtraInfos = 0; - int voteAllExtraInfos = 0; - for (String serverDescriptorDigest : v.getValue()) { - voteAllServerDescs++; - if (knownServerDescriptors.containsKey( - serverDescriptorDigest)) { - voteFoundServerDescs++; - if (knownServerDescriptors.get(serverDescriptorDigest) - != null) { - String extraInfoDescriptorDigest = - knownServerDescriptors.get(serverDescriptorDigest); - voteAllExtraInfos++; - if (knownExtraInfoDescriptors.contains( - extraInfoDescriptorDigest)) { - voteFoundExtraInfos++; - } - } - } - } - sb.append("\nV, " + validAfterTime); - if (voteAllServerDescs > 0) { - sb.append(String.format(", %d/%d S (%.1f%%)", - voteFoundServerDescs, voteAllServerDescs, - 100.0D * (double) voteFoundServerDescs - / (double) voteAllServerDescs)); - } else { - sb.append(", 0/0 S"); - } - if (voteAllExtraInfos > 0) { - sb.append(String.format(", %d/%d E (%.1f%%)", - voteFoundExtraInfos, voteAllExtraInfos, - 100.0D * (double) voteFoundExtraInfos - / (double) voteAllExtraInfos)); - } else { - sb.append(", 0/0 E"); - } - String fingerprint = v.getKey(); - /* Ignore turtles when warning about missing descriptors. */ - if (!fingerprint.equalsIgnoreCase( - "27B6B5996C426270A5C95488AA5BCEB6BCC86956") - && (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 - || voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995)) { - missingDescriptors = true; - } - } - } - int foundServerDescs = 0; - int allServerDescs = 0; - int foundExtraInfos = 0; - int allExtraInfos = 0; - int foundMicrodescriptors = 0; - int allMicrodescriptors = 0; - for (String serverDescriptorDigest : c.getValue()) { - allServerDescs++; - if (knownServerDescriptors.containsKey( - serverDescriptorDigest)) { - foundServerDescs++; - if (knownServerDescriptors.get( - serverDescriptorDigest) != null) { - allExtraInfos++; - String extraInfoDescriptorDigest = - knownServerDescriptors.get(serverDescriptorDigest); - if (knownExtraInfoDescriptors.contains( - extraInfoDescriptorDigest)) { - foundExtraInfos++; - } - } - } - } - sb.append("\nC, " + validAfterTime); - if (allVotes > 0) { - sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes, - 100.0D * (double) foundVotes / (double) allVotes)); - } else { - sb.append(", 0/0 V"); - } - if (allServerDescs > 0) { - sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs, - allServerDescs, 100.0D * (double) foundServerDescs - / (double) allServerDescs)); - } else { - sb.append(", 0/0 S"); - } - if (allExtraInfos > 0) { - sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos, - allExtraInfos, 100.0D * (double) foundExtraInfos - / (double) allExtraInfos)); - } else { - sb.append(", 0/0 E"); - } - if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) { - for (String microdescriptorDigest : - this.storedMicrodescConsensuses.get(validAfterMillis)) { - allMicrodescriptors++; - if (knownMicrodescriptors.contains(microdescriptorDigest)) { - foundMicrodescriptors++; - } - } - sb.append("\nM, " + validAfterTime); - if (allMicrodescriptors > 0) { - sb.append(String.format(", %d/%d M (%.1f%%)", - foundMicrodescriptors, allMicrodescriptors, - 100.0D * (double) foundMicrodescriptors - / (double) allMicrodescriptors)); - } else { - sb.append(", 0/0 M"); - } - } else { - missingMicrodescConsensus = true; - } - if (foundServerDescs * 1000 < allServerDescs * 995 - || foundExtraInfos * 1000 < allExtraInfos * 995 - || foundMicrodescriptors * 1000 < allMicrodescriptors * 995) { - missingDescriptors = true; - } - if (foundVotes < allVotes) { - missingVotes = true; - } - } - this.logger.info(sb.toString()); - if (missingDescriptors) { - this.logger.fine("We are missing at least 0.5% of server or " - + "extra-info descriptors referenced from a consensus or " - + "vote or at least 0.5% of microdescriptors referenced from a " - + "microdesc consensus."); - } - if (missingVotes) { - /* TODO Shouldn't warn if we're not trying to archive votes at - * all. */ - this.logger.fine("We are missing at least one vote that was " - + "referenced from a consensus."); - } - if (missingMicrodescConsensus) { - /* TODO Shouldn't warn if we're not trying to archive microdesc - * consensuses at all. */ - this.logger.fine("We are missing at least one microdesc " - + "consensus that was published together with a known " - + "consensus."); - } - } - - private void checkStaledescriptors() { - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - long tooOldMillis = this.now - 330L * 60L * 1000L; - if (!this.storedConsensuses.isEmpty() - && this.storedConsensuses.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay network status " - + "consensus was valid after " - + dateTimeFormat.format(this.storedConsensuses.lastKey()) - + ", which is more than 5:30 hours in the past."); - } - if (!this.storedMicrodescConsensuses.isEmpty() - && this.storedMicrodescConsensuses.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay network status " - + "microdesc consensus was valid after " - + dateTimeFormat.format( - this.storedMicrodescConsensuses.lastKey()) - + ", which is more than 5:30 hours in the past."); - } - if (!this.storedVotes.isEmpty() - && this.storedVotes.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay network status vote " - + "was valid after " + dateTimeFormat.format( - this.storedVotes.lastKey()) + ", which is more than 5:30 hours " - + "in the past."); - } - if (!this.storedServerDescriptors.isEmpty() - && this.storedServerDescriptors.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay server descriptor was " - + "published at " - + dateTimeFormat.format(this.storedServerDescriptors.lastKey()) - + ", which is more than 5:30 hours in the past."); - } - if (!this.storedExtraInfoDescriptors.isEmpty() - && this.storedExtraInfoDescriptors.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay extra-info descriptor " - + "was published at " + dateTimeFormat.format( - this.storedExtraInfoDescriptors.lastKey()) - + ", which is more than 5:30 hours in the past."); - } - if (!this.storedMicrodescriptors.isEmpty() - && this.storedMicrodescriptors.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay microdescriptor was " - + "contained in a microdesc consensus that was valid after " - + dateTimeFormat.format(this.storedMicrodescriptors.lastKey()) - + ", which is more than 5:30 hours in the past."); - } - } - - /* Delete all files from the rsync directory that have not been modified - * in the last three days (except for microdescriptors which are kept - * for up to thirty days), and remove the .tmp extension from newly - * written files. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<File>(); - allFiles.add(new File("recent/relay-descriptors")); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.getName().endsWith("-micro")) { - if (file.lastModified() < cutOffMicroMillis) { - file.delete(); - } - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } else if (file.getName().endsWith(".tmp")) { - file.renameTo(new File(file.getParentFile(), - file.getName().substring(0, - file.getName().lastIndexOf(".tmp")))); - } - } - } - - private void saveDescriptorDigests() { - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - try { - this.storedServerDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.storedServerDescriptorsFile)); - for (Map.Entry<Long, Map<String, String>> e : - this.storedServerDescriptors.entrySet()) { - String published = dateTimeFormat.format(e.getKey()); - for (Map.Entry<String, String> f : e.getValue().entrySet()) { - String serverDescriptorDigest = f.getKey(); - String extraInfoDescriptorDigest = f.getValue() == null ? "NA" - : f.getValue(); - bw.write(String.format("%s,%s,%s%n", published, - serverDescriptorDigest, extraInfoDescriptorDigest)); - } - } - bw.close(); - this.storedExtraInfoDescriptorsFile.getParentFile().mkdirs(); - bw = new BufferedWriter(new FileWriter( - this.storedExtraInfoDescriptorsFile)); - for (Map.Entry<Long, Set<String>> e : - this.storedExtraInfoDescriptors.entrySet()) { - String published = dateTimeFormat.format(e.getKey()); - for (String extraInfoDescriptorDigest : e.getValue()) { - bw.write(String.format("%s,%s%n", published, - extraInfoDescriptorDigest)); - } - } - bw.close(); - this.storedMicrodescriptorsFile.getParentFile().mkdirs(); - bw = new BufferedWriter(new FileWriter( - this.storedMicrodescriptorsFile)); - for (Map.Entry<Long, Set<String>> e : - this.storedMicrodescriptors.entrySet()) { - String validAfter = dateTimeFormat.format(e.getKey()); - for (String microdescriptorDigest : e.getValue()) { - bw.write(String.format("%s,%s%n", validAfter, - microdescriptorDigest)); - } - } - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not save descriptor " - + "digests. We might not be able to correctly check " - + "descriptors for completeness in the next run.", e); - } - } - - public void storeConsensus(byte[] data, long validAfter, - SortedSet<String> dirSources, - SortedSet<String> serverDescriptorDigests) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/consensus/" - + printFormat.format(new Date(validAfter)) + "-consensus"); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncFile = new File("recent/relay-descriptors/consensuses/" - + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - if (this.store(CONSENSUS_ANNOTATION, data, outputFiles, null)) { - this.storedConsensusesCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 3L * 60L * 60L * 1000L) { - this.storedConsensuses.put(validAfter, serverDescriptorDigests); - this.expectedVotes.put(validAfter, dirSources.size()); - } - } - - public void storeMicrodescConsensus(byte[] data, long validAfter, - SortedSet<String> microdescriptorDigests) { - SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat( - "yyyy/MM"); - yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat( - "dd/yyyy-MM-dd-HH-mm-ss"); - dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory - + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter) - + "/consensus-microdesc/" - + dayDirectoryFileFormat.format(validAfter) - + "-consensus-microdesc"); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncFile = new File("recent/relay-descriptors/microdescs/" - + "consensus-microdesc/" + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles, - null)) { - this.storedMicrodescConsensusesCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 3L * 60L * 60L * 1000L) { - this.storedMicrodescConsensuses.put(validAfter, - microdescriptorDigests); - } - } - - public void storeVote(byte[] data, long validAfter, - String fingerprint, String digest, - SortedSet<String> serverDescriptorDigests) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/vote/" - + printFormat.format(new Date(validAfter)) + "-vote-" - + fingerprint + "-" + digest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncFile = new File("recent/relay-descriptors/votes/" - + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - if (this.store(VOTE_ANNOTATION, data, outputFiles, null)) { - this.storedVotesCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 3L * 60L * 60L * 1000L) { - if (!this.storedVotes.containsKey(validAfter)) { - this.storedVotes.put(validAfter, - new TreeMap<String, SortedSet<String>>()); - } - this.storedVotes.get(validAfter).put(fingerprint, - serverDescriptorDigests); - } - } - - public void storeCertificate(byte[] data, String fingerprint, - long published) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/certs/" - + fingerprint + "-" + printFormat.format(new Date(published))); - File[] outputFiles = new File[] { tarballFile }; - if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles, null)) { - this.storedCertsCounter++; - } - } - - public void storeServerDescriptor(byte[] data, String digest, - long published, String extraInfoDigest) { - SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory - + "/server-descriptor/" + printFormat.format(new Date(published)) - + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/" - + digest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncCatFile = new File("recent/relay-descriptors/" - + "server-descriptors/" + this.rsyncCatString - + "-server-descriptors.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles, - append)) { - this.storedServerDescriptorsCounter++; - } - if (!tarballFileExistedBefore - && this.now - published < 48L * 60L * 60L * 1000L) { - if (!this.storedServerDescriptors.containsKey(published)) { - this.storedServerDescriptors.put(published, - new HashMap<String, String>()); - } - this.storedServerDescriptors.get(published).put(digest, - extraInfoDigest); - } - } - - public void storeExtraInfoDescriptor(byte[] data, - String extraInfoDigest, long published) { - SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); - descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/extra-info/" - + descriptorFormat.format(new Date(published)) - + extraInfoDigest.substring(0, 1) + "/" - + extraInfoDigest.substring(1, 2) + "/" - + extraInfoDigest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncCatFile = new File("recent/relay-descriptors/" - + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles, append)) { - this.storedExtraInfoDescriptorsCounter++; - } - if (!tarballFileExistedBefore - && this.now - published < 48L * 60L * 60L * 1000L) { - if (!this.storedExtraInfoDescriptors.containsKey(published)) { - this.storedExtraInfoDescriptors.put(published, - new HashSet<String>()); - } - this.storedExtraInfoDescriptors.get(published).add(extraInfoDigest); - } - } - - public void storeMicrodescriptor(byte[] data, - String microdescriptorDigest, long validAfter) { - /* TODO We could check here whether we already stored the - * microdescriptor in the same valid-after month. This can happen, - * e.g., when two relays share the same microdescriptor. In that case - * this method gets called twice and the second call overwrites the - * file written in the first call. However, this method must be - * called twice to store the same microdescriptor in two different - * valid-after months. */ - SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); - descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/microdesc/" - + descriptorFormat.format(validAfter) + "micro/" - + microdescriptorDigest.substring(0, 1) + "/" - + microdescriptorDigest.substring(1, 2) + "/" - + microdescriptorDigest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncCatFile = new File("recent/relay-descriptors/" - + "microdescs/micro/" + this.rsyncCatString - + "-micro.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles, - append)) { - this.storedMicrodescriptorsCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 40L * 24L * 60L * 60L * 1000L) { - if (!this.storedMicrodescriptors.containsKey(validAfter)) { - this.storedMicrodescriptors.put(validAfter, - new HashSet<String>()); - } - this.storedMicrodescriptors.get(validAfter).add( - microdescriptorDigest); - } - } - - private boolean store(byte[] typeAnnotation, byte[] data, - File[] outputFiles, boolean[] append) { - try { - this.logger.finer("Storing " + outputFiles[0]); - if (this.descriptorParser.parseDescriptors(data, - outputFiles[0].getName()).size() != 1) { - this.logger.info("Relay descriptor file " + outputFiles[0] - + " doesn't contain exactly one descriptor. Not storing."); - return false; - } - for (int i = 0; i < outputFiles.length; i++) { - File outputFile = outputFiles[i]; - boolean appendToFile = append == null ? false : append[i]; - outputFile.getParentFile().mkdirs(); - BufferedOutputStream bos = new BufferedOutputStream( - new FileOutputStream(outputFile, appendToFile)); - if (data.length > 0 && data[0] != '@') { - bos.write(typeAnnotation, 0, typeAnnotation.length); - } - bos.write(data, 0, data.length); - bos.close(); - } - return true; - } catch (DescriptorParseException e) { - this.logger.log(Level.WARNING, "Could not parse relay descriptor " - + outputFiles[0] + " before storing it to disk. Skipping.", e); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store relay descriptor " - + outputFiles[0], e); - } - return false; - } -} diff --git a/src/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java b/src/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java deleted file mode 100644 index b9001dd..0000000 --- a/src/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.relaydescs; - -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Parses all descriptors in local directory cacheddesc/ and sorts them - * into directory structure in directory-archive/. - */ -public class CachedRelayDescriptorReader { - public CachedRelayDescriptorReader(RelayDescriptorParser rdp, - List<String> inputDirectories, File statsDirectory) { - - if (rdp == null || inputDirectories == null - || inputDirectories.isEmpty() || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - StringBuilder dumpStats = new StringBuilder("Finished importing " - + "relay descriptors from local Tor data directories:"); - Logger logger = Logger.getLogger( - CachedRelayDescriptorReader.class.getName()); - - /* Read import history containing SHA-1 digests of previously parsed - * statuses and descriptors, so that we can skip them in this run. */ - Set<String> lastImportHistory = new HashSet<String>(); - Set<String> currentImportHistory = new HashSet<String>(); - File importHistoryFile = new File(statsDirectory, - "cacheddesc-import-history"); - if (importHistoryFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - importHistoryFile)); - String line; - while ((line = br.readLine()) != null) { - lastImportHistory.add(line); - } - br.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not read import history from " - + importHistoryFile.getAbsolutePath() + ".", e); - } - } - - /* Read cached descriptors directories. */ - for (String inputDirectory : inputDirectories) { - File cachedDescDir = new File(inputDirectory); - if (!cachedDescDir.exists()) { - logger.warning("Directory " + cachedDescDir.getAbsolutePath() - + " does not exist. Skipping."); - continue; - } - logger.fine("Reading " + cachedDescDir.getAbsolutePath() - + " directory."); - SortedSet<File> cachedDescFiles = new TreeSet<File>(); - Stack<File> files = new Stack<File>(); - files.add(cachedDescDir); - while (!files.isEmpty()) { - File file = files.pop(); - if (file.isDirectory()) { - files.addAll(Arrays.asList(file.listFiles())); - } else { - cachedDescFiles.add(file); - } - } - for (File f : cachedDescFiles) { - try { - // descriptors may contain non-ASCII chars; read as bytes to - // determine digests - BufferedInputStream bis = - new BufferedInputStream(new FileInputStream(f)); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - if (f.getName().equals("cached-consensus")) { - /* Check if directory information is stale. */ - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - String line = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("valid-after ")) { - dumpStats.append("\n" + f.getName() + ": " + line.substring( - "valid-after ".length())); - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (dateTimeFormat.parse(line.substring("valid-after " - .length())).getTime() < System.currentTimeMillis() - - 6L * 60L * 60L * 1000L) { - logger.warning("Cached descriptor files in " - + cachedDescDir.getAbsolutePath() + " are stale. " - + "The valid-after line in cached-consensus is '" - + line + "'."); - dumpStats.append(" (stale!)"); - } - break; - } - } - br.close(); - - /* Parse the cached consensus if we haven't parsed it before - * (but regardless of whether it's stale or not). */ - if (rdp != null) { - String digest = Hex.encodeHexString(DigestUtils.sha( - allData)); - if (!lastImportHistory.contains(digest) - && !currentImportHistory.contains(digest)) { - rdp.parse(allData); - } else { - dumpStats.append(" (skipped)"); - } - currentImportHistory.add(digest); - } - } else if (f.getName().equals("v3-status-votes")) { - int parsedNum = 0; - int skippedNum = 0; - String ascii = new String(allData, "US-ASCII"); - String startToken = "network-status-version "; - int end = ascii.length(); - int start = ascii.indexOf(startToken); - while (start >= 0 && start < end) { - int next = ascii.indexOf(startToken, start + 1); - if (next < 0) { - next = end; - } - if (start < next) { - byte[] rawNetworkStatusBytes = new byte[next - start]; - System.arraycopy(allData, start, rawNetworkStatusBytes, 0, - next - start); - if (rdp != null) { - String digest = Hex.encodeHexString(DigestUtils.sha( - rawNetworkStatusBytes)); - if (!lastImportHistory.contains(digest) - && !currentImportHistory.contains(digest)) { - rdp.parse(rawNetworkStatusBytes); - parsedNum++; - } else { - skippedNum++; - } - currentImportHistory.add(digest); - } - } - start = next; - } - dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum - + ", skipped " + skippedNum + " votes"); - } else if (f.getName().startsWith("cached-descriptors") - || f.getName().startsWith("cached-extrainfo")) { - String ascii = new String(allData, "US-ASCII"); - int start = -1; - int sig = -1; - int end = -1; - String startToken = - f.getName().startsWith("cached-descriptors") - ? "router " : "extra-info "; - String sigToken = "\nrouter-signature\n"; - String endToken = "\n-----END SIGNATURE-----\n"; - int parsedNum = 0; - int skippedNum = 0; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - sig = ascii.indexOf(sigToken, start); - if (sig < 0) { - break; - } - sig += sigToken.length(); - end = ascii.indexOf(endToken, sig); - if (end < 0) { - break; - } - end += endToken.length(); - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, end - start); - if (rdp != null) { - String digest = Hex.encodeHexString(DigestUtils.sha( - descBytes)); - if (!lastImportHistory.contains(digest) - && !currentImportHistory.contains(digest)) { - rdp.parse(descBytes); - parsedNum++; - } else { - skippedNum++; - } - currentImportHistory.add(digest); - } - } - dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum - + ", skipped " + skippedNum + " " - + (f.getName().startsWith("cached-descriptors") - ? "server" : "extra-info") + " descriptors"); - } - } catch (IOException e) { - logger.log(Level.WARNING, "Failed reading " - + cachedDescDir.getAbsolutePath() + " directory.", e); - } catch (ParseException e) { - logger.log(Level.WARNING, "Failed reading " - + cachedDescDir.getAbsolutePath() + " directory.", e); - } - } - logger.fine("Finished reading " - + cachedDescDir.getAbsolutePath() + " directory."); - } - - /* Write import history containing SHA-1 digests to disk. */ - try { - importHistoryFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - importHistoryFile)); - for (String digest : currentImportHistory) { - bw.write(digest + "\n"); - } - bw.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write import history to " - + importHistoryFile.getAbsolutePath() + ".", e); - } - - logger.info(dumpStats.toString()); - } -} - diff --git a/src/org/torproject/collector/relaydescs/ReferenceChecker.java b/src/org/torproject/collector/relaydescs/ReferenceChecker.java deleted file mode 100644 index 9f0f183..0000000 --- a/src/org/torproject/collector/relaydescs/ReferenceChecker.java +++ /dev/null @@ -1,319 +0,0 @@ -/* Copyright 2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.relaydescs; - -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.DirSourceEntry; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.Microdescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; -import org.torproject.descriptor.RelayNetworkStatusVote; -import org.torproject.descriptor.ServerDescriptor; - -import com.google.gson.Gson; - -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Locale; -import java.util.Set; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class ReferenceChecker { - - private Logger log = Logger.getLogger(ReferenceChecker.class.getName()); - - private File descriptorsDir; - - private File referencesFile; - - private File historyFile; - - private long currentTimeMillis; - - private SortedSet<Reference> references = new TreeSet<Reference>(); - - private static DateFormat dateTimeFormat; - - static { - dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", - Locale.US); - dateTimeFormat.setLenient(false); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - private static final long ONE_HOUR = 60L * 60L * 1000L; - - private static final long THREE_HOURS = 3L * ONE_HOUR; - - private static final long SIX_HOURS = 6L * ONE_HOUR; - - private static final long ONE_DAY = 24L * ONE_HOUR; - - private static final long THIRTY_DAYS = 30L * ONE_DAY; - - public ReferenceChecker(File descriptorsDir, File referencesFile, - File historyFile) { - this.descriptorsDir = descriptorsDir; - this.referencesFile = referencesFile; - this.historyFile = historyFile; - } - - public void check() { - this.getCurrentTimeMillis(); - this.readReferencesFile(); - this.readNewDescriptors(); - this.dropStaleReferences(); - this.checkReferences(); - this.writeReferencesFile(); - } - - private void getCurrentTimeMillis() { - this.currentTimeMillis = System.currentTimeMillis(); - } - - private static class Reference implements Comparable<Reference> { - - private String referencing; - - private String referenced; - - private double weight; - - private long expiresAfterMillis; - - public Reference(String referencing, String referenced, double weight, - long expiresAfterMillis) { - this.referencing = referencing; - this.referenced = referenced; - this.weight = weight; - this.expiresAfterMillis = expiresAfterMillis; - } - - @Override - public boolean equals(Object otherObject) { - if (!(otherObject instanceof Reference)) { - return false; - } - Reference other = (Reference) otherObject; - return this.referencing.equals(other.referencing) - && this.referenced.equals(other.referenced); - } - - @Override - public int hashCode() { - return this.referencing.hashCode() + this.referenced.hashCode(); - } - - @Override - public int compareTo(Reference other) { - int result = this.referencing.compareTo(other.referencing); - if (result == 0) { - result = this.referenced.compareTo(other.referenced); - } - return result; - } - } - - private void readReferencesFile() { - if (!this.referencesFile.exists()) { - return; - } - Gson gson = new Gson(); - try { - FileReader fr = new FileReader(this.referencesFile); - this.references.addAll(Arrays.asList(gson.fromJson(fr, - Reference[].class))); - fr.close(); - } catch (IOException e) { - this.log.log(Level.WARNING, "Cannot read existing references file " - + "from previous run.", e); - } - } - - private void readNewDescriptors() { - DescriptorReader descriptorReader = - DescriptorSourceFactory.createDescriptorReader(); - descriptorReader.addDirectory(this.descriptorsDir); - descriptorReader.setExcludeFiles(this.historyFile); - Iterator<DescriptorFile> descriptorFiles = - descriptorReader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof RelayNetworkStatusConsensus) { - RelayNetworkStatusConsensus consensus = - (RelayNetworkStatusConsensus) descriptor; - String consensusFlavor = consensus.getConsensusFlavor(); - if (consensusFlavor == null) { - this.readRelayNetworkStatusConsensusUnflavored(consensus); - } else if (consensusFlavor.equals("microdesc")) { - this.readRelayNetworkStatusConsensusMicrodesc(consensus); - } else { - /* Ignore unknown consensus flavors. */ - } - } else if (descriptor instanceof RelayNetworkStatusVote) { - this.readRelayNetworkStatusVote( - (RelayNetworkStatusVote) descriptor); - } else if (descriptor instanceof ServerDescriptor) { - this.readServerDescriptor((ServerDescriptor) descriptor); - } else if (descriptor instanceof ExtraInfoDescriptor) { - this.readExtraInfoDescriptor((ExtraInfoDescriptor) descriptor); - } else if (descriptor instanceof Microdescriptor) { - readMicrodescriptor((Microdescriptor) descriptor); - } else { - /* Ignore unknown descriptors. */ - } - } - } - } - - private void readRelayNetworkStatusConsensusUnflavored( - RelayNetworkStatusConsensus consensus) { - String validAfter = dateTimeFormat.format( - consensus.getValidAfterMillis()); - String referencing = String.format("C-%s", validAfter); - this.addReference(referencing, String.format("M-%s", validAfter), 1.0, - consensus.getValidAfterMillis() + THREE_HOURS); - for (DirSourceEntry dirSourceEntry : - consensus.getDirSourceEntries().values()) { - if (!dirSourceEntry.isLegacy()) { - this.addReference(referencing, String.format("V-%s-%s", - validAfter, dirSourceEntry.getIdentity()), 1.0, - consensus.getValidAfterMillis() + THREE_HOURS); - } - } - double entryWeight = 200.0 - / ((double) consensus.getStatusEntries().size()); - for (NetworkStatusEntry entry : - consensus.getStatusEntries().values()) { - this.addReference(referencing, - String.format("S-%s", entry.getDescriptor()), entryWeight, - entry.getPublishedMillis() + THREE_HOURS); - } - } - - private void readRelayNetworkStatusConsensusMicrodesc( - RelayNetworkStatusConsensus consensus) { - String validAfter = dateTimeFormat.format( - consensus.getValidAfterMillis()); - String referencing = String.format("M-%s", validAfter); - this.addReference(referencing, String.format("C-%s", validAfter), 1.0, - consensus.getValidAfterMillis() + THREE_HOURS); - double entryWeight = 200.0 - / ((double) consensus.getStatusEntries().size()); - for (NetworkStatusEntry entry : - consensus.getStatusEntries().values()) { - for (String digest : entry.getMicrodescriptorDigests()) { - this.addReference(referencing, String.format("D-%s", digest), - entryWeight, entry.getPublishedMillis() + THREE_HOURS); - } - } - } - - private void readRelayNetworkStatusVote(RelayNetworkStatusVote vote) { - String validAfter = dateTimeFormat.format(vote.getValidAfterMillis()); - String referencing = String.format("V-%s-%s", validAfter, - vote.getIdentity()); - double entryWeight = 200.0 - / ((double) vote.getStatusEntries().size()); - for (NetworkStatusEntry entry : vote.getStatusEntries().values()) { - this.addReference(referencing, - String.format("S-%s", entry.getDescriptor()), entryWeight, - entry.getPublishedMillis() + SIX_HOURS); - } - } - - private void readServerDescriptor(ServerDescriptor serverDescriptor) { - String referenced = serverDescriptor.getExtraInfoDigest() == null ? "" - : String.format("E-%s", serverDescriptor.getExtraInfoDigest()); - this.addReference(String.format("S-%s", - serverDescriptor.getServerDescriptorDigest()), referenced, 0.01, - serverDescriptor.getPublishedMillis() + SIX_HOURS); - } - - private void readExtraInfoDescriptor( - ExtraInfoDescriptor extraInfoDescriptor) { - this.addReference(String.format("E-%s", - extraInfoDescriptor.getExtraInfoDigest()), "", 0.005, - extraInfoDescriptor.getPublishedMillis() + SIX_HOURS); - } - - private void readMicrodescriptor(Microdescriptor microdesc) { - this.addReference( - String.format("D-%s", microdesc.getMicrodescriptorDigest()), "", - 0.0, this.currentTimeMillis + THIRTY_DAYS); - } - - private void addReference(String referencing, String referenced, - double weight, long expiresAfterMillis) { - this.references.add(new Reference(referencing.toUpperCase(), - referenced.toUpperCase(), weight, expiresAfterMillis)); - } - - private void dropStaleReferences() { - SortedSet<Reference> recentReferences = new TreeSet<Reference>(); - for (Reference reference : this.references) { - if (this.currentTimeMillis <= reference.expiresAfterMillis) { - recentReferences.add(reference); - } - } - this.references = recentReferences; - } - - private void checkReferences() { - Set<String> knownDescriptors = new HashSet<String>(); - for (Reference reference : this.references) { - knownDescriptors.add(reference.referencing); - } - double totalMissingDescriptorsWeight = 0.0; - Set<String> missingDescriptors = new TreeSet<String>(); - StringBuilder sb = new StringBuilder("Missing referenced " - + "descriptors:"); - for (Reference reference : this.references) { - if (reference.referenced.length() > 0 - && !knownDescriptors.contains(reference.referenced)) { - if (!missingDescriptors.contains(reference.referenced)) { - totalMissingDescriptorsWeight += reference.weight; - } - missingDescriptors.add(reference.referenced); - sb.append(String.format("%n%s -> %s (%.4f -> %.4f)", - reference.referencing, reference.referenced, reference.weight, - totalMissingDescriptorsWeight)); - } - } - this.log.log(Level.INFO, sb.toString()); - if (totalMissingDescriptorsWeight > 0.999) { - this.log.log(Level.WARNING, "Missing too many referenced " - + "descriptors (" + totalMissingDescriptorsWeight + ")."); - } - } - - private void writeReferencesFile() { - Gson gson = new Gson(); - try { - FileWriter fw = new FileWriter(this.referencesFile); - gson.toJson(this.references, fw); - fw.close(); - } catch (IOException e) { - this.log.log(Level.WARNING, "Cannot write references file for next " - + "run.", e); - } - } -} - diff --git a/src/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java deleted file mode 100644 index 458332a..0000000 --- a/src/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java +++ /dev/null @@ -1,1134 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.relaydescs; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.digest.DigestUtils; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.zip.InflaterInputStream; - -/** - * Downloads relay descriptors from the directory authorities via HTTP. - * Keeps a list of missing descriptors that gets updated by parse results - * from <code>RelayDescriptorParser</code> and downloads all missing - * descriptors that have been published in the last 24 hours. Also - * downloads all server and extra-info descriptors known to a directory - * authority at most once a day. - */ -public class RelayDescriptorDownloader { - - /** - * Text file containing the descriptors that we are missing and that we - * want to download. Lines are formatted as: - * - * - "consensus,<validafter>,<parsed>", - * - "consensus-microdesc,<validafter>,<parsed>", - * - "vote,<validafter>,<fingerprint>,<parsed>", - * - "server,<published>,<relayid>,<descid>,<parsed>", - * - "extra,<published>,<relayid>,<descid>,<parsed>", or - * - "micro,<validafter>,<relayid>,<descid>,<parsed>". - */ - private File missingDescriptorsFile; - - /** - * Relay descriptors that we are missing and that we want to download - * either in this execution or write to disk and try next time. Map keys - * contain comma-separated values as in the missing descriptors files - * without the "parsed" column. Map values contain the "parsed" column. - */ - private SortedMap<String, String> missingDescriptors; - - /** - * Map from base64 microdescriptor digests to keys in missingDescriptors - * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because - * we can't learn <validafter> or <relayid> from parsing - * microdescriptors, but we need to know <validafter> to store - * microdescriptors to disk and both <validafter> and <relayid> to - * remove microdescriptors from the missing list. There are potentially - * many matching keys in missingDescriptors for the same microdescriptor - * digest. Also, in rare cases relays share the same microdescriptor - * (which is only possible if they share the same onion key), and then - * we don't have to download their microdescriptor more than once. - */ - private Map<String, Set<String>> microdescriptorKeys; - - /** - * Set of microdescriptor digests that are currently missing. Used for - * logging statistics instead of "micro,<validafter>,..." keys which may - * contain the same microdescriptor digest multiple times. - */ - private Set<String> missingMicrodescriptors; - - /** - * Text file containing the IP addresses (and Dir ports if not 80) of - * directory authorities and when we last downloaded all server and - * extra-info descriptors from them, so that we can avoid downloading - * them too often. - */ - private File lastDownloadedAllDescriptorsFile; - - /** - * Map of directory authorities and when we last downloaded all server - * and extra-info descriptors from them. Map keys are IP addresses (and - * Dir ports if not 80), map values are timestamps. - */ - private Map<String, String> lastDownloadedAllDescriptors; - - /** - * <code>RelayDescriptorParser</code> that we will hand over the - * downloaded descriptors for parsing. - */ - private RelayDescriptorParser rdp; - - /** - * Directory authorities that we will try to download missing - * descriptors from. - */ - private List<String> authorities; - - /** - * Fingerprints of directory authorities that we will use to download - * votes without requiring a successfully downloaded consensus. - */ - private List<String> authorityFingerprints; - - /** - * Should we try to download the current consensus if we don't have it? - */ - private boolean downloadCurrentConsensus; - - /** - * Should we try to download the current microdesc consensus if we don't - * have it? - */ - private boolean downloadCurrentMicrodescConsensus; - - /** - * Should we try to download current votes if we don't have them? - */ - private boolean downloadCurrentVotes; - - /** - * Should we try to download missing server descriptors that have been - * published within the past 24 hours? - */ - private boolean downloadMissingServerDescriptors; - - /** - * Should we try to download missing extra-info descriptors that have - * been published within the past 24 hours? - */ - private boolean downloadMissingExtraInfos; - - /** - * Should we try to download missing microdescriptors that have been - * published within the past 24 hours? - */ - private boolean downloadMissingMicrodescriptors; - - /** - * Should we try to download all server descriptors from the authorities - * once every 24 hours? - */ - private boolean downloadAllServerDescriptors; - - /** - * Should we try to download all extra-info descriptors from the - * authorities once every 24 hours? - */ - private boolean downloadAllExtraInfos; - - /** - * Should we download zlib-compressed versions of descriptors by adding - * ".z" to URLs? - */ - private boolean downloadCompressed; - - /** - * valid-after time that we expect the current consensus, - * microdescriptor consensus, and votes to have, formatted - * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this - * valid-after time on the directory authorities. This time is - * initialized as the beginning of the current hour. - */ - private String currentValidAfter; - - /** - * Cut-off time for missing server and extra-info descriptors, formatted - * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system - * time minus 24 hours. - */ - private String descriptorCutOff; - - /** - * Cut-off time for downloading all server and extra-info descriptors - * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This - * time is initialized as the current system time minus 23:30 hours. - */ - private String downloadAllDescriptorsCutOff; - - /** - * Directory authorities that we plan to download all server and - * extra-info descriptors from in this execution. - */ - private Set<String> downloadAllDescriptorsFromAuthorities; - - /** - * Current timestamp that is written to the missing list for descriptors - * that we parsed in this execution and for authorities that we - * downloaded all server and extra-info descriptors from. - */ - private String currentTimestamp; - - /** - * Logger for this class. - */ - private Logger logger; - - /** - * Number of descriptors requested by directory authority to be included - * in logs. - */ - private Map<String, Integer> requestsByAuthority; - - /** - * Counters for descriptors that we had on the missing list at the - * beginning of the execution, that we added to the missing list, - * that we requested, and that we successfully downloaded in this - * execution. - */ - private int oldMissingConsensuses = 0; - - private int oldMissingMicrodescConsensuses = 0; - - private int oldMissingVotes = 0; - - private int oldMissingServerDescriptors = 0; - - private int oldMissingExtraInfoDescriptors = 0; - - private int oldMissingMicrodescriptors = 0; - - private int newMissingConsensuses = 0; - - private int newMissingMicrodescConsensuses = 0; - - private int newMissingVotes = 0; - - private int newMissingServerDescriptors = 0; - - private int newMissingExtraInfoDescriptors = 0; - - private int newMissingMicrodescriptors = 0; - - private int requestedConsensuses = 0; - - private int requestedMicrodescConsensuses = 0; - - private int requestedVotes = 0; - - private int requestedMissingServerDescriptors = 0; - - private int requestedAllServerDescriptors = 0; - - private int requestedMissingExtraInfoDescriptors = 0; - - private int requestedAllExtraInfoDescriptors = 0; - - private int requestedMissingMicrodescriptors = 0; - - private int downloadedConsensuses = 0; - - private int downloadedMicrodescConsensuses = 0; - - private int downloadedVotes = 0; - - private int downloadedMissingServerDescriptors = 0; - - private int downloadedAllServerDescriptors = 0; - - private int downloadedMissingExtraInfoDescriptors = 0; - - private int downloadedAllExtraInfoDescriptors = 0; - - private int downloadedMissingMicrodescriptors = 0; - - /** - * Initializes this class, including reading in missing descriptors from - * <code>stats/missing-relay-descriptors</code> and the times when we - * last downloaded all server and extra-info descriptors from - * <code>stats/last-downloaded-all-descriptors</code>. - */ - public RelayDescriptorDownloader(RelayDescriptorParser rdp, - List<String> authorities, List<String> authorityFingerprints, - boolean downloadCurrentConsensus, - boolean downloadCurrentMicrodescConsensus, - boolean downloadCurrentVotes, - boolean downloadMissingServerDescriptors, - boolean downloadMissingExtraInfos, - boolean downloadMissingMicrodescriptors, - boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos, - boolean downloadCompressed) { - - /* Memorize argument values. */ - this.rdp = rdp; - this.authorities = new ArrayList<String>(authorities); - this.authorityFingerprints = new ArrayList<String>( - authorityFingerprints); - this.downloadCurrentConsensus = downloadCurrentConsensus; - this.downloadCurrentMicrodescConsensus = - downloadCurrentMicrodescConsensus; - this.downloadCurrentVotes = downloadCurrentVotes; - this.downloadMissingServerDescriptors = - downloadMissingServerDescriptors; - this.downloadMissingExtraInfos = downloadMissingExtraInfos; - this.downloadMissingMicrodescriptors = - downloadMissingMicrodescriptors; - this.downloadAllServerDescriptors = downloadAllServerDescriptors; - this.downloadAllExtraInfos = downloadAllExtraInfos; - this.downloadCompressed = downloadCompressed; - - /* Shuffle list of authorities for better load balancing over time. */ - Collections.shuffle(this.authorities); - - /* Initialize logger. */ - this.logger = Logger.getLogger( - RelayDescriptorDownloader.class.getName()); - - /* Prepare cut-off times and timestamp for the missing descriptors - * list and the list of authorities to download all server and - * extra-info descriptors from. */ - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - format.setTimeZone(TimeZone.getTimeZone("UTC")); - long now = System.currentTimeMillis(); - this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) - * (60L * 60L * 1000L)); - this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L); - this.currentTimestamp = format.format(now); - this.downloadAllDescriptorsCutOff = format.format(now - - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L); - - /* Read list of missing descriptors from disk and memorize those that - * we are interested in and that are likely to be found on the - * directory authorities. */ - this.missingDescriptors = new TreeMap<String, String>(); - this.microdescriptorKeys = new HashMap<String, Set<String>>(); - this.missingMicrodescriptors = new HashSet<String>(); - this.missingDescriptorsFile = new File( - "stats/missing-relay-descriptors"); - if (this.missingDescriptorsFile.exists()) { - try { - this.logger.fine("Reading file " - + this.missingDescriptorsFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.missingDescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.split(",").length > 2) { - String published = line.split(",")[1]; - if (((line.startsWith("consensus,") - || line.startsWith("consensus-microdesc,") - || line.startsWith("vote,")) - && this.currentValidAfter.equals(published)) - || ((line.startsWith("server,") - || line.startsWith("extra,") - || line.startsWith("micro,")) - && this.descriptorCutOff.compareTo(published) < 0)) { - if (!line.endsWith("NA")) { - /* Not missing. */ - } else if (line.startsWith("consensus,")) { - oldMissingConsensuses++; - } else if (line.startsWith("consensus-microdesc,")) { - oldMissingMicrodescConsensuses++; - } else if (line.startsWith("vote,")) { - oldMissingVotes++; - } else if (line.startsWith("server,")) { - oldMissingServerDescriptors++; - } else if (line.startsWith("extra,")) { - oldMissingExtraInfoDescriptors++; - } - int separateAt = line.lastIndexOf(","); - this.missingDescriptors.put(line.substring(0, - separateAt), line.substring(separateAt + 1)); - if (line.startsWith("micro,")) { - String microdescriptorDigest = line.split(",")[3]; - String microdescriptorKey = line.substring(0, - line.lastIndexOf(",")); - if (!this.microdescriptorKeys.containsKey( - microdescriptorDigest)) { - this.microdescriptorKeys.put( - microdescriptorDigest, new HashSet<String>()); - } - this.microdescriptorKeys.get(microdescriptorDigest).add( - microdescriptorKey); - if (line.endsWith("NA") && !this.missingMicrodescriptors - .contains(microdescriptorDigest)) { - this.missingMicrodescriptors.add(microdescriptorDigest); - oldMissingMicrodescriptors++; - } - } - } - } else { - this.logger.fine("Invalid line '" + line + "' in " - + this.missingDescriptorsFile.getAbsolutePath() - + ". Ignoring."); - } - } - br.close(); - this.logger.fine("Finished reading file " - + this.missingDescriptorsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.missingDescriptorsFile.getAbsolutePath() - + "! This means that we might forget to dowload relay " - + "descriptors we are missing.", e); - } - } - - /* Read list of directory authorities and when we last downloaded all - * server and extra-info descriptors from them. */ - this.lastDownloadedAllDescriptors = new HashMap<String, String>(); - this.lastDownloadedAllDescriptorsFile = new File( - "stats/last-downloaded-all-descriptors"); - if (this.lastDownloadedAllDescriptorsFile.exists()) { - try { - this.logger.fine("Reading file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.lastDownloadedAllDescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.split(",").length != 2) { - this.logger.fine("Invalid line '" + line + "' in " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + ". Ignoring."); - } else { - String[] parts = line.split(","); - String authority = parts[0]; - String lastDownloaded = parts[1]; - this.lastDownloadedAllDescriptors.put(authority, - lastDownloaded); - } - } - br.close(); - this.logger.fine("Finished reading file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "! This means that we might download all server and " - + "extra-info descriptors more often than we should.", e); - } - } - - /* Make a list of at most two directory authorities that we want to - * download all server and extra-info descriptors from. */ - this.downloadAllDescriptorsFromAuthorities = new HashSet<String>(); - for (String authority : this.authorities) { - if (!this.lastDownloadedAllDescriptors.containsKey(authority) - || this.lastDownloadedAllDescriptors.get(authority).compareTo( - this.downloadAllDescriptorsCutOff) < 0) { - this.downloadAllDescriptorsFromAuthorities.add(authority); - } - if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) { - break; - } - } - - /* Prepare statistics on this execution. */ - this.requestsByAuthority = new HashMap<String, Integer>(); - for (String authority : this.authorities) { - this.requestsByAuthority.put(authority, 0); - } - } - - /** - * We have parsed a consensus. Take this consensus off the missing list - * and add the votes created by the given <code>authorities</code> and - * the <code>serverDescriptors</code> which are in the format - * "<published>,<relayid>,<descid>" to that list. - */ - public void haveParsedConsensus(String validAfter, - Set<String> authorities, Set<String> serverDescriptors) { - - /* Mark consensus as parsed. */ - if (this.currentValidAfter.equals(validAfter)) { - String consensusKey = "consensus," + validAfter; - this.missingDescriptors.put(consensusKey, this.currentTimestamp); - - /* Add votes to missing list. */ - for (String authority : authorities) { - String voteKey = "vote," + validAfter + "," + authority; - if (!this.missingDescriptors.containsKey(voteKey)) { - this.missingDescriptors.put(voteKey, "NA"); - this.newMissingVotes++; - } - } - } - - /* Add server descriptors to missing list. */ - for (String serverDescriptor : serverDescriptors) { - String published = serverDescriptor.split(",")[0]; - if (this.descriptorCutOff.compareTo(published) < 0) { - String serverDescriptorKey = "server," + serverDescriptor; - if (!this.missingDescriptors.containsKey( - serverDescriptorKey)) { - this.missingDescriptors.put(serverDescriptorKey, "NA"); - this.newMissingServerDescriptors++; - } - } - } - } - - /** - * We have parsed a microdesc consensus. Take this microdesc consensus - * off the missing list and add the <code>microdescriptors</code> which - * are in the format "<validafter>,<relayid>,<descid>" to that - * list. - */ - public void haveParsedMicrodescConsensus(String validAfter, - Set<String> microdescriptors) { - - /* Mark microdesc consensus as parsed. */ - if (this.currentValidAfter.equals(validAfter)) { - String microdescConsensusKey = "consensus-microdesc," + validAfter; - this.missingDescriptors.put(microdescConsensusKey, - this.currentTimestamp); - } - - /* Add microdescriptors to missing list. Exclude those that we already - * downloaded this month. (We download each microdescriptor at least - * once per month to keep the storage logic sane; otherwise we'd have - * to copy microdescriptors from the earlier month to the current - * month, and that gets messy.) */ - if (this.descriptorCutOff.compareTo(validAfter) < 0) { - String validAfterYearMonth = validAfter.substring(0, - "YYYY-MM".length()); - for (String microdescriptor : microdescriptors) { - String microdescriptorKey = "micro," + microdescriptor; - String parsed = "NA"; - String microdescriptorDigest = microdescriptor.split(",")[2]; - if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) { - for (String otherMicrodescriptorKey : - this.microdescriptorKeys.get(microdescriptorDigest)) { - String otherValidAfter = - otherMicrodescriptorKey.split(",")[1]; - if (!otherValidAfter.startsWith(validAfterYearMonth)) { - continue; - } - String otherParsed = this.missingDescriptors.get( - otherMicrodescriptorKey); - if (otherParsed != null && !otherParsed.equals("NA")) { - parsed = otherParsed; - break; - } - } - } else { - this.microdescriptorKeys.put( - microdescriptorDigest, new HashSet<String>()); - } - this.microdescriptorKeys.get(microdescriptorDigest).add( - microdescriptorKey); - this.missingDescriptors.put(microdescriptorKey, parsed); - if (parsed.equals("NA") - && !this.missingMicrodescriptors.contains(microdescriptorDigest)) { - this.missingMicrodescriptors.add(microdescriptorDigest); - this.newMissingMicrodescriptors++; - } - } - } - } - - /** - * We have parsed a vote. Take this vote off the missing list and add - * the <code>serverDescriptors</code> which are in the format - * "<published>,<relayid>,<descid>" to that list. - */ - public void haveParsedVote(String validAfter, String fingerprint, - Set<String> serverDescriptors) { - - /* Mark vote as parsed. */ - if (this.currentValidAfter.equals(validAfter)) { - String voteKey = "vote," + validAfter + "," + fingerprint; - this.missingDescriptors.put(voteKey, this.currentTimestamp); - } - - /* Add server descriptors to missing list. */ - for (String serverDescriptor : serverDescriptors) { - String published = serverDescriptor.split(",")[0]; - if (this.descriptorCutOff.compareTo(published) < 0) { - String serverDescriptorKey = "server," + serverDescriptor; - if (!this.missingDescriptors.containsKey( - serverDescriptorKey)) { - this.missingDescriptors.put(serverDescriptorKey, "NA"); - this.newMissingServerDescriptors++; - } - } - } - } - - /** - * We have parsed a server descriptor. Take this server descriptor off - * the missing list and put the extra-info descriptor digest on that - * list. - */ - public void haveParsedServerDescriptor(String published, - String relayIdentity, String serverDescriptorDigest, - String extraInfoDigest) { - - /* Mark server descriptor as parsed. */ - if (this.descriptorCutOff.compareTo(published) < 0) { - String serverDescriptorKey = "server," + published + "," - + relayIdentity + "," + serverDescriptorDigest; - this.missingDescriptors.put(serverDescriptorKey, - this.currentTimestamp); - - /* Add extra-info descriptor to missing list. */ - if (extraInfoDigest != null) { - String extraInfoKey = "extra," + published + "," - + relayIdentity + "," + extraInfoDigest; - if (!this.missingDescriptors.containsKey(extraInfoKey)) { - this.missingDescriptors.put(extraInfoKey, "NA"); - this.newMissingExtraInfoDescriptors++; - } - } - } - } - - /** - * We have parsed an extra-info descriptor. Take it off the missing - * list. - */ - public void haveParsedExtraInfoDescriptor(String published, - String relayIdentity, String extraInfoDigest) { - if (this.descriptorCutOff.compareTo(published) < 0) { - String extraInfoKey = "extra," + published + "," - + relayIdentity + "," + extraInfoDigest; - this.missingDescriptors.put(extraInfoKey, this.currentTimestamp); - } - } - - /** - * We have parsed a microdescriptor. Take it off the missing list. - */ - public void haveParsedMicrodescriptor(String descriptorDigest) { - if (this.microdescriptorKeys.containsKey(descriptorDigest)) { - for (String microdescriptorKey : - this.microdescriptorKeys.get(descriptorDigest)) { - String validAfter = microdescriptorKey.split(",")[0]; - if (this.descriptorCutOff.compareTo(validAfter) < 0) { - this.missingDescriptors.put(microdescriptorKey, - this.currentTimestamp); - } - } - this.missingMicrodescriptors.remove(descriptorDigest); - } - } - - /** - * Downloads missing descriptors that we think might still be available - * on the directory authorities as well as all server and extra-info - * descriptors once per day. - */ - public void downloadDescriptors() { - - /* Put the current consensus and votes on the missing list, unless we - * already have them. */ - String consensusKey = "consensus," + this.currentValidAfter; - if (!this.missingDescriptors.containsKey(consensusKey)) { - this.missingDescriptors.put(consensusKey, "NA"); - this.newMissingConsensuses++; - } - String microdescConsensusKey = "consensus-microdesc," - + this.currentValidAfter; - if (!this.missingDescriptors.containsKey(microdescConsensusKey)) { - this.missingDescriptors.put(microdescConsensusKey, "NA"); - this.newMissingMicrodescConsensuses++; - } - for (String authority : authorityFingerprints) { - String voteKey = "vote," + this.currentValidAfter + "," + authority; - if (!this.missingDescriptors.containsKey(voteKey)) { - this.missingDescriptors.put(voteKey, "NA"); - this.newMissingVotes++; - } - } - - /* Download descriptors from authorities which are in random order, so - * that we distribute the load somewhat fairly over time. */ - for (String authority : authorities) { - - /* Make all requests to an authority in a single try block. If - * something goes wrong with this authority, we give up on all - * downloads and continue with the next authority. */ - /* TODO Some authorities provide very little bandwidth and could - * slow down the entire download process. Ponder adding a timeout of - * 3 or 5 minutes per authority to avoid getting in the way of the - * next execution. */ - try { - - /* Start with downloading the current consensus, unless we already - * have it. */ - if (downloadCurrentConsensus) { - if (this.missingDescriptors.containsKey(consensusKey) - && this.missingDescriptors.get(consensusKey).equals("NA")) { - this.requestedConsensuses++; - this.downloadedConsensuses += - this.downloadResourceFromAuthority(authority, - "/tor/status-vote/current/consensus"); - } - } - - /* Then try to download the microdesc consensus. */ - if (downloadCurrentMicrodescConsensus) { - if (this.missingDescriptors.containsKey(microdescConsensusKey) - && this.missingDescriptors.get(microdescConsensusKey) - .equals("NA")) { - this.requestedMicrodescConsensuses++; - this.downloadedMicrodescConsensuses += - this.downloadResourceFromAuthority(authority, - "/tor/status-vote/current/consensus-microdesc"); - } - } - - /* Next, try to download current votes that we're missing. */ - if (downloadCurrentVotes) { - String voteKeyPrefix = "vote," + this.currentValidAfter; - SortedSet<String> fingerprints = new TreeSet<String>(); - for (Map.Entry<String, String> e : - this.missingDescriptors.entrySet()) { - if (e.getValue().equals("NA") - && e.getKey().startsWith(voteKeyPrefix)) { - String fingerprint = e.getKey().split(",")[2]; - fingerprints.add(fingerprint); - } - } - for (String fingerprint : fingerprints) { - this.requestedVotes++; - this.downloadedVotes += - this.downloadResourceFromAuthority(authority, - "/tor/status-vote/current/" + fingerprint); - } - } - - /* Download either all server and extra-info descriptors or only - * those that we're missing. Start with server descriptors, then - * request extra-info descriptors. Finally, request missing - * microdescriptors. */ - for (String type : new String[] { "server", "extra", "micro" }) { - - /* Download all server or extra-info descriptors from this - * authority if we haven't done so for 24 hours and if we're - * configured to do so. */ - if (this.downloadAllDescriptorsFromAuthorities.contains( - authority) && ((type.equals("server") - && this.downloadAllServerDescriptors) - || (type.equals("extra") && this.downloadAllExtraInfos))) { - int downloadedAllDescriptors = - this.downloadResourceFromAuthority(authority, "/tor/" - + type + "/all"); - if (type.equals("server")) { - this.requestedAllServerDescriptors++; - this.downloadedAllServerDescriptors += - downloadedAllDescriptors; - } else if (type.equals("extra")) { - this.requestedAllExtraInfoDescriptors++; - this.downloadedAllExtraInfoDescriptors += - downloadedAllDescriptors; - } - - /* Download missing server descriptors, extra-info descriptors, - * and microdescriptors if we're configured to do so. */ - } else if ((type.equals("server") - && this.downloadMissingServerDescriptors) - || (type.equals("extra") && this.downloadMissingExtraInfos) - || (type.equals("micro") - && this.downloadMissingMicrodescriptors)) { - - /* Go through the list of missing descriptors of this type - * and combine the descriptor identifiers to a URL of up to - * 96 server or extra-info descriptors or 92 microdescriptors - * that we can download at once. */ - SortedSet<String> descriptorIdentifiers = - new TreeSet<String>(); - for (Map.Entry<String, String> e : - this.missingDescriptors.entrySet()) { - if (e.getValue().equals("NA") - && e.getKey().startsWith(type + ",") - && this.descriptorCutOff.compareTo( - e.getKey().split(",")[1]) < 0) { - String descriptorIdentifier = e.getKey().split(",")[3]; - descriptorIdentifiers.add(descriptorIdentifier); - } - } - StringBuilder combinedResource = null; - int descriptorsInCombinedResource = 0; - int requestedDescriptors = 0; - int downloadedDescriptors = 0; - int maxDescriptorsInCombinedResource = - type.equals("micro") ? 92 : 96; - String separator = type.equals("micro") ? "-" : "+"; - for (String descriptorIdentifier : descriptorIdentifiers) { - if (descriptorsInCombinedResource - >= maxDescriptorsInCombinedResource) { - requestedDescriptors += descriptorsInCombinedResource; - downloadedDescriptors += - this.downloadResourceFromAuthority(authority, - combinedResource.toString()); - combinedResource = null; - descriptorsInCombinedResource = 0; - } - if (descriptorsInCombinedResource == 0) { - combinedResource = new StringBuilder("/tor/" + type - + "/d/" + descriptorIdentifier); - } else { - combinedResource.append(separator + descriptorIdentifier); - } - descriptorsInCombinedResource++; - } - if (descriptorsInCombinedResource > 0) { - requestedDescriptors += descriptorsInCombinedResource; - downloadedDescriptors += - this.downloadResourceFromAuthority(authority, - combinedResource.toString()); - } - if (type.equals("server")) { - this.requestedMissingServerDescriptors += - requestedDescriptors; - this.downloadedMissingServerDescriptors += - downloadedDescriptors; - } else if (type.equals("extra")) { - this.requestedMissingExtraInfoDescriptors += - requestedDescriptors; - this.downloadedMissingExtraInfoDescriptors += - downloadedDescriptors; - } else if (type.equals("micro")) { - this.requestedMissingMicrodescriptors += - requestedDescriptors; - this.downloadedMissingMicrodescriptors += - downloadedDescriptors; - } - } - } - - /* If a download failed, stop requesting descriptors from this - * authority and move on to the next. */ - } catch (IOException e) { - logger.log(Level.FINE, "Failed downloading from " + authority - + "!", e); - } - } - } - - /** - * Attempts to download one or more descriptors identified by a resource - * string from a directory authority and passes the returned - * descriptor(s) to the <code>RelayDescriptorParser</code> upon success. - * Returns the number of descriptors contained in the reply. Throws an - * <code>IOException</code> if something goes wrong while downloading. - */ - private int downloadResourceFromAuthority(String authority, - String resource) throws IOException { - byte[] allData = null; - this.requestsByAuthority.put(authority, - this.requestsByAuthority.get(authority) + 1); - /* TODO Disable compressed downloads for extra-info descriptors, - * because zlib decompression doesn't work correctly. Figure out why - * this is and fix it. */ - String fullUrl = "http://" + authority + resource - + (this.downloadCompressed && !resource.startsWith("/tor/extra/") - ? ".z" : ""); - URL u = new URL(fullUrl); - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - int response = huc.getResponseCode(); - if (response == 200) { - BufferedInputStream in = this.downloadCompressed - && !resource.startsWith("/tor/extra/") - ? new BufferedInputStream(new InflaterInputStream( - huc.getInputStream())) - : new BufferedInputStream(huc.getInputStream()); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = in.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - in.close(); - allData = baos.toByteArray(); - } - logger.fine("Downloaded " + fullUrl + " -> " + response + " (" - + (allData == null ? 0 : allData.length) + " bytes)"); - int receivedDescriptors = 0; - if (allData != null) { - if (resource.startsWith("/tor/status-vote/current/")) { - this.rdp.parse(allData); - receivedDescriptors = 1; - } else if (resource.startsWith("/tor/server/") - || resource.startsWith("/tor/extra/")) { - if (resource.equals("/tor/server/all") - || resource.equals("/tor/extra/all")) { - this.lastDownloadedAllDescriptors.put(authority, - this.currentTimestamp); - } - String ascii = null; - try { - ascii = new String(allData, "US-ASCII"); - } catch (UnsupportedEncodingException e) { - /* No way that US-ASCII is not supported. */ - } - int start = -1; - int sig = -1; - int end = -1; - String startToken = resource.startsWith("/tor/server/") - ? "router " : "extra-info "; - String sigToken = "\nrouter-signature\n"; - String endToken = "\n-----END SIGNATURE-----\n"; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - sig = ascii.indexOf(sigToken, start); - if (sig < 0) { - break; - } - sig += sigToken.length(); - end = ascii.indexOf(endToken, sig); - if (end < 0) { - break; - } - end += endToken.length(); - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, end - start); - this.rdp.parse(descBytes); - receivedDescriptors++; - } - } else if (resource.startsWith("/tor/micro/")) { - /* TODO We need to parse microdescriptors ourselves, rather than - * RelayDescriptorParser, because only we know the valid-after - * time(s) of microdesc consensus(es) containing this - * microdescriptor. However, this breaks functional abstraction - * pretty badly. */ - SimpleDateFormat parseFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String ascii = null; - try { - ascii = new String(allData, "US-ASCII"); - } catch (UnsupportedEncodingException e) { - /* No way that US-ASCII is not supported. */ - } - int start = -1; - int end = -1; - String startToken = "onion-key\n"; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - end = ascii.indexOf(startToken, start + 1); - if (end < 0) { - end = ascii.length(); - if (end <= start) { - break; - } - } - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, end - start); - String digest256Base64 = Base64.encodeBase64String( - DigestUtils.sha256(descBytes)).replaceAll("=", ""); - if (!this.microdescriptorKeys.containsKey(digest256Base64)) { - continue; - } - String digest256Hex = DigestUtils.sha256Hex(descBytes); - for (String microdescriptorKey : - this.microdescriptorKeys.get(digest256Base64)) { - String validAfterTime = microdescriptorKey.split(",")[1]; - try { - long validAfter = - parseFormat.parse(validAfterTime).getTime(); - this.rdp.storeMicrodescriptor(descBytes, digest256Hex, - digest256Base64, validAfter); - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse " - + "valid-after time '" + validAfterTime + "' in " - + "microdescriptor key. Not storing microdescriptor.", - e); - } - } - receivedDescriptors++; - } - } - } - return receivedDescriptors; - } - - /** - * Writes status files to disk and logs statistics about downloading - * relay descriptors in this execution. - */ - public void writeFile() { - - /* Write missing descriptors file to disk. */ - int missingConsensuses = 0; - int missingMicrodescConsensuses = 0; - int missingVotes = 0; - int missingServerDescriptors = 0; - int missingExtraInfoDescriptors = 0; - try { - this.logger.fine("Writing file " - + this.missingDescriptorsFile.getAbsolutePath() + "..."); - this.missingDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.missingDescriptorsFile)); - for (Map.Entry<String, String> e : - this.missingDescriptors.entrySet()) { - String key = e.getKey(); - String value = e.getValue(); - if (!value.equals("NA")) { - /* Not missing. */ - } else if (key.startsWith("consensus,")) { - missingConsensuses++; - } else if (key.startsWith("consensus-microdesc,")) { - missingMicrodescConsensuses++; - } else if (key.startsWith("vote,")) { - missingVotes++; - } else if (key.startsWith("server,")) { - missingServerDescriptors++; - } else if (key.startsWith("extra,")) { - missingExtraInfoDescriptors++; - } else if (key.startsWith("micro,")) { - /* We're counting missing microdescriptors below. */ - } - bw.write(key + "," + value + "\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.missingDescriptorsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed writing " - + this.missingDescriptorsFile.getAbsolutePath() + "!", e); - } - int missingMicrodescriptors = this.missingMicrodescriptors.size(); - - /* Write text file containing the directory authorities and when we - * last downloaded all server and extra-info descriptors from them to - * disk. */ - try { - this.logger.fine("Writing file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "..."); - this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.lastDownloadedAllDescriptorsFile)); - for (Map.Entry<String, String> e : - this.lastDownloadedAllDescriptors.entrySet()) { - String authority = e.getKey(); - String lastDownloaded = e.getValue(); - bw.write(authority + "," + lastDownloaded + "\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed writing " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!", - e); - } - - /* Log statistics about this execution. */ - this.logger.info("Finished downloading relay descriptors from the " - + "directory authorities."); - this.logger.info("At the beginning of this execution, we were " - + "missing " + oldMissingConsensuses + " consensus(es), " - + oldMissingMicrodescConsensuses + " microdesc consensus(es), " - + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors - + " server descriptor(s), " + oldMissingExtraInfoDescriptors - + " extra-info descriptor(s), and " + oldMissingMicrodescriptors - + " microdescriptor(s)."); - this.logger.info("During this execution, we added " - + this.newMissingConsensuses + " consensus(es), " - + this.newMissingMicrodescConsensuses - + " microdesc consensus(es), " + this.newMissingVotes - + " vote(s), " + this.newMissingServerDescriptors - + " server descriptor(s), " + this.newMissingExtraInfoDescriptors - + " extra-info descriptor(s), and " - + this.newMissingMicrodescriptors + " microdescriptor(s) to the " - + "missing list, some of which we also " - + "requested and removed from the list again."); - this.logger.info("We requested " + this.requestedConsensuses - + " consensus(es), " + this.requestedMicrodescConsensuses - + " microdesc consensus(es), " + this.requestedVotes - + " vote(s), " + this.requestedMissingServerDescriptors - + " missing server descriptor(s), " - + this.requestedAllServerDescriptors - + " times all server descriptors, " - + this.requestedMissingExtraInfoDescriptors + " missing " - + "extra-info descriptor(s), " - + this.requestedAllExtraInfoDescriptors + " times all extra-info " - + "descriptors, and " + this.requestedMissingMicrodescriptors - + " missing microdescriptor(s) from the directory authorities."); - StringBuilder sb = new StringBuilder(); - for (String authority : this.authorities) { - sb.append(" " + authority + "=" - + this.requestsByAuthority.get(authority)); - } - this.logger.info("We sent these numbers of requests to the directory " - + "authorities:" + sb.toString()); - this.logger.info("We successfully downloaded " - + this.downloadedConsensuses + " consensus(es), " - + this.downloadedMicrodescConsensuses - + " microdesc consensus(es), " + this.downloadedVotes - + " vote(s), " + this.downloadedMissingServerDescriptors - + " missing server descriptor(s), " - + this.downloadedAllServerDescriptors - + " server descriptor(s) when downloading all descriptors, " - + this.downloadedMissingExtraInfoDescriptors + " missing " - + "extra-info descriptor(s), " - + this.downloadedAllExtraInfoDescriptors + " extra-info " - + "descriptor(s) when downloading all descriptors, and " - + this.downloadedMissingMicrodescriptors - + " missing microdescriptor(s)."); - this.logger.info("At the end of this execution, we are missing " - + missingConsensuses + " consensus(es), " - + missingMicrodescConsensuses + " microdesc consensus(es), " - + missingVotes + " vote(s), " + missingServerDescriptors - + " server descriptor(s), " + missingExtraInfoDescriptors - + " extra-info descriptor(s), and " + missingMicrodescriptors - + " microdescriptor(s), some of which we may try in the next " - + "execution."); - } -} - diff --git a/src/org/torproject/collector/relaydescs/RelayDescriptorParser.java b/src/org/torproject/collector/relaydescs/RelayDescriptorParser.java deleted file mode 100644 index 3f9b912..0000000 --- a/src/org/torproject/collector/relaydescs/RelayDescriptorParser.java +++ /dev/null @@ -1,337 +0,0 @@ -/* Copyright 2010--2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.relaydescs; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Parses relay descriptors including network status consensuses and - * votes, server and extra-info descriptors, and passes the results to the - * stats handlers, to the archive writer, or to the relay descriptor - * downloader. - */ -public class RelayDescriptorParser { - - /** - * File writer that writes descriptor contents to files in a - * directory-archive directory structure. - */ - private ArchiveWriter aw; - - private ArchiveReader ar; - - /** - * Missing descriptor downloader that uses the parse results to learn - * which descriptors we are missing and want to download. - */ - private RelayDescriptorDownloader rdd; - - /** - * Logger for this class. - */ - private Logger logger; - - private SimpleDateFormat dateTimeFormat; - - /** - * Initializes this class. - */ - public RelayDescriptorParser(ArchiveWriter aw) { - this.aw = aw; - - /* Initialize logger. */ - this.logger = Logger.getLogger(RelayDescriptorParser.class.getName()); - - this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - public void setRelayDescriptorDownloader( - RelayDescriptorDownloader rdd) { - this.rdd = rdd; - } - - public void setArchiveReader(ArchiveReader ar) { - this.ar = ar; - } - - public boolean parse(byte[] data) { - boolean stored = false; - try { - /* Convert descriptor to ASCII for parsing. This means we'll lose - * the non-ASCII chars, but we don't care about them for parsing - * anyway. */ - BufferedReader br = new BufferedReader(new StringReader(new String( - data, "US-ASCII"))); - String line; - do { - line = br.readLine(); - } while (line != null && line.startsWith("@")); - if (line == null) { - this.logger.fine("We were given an empty descriptor for " - + "parsing. Ignoring."); - return false; - } - SimpleDateFormat parseFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (line.startsWith("network-status-version 3")) { - String statusType = "consensus"; - if (line.equals("network-status-version 3 microdesc")) { - statusType = "consensus-microdesc"; - } - String validAfterTime = null; - String fingerprint = null; - String dirSource = null; - long validAfter = -1L; - long dirKeyPublished = -1L; - SortedSet<String> dirSources = new TreeSet<String>(); - SortedSet<String> serverDescriptors = new TreeSet<String>(); - SortedSet<String> serverDescriptorDigests = new TreeSet<String>(); - SortedSet<String> microdescriptorKeys = new TreeSet<String>(); - SortedSet<String> microdescriptorDigests = new TreeSet<String>(); - StringBuilder certificateStringBuilder = null; - String certificateString = null; - String lastRelayIdentity = null; - while ((line = br.readLine()) != null) { - if (certificateStringBuilder != null) { - if (line.startsWith("r ")) { - certificateString = certificateStringBuilder.toString(); - certificateStringBuilder = null; - } else { - certificateStringBuilder.append(line + "\n"); - } - } - if (line.equals("vote-status vote")) { - statusType = "vote"; - } else if (line.startsWith("valid-after ")) { - validAfterTime = line.substring("valid-after ".length()); - validAfter = parseFormat.parse(validAfterTime).getTime(); - } else if (line.startsWith("dir-source ")) { - dirSource = line.split(" ")[2]; - } else if (line.startsWith("vote-digest ")) { - dirSources.add(dirSource); - } else if (line.startsWith("dir-key-certificate-version ")) { - certificateStringBuilder = new StringBuilder(); - certificateStringBuilder.append(line + "\n"); - } else if (line.startsWith("fingerprint ")) { - fingerprint = line.split(" ")[1]; - } else if (line.startsWith("dir-key-published ")) { - String dirKeyPublishedTime = line.substring( - "dir-key-published ".length()); - dirKeyPublished = parseFormat.parse(dirKeyPublishedTime) - .getTime(); - } else if (line.startsWith("r ")) { - String[] parts = line.split(" "); - if (parts.length == 8) { - lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64( - parts[2] + "=")).toLowerCase(); - } else if (parts.length == 9) { - lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64( - parts[2] + "=")).toLowerCase(); - String serverDesc = Hex.encodeHexString(Base64.decodeBase64( - parts[3] + "=")).toLowerCase(); - String publishedTime = parts[4] + " " + parts[5]; - serverDescriptors.add(publishedTime + "," - + lastRelayIdentity + "," + serverDesc); - serverDescriptorDigests.add(serverDesc); - } else { - this.logger.log(Level.WARNING, "Could not parse r line '" - + line + "' in descriptor. Skipping."); - break; - } - } else if (line.startsWith("m ")) { - String[] parts = line.split(" "); - if (parts.length == 2 && parts[1].length() == 43) { - String digest256Base64 = parts[1]; - microdescriptorKeys.add(validAfterTime + "," - + lastRelayIdentity + "," + digest256Base64); - String digest256Hex = Hex.encodeHexString( - Base64.decodeBase64(digest256Base64 + "=")) - .toLowerCase(); - microdescriptorDigests.add(digest256Hex); - } else if (parts.length != 3 - || !parts[2].startsWith("sha256=") - || parts[2].length() != 50) { - this.logger.log(Level.WARNING, "Could not parse m line '" - + line + "' in descriptor. Skipping."); - break; - } - } - } - if (statusType.equals("consensus")) { - if (this.rdd != null) { - this.rdd.haveParsedConsensus(validAfterTime, dirSources, - serverDescriptors); - } - if (this.aw != null) { - this.aw.storeConsensus(data, validAfter, dirSources, - serverDescriptorDigests); - stored = true; - } - } else if (statusType.equals("consensus-microdesc")) { - if (this.rdd != null) { - this.rdd.haveParsedMicrodescConsensus(validAfterTime, - microdescriptorKeys); - } - if (this.ar != null) { - this.ar.haveParsedMicrodescConsensus(validAfterTime, - microdescriptorDigests); - } - if (this.aw != null) { - this.aw.storeMicrodescConsensus(data, validAfter, - microdescriptorDigests); - stored = true; - } - } else { - if (this.aw != null || this.rdd != null) { - String ascii = new String(data, "US-ASCII"); - String startToken = "network-status-version "; - String sigToken = "directory-signature "; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken); - if (start >= 0 && sig >= 0 && sig > start) { - sig += sigToken.length(); - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - String digest = DigestUtils.shaHex(forDigest).toUpperCase(); - if (this.aw != null) { - this.aw.storeVote(data, validAfter, dirSource, digest, - serverDescriptorDigests); - stored = true; - } - if (this.rdd != null) { - this.rdd.haveParsedVote(validAfterTime, fingerprint, - serverDescriptors); - } - } - if (certificateString != null) { - if (this.aw != null) { - this.aw.storeCertificate(certificateString.getBytes(), - dirSource, dirKeyPublished); - stored = true; - } - } - } - } - } else if (line.startsWith("router ")) { - String publishedTime = null; - String extraInfoDigest = null; - String relayIdentifier = null; - long published = -1L; - while ((line = br.readLine()) != null) { - if (line.startsWith("published ")) { - publishedTime = line.substring("published ".length()); - published = parseFormat.parse(publishedTime).getTime(); - } else if (line.startsWith("opt fingerprint") - || line.startsWith("fingerprint")) { - relayIdentifier = line.substring(line.startsWith("opt ") - ? "opt fingerprint".length() : "fingerprint".length()) - .replaceAll(" ", "").toLowerCase(); - } else if (line.startsWith("opt extra-info-digest ") - || line.startsWith("extra-info-digest ")) { - extraInfoDigest = line.startsWith("opt ") - ? line.split(" ")[2].toLowerCase() - : line.split(" ")[1].toLowerCase(); - } - } - String ascii = new String(data, "US-ASCII"); - String startToken = "router "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - String digest = null; - if (start >= 0 || sig >= 0 || sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - digest = DigestUtils.shaHex(forDigest); - } - if (this.aw != null && digest != null) { - this.aw.storeServerDescriptor(data, digest, published, - extraInfoDigest); - stored = true; - } - if (this.rdd != null && digest != null) { - this.rdd.haveParsedServerDescriptor(publishedTime, - relayIdentifier, digest, extraInfoDigest); - } - } else if (line.startsWith("extra-info ")) { - String publishedTime = null; - String relayIdentifier = line.split(" ")[2]; - long published = -1L; - while ((line = br.readLine()) != null) { - if (line.startsWith("published ")) { - publishedTime = line.substring("published ".length()); - published = parseFormat.parse(publishedTime).getTime(); - } - } - String ascii = new String(data, "US-ASCII"); - String startToken = "extra-info "; - String sigToken = "\nrouter-signature\n"; - String digest = null; - int start = ascii.indexOf(startToken); - if (start > 0) { - /* Do not confuse "extra-info " in "@type extra-info 1.0" with - * "extra-info 0000...". TODO This is a hack that should be - * solved by using metrics-lib some day. */ - start = ascii.indexOf("\n" + startToken); - if (start > 0) { - start++; - } - } - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - digest = DigestUtils.shaHex(forDigest); - } - if (this.aw != null && digest != null) { - this.aw.storeExtraInfoDescriptor(data, digest, published); - stored = true; - } - if (this.rdd != null && digest != null) { - this.rdd.haveParsedExtraInfoDescriptor(publishedTime, - relayIdentifier.toLowerCase(), digest); - } - } else if (line.equals("onion-key")) { - /* Cannot store microdescriptors without knowing valid-after - * time(s) of microdesc consensuses containing them, because we - * don't know which month directories to put them in. Have to use - * storeMicrodescriptor below. */ - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse descriptor. " - + "Skipping.", e); - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse descriptor. " - + "Skipping.", e); - } - return stored; - } - - public void storeMicrodescriptor(byte[] data, String digest256Hex, - String digest256Base64, long validAfter) { - if (this.aw != null) { - this.aw.storeMicrodescriptor(data, digest256Hex, validAfter); - } - if (this.rdd != null) { - this.rdd.haveParsedMicrodescriptor(digest256Base64); - } - } -} - diff --git a/src/org/torproject/collector/torperf/TorperfDownloader.java b/src/org/torproject/collector/torperf/TorperfDownloader.java deleted file mode 100644 index 7bcfbf3..0000000 --- a/src/org/torproject/collector/torperf/TorperfDownloader.java +++ /dev/null @@ -1,643 +0,0 @@ -/* Copyright 2012-2016 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.torperf; - -import org.torproject.collector.main.Configuration; -import org.torproject.collector.main.LockFile; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -/* Download possibly truncated Torperf .data and .extradata files from - * configured sources, append them to the files we already have, and merge - * the two files into the .tpf format. */ -public class TorperfDownloader extends Thread { - - public static void main(String[] args) { - - Logger logger = Logger.getLogger(TorperfDownloader.class.getName()); - logger.info("Starting torperf module of CollecTor."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile("torperf"); - if (!lf.acquireLock()) { - logger.severe("Warning: CollecTor is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Process Torperf files - new TorperfDownloader(config).run(); - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating torperf module of CollecTor."); - } - - private Configuration config; - - public TorperfDownloader(Configuration config) { - this.config = config; - } - - private File torperfOutputDirectory = null; - private SortedMap<String, String> torperfSources = null; - private List<String> torperfFilesLines = null; - private Logger logger = null; - private SimpleDateFormat dateFormat; - - public void run() { - - File torperfOutputDirectory = - new File(config.getTorperfOutputDirectory()); - SortedMap<String, String> torperfSources = config.getTorperfSources(); - List<String> torperfFilesLines = config.getTorperfFiles(); - - this.torperfOutputDirectory = torperfOutputDirectory; - this.torperfSources = torperfSources; - this.torperfFilesLines = torperfFilesLines; - if (!this.torperfOutputDirectory.exists()) { - this.torperfOutputDirectory.mkdirs(); - } - this.logger = Logger.getLogger(TorperfDownloader.class.getName()); - this.dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - this.readLastMergedTimestamps(); - for (String torperfFilesLine : this.torperfFilesLines) { - this.downloadAndMergeFiles(torperfFilesLine); - } - this.writeLastMergedTimestamps(); - - this.cleanUpRsyncDirectory(); - } - - private File torperfLastMergedFile = - new File("stats/torperf-last-merged"); - - SortedMap<String, String> lastMergedTimestamps = - new TreeMap<String, String>(); - - private void readLastMergedTimestamps() { - if (!this.torperfLastMergedFile.exists()) { - return; - } - try { - BufferedReader br = new BufferedReader(new FileReader( - this.torperfLastMergedFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(" "); - String fileName = null; - String timestamp = null; - if (parts.length == 2) { - try { - Double.parseDouble(parts[1]); - fileName = parts[0]; - timestamp = parts[1]; - } catch (NumberFormatException e) { - /* Handle below. */ - } - } - if (fileName == null || timestamp == null) { - this.logger.log(Level.WARNING, "Invalid line '" + line + "' in " - + this.torperfLastMergedFile.getAbsolutePath() + ". " - + "Ignoring past history of merging .data and .extradata " - + "files."); - this.lastMergedTimestamps.clear(); - break; - } - this.lastMergedTimestamps.put(fileName, timestamp); - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Error while reading '" - + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring " - + "past history of merging .data and .extradata files."); - this.lastMergedTimestamps.clear(); - } - } - - private void writeLastMergedTimestamps() { - try { - this.torperfLastMergedFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.torperfLastMergedFile)); - for (Map.Entry<String, String> e : - this.lastMergedTimestamps.entrySet()) { - String fileName = e.getKey(); - String timestamp = e.getValue(); - bw.write(fileName + " " + timestamp + "\n"); - } - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Error while writing '" - + this.torperfLastMergedFile.getAbsolutePath() + ". This may " - + "result in ignoring history of merging .data and .extradata " - + "files in the next execution.", e); - } - } - - private void downloadAndMergeFiles(String torperfFilesLine) { - String[] parts = torperfFilesLine.split(" "); - String sourceName = parts[1]; - int fileSize = -1; - try { - fileSize = Integer.parseInt(parts[2]); - } catch (NumberFormatException e) { - this.logger.log(Level.WARNING, "Could not parse file size in " - + "TorperfFiles configuration line '" + torperfFilesLine - + "'."); - return; - } - - /* Download and append the .data file. */ - String dataFileName = parts[3]; - String sourceBaseUrl = torperfSources.get(sourceName); - String dataUrl = sourceBaseUrl + dataFileName; - String dataOutputFileName = sourceName + "-" + dataFileName; - File dataOutputFile = new File(torperfOutputDirectory, - dataOutputFileName); - boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl, - dataOutputFile, true); - - /* Download and append the .extradata file. */ - String extradataFileName = parts[4]; - String extradataUrl = sourceBaseUrl + extradataFileName; - String extradataOutputFileName = sourceName + "-" + extradataFileName; - File extradataOutputFile = new File(torperfOutputDirectory, - extradataOutputFileName); - boolean downloadedExtradataFile = this.downloadAndAppendFile( - extradataUrl, extradataOutputFile, false); - - /* Merge both files into .tpf format. */ - if (!downloadedDataFile && !downloadedExtradataFile) { - return; - } - String skipUntil = null; - if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) { - skipUntil = this.lastMergedTimestamps.get(dataOutputFileName); - } - try { - skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile, - sourceName, fileSize, skipUntil); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile - + " and " + extradataOutputFile + ".", e); - } - if (skipUntil != null) { - this.lastMergedTimestamps.put(dataOutputFileName, skipUntil); - } - } - - private boolean downloadAndAppendFile(String url, File outputFile, - boolean isDataFile) { - - /* Read an existing output file to determine which line will be the - * first to append to it. */ - String lastTimestampLine = null; - int linesAfterLastTimestampLine = 0; - if (outputFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - outputFile)); - String line; - while ((line = br.readLine()) != null) { - if (isDataFile || line.contains(" LAUNCH")) { - lastTimestampLine = line; - linesAfterLastTimestampLine = 0; - } else { - linesAfterLastTimestampLine++; - } - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed reading '" - + outputFile.getAbsolutePath() + "' to determine the first " - + "line to append to it.", e); - return false; - } - } - try { - this.logger.fine("Downloading " + (isDataFile ? ".data" : - ".extradata") + " file from '" + url + "' and merging it into " - + "'" + outputFile.getAbsolutePath() + "'."); - URL u = new URL(url); - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - BufferedReader br = new BufferedReader(new InputStreamReader( - huc.getInputStream())); - String line; - BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile, - true)); - boolean copyLines = lastTimestampLine == null; - while ((line = br.readLine()) != null) { - if (copyLines && linesAfterLastTimestampLine == 0) { - if (isDataFile || line.contains(" LAUNCH")) { - lastTimestampLine = line; - } - bw.write(line + "\n"); - } else if (copyLines && linesAfterLastTimestampLine > 0) { - linesAfterLastTimestampLine--; - } else if (line.equals(lastTimestampLine)) { - copyLines = true; - } - } - bw.close(); - br.close(); - if (!copyLines) { - this.logger.warning("The last timestamp line in '" - + outputFile.getAbsolutePath() + "' is not contained in the " - + "new file downloaded from '" + url + "'. Cannot append " - + "new lines without possibly leaving a gap. Skipping."); - return false; - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed downloading and/or merging '" - + url + "'.", e); - return false; - } - if (lastTimestampLine == null) { - this.logger.warning("'" + outputFile.getAbsolutePath() - + "' doesn't contain any timestamp lines. Unable to check " - + "whether that file is stale or not."); - } else { - long lastTimestampMillis = -1L; - if (isDataFile) { - lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( - 0, lastTimestampLine.indexOf(" "))) * 1000L; - } else { - lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( - lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(), - lastTimestampLine.indexOf(".", - lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L; - } - if (lastTimestampMillis < System.currentTimeMillis() - - 330L * 60L * 1000L) { - this.logger.warning("The last timestamp in '" - + outputFile.getAbsolutePath() + "' is more than 5:30 hours " - + "old: " + lastTimestampMillis); - } - } - return true; - } - - private String mergeFiles(File dataFile, File extradataFile, - String source, int fileSize, String skipUntil) throws IOException { - SortedMap<String, String> config = new TreeMap<String, String>(); - config.put("SOURCE", source); - config.put("FILESIZE", String.valueOf(fileSize)); - if (!dataFile.exists() || !extradataFile.exists()) { - this.logger.warning("File " + dataFile.getAbsolutePath() + " or " - + extradataFile.getAbsolutePath() + " is missing."); - return null; - } - this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and " - + extradataFile.getAbsolutePath() + " into .tpf format."); - BufferedReader brD = new BufferedReader(new FileReader(dataFile)); - BufferedReader brE = new BufferedReader(new FileReader(extradataFile)); - String lineD = brD.readLine(); - String lineE = brE.readLine(); - int d = 1; - int e = 1; - String maxDataComplete = null; - String maxUsedAt = null; - while (lineD != null) { - - /* Parse .data line. Every valid .data line will go into the .tpf - * format, either with additional information from the .extradata - * file or without it. */ - if (lineD.isEmpty()) { - this.logger.finer("Skipping empty line " + dataFile.getName() - + ":" + d++ + "."); - lineD = brD.readLine(); - continue; - } - SortedMap<String, String> data = this.parseDataLine(lineD); - if (data == null) { - this.logger.finer("Skipping illegal line " + dataFile.getName() - + ":" + d++ + " '" + lineD + "'."); - lineD = brD.readLine(); - continue; - } - String dataComplete = data.get("DATACOMPLETE"); - double dataCompleteSeconds = Double.parseDouble(dataComplete); - if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) { - this.logger.finer("Skipping " + dataFile.getName() + ":" - + d++ + " which we already processed before."); - lineD = brD.readLine(); - continue; - } - maxDataComplete = dataComplete; - - /* Parse .extradata line if available and try to find the one that - * matches the .data line. */ - SortedMap<String, String> extradata = null; - while (lineE != null) { - if (lineE.isEmpty()) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is empty."); - lineE = brE.readLine(); - continue; - } - if (lineE.startsWith("BUILDTIMEOUT_SET ")) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is a BUILDTIMEOUT_SET line."); - lineE = brE.readLine(); - continue; - } else if (lineE.startsWith("ok ") - || lineE.startsWith("error ")) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is in the old format."); - lineE = brE.readLine(); - continue; - } - extradata = this.parseExtradataLine(lineE); - if (extradata == null) { - this.logger.finer("Skipping Illegal line " - + extradataFile.getName() + ":" + e++ + " '" + lineE - + "'."); - lineE = brE.readLine(); - continue; - } - if (!extradata.containsKey("USED_AT")) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which doesn't contain a USED_AT element."); - lineE = brE.readLine(); - continue; - } - String usedAt = extradata.get("USED_AT"); - double usedAtSeconds = Double.parseDouble(usedAt); - if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which we already processed before."); - lineE = brE.readLine(); - continue; - } - maxUsedAt = usedAt; - if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) { - this.logger.fine("Merging " + extradataFile.getName() + ":" - + e++ + " into the current .data line."); - lineE = brE.readLine(); - break; - } else if (usedAtSeconds > dataCompleteSeconds) { - this.logger.finer("Comparing " + extradataFile.getName() - + " to the next .data line."); - extradata = null; - break; - } else { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is too old to be merged with " - + dataFile.getName() + ":" + d + "."); - lineE = brE.readLine(); - continue; - } - } - - /* Write output line to .tpf file. */ - SortedMap<String, String> keysAndValues = - new TreeMap<String, String>(); - if (extradata != null) { - keysAndValues.putAll(extradata); - } - keysAndValues.putAll(data); - keysAndValues.putAll(config); - this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + "."); - lineD = brD.readLine(); - try { - this.writeTpfLine(source, fileSize, keysAndValues); - } catch (IOException ex) { - this.logger.log(Level.WARNING, "Error writing output line. " - + "Aborting to merge " + dataFile.getName() + " and " - + extradataFile.getName() + ".", e); - break; - } - } - brD.close(); - brE.close(); - this.writeCachedTpfLines(); - if (maxDataComplete == null) { - return maxUsedAt; - } else if (maxUsedAt == null) { - return maxDataComplete; - } else if (maxDataComplete.compareTo(maxUsedAt) > 0) { - return maxUsedAt; - } else { - return maxDataComplete; - } - } - - private SortedMap<Integer, String> dataTimestamps; - - private SortedMap<String, String> parseDataLine(String line) { - String[] parts = line.trim().split(" "); - if (line.length() == 0 || parts.length < 20) { - return null; - } - if (this.dataTimestamps == null) { - this.dataTimestamps = new TreeMap<Integer, String>(); - this.dataTimestamps.put(0, "START"); - this.dataTimestamps.put(2, "SOCKET"); - this.dataTimestamps.put(4, "CONNECT"); - this.dataTimestamps.put(6, "NEGOTIATE"); - this.dataTimestamps.put(8, "REQUEST"); - this.dataTimestamps.put(10, "RESPONSE"); - this.dataTimestamps.put(12, "DATAREQUEST"); - this.dataTimestamps.put(14, "DATARESPONSE"); - this.dataTimestamps.put(16, "DATACOMPLETE"); - this.dataTimestamps.put(21, "DATAPERC10"); - this.dataTimestamps.put(23, "DATAPERC20"); - this.dataTimestamps.put(25, "DATAPERC30"); - this.dataTimestamps.put(27, "DATAPERC40"); - this.dataTimestamps.put(29, "DATAPERC50"); - this.dataTimestamps.put(31, "DATAPERC60"); - this.dataTimestamps.put(33, "DATAPERC70"); - this.dataTimestamps.put(35, "DATAPERC80"); - this.dataTimestamps.put(37, "DATAPERC90"); - } - SortedMap<String, String> data = new TreeMap<String, String>(); - try { - for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) { - int i = e.getKey(); - if (parts.length > i + 1) { - String key = e.getValue(); - String value = String.format("%s.%02d", parts[i], - Integer.parseInt(parts[i + 1]) / 10000); - data.put(key, value); - } - } - } catch (NumberFormatException e) { - return null; - } - data.put("WRITEBYTES", parts[18]); - data.put("READBYTES", parts[19]); - if (parts.length >= 21) { - data.put("DIDTIMEOUT", parts[20]); - } - return data; - } - - private SortedMap<String, String> parseExtradataLine(String line) { - String[] parts = line.split(" "); - SortedMap<String, String> extradata = new TreeMap<String, String>(); - String previousKey = null; - for (String part : parts) { - String[] keyAndValue = part.split("=", -1); - if (keyAndValue.length == 2) { - String key = keyAndValue[0]; - previousKey = key; - String value = keyAndValue[1]; - if (value.contains(".") && value.lastIndexOf(".") - == value.length() - 2) { - /* Make sure that all floats have two trailing digits. */ - value += "0"; - } - extradata.put(key, value); - } else if (keyAndValue.length == 1 && previousKey != null) { - String value = keyAndValue[0]; - if (previousKey.equals("STREAM_FAIL_REASONS") - && (value.equals("MISC") || value.equals("EXITPOLICY") - || value.equals("RESOURCELIMIT") - || value.equals("RESOLVEFAILED"))) { - extradata.put(previousKey, extradata.get(previousKey) + ":" - + value); - } else { - return null; - } - } else { - return null; - } - } - return extradata; - } - - private String cachedSource; - - private int cachedFileSize; - - private String cachedStartDate; - - private SortedMap<String, String> cachedTpfLines; - - private void writeTpfLine(String source, int fileSize, - SortedMap<String, String> keysAndValues) throws IOException { - StringBuilder sb = new StringBuilder(); - int written = 0; - for (Map.Entry<String, String> keyAndValue : - keysAndValues.entrySet()) { - String key = keyAndValue.getKey(); - String value = keyAndValue.getValue(); - sb.append((written++ > 0 ? " " : "") + key + "=" + value); - } - String line = sb.toString(); - String startString = keysAndValues.get("START"); - long startMillis = Long.parseLong(startString.substring(0, - startString.indexOf("."))) * 1000L; - String startDate = dateFormat.format(startMillis); - if (this.cachedTpfLines == null || !source.equals(this.cachedSource) - || fileSize != this.cachedFileSize - || !startDate.equals(this.cachedStartDate)) { - this.writeCachedTpfLines(); - this.readTpfLinesToCache(source, fileSize, startDate); - } - if (!this.cachedTpfLines.containsKey(startString) - || line.length() > this.cachedTpfLines.get(startString).length()) { - this.cachedTpfLines.put(startString, line); - } - } - - private void readTpfLinesToCache(String source, int fileSize, - String startDate) throws IOException { - this.cachedTpfLines = new TreeMap<String, String>(); - this.cachedSource = source; - this.cachedFileSize = fileSize; - this.cachedStartDate = startDate; - File tpfFile = new File(torperfOutputDirectory, - startDate.replaceAll("-", "/") + "/" - + source + "-" + String.valueOf(fileSize) + "-" + startDate - + ".tpf"); - if (!tpfFile.exists()) { - return; - } - BufferedReader br = new BufferedReader(new FileReader(tpfFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.startsWith("@type ")) { - continue; - } - if (line.contains("START=")) { - String startString = line.substring(line.indexOf("START=") - + "START=".length()).split(" ")[0]; - this.cachedTpfLines.put(startString, line); - } - } - br.close(); - } - - private void writeCachedTpfLines() throws IOException { - if (this.cachedSource == null || this.cachedFileSize == 0 - || this.cachedStartDate == null || this.cachedTpfLines == null) { - return; - } - File tarballFile = new File(torperfOutputDirectory, - this.cachedStartDate.replaceAll("-", "/") - + "/" + this.cachedSource + "-" - + String.valueOf(this.cachedFileSize) + "-" - + this.cachedStartDate + ".tpf"); - File rsyncFile = new File("recent/torperf/" + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - for (File outputFile : outputFiles) { - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile)); - for (String line : this.cachedTpfLines.values()) { - bw.write("@type torperf 1.0\n"); - bw.write(line + "\n"); - } - bw.close(); - } - this.cachedSource = null; - this.cachedFileSize = 0; - this.cachedStartDate = null; - this.cachedTpfLines = null; - } - - /* Delete all files from the rsync directory that have not been modified - * in the last three days. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<File>(); - allFiles.add(new File("recent/torperf")); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } - } - } -} -