commit 8746badd1bfd8cb05983159be2336f1cf72cbd44 Author: Karsten Loesing karsten.loesing@gmx.net Date: Fri Oct 26 12:44:42 2012 -0400
Group classes by kind of processed metrics data.
Also remove unused unit tests. --- build.xml | 24 +- src/org/torproject/ernie/db/ArchiveReader.java | 146 ---- src/org/torproject/ernie/db/ArchiveWriter.java | 339 -------- .../ernie/db/BridgeDescriptorParser.java | 46 - .../ernie/db/BridgePoolAssignmentsProcessor.java | 174 ---- .../torproject/ernie/db/BridgeSnapshotReader.java | 220 ----- .../ernie/db/CachedRelayDescriptorReader.java | 235 ----- src/org/torproject/ernie/db/Configuration.java | 359 -------- .../torproject/ernie/db/ExitListDownloader.java | 100 --- src/org/torproject/ernie/db/LockFile.java | 52 -- .../torproject/ernie/db/LoggingConfiguration.java | 93 -- src/org/torproject/ernie/db/Main.java | 160 ---- .../ernie/db/RelayDescriptorDownloader.java | 821 ------------------ .../torproject/ernie/db/RelayDescriptorParser.java | 265 ------ src/org/torproject/ernie/db/RsyncDataProvider.java | 217 ----- .../ernie/db/SanitizedBridgesWriter.java | 911 -------------------- src/org/torproject/ernie/db/TorperfDownloader.java | 573 ------------ .../db/bridgedescs/BridgeDescriptorParser.java | 46 + .../ernie/db/bridgedescs/BridgeSnapshotReader.java | 220 +++++ .../db/bridgedescs/SanitizedBridgesWriter.java | 911 ++++++++++++++++++++ .../BridgePoolAssignmentsProcessor.java | 174 ++++ .../ernie/db/exitlists/ExitListDownloader.java | 100 +++ .../torproject/ernie/db/main/Configuration.java | 359 ++++++++ src/org/torproject/ernie/db/main/LockFile.java | 52 ++ .../ernie/db/main/LoggingConfiguration.java | 93 ++ src/org/torproject/ernie/db/main/Main.java | 172 ++++ .../ernie/db/main/RsyncDataProvider.java | 217 +++++ .../ernie/db/relaydescs/ArchiveReader.java | 146 ++++ .../ernie/db/relaydescs/ArchiveWriter.java | 339 ++++++++ .../db/relaydescs/CachedRelayDescriptorReader.java | 235 +++++ .../db/relaydescs/RelayDescriptorDownloader.java | 821 ++++++++++++++++++ .../ernie/db/relaydescs/RelayDescriptorParser.java | 265 ++++++ .../ernie/db/torperf/TorperfDownloader.java | 573 ++++++++++++ .../org/torproject/ernie/db/ArchiveReaderTest.java | 32 - .../org/torproject/ernie/db/ArchiveWriterTest.java | 19 - .../ernie/db/BridgeSnapshotReaderTest.java | 32 - .../ernie/db/CachedRelayDescriptorReaderTest.java | 31 - .../ernie/db/SanitizedBridgesWriterTest.java | 38 - 38 files changed, 4724 insertions(+), 4886 deletions(-)
diff --git a/build.xml b/build.xml index ce3e337..7cba58c 100644 --- a/build.xml +++ b/build.xml @@ -1,7 +1,6 @@ <project default="run" name="ERNIE" basedir="."> <property name="sources" value="src/"/> <property name="classes" value="classes/"/> - <property name="tests" value="test"/> <property name="docs" value="javadoc/"/> <property name="name" value="ERNIE"/> <path id="classpath"> @@ -27,7 +26,7 @@ <target name="run" depends="compile"> <java fork="true" maxmemory="2048m" - classname="org.torproject.ernie.db.Main"> + classname="org.torproject.ernie.db.main.Main"> <classpath refid="classpath"/> </java> </target> @@ -38,26 +37,5 @@ <fileset dir="${sources}/" includes="**/*.java" /> </javadoc> </target> - <target name="test" depends="compile"> - <javac destdir="${classes}" - srcdir="${tests}" - source="1.5" - target="1.5" - debug="true" - deprecation="true" - optimize="false" - failonerror="true" - includeantruntime="false"> - <classpath refid="classpath"/> - </javac> - <junit haltonfailure="true" printsummary="off"> - <classpath refid="classpath"/> - <formatter type="plain" usefile="false"/> - <batchtest> - <fileset dir="${classes}" - includes="**/*Test.class"/> - </batchtest> - </junit> - </target> </project>
diff --git a/src/org/torproject/ernie/db/ArchiveReader.java b/src/org/torproject/ernie/db/ArchiveReader.java deleted file mode 100644 index 06abf6c..0000000 --- a/src/org/torproject/ernie/db/ArchiveReader.java +++ /dev/null @@ -1,146 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; - -/** - * Read in all files in a given directory and pass buffered readers of - * them to the relay descriptor parser. - */ -public class ArchiveReader { - public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory, - File statsDirectory, boolean keepImportHistory) { - - if (rdp == null || archivesDirectory == null || - statsDirectory == null) { - throw new IllegalArgumentException(); - } - - int parsedFiles = 0, ignoredFiles = 0; - Logger logger = Logger.getLogger(ArchiveReader.class.getName()); - SortedSet<String> archivesImportHistory = new TreeSet<String>(); - File archivesImportHistoryFile = new File(statsDirectory, - "archives-import-history"); - if (keepImportHistory && archivesImportHistoryFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - archivesImportHistoryFile)); - String line = null; - while ((line = br.readLine()) != null) { - archivesImportHistory.add(line); - } - br.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not read in archives import " - + "history file. Skipping."); - } - } - if (archivesDirectory.exists()) { - logger.fine("Importing files in directory " + archivesDirectory - + "/..."); - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(archivesDirectory); - List<File> problems = new ArrayList<File>(); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - for (File f : pop.listFiles()) { - filesInInputDir.add(f); - } - } else { - if (rdp != null) { - try { - BufferedInputStream bis = null; - if (keepImportHistory && - archivesImportHistory.contains(pop.getName())) { - ignoredFiles++; - continue; - } else if (pop.getName().endsWith(".tar.bz2")) { - logger.warning("Cannot parse compressed tarball " - + pop.getAbsolutePath() + ". Skipping."); - continue; - } else if (pop.getName().endsWith(".bz2")) { - FileInputStream fis = new FileInputStream(pop); - BZip2CompressorInputStream bcis = - new BZip2CompressorInputStream(fis); - bis = new BufferedInputStream(bcis); - } else { - FileInputStream fis = new FileInputStream(pop); - bis = new BufferedInputStream(fis); - } - if (keepImportHistory) { - archivesImportHistory.add(pop.getName()); - } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - rdp.parse(allData); - parsedFiles++; - } catch (IOException e) { - problems.add(pop); - if (problems.size() > 3) { - break; - } - } - } - } - } - if (problems.isEmpty()) { - logger.fine("Finished importing files in directory " - + archivesDirectory + "/."); - } else { - StringBuilder sb = new StringBuilder("Failed importing files in " - + "directory " + archivesDirectory + "/:"); - int printed = 0; - for (File f : problems) { - sb.append("\n " + f.getAbsolutePath()); - if (++printed >= 3) { - sb.append("\n ... more"); - break; - } - } - } - } - if (keepImportHistory) { - try { - archivesImportHistoryFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - archivesImportHistoryFile)); - for (String line : archivesImportHistory) { - bw.write(line + "\n"); - } - bw.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write archives import " - + "history file."); - } - } - logger.info("Finished importing relay descriptors from local " - + "directory:\nParsed " + parsedFiles + ", ignored " - + ignoredFiles + " files."); - } -} - diff --git a/src/org/torproject/ernie/db/ArchiveWriter.java b/src/org/torproject/ernie/db/ArchiveWriter.java deleted file mode 100644 index d1b9499..0000000 --- a/src/org/torproject/ernie/db/ArchiveWriter.java +++ /dev/null @@ -1,339 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.Hex; -import org.torproject.descriptor.DescriptorParser; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.impl.DescriptorParseException; - -public class ArchiveWriter { - private Logger logger; - private File outputDirectory; - private DescriptorParser descriptorParser; - private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0, - storedServerDescriptors = 0, storedExtraInfoDescriptors = 0; - - public ArchiveWriter(File outputDirectory) { - - if (outputDirectory == null) { - throw new IllegalArgumentException(); - } - - this.logger = Logger.getLogger(ArchiveWriter.class.getName()); - this.outputDirectory = outputDirectory; - this.descriptorParser = - DescriptorSourceFactory.createDescriptorParser(); - } - - private boolean store(byte[] typeAnnotation, byte[] data, - String filename) { - try { - File file = new File(filename); - if (!file.exists()) { - this.logger.finer("Storing " + filename); - if (this.descriptorParser.parseDescriptors(data, filename).size() - != 1) { - this.logger.info("Relay descriptor file " + filename - + " doesn't contain exactly one descriptor. Not storing."); - return false; - } - file.getParentFile().mkdirs(); - BufferedOutputStream bos = new BufferedOutputStream( - new FileOutputStream(file)); - if (data.length > 0 && data[0] != '@') { - bos.write(typeAnnotation, 0, typeAnnotation.length); - } - bos.write(data, 0, data.length); - bos.close(); - return true; - } - } catch (DescriptorParseException e) { - this.logger.log(Level.WARNING, "Could not parse relay descriptor " - + filename + " before storing it to disk. Skipping.", e); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store relay descriptor " - + filename, e); - } - return false; - } - - private static final byte[] CONSENSUS_ANNOTATION = - "@type network-status-consensus-3 1.0\n".getBytes(); - public void storeConsensus(byte[] data, long validAfter) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String filename = outputDirectory + "/consensus/" - + printFormat.format(new Date(validAfter)) + "-consensus"; - if (this.store(CONSENSUS_ANNOTATION, data, filename)) { - this.storedConsensuses++; - } - } - - private static final byte[] VOTE_ANNOTATION = - "@type network-status-vote-3 1.0\n".getBytes(); - public void storeVote(byte[] data, long validAfter, - String fingerprint, String digest) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String filename = outputDirectory + "/vote/" - + printFormat.format(new Date(validAfter)) + "-vote-" - + fingerprint + "-" + digest; - if (this.store(VOTE_ANNOTATION, data, filename)) { - this.storedVotes++; - } - } - - private static final byte[] CERTIFICATE_ANNOTATION = - "@type dir-key-certificate-3 1.0\n".getBytes(); - public void storeCertificate(byte[] data, String fingerprint, - long published) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String filename = outputDirectory + "/certs/" - + fingerprint + "-" + printFormat.format(new Date(published)); - if (this.store(CERTIFICATE_ANNOTATION, data, filename)) { - this.storedCerts++; - } - } - - private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = - "@type server-descriptor 1.0\n".getBytes(); - public void storeServerDescriptor(byte[] data, String digest, - long published) { - SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String filename = outputDirectory + "/server-descriptor/" - + printFormat.format(new Date(published)) - + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/" - + digest; - if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) { - this.storedServerDescriptors++; - } - } - - private static final byte[] EXTRA_INFO_ANNOTATION = - "@type extra-info 1.0\n".getBytes(); - public void storeExtraInfoDescriptor(byte[] data, - String extraInfoDigest, long published) { - SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); - descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - String filename = outputDirectory + "/extra-info/" - + descriptorFormat.format(new Date(published)) - + extraInfoDigest.substring(0, 1) + "/" - + extraInfoDigest.substring(1, 2) + "/" - + extraInfoDigest; - if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) { - this.storedExtraInfoDescriptors++; - } - } - - private StringBuilder intermediateStats = new StringBuilder(); - public void intermediateStats(String event) { - intermediateStats.append("While " + event + ", we stored " - + this.storedConsensuses + " consensus(es), " + this.storedVotes - + " vote(s), " + this.storedCerts + " certificate(s), " - + this.storedServerDescriptors + " server descriptor(s), and " - + this.storedExtraInfoDescriptors - + " extra-info descriptor(s) to disk.\n"); - this.storedConsensuses = 0; - this.storedVotes = 0; - this.storedCerts = 0; - this.storedServerDescriptors = 0; - this.storedExtraInfoDescriptors = 0; - } - /** - * Dump some statistics on the completeness of descriptors to the logs - * on level INFO. - */ - public void dumpStats() { - StringBuilder sb = new StringBuilder("Finished writing relay " - + "descriptors to disk.\n"); - sb.append(intermediateStats.toString()); - sb.append("Statistics on the completeness of written relay " - + "descriptors of the last 3 consensuses (Consensus/Vote, " - + "valid-after, votes, server descriptors, extra-infos):"); - try { - SimpleDateFormat validAfterFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - validAfterFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat consensusVoteFormat = - new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat descriptorFormat = - new SimpleDateFormat("yyyy/MM/"); - descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - - SortedSet<File> consensuses = new TreeSet<File>(); - Stack<File> leftToParse = new Stack<File>(); - leftToParse.add(new File(outputDirectory + "/consensus")); - while (!leftToParse.isEmpty()) { - File pop = leftToParse.pop(); - if (pop.isDirectory()) { - for (File f : pop.listFiles()) { - leftToParse.add(f); - } - } else if (pop.length() > 0) { - consensuses.add(pop); - } - while (consensuses.size() > 3) { - consensuses.remove(consensuses.first()); - } - } - for (File f : consensuses) { - BufferedReader br = new BufferedReader(new FileReader(f)); - String line = null, validAfterTime = null, - voteFilenamePrefix = null, dirSource = null; - int allVotes = 0, foundVotes = 0, - allServerDescs = 0, foundServerDescs = 0, - allExtraInfos = 0, foundExtraInfos = 0; - while ((line = br.readLine()) != null) { - if (line.startsWith("valid-after ")) { - validAfterTime = line.substring("valid-after ".length()); - long validAfter = validAfterFormat.parse( - validAfterTime).getTime(); - voteFilenamePrefix = outputDirectory + "/vote/" - + consensusVoteFormat.format(new Date(validAfter)) - + "-vote-"; - } else if (line.startsWith("dir-source ")) { - dirSource = line.split(" ")[2]; - } else if (line.startsWith("vote-digest ")) { - allVotes++; - File voteFile = new File(voteFilenamePrefix + dirSource + "-" - + line.split(" ")[1]); - if (voteFile.exists()) { - foundVotes++; - BufferedReader vbr = new BufferedReader(new FileReader( - voteFile)); - String line3 = null; - int voteAllServerDescs = 0, voteFoundServerDescs = 0, - voteAllExtraInfos = 0, voteFoundExtraInfos = 0; - while ((line3 = vbr.readLine()) != null) { - if (line3.startsWith("r ")) { - voteAllServerDescs++; - String digest = Hex.encodeHexString(Base64.decodeBase64( - line3.split(" ")[3] + "=")).toLowerCase(); - long published = validAfterFormat.parse( - line3.split(" ")[4] + " " - + line3.split(" ")[5]).getTime(); - String filename = outputDirectory - + "/server-descriptor/" - + descriptorFormat.format(new Date(published)) - + digest.substring(0, 1) + "/" - + digest.substring(1, 2) + "/" + digest; - if (new File(filename).exists()) { - BufferedReader sbr = new BufferedReader(new FileReader( - new File(filename))); - String line2 = null; - while ((line2 = sbr.readLine()) != null) { - if (line2.startsWith("opt extra-info-digest ") || - line2.startsWith("extra-info-digest ")) { - voteAllExtraInfos++; - String extraInfoDigest = line2.startsWith("opt ") ? - line2.split(" ")[2].toLowerCase() : - line2.split(" ")[1].toLowerCase(); - String filename2 = - outputDirectory.getAbsolutePath() - + "/extra-info/" - + descriptorFormat.format(new Date(published)) - + extraInfoDigest.substring(0, 1) + "/" - + extraInfoDigest.substring(1, 2) + "/" - + extraInfoDigest; - if (new File(filename2).exists()) { - voteFoundExtraInfos++; - } - } - } - sbr.close(); - voteFoundServerDescs++; - } - } - } - vbr.close(); - sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), " - + "%d/%d (%.1f%%)", validAfterTime, - voteFoundServerDescs, voteAllServerDescs, - 100.0D * (double) voteFoundServerDescs / - (double) voteAllServerDescs, - voteFoundExtraInfos, voteAllExtraInfos, - 100.0D * (double) voteFoundExtraInfos / - (double) voteAllExtraInfos)); - } - } else if (line.startsWith("r ")) { - allServerDescs++; - String digest = Hex.encodeHexString(Base64.decodeBase64( - line.split(" ")[3] + "=")).toLowerCase(); - long published = validAfterFormat.parse( - line.split(" ")[4] + " " + line.split(" ")[5]).getTime(); - String filename = outputDirectory.getAbsolutePath() - + "/server-descriptor/" - + descriptorFormat.format(new Date(published)) - + digest.substring(0, 1) + "/" - + digest.substring(1, 2) + "/" + digest; - if (new File (filename).exists()) { - BufferedReader sbr = new BufferedReader(new FileReader( - new File(filename))); - String line2 = null; - while ((line2 = sbr.readLine()) != null) { - if (line2.startsWith("opt extra-info-digest ") || - line2.startsWith("extra-info-digest ")) { - allExtraInfos++; - String extraInfoDigest = line2.startsWith("opt ") ? - line2.split(" ")[2].toLowerCase() : - line2.split(" ")[1].toLowerCase(); - String filename2 = outputDirectory.getAbsolutePath() - + "/extra-info/" - + descriptorFormat.format(new Date(published)) - + extraInfoDigest.substring(0, 1) + "/" - + extraInfoDigest.substring(1, 2) + "/" - + extraInfoDigest; - if (new File (filename2).exists()) { - foundExtraInfos++; - } - } - } - sbr.close(); - foundServerDescs++; - } - } - } - br.close(); - sb.append(String.format("%nC, %s, %d/%d (%.1f%%), " - + "%d/%d (%.1f%%), %d/%d (%.1f%%)", - validAfterTime, foundVotes, allVotes, - 100.0D * (double) foundVotes / (double) allVotes, - foundServerDescs, allServerDescs, - 100.0D * (double) foundServerDescs / (double) allServerDescs, - foundExtraInfos, allExtraInfos, - 100.0D * (double) foundExtraInfos / (double) allExtraInfos)); - } - this.logger.info(sb.toString()); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not dump statistics to disk.", - e); - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not dump statistics to disk.", - e); - } - } -} diff --git a/src/org/torproject/ernie/db/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/BridgeDescriptorParser.java deleted file mode 100644 index 7773525..0000000 --- a/src/org/torproject/ernie/db/BridgeDescriptorParser.java +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class BridgeDescriptorParser { - private SanitizedBridgesWriter sbw; - private Logger logger; - public BridgeDescriptorParser(SanitizedBridgesWriter sbw) { - this.sbw = sbw; - this.logger = - Logger.getLogger(BridgeDescriptorParser.class.getName()); - } - public void parse(byte[] allData, String dateTime) { - try { - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - String line = br.readLine(); - if (line == null) { - return; - } else if (line.startsWith("r ")) { - if (this.sbw != null) { - this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime); - } - } else if (line.startsWith("router ")) { - if (this.sbw != null) { - this.sbw.sanitizeAndStoreServerDescriptor(allData); - } - } else if (line.startsWith("extra-info ")) { - if (this.sbw != null) { - this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData); - } - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", - e); - return; - } - } -} - diff --git a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java deleted file mode 100644 index d03dcaf..0000000 --- a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright 2011--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; - -public class BridgePoolAssignmentsProcessor { - - public BridgePoolAssignmentsProcessor(File assignmentsDirectory, - File sanitizedAssignmentsDirectory) { - - Logger logger = - Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName()); - if (assignmentsDirectory == null || - sanitizedAssignmentsDirectory == null) { - IllegalArgumentException e = new IllegalArgumentException("Neither " - + "assignmentsDirectory nor sanitizedAssignmentsDirectory may " - + "be null!"); - throw e; - } - - List<File> assignmentFiles = new ArrayList<File>(); - Stack<File> files = new Stack<File>(); - files.add(assignmentsDirectory); - while (!files.isEmpty()) { - File file = files.pop(); - if (file.isDirectory()) { - files.addAll(Arrays.asList(file.listFiles())); - } else if (!file.getName().endsWith(".gz")) { - assignmentFiles.add(file); - } - } - - SimpleDateFormat assignmentFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat filenameFormat = - new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - for (File assignmentFile : assignmentFiles) { - logger.info("Processing bridge pool assignment file '" - + assignmentFile.getAbsolutePath() + "'..."); - try { - BufferedReader br = null; - if (assignmentFile.getName().endsWith(".gz")) { - br = new BufferedReader(new InputStreamReader( - new GzipCompressorInputStream(new FileInputStream( - assignmentFile)))); - } else { - br = new BufferedReader(new FileReader(assignmentFile)); - } - String line, bridgePoolAssignmentLine = null; - SortedSet<String> sanitizedAssignments = new TreeSet<String>(); - boolean wroteLastLine = false, skipBefore20120504125947 = true; - while ((line = br.readLine()) != null || !wroteLastLine) { - if (line != null && line.startsWith("bridge-pool-assignment ")) { - String[] parts = line.split(" "); - if (parts.length != 3) { - continue; - } - /* TODO Take out this temporary hack to ignore all assignments - * coming from ponticum when byblos was still the official - * BridgeDB host. */ - if (line.compareTo( - "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) { - skipBefore20120504125947 = false; - } - } - if (skipBefore20120504125947) { - if (line == null) { - break; - } else { - continue; - } - } - if (line == null || - line.startsWith("bridge-pool-assignment ")) { - if (bridgePoolAssignmentLine != null) { - try { - long bridgePoolAssignmentTime = assignmentFormat.parse( - bridgePoolAssignmentLine.substring( - "bridge-pool-assignment ".length())).getTime(); - File sanitizedAssignmentsFile = new File( - sanitizedAssignmentsDirectory, filenameFormat.format( - bridgePoolAssignmentTime)); - if (!sanitizedAssignmentsFile.exists()) { - sanitizedAssignmentsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - sanitizedAssignmentsFile)); - bw.write("@type bridge-pool-assignment 1.0\n"); - bw.write(bridgePoolAssignmentLine + "\n"); - for (String assignmentLine : sanitizedAssignments) { - bw.write(assignmentLine + "\n"); - } - bw.close(); - } - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write sanitized " - + "bridge pool assignment file for line '" - + bridgePoolAssignmentLine + "' to disk. Skipping " - + "bridge pool assignment file '" - + assignmentFile.getAbsolutePath() + "'.", e); - break; - } catch (ParseException e) { - logger.log(Level.WARNING, "Could not write sanitized " - + "bridge pool assignment file for line '" - + bridgePoolAssignmentLine + "' to disk. Skipping " - + "bridge pool assignment file '" - + assignmentFile.getAbsolutePath() + "'.", e); - break; - } - sanitizedAssignments.clear(); - } - if (line == null) { - wroteLastLine = true; - } else { - bridgePoolAssignmentLine = line; - } - } else { - String[] parts = line.split(" "); - if (parts.length < 2 || parts[0].length() < 40) { - logger.warning("Unrecognized line '" + line - + "'. Aborting."); - break; - } - String hashedFingerprint = null; - try { - hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex( - line.split(" ")[0].toCharArray())).toLowerCase(); - } catch (DecoderException e) { - logger.warning("Unable to decode hex fingerprint in line '" - + line + "'. Aborting."); - break; - } - String assignmentDetails = line.substring(40); - sanitizedAssignments.add(hashedFingerprint - + assignmentDetails); - } - } - br.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not read bridge pool assignment " - + "file '" + assignmentFile.getAbsolutePath() - + "'. Skipping.", e); - } - } - - logger.info("Finished processing bridge pool assignment file(s)."); - } -} - diff --git a/src/org/torproject/ernie/db/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/BridgeSnapshotReader.java deleted file mode 100644 index f21794d..0000000 --- a/src/org/torproject/ernie/db/BridgeSnapshotReader.java +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.util.HashSet; -import java.util.Set; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; -import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; - -/** - * Reads the half-hourly snapshots of bridge descriptors from Tonga. - */ -public class BridgeSnapshotReader { - public BridgeSnapshotReader(BridgeDescriptorParser bdp, - File bridgeDirectoriesDir, File statsDirectory) { - - if (bdp == null || bridgeDirectoriesDir == null || - statsDirectory == null) { - throw new IllegalArgumentException(); - } - - Logger logger = - Logger.getLogger(BridgeSnapshotReader.class.getName()); - SortedSet<String> parsed = new TreeSet<String>(); - File bdDir = bridgeDirectoriesDir; - File pbdFile = new File(statsDirectory, "parsed-bridge-directories"); - boolean modified = false; - if (bdDir.exists()) { - if (pbdFile.exists()) { - logger.fine("Reading file " + pbdFile.getAbsolutePath() + "..."); - try { - BufferedReader br = new BufferedReader(new FileReader(pbdFile)); - String line = null; - while ((line = br.readLine()) != null) { - parsed.add(line); - } - br.close(); - logger.fine("Finished reading file " - + pbdFile.getAbsolutePath() + "."); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed reading file " - + pbdFile.getAbsolutePath() + "!", e); - return; - } - } - logger.fine("Importing files in directory " + bridgeDirectoriesDir - + "/..."); - Set<String> descriptorImportHistory = new HashSet<String>(); - int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0, - parsedServerDescriptors = 0, skippedServerDescriptors = 0, - parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0; - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(bdDir); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - for (File f : pop.listFiles()) { - filesInInputDir.add(f); - } - } else if (!parsed.contains(pop.getName())) { - try { - FileInputStream in = new FileInputStream(pop); - if (in.available() > 0) { - TarArchiveInputStream tais = null; - if (pop.getName().endsWith(".tar.gz")) { - GzipCompressorInputStream gcis = - new GzipCompressorInputStream(in); - tais = new TarArchiveInputStream(gcis); - } else if (pop.getName().endsWith(".tar")) { - tais = new TarArchiveInputStream(in); - } else { - continue; - } - BufferedInputStream bis = new BufferedInputStream(tais); - String fn = pop.getName(); - String dateTime = fn.substring(11, 21) + " " - + fn.substring(22, 24) + ":" + fn.substring(24, 26) - + ":" + fn.substring(26, 28); - while ((tais.getNextTarEntry()) != null) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - byte[] allData = baos.toByteArray(); - if (allData.length == 0) { - continue; - } - String fileDigest = Hex.encodeHexString(DigestUtils.sha( - allData)); - String ascii = new String(allData, "US-ASCII"); - BufferedReader br3 = new BufferedReader(new StringReader( - ascii)); - String firstLine = null; - while ((firstLine = br3.readLine()) != null) { - if (firstLine.startsWith("@")) { - continue; - } else { - break; - } - } - if (firstLine.startsWith("r ")) { - bdp.parse(allData, dateTime); - parsedStatuses++; - } else if (descriptorImportHistory.contains(fileDigest)) { - /* Skip server descriptors or extra-info descriptors if - * we parsed them before. */ - skippedFiles++; - continue; - } else { - int start = -1, sig = -1, end = -1; - String startToken = - firstLine.startsWith("router ") ? - "router " : "extra-info "; - String sigToken = "\nrouter-signature\n"; - String endToken = "\n-----END SIGNATURE-----\n"; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - sig = ascii.indexOf(sigToken, start); - if (sig < 0) { - break; - } - sig += sigToken.length(); - end = ascii.indexOf(endToken, sig); - if (end < 0) { - break; - } - end += endToken.length(); - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, - end - start); - String descriptorDigest = Hex.encodeHexString( - DigestUtils.sha(descBytes)); - if (!descriptorImportHistory.contains( - descriptorDigest)) { - bdp.parse(descBytes, dateTime); - descriptorImportHistory.add(descriptorDigest); - if (firstLine.startsWith("router ")) { - parsedServerDescriptors++; - } else { - parsedExtraInfoDescriptors++; - } - } else { - if (firstLine.startsWith("router ")) { - skippedServerDescriptors++; - } else { - skippedExtraInfoDescriptors++; - } - } - } - } - descriptorImportHistory.add(fileDigest); - parsedFiles++; - } - bis.close(); - } - in.close(); - - /* Let's give some memory back, or we'll run out of it. */ - System.gc(); - - parsed.add(pop.getName()); - modified = true; - } catch (IOException e) { - logger.log(Level.WARNING, "Could not parse bridge snapshot " - + pop.getName() + "!", e); - continue; - } - } - } - logger.fine("Finished importing files in directory " - + bridgeDirectoriesDir + "/. In total, we parsed " - + parsedFiles + " files (skipped " + skippedFiles - + ") containing " + parsedStatuses + " statuses, " - + parsedServerDescriptors + " server descriptors (skipped " - + skippedServerDescriptors + "), and " - + parsedExtraInfoDescriptors + " extra-info descriptors " - + "(skipped " + skippedExtraInfoDescriptors + ")."); - if (!parsed.isEmpty() && modified) { - logger.fine("Writing file " + pbdFile.getAbsolutePath() + "..."); - try { - pbdFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile)); - for (String f : parsed) { - bw.append(f + "\n"); - } - bw.close(); - logger.fine("Finished writing file " + pbdFile.getAbsolutePath() - + "."); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed writing file " - + pbdFile.getAbsolutePath() + "!", e); - } - } - } - } -} - diff --git a/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java deleted file mode 100644 index 4da3e44..0000000 --- a/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java +++ /dev/null @@ -1,235 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.StringReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.TimeZone; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; - -/** - * Parses all descriptors in local directory cacheddesc/ and sorts them - * into directory structure in directory-archive/. - */ -public class CachedRelayDescriptorReader { - public CachedRelayDescriptorReader(RelayDescriptorParser rdp, - List<String> inputDirectories, File statsDirectory) { - - if (rdp == null || inputDirectories == null || - inputDirectories.isEmpty() || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - StringBuilder dumpStats = new StringBuilder("Finished importing " - + "relay descriptors from local Tor data directories:"); - Logger logger = Logger.getLogger( - CachedRelayDescriptorReader.class.getName()); - - /* Read import history containing SHA-1 digests of previously parsed - * statuses and descriptors, so that we can skip them in this run. */ - Set<String> lastImportHistory = new HashSet<String>(), - currentImportHistory = new HashSet<String>(); - File importHistoryFile = new File(statsDirectory, - "cacheddesc-import-history"); - if (importHistoryFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - importHistoryFile)); - String line; - while ((line = br.readLine()) != null) { - lastImportHistory.add(line); - } - br.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not read import history from " - + importHistoryFile.getAbsolutePath() + ".", e); - } - } - - /* Read cached descriptors directories. */ - for (String inputDirectory : inputDirectories) { - File cachedDescDir = new File(inputDirectory); - if (!cachedDescDir.exists()) { - logger.warning("Directory " + cachedDescDir.getAbsolutePath() - + " does not exist. Skipping."); - continue; - } - logger.fine("Reading " + cachedDescDir.getAbsolutePath() - + " directory."); - for (File f : cachedDescDir.listFiles()) { - try { - // descriptors may contain non-ASCII chars; read as bytes to - // determine digests - BufferedInputStream bis = - new BufferedInputStream(new FileInputStream(f)); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - if (f.getName().equals("cached-consensus")) { - /* Check if directory information is stale. */ - BufferedReader br = new BufferedReader(new StringReader( - new String(allData, "US-ASCII"))); - String line = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("valid-after ")) { - dumpStats.append("\n" + f.getName() + ": " + line.substring( - "valid-after ".length())); - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (dateTimeFormat.parse(line.substring("valid-after ". - length())).getTime() < System.currentTimeMillis() - - 6L * 60L * 60L * 1000L) { - logger.warning("Cached descriptor files in " - + cachedDescDir.getAbsolutePath() + " are stale. " - + "The valid-after line in cached-consensus is '" - + line + "'."); - dumpStats.append(" (stale!)"); - } - break; - } - } - br.close(); - - /* Parse the cached consensus if we haven't parsed it before - * (but regardless of whether it's stale or not). */ - if (rdp != null) { - String digest = Hex.encodeHexString(DigestUtils.sha( - allData)); - if (!lastImportHistory.contains(digest) && - !currentImportHistory.contains(digest)) { - rdp.parse(allData); - } else { - dumpStats.append(" (skipped)"); - } - currentImportHistory.add(digest); - } - } else if (f.getName().equals("v3-status-votes")) { - int parsedNum = 0, skippedNum = 0; - String ascii = new String(allData, "US-ASCII"); - String startToken = "network-status-version "; - int end = ascii.length(); - int start = ascii.indexOf(startToken); - while (start >= 0 && start < end) { - int next = ascii.indexOf(startToken, start + 1); - if (next < 0) { - next = end; - } - if (start < next) { - byte[] rawNetworkStatusBytes = new byte[next - start]; - System.arraycopy(allData, start, rawNetworkStatusBytes, 0, - next - start); - if (rdp != null) { - String digest = Hex.encodeHexString(DigestUtils.sha( - rawNetworkStatusBytes)); - if (!lastImportHistory.contains(digest) && - !currentImportHistory.contains(digest)) { - rdp.parse(rawNetworkStatusBytes); - parsedNum++; - } else { - skippedNum++; - } - currentImportHistory.add(digest); - } - } - start = next; - } - dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum - + ", skipped " + skippedNum + " votes"); - } else if (f.getName().startsWith("cached-descriptors") || - f.getName().startsWith("cached-extrainfo")) { - String ascii = new String(allData, "US-ASCII"); - int start = -1, sig = -1, end = -1; - String startToken = - f.getName().startsWith("cached-descriptors") ? - "router " : "extra-info "; - String sigToken = "\nrouter-signature\n"; - String endToken = "\n-----END SIGNATURE-----\n"; - int parsedNum = 0, skippedNum = 0; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - sig = ascii.indexOf(sigToken, start); - if (sig < 0) { - break; - } - sig += sigToken.length(); - end = ascii.indexOf(endToken, sig); - if (end < 0) { - break; - } - end += endToken.length(); - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, end - start); - if (rdp != null) { - String digest = Hex.encodeHexString(DigestUtils.sha( - descBytes)); - if (!lastImportHistory.contains(digest) && - !currentImportHistory.contains(digest)) { - rdp.parse(descBytes); - parsedNum++; - } else { - skippedNum++; - } - currentImportHistory.add(digest); - } - } - dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum - + ", skipped " + skippedNum + " " - + (f.getName().startsWith("cached-descriptors") ? - "server" : "extra-info") + " descriptors"); - } - } catch (IOException e) { - logger.log(Level.WARNING, "Failed reading " - + cachedDescDir.getAbsolutePath() + " directory.", e); - } catch (ParseException e) { - logger.log(Level.WARNING, "Failed reading " - + cachedDescDir.getAbsolutePath() + " directory.", e); - } - } - logger.fine("Finished reading " - + cachedDescDir.getAbsolutePath() + " directory."); - } - - /* Write import history containing SHA-1 digests to disk. */ - try { - importHistoryFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - importHistoryFile)); - for (String digest : currentImportHistory) { - bw.write(digest + "\n"); - } - bw.close(); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write import history to " - + importHistoryFile.getAbsolutePath() + ".", e); - } - - logger.info(dumpStats.toString()); - } -} - diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java deleted file mode 100644 index e130dab..0000000 --- a/src/org/torproject/ernie/db/Configuration.java +++ /dev/null @@ -1,359 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Initialize configuration with hard-coded defaults, overwrite with - * configuration in config file, if exists, and answer Main.java about our - * configuration. - */ -public class Configuration { - private boolean writeDirectoryArchives = false; - private String directoryArchivesOutputDirectory = "directory-archive/"; - private boolean importCachedRelayDescriptors = false; - private List<String> cachedRelayDescriptorsDirectory = - new ArrayList<String>(Arrays.asList("cacheddesc/".split(","))); - private boolean importDirectoryArchives = false; - private String directoryArchivesDirectory = "archives/"; - private boolean keepDirectoryArchiveImportHistory = false; - private boolean writeSanitizedBridges = false; - private boolean replaceIPAddressesWithHashes = false; - private long limitBridgeDescriptorMappings = -1L; - private String sanitizedBridgesWriteDirectory = "sanitized-bridges/"; - private boolean importBridgeSnapshots = false; - private String bridgeSnapshotsDirectory = "bridge-directories/"; - private boolean downloadRelayDescriptors = false; - private List<String> downloadFromDirectoryAuthorities = Arrays.asList(( - "86.59.21.38,76.73.17.194:9030,213.115.239.118:443," - + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131," - + "194.109.206.212,212.112.245.170").split(",")); - private boolean downloadCurrentConsensus = true; - private boolean downloadCurrentVotes = true; - private boolean downloadMissingServerDescriptors = true; - private boolean downloadMissingExtraInfoDescriptors = true; - private boolean downloadAllServerDescriptors = false; - private boolean downloadAllExtraInfoDescriptors = false; - private boolean compressRelayDescriptorDownloads; - private boolean downloadExitList = false; - private boolean processBridgePoolAssignments = false; - private String assignmentsDirectory = "assignments/"; - private String sanitizedAssignmentsDirectory = "sanitized-assignments/"; - private boolean processTorperfFiles = false; - private String torperfOutputDirectory = "torperf/"; - private SortedMap<String, String> torperfSources = null; - private List<String> torperfFiles = null; - private boolean provideFilesViaRsync = false; - private String rsyncDirectory = "rsync"; - public Configuration() { - - /* Initialize logger. */ - Logger logger = Logger.getLogger(Configuration.class.getName()); - - /* Read config file, if present. */ - File configFile = new File("config"); - if (!configFile.exists()) { - logger.warning("Could not find config file. In the default " - + "configuration, we are not configured to read data from any " - + "data source or write data to any data sink. You need to " - + "create a config file (" + configFile.getAbsolutePath() - + ") and provide at least one data source and one data sink. " - + "Refer to the manual for more information."); - return; - } - String line = null; - boolean containsCachedRelayDescriptorsDirectory = false; - try { - BufferedReader br = new BufferedReader(new FileReader(configFile)); - while ((line = br.readLine()) != null) { - if (line.startsWith("#") || line.length() < 1) { - continue; - } else if (line.startsWith("WriteDirectoryArchives")) { - this.writeDirectoryArchives = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DirectoryArchivesOutputDirectory")) { - this.directoryArchivesOutputDirectory = line.split(" ")[1]; - } else if (line.startsWith("ImportCachedRelayDescriptors")) { - this.importCachedRelayDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("CachedRelayDescriptorsDirectory")) { - if (!containsCachedRelayDescriptorsDirectory) { - this.cachedRelayDescriptorsDirectory.clear(); - containsCachedRelayDescriptorsDirectory = true; - } - this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]); - } else if (line.startsWith("ImportDirectoryArchives")) { - this.importDirectoryArchives = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DirectoryArchivesDirectory")) { - this.directoryArchivesDirectory = line.split(" ")[1]; - } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) { - this.keepDirectoryArchiveImportHistory = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("WriteSanitizedBridges")) { - this.writeSanitizedBridges = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("ReplaceIPAddressesWithHashes")) { - this.replaceIPAddressesWithHashes = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("LimitBridgeDescriptorMappings")) { - this.limitBridgeDescriptorMappings = Long.parseLong( - line.split(" ")[1]); - } else if (line.startsWith("SanitizedBridgesWriteDirectory")) { - this.sanitizedBridgesWriteDirectory = line.split(" ")[1]; - } else if (line.startsWith("ImportBridgeSnapshots")) { - this.importBridgeSnapshots = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("BridgeSnapshotsDirectory")) { - this.bridgeSnapshotsDirectory = line.split(" ")[1]; - } else if (line.startsWith("DownloadRelayDescriptors")) { - this.downloadRelayDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadFromDirectoryAuthorities")) { - this.downloadFromDirectoryAuthorities = new ArrayList<String>(); - for (String dir : line.split(" ")[1].split(",")) { - // test if IP:port pair has correct format - if (dir.length() < 1) { - logger.severe("Configuration file contains directory " - + "authority IP:port of length 0 in line '" + line - + "'! Exiting!"); - System.exit(1); - } - new URL("http://" + dir + "/"); - this.downloadFromDirectoryAuthorities.add(dir); - } - } else if (line.startsWith("DownloadCurrentConsensus")) { - this.downloadCurrentConsensus = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadCurrentVotes")) { - this.downloadCurrentVotes = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadMissingServerDescriptors")) { - this.downloadMissingServerDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith( - "DownloadMissingExtraInfoDescriptors")) { - this.downloadMissingExtraInfoDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadAllServerDescriptors")) { - this.downloadAllServerDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) { - this.downloadAllExtraInfoDescriptors = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("CompressRelayDescriptorDownloads")) { - this.compressRelayDescriptorDownloads = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("DownloadExitList")) { - this.downloadExitList = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("ProcessBridgePoolAssignments")) { - this.processBridgePoolAssignments = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("AssignmentsDirectory")) { - this.assignmentsDirectory = line.split(" ")[1]; - } else if (line.startsWith("SanitizedAssignmentsDirectory")) { - this.sanitizedAssignmentsDirectory = line.split(" ")[1]; - } else if (line.startsWith("ProcessTorperfFiles")) { - this.processTorperfFiles = Integer.parseInt(line.split(" ")[1]) - != 0; - } else if (line.startsWith("TorperfOutputDirectory")) { - } else if (line.startsWith("TorperfSource")) { - if (this.torperfSources == null) { - this.torperfSources = new TreeMap<String, String>(); - } - String[] parts = line.split(" "); - String sourceName = parts[1]; - String baseUrl = parts[2]; - this.torperfSources.put(sourceName, baseUrl); - } else if (line.startsWith("TorperfFiles")) { - if (this.torperfFiles == null) { - this.torperfFiles = new ArrayList<String>(); - } - String[] parts = line.split(" "); - if (parts.length != 5) { - logger.severe("Configuration file contains TorperfFiles " - + "option with wrong number of values in line '" + line - + "'! Exiting!"); - System.exit(1); - } - this.torperfFiles.add(line); - } else if (line.startsWith("ProvideFilesViaRsync")) { - this.provideFilesViaRsync = Integer.parseInt( - line.split(" ")[1]) != 0; - } else if (line.startsWith("RsyncDirectory")) { - this.rsyncDirectory = line.split(" ")[1]; - } else { - logger.severe("Configuration file contains unrecognized " - + "configuration key in line '" + line + "'! Exiting!"); - System.exit(1); - } - } - br.close(); - } catch (ArrayIndexOutOfBoundsException e) { - logger.severe("Configuration file contains configuration key " - + "without value in line '" + line + "'. Exiting!"); - System.exit(1); - } catch (MalformedURLException e) { - logger.severe("Configuration file contains illegal URL or IP:port " - + "pair in line '" + line + "'. Exiting!"); - System.exit(1); - } catch (NumberFormatException e) { - logger.severe("Configuration file contains illegal value in line '" - + line + "' with legal values being 0 or 1. Exiting!"); - System.exit(1); - } catch (IOException e) { - logger.log(Level.SEVERE, "Unknown problem while reading config " - + "file! Exiting!", e); - System.exit(1); - } - - /** Make some checks if configuration is valid. */ - if (!this.importCachedRelayDescriptors && - !this.importDirectoryArchives && !this.downloadRelayDescriptors && - !this.importBridgeSnapshots && - !this.downloadExitList && !this.processBridgePoolAssignments && - !this.writeDirectoryArchives && !this.writeSanitizedBridges && - !this.processTorperfFiles) { - logger.warning("We have not been configured to read data from any " - + "data source or write data to any data sink. You need to " - + "edit your config file (" + configFile.getAbsolutePath() - + ") and provide at least one data source and one data sink. " - + "Refer to the manual for more information."); - } - if ((this.importCachedRelayDescriptors || - this.importDirectoryArchives || this.downloadRelayDescriptors) && - !this.writeDirectoryArchives) { - logger.warning("We are configured to import/download relay " - + "descriptors, but we don't have a single data sink to write " - + "relay descriptors to."); - } - if (!(this.importCachedRelayDescriptors || - this.importDirectoryArchives || this.downloadRelayDescriptors) && - this.writeDirectoryArchives) { - logger.warning("We are configured to write relay descriptor to at " - + "least one data sink, but we don't have a single data source " - + "containing relay descriptors."); - } - if (this.importBridgeSnapshots && !this.writeSanitizedBridges) { - logger.warning("We are configured to import/download bridge " - + "descriptors, but we don't have a single data sink to write " - + "bridge descriptors to."); - } - if (!this.importBridgeSnapshots && this.writeSanitizedBridges) { - logger.warning("We are configured to write bridge descriptor to at " - + "least one data sink, but we don't have a single data source " - + "containing bridge descriptors."); - } - } - public boolean getWriteDirectoryArchives() { - return this.writeDirectoryArchives; - } - public String getDirectoryArchivesOutputDirectory() { - return this.directoryArchivesOutputDirectory; - } - public boolean getImportCachedRelayDescriptors() { - return this.importCachedRelayDescriptors; - } - public List<String> getCachedRelayDescriptorDirectory() { - return this.cachedRelayDescriptorsDirectory; - } - public boolean getImportDirectoryArchives() { - return this.importDirectoryArchives; - } - public String getDirectoryArchivesDirectory() { - return this.directoryArchivesDirectory; - } - public boolean getKeepDirectoryArchiveImportHistory() { - return this.keepDirectoryArchiveImportHistory; - } - public boolean getWriteSanitizedBridges() { - return this.writeSanitizedBridges; - } - public boolean getReplaceIPAddressesWithHashes() { - return this.replaceIPAddressesWithHashes; - } - public long getLimitBridgeDescriptorMappings() { - return this.limitBridgeDescriptorMappings; - } - public String getSanitizedBridgesWriteDirectory() { - return this.sanitizedBridgesWriteDirectory; - } - public boolean getImportBridgeSnapshots() { - return this.importBridgeSnapshots; - } - public String getBridgeSnapshotsDirectory() { - return this.bridgeSnapshotsDirectory; - } - public boolean getDownloadRelayDescriptors() { - return this.downloadRelayDescriptors; - } - public List<String> getDownloadFromDirectoryAuthorities() { - return this.downloadFromDirectoryAuthorities; - } - public boolean getDownloadCurrentConsensus() { - return this.downloadCurrentConsensus; - } - public boolean getDownloadCurrentVotes() { - return this.downloadCurrentVotes; - } - public boolean getDownloadMissingServerDescriptors() { - return this.downloadMissingServerDescriptors; - } - public boolean getDownloadMissingExtraInfoDescriptors() { - return this.downloadMissingExtraInfoDescriptors; - } - public boolean getDownloadAllServerDescriptors() { - return this.downloadAllServerDescriptors; - } - public boolean getDownloadAllExtraInfoDescriptors() { - return this.downloadAllExtraInfoDescriptors; - } - public boolean getCompressRelayDescriptorDownloads() { - return this.compressRelayDescriptorDownloads; - } - public boolean getDownloadExitList() { - return this.downloadExitList; - } - public boolean getProcessBridgePoolAssignments() { - return processBridgePoolAssignments; - } - public String getAssignmentsDirectory() { - return assignmentsDirectory; - } - public String getSanitizedAssignmentsDirectory() { - return sanitizedAssignmentsDirectory; - } - public boolean getProcessTorperfFiles() { - return this.processTorperfFiles; - } - public String getTorperfOutputDirectory() { - return this.torperfOutputDirectory; - } - public SortedMap<String, String> getTorperfSources() { - return this.torperfSources; - } - public List<String> getTorperfFiles() { - return this.torperfFiles; - } - public boolean getProvideFilesViaRsync() { - return this.provideFilesViaRsync; - } - public String getRsyncDirectory() { - return this.rsyncDirectory; - } -} - diff --git a/src/org/torproject/ernie/db/ExitListDownloader.java b/src/org/torproject/ernie/db/ExitListDownloader.java deleted file mode 100644 index 01a554f..0000000 --- a/src/org/torproject/ernie/db/ExitListDownloader.java +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedInputStream; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -public class ExitListDownloader { - public ExitListDownloader() { - Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); - try { - logger.fine("Downloading exit list..."); - String exitAddressesUrl = - "http://exitlist.torproject.org/exit-addresses"; - URL u = new URL(exitAddressesUrl); - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - int response = huc.getResponseCode(); - if (response != 200) { - logger.warning("Could not download exit list. Response code " + - response); - return; - } - BufferedInputStream in = new BufferedInputStream( - huc.getInputStream()); - SimpleDateFormat printFormat = - new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - Date downloadedDate = new Date(); - File exitListFile = new File("exitlist/" + printFormat.format( - downloadedDate)); - exitListFile.getParentFile().mkdirs(); - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - BufferedWriter bw = new BufferedWriter(new FileWriter( - exitListFile)); - bw.write("@type tordnsel 1.0\n"); - bw.write("Downloaded " + dateTimeFormat.format(downloadedDate) - + "\n"); - int len; - byte[] data = new byte[1024]; - while ((len = in.read(data, 0, 1024)) >= 0) { - bw.write(new String(data, 0, len)); - } - in.close(); - bw.close(); - logger.fine("Finished downloading exit list."); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed downloading exit list", e); - return; - } - - /* Write stats. */ - StringBuilder dumpStats = new StringBuilder("Finished downloading " - + "exit list.\nLast three exit lists are:"); - Stack<File> filesInInputDir = new Stack<File>(); - filesInInputDir.add(new File("exitlist")); - SortedSet<File> lastThreeExitLists = new TreeSet<File>(); - while (!filesInInputDir.isEmpty()) { - File pop = filesInInputDir.pop(); - if (pop.isDirectory()) { - SortedSet<File> lastThreeElements = new TreeSet<File>(); - for (File f : pop.listFiles()) { - lastThreeElements.add(f); - } - while (lastThreeElements.size() > 3) { - lastThreeElements.remove(lastThreeElements.first()); - } - for (File f : lastThreeElements) { - filesInInputDir.add(f); - } - } else { - lastThreeExitLists.add(pop); - while (lastThreeExitLists.size() > 3) { - lastThreeExitLists.remove(lastThreeExitLists.first()); - } - } - } - for (File f : lastThreeExitLists) { - dumpStats.append("\n" + f.getName()); - } - logger.info(dumpStats.toString()); - } -} - diff --git a/src/org/torproject/ernie/db/LockFile.java b/src/org/torproject/ernie/db/LockFile.java deleted file mode 100644 index 3255620..0000000 --- a/src/org/torproject/ernie/db/LockFile.java +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.logging.Logger; - -public class LockFile { - - private File lockFile; - private Logger logger; - - public LockFile() { - this.lockFile = new File("lock"); - this.logger = Logger.getLogger(LockFile.class.getName()); - } - - public boolean acquireLock() { - this.logger.fine("Trying to acquire lock..."); - try { - if (this.lockFile.exists()) { - BufferedReader br = new BufferedReader(new FileReader("lock")); - long runStarted = Long.parseLong(br.readLine()); - br.close(); - if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) { - return false; - } - } - BufferedWriter bw = new BufferedWriter(new FileWriter("lock")); - bw.append("" + System.currentTimeMillis() + "\n"); - bw.close(); - this.logger.fine("Acquired lock."); - return true; - } catch (IOException e) { - this.logger.warning("Caught exception while trying to acquire " - + "lock!"); - return false; - } - } - - public void releaseLock() { - this.logger.fine("Releasing lock..."); - this.lockFile.delete(); - this.logger.fine("Released lock."); - } -} - diff --git a/src/org/torproject/ernie/db/LoggingConfiguration.java b/src/org/torproject/ernie/db/LoggingConfiguration.java deleted file mode 100644 index b83ef53..0000000 --- a/src/org/torproject/ernie/db/LoggingConfiguration.java +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedWriter; -import java.io.FileWriter; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.TimeZone; -import java.util.logging.ConsoleHandler; -import java.util.logging.FileHandler; -import java.util.logging.Formatter; -import java.util.logging.Handler; -import java.util.logging.Level; -import java.util.logging.LogRecord; -import java.util.logging.Logger; -/** - * Initialize logging configuration. - * - * Log levels used by ERNIE: - * - * - SEVERE: An event made it impossible to continue program execution. - * - WARNING: A potential problem occurred that requires the operator to - * look after the otherwise unattended setup - * - INFO: Messages on INFO level are meant to help the operator in making - * sure that operation works as expected. - * - FINE: Debug messages that are used to identify problems and which are - * turned on by default. - * - FINER: More detailed debug messages to investigate problems in more - * detail. Not turned on by default. Increase log file limit when using - * FINER. - * - FINEST: Most detailed debug messages. Not used. - */ -public class LoggingConfiguration { - public LoggingConfiguration() { - - /* Remove default console handler. */ - for (Handler h : Logger.getLogger("").getHandlers()) { - Logger.getLogger("").removeHandler(h); - } - - /* Disable logging of internal Sun classes. */ - Logger.getLogger("sun").setLevel(Level.OFF); - - /* Set minimum log level we care about from INFO to FINER. */ - Logger.getLogger("").setLevel(Level.FINER); - - /* Create log handler that writes messages on WARNING or higher to the - * console. */ - final SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - Formatter cf = new Formatter() { - public String format(LogRecord record) { - return dateTimeFormat.format(new Date(record.getMillis())) + " " - + record.getMessage() + "\n"; - } - }; - Handler ch = new ConsoleHandler(); - ch.setFormatter(cf); - ch.setLevel(Level.WARNING); - Logger.getLogger("").addHandler(ch); - - /* Initialize own logger for this class. */ - Logger logger = Logger.getLogger( - LoggingConfiguration.class.getName()); - - /* Create log handler that writes all messages on FINE or higher to a - * local file. */ - Formatter ff = new Formatter() { - public String format(LogRecord record) { - return dateTimeFormat.format(new Date(record.getMillis())) + " " - + record.getLevel() + " " + record.getSourceClassName() + " " - + record.getSourceMethodName() + " " + record.getMessage() - + (record.getThrown() != null ? " " + record.getThrown() : "") - + "\n"; - } - }; - try { - FileHandler fh = new FileHandler("log", 5000000, 5, true); - fh.setFormatter(ff); - fh.setLevel(Level.FINE); - Logger.getLogger("").addHandler(fh); - } catch (SecurityException e) { - logger.log(Level.WARNING, "No permission to create log file. " - + "Logging to file is disabled.", e); - } catch (IOException e) { - logger.log(Level.WARNING, "Could not write to log file. Logging to " - + "file is disabled.", e); - } - } -} diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java deleted file mode 100644 index 04cc868..0000000 --- a/src/org/torproject/ernie/db/Main.java +++ /dev/null @@ -1,160 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.File; -import java.util.List; -import java.util.logging.Logger; - -/** - * Coordinate downloading and parsing of descriptors and extraction of - * statistically relevant data for later processing with R. - */ -public class Main { - public static void main(String[] args) { - - /* Initialize logging configuration. */ - new LoggingConfiguration(); - - Logger logger = Logger.getLogger(Main.class.getName()); - logger.info("Starting ERNIE."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile(); - if (!lf.acquireLock()) { - logger.severe("Warning: ERNIE is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Define stats directory for temporary files - File statsDirectory = new File("stats"); - - // Prepare writing relay descriptor archive to disk - ArchiveWriter aw = config.getWriteDirectoryArchives() ? - new ArchiveWriter( - new File(config.getDirectoryArchivesOutputDirectory())) : null; - - // Prepare relay descriptor parser (only if we are writing stats or - // directory archives to disk) - RelayDescriptorParser rdp = aw != null ? - new RelayDescriptorParser(aw) : null; - - // Import/download relay descriptors from the various sources - if (rdp != null) { - RelayDescriptorDownloader rdd = null; - if (config.getDownloadRelayDescriptors()) { - List<String> dirSources = - config.getDownloadFromDirectoryAuthorities(); - rdd = new RelayDescriptorDownloader(rdp, dirSources, - config.getDownloadCurrentConsensus(), - config.getDownloadCurrentVotes(), - config.getDownloadMissingServerDescriptors(), - config.getDownloadMissingExtraInfoDescriptors(), - config.getDownloadAllServerDescriptors(), - config.getDownloadAllExtraInfoDescriptors(), - config.getCompressRelayDescriptorDownloads()); - rdp.setRelayDescriptorDownloader(rdd); - } - if (config.getImportCachedRelayDescriptors()) { - new CachedRelayDescriptorReader(rdp, - config.getCachedRelayDescriptorDirectory(), statsDirectory); - if (aw != null) { - aw.intermediateStats("importing relay descriptors from local " - + "Tor data directories"); - } - } - if (config.getImportDirectoryArchives()) { - new ArchiveReader(rdp, - new File(config.getDirectoryArchivesDirectory()), - statsDirectory, - config.getKeepDirectoryArchiveImportHistory()); - if (aw != null) { - aw.intermediateStats("importing relay descriptors from local " - + "directory"); - } - } - if (rdd != null) { - rdd.downloadDescriptors(); - rdd.writeFile(); - rdd = null; - if (aw != null) { - aw.intermediateStats("downloading relay descriptors from the " - + "directory authorities"); - } - } - } - - // Write output to disk that only depends on relay descriptors - if (aw != null) { - aw.dumpStats(); - aw = null; - } - - // Prepare sanitized bridge descriptor writer - SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ? - new SanitizedBridgesWriter( - new File(config.getSanitizedBridgesWriteDirectory()), - statsDirectory, config.getReplaceIPAddressesWithHashes(), - config.getLimitBridgeDescriptorMappings()) : null; - - // Prepare bridge descriptor parser - BridgeDescriptorParser bdp = config.getWriteSanitizedBridges() - ? new BridgeDescriptorParser(sbw) : null; - - // Import bridge descriptors - if (bdp != null && config.getImportBridgeSnapshots()) { - new BridgeSnapshotReader(bdp, - new File(config.getBridgeSnapshotsDirectory()), - statsDirectory); - } - - // Finish writing sanitized bridge descriptors to disk - if (sbw != null) { - sbw.finishWriting(); - sbw = null; - } - - // Download exit list and store it to disk - if (config.getDownloadExitList()) { - new ExitListDownloader(); - } - - // Process bridge pool assignments - if (config.getProcessBridgePoolAssignments()) { - new BridgePoolAssignmentsProcessor( - new File(config.getAssignmentsDirectory()), - new File(config.getSanitizedAssignmentsDirectory())); - } - - // Process Torperf files - if (config.getProcessTorperfFiles()) { - new TorperfDownloader(new File(config.getTorperfOutputDirectory()), - config.getTorperfSources(), config.getTorperfFiles()); - } - - // Copy recently published files to a local directory that can then - // be served via rsync. - if (config.getProvideFilesViaRsync()) { - new RsyncDataProvider( - !config.getWriteDirectoryArchives() ? null : - new File(config.getDirectoryArchivesOutputDirectory()), - !config.getWriteSanitizedBridges() ? null : - new File(config.getSanitizedBridgesWriteDirectory()), - !config.getProcessBridgePoolAssignments() ? null : - new File(config.getSanitizedAssignmentsDirectory()), - config.getDownloadExitList(), - !config.getProcessTorperfFiles() ? null : - new File(config.getTorperfOutputDirectory()), - new File(config.getRsyncDirectory())); - } - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating ERNIE."); - } -} diff --git a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java deleted file mode 100644 index f7e9468..0000000 --- a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java +++ /dev/null @@ -1,821 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.zip.InflaterInputStream; - -/** - * Downloads relay descriptors from the directory authorities via HTTP. - * Keeps a list of missing descriptors that gets updated by parse results - * from <code>RelayDescriptorParser</code> and downloads all missing - * descriptors that have been published in the last 24 hours. Also - * downloads all server and extra-info descriptors known to a directory - * authority at most once a day. - */ -public class RelayDescriptorDownloader { - - /** - * Text file containing the descriptors that we are missing and that we - * want to download. Lines are formatted as: - * - * - "consensus,<validafter>,<parsed>", - * - "vote,<validafter>,<fingerprint>,<parsed>", - * - "server,<published>,<relayid>,<descid>,<parsed>", or - * - "extra,<published>,<relayid>,<descid>,<parsed>". - */ - private File missingDescriptorsFile; - - /** - * Relay descriptors that we are missing and that we want to download - * either in this execution or write to disk and try next time. Map keys - * contain comma-separated values as in the missing descriptors files - * without the "parsed" column. Map values contain the "parsed" column. - */ - private SortedMap<String, String> missingDescriptors; - - /** - * Text file containing the IP addresses (and Dir ports if not 80) of - * directory authorities and when we last downloaded all server and - * extra-info descriptors from them, so that we can avoid downloading - * them too often. - */ - private File lastDownloadedAllDescriptorsFile; - - /** - * Map of directory authorities and when we last downloaded all server - * and extra-info descriptors from them. Map keys are IP addresses (and - * Dir ports if not 80), map values are timestamps. - */ - private Map<String, String> lastDownloadedAllDescriptors; - - /** - * <code>RelayDescriptorParser</code> that we will hand over the - * downloaded descriptors for parsing. - */ - private RelayDescriptorParser rdp; - - /** - * Directory authorities that we will try to download missing - * descriptors from. - */ - private List<String> authorities; - - /** - * Should we try to download the current consensus if we don't have it? - */ - private boolean downloadCurrentConsensus; - - /** - * Should we try to download current votes if we don't have them? - */ - private boolean downloadCurrentVotes; - - /** - * Should we try to download missing server descriptors that have been - * published within the past 24 hours? - */ - private boolean downloadMissingServerDescriptors; - - /** - * Should we try to download missing extra-info descriptors that have - * been published within the past 24 hours? - */ - private boolean downloadMissingExtraInfos; - - /** - * Should we try to download all server descriptors from the authorities - * once every 24 hours? - */ - private boolean downloadAllServerDescriptors; - - /** - * Should we try to download all extra-info descriptors from the - * authorities once every 24 hours? - */ - private boolean downloadAllExtraInfos; - - /** - * Should we download zlib-compressed versions of descriptors by adding - * ".z" to URLs? - */ - private boolean downloadCompressed; - - /** - * valid-after time that we expect the current consensus and votes to - * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find - * consensuses and votes with this valid-after time on the directory - * authorities. This time is initialized as the beginning of the current - * hour. - */ - private String currentValidAfter; - - /** - * Cut-off time for missing server and extra-info descriptors, formatted - * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system - * time minus 24 hours. - */ - private String descriptorCutOff; - - /** - * Cut-off time for downloading all server and extra-info descriptors - * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This - * time is initialized as the current system time minus 23:30 hours. - */ - private String downloadAllDescriptorsCutOff; - - /** - * Directory authorities that we plan to download all server and - * extra-info descriptors from in this execution. - */ - private Set<String> downloadAllDescriptorsFromAuthorities; - - /** - * Current timestamp that is written to the missing list for descriptors - * that we parsed in this execution and for authorities that we - * downloaded all server and extra-info descriptors from. - */ - private String currentTimestamp; - - /** - * Logger for this class. - */ - private Logger logger; - - /** - * Number of descriptors requested by directory authority to be included - * in logs. - */ - private Map<String, Integer> requestsByAuthority; - - /** - * Counters for descriptors that we had on the missing list at the - * beginning of the execution, that we added to the missing list, - * that we requested, and that we successfully downloaded in this - * execution. - */ - private int oldMissingConsensuses = 0, oldMissingVotes = 0, - oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0, - newMissingConsensuses = 0, newMissingVotes = 0, - newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0, - requestedConsensuses = 0, requestedVotes = 0, - requestedMissingServerDescriptors = 0, - requestedAllServerDescriptors = 0, - requestedMissingExtraInfoDescriptors = 0, - requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0, - downloadedVotes = 0, downloadedMissingServerDescriptors = 0, - downloadedAllServerDescriptors = 0, - downloadedMissingExtraInfoDescriptors = 0, - downloadedAllExtraInfoDescriptors = 0; - - /** - * Initializes this class, including reading in missing descriptors from - * <code>stats/missing-relay-descriptors</code> and the times when we - * last downloaded all server and extra-info descriptors from - * <code>stats/last-downloaded-all-descriptors</code>. - */ - public RelayDescriptorDownloader(RelayDescriptorParser rdp, - List<String> authorities, boolean downloadCurrentConsensus, - boolean downloadCurrentVotes, - boolean downloadMissingServerDescriptors, - boolean downloadMissingExtraInfos, - boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos, - boolean downloadCompressed) { - - /* Memorize argument values. */ - this.rdp = rdp; - this.authorities = new ArrayList<String>(authorities); - this.downloadCurrentConsensus = downloadCurrentConsensus; - this.downloadCurrentVotes = downloadCurrentVotes; - this.downloadMissingServerDescriptors = - downloadMissingServerDescriptors; - this.downloadMissingExtraInfos = downloadMissingExtraInfos; - this.downloadAllServerDescriptors = downloadAllServerDescriptors; - this.downloadAllExtraInfos = downloadAllExtraInfos; - this.downloadCompressed = downloadCompressed; - - /* Shuffle list of authorities for better load balancing over time. */ - Collections.shuffle(this.authorities); - - /* Initialize logger. */ - this.logger = Logger.getLogger( - RelayDescriptorDownloader.class.getName()); - - /* Prepare cut-off times and timestamp for the missing descriptors - * list and the list of authorities to download all server and - * extra-info descriptors from. */ - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - format.setTimeZone(TimeZone.getTimeZone("UTC")); - long now = System.currentTimeMillis(); - this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) * - (60L * 60L * 1000L)); - this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L); - this.currentTimestamp = format.format(now); - this.downloadAllDescriptorsCutOff = format.format(now - - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L); - - /* Read list of missing descriptors from disk and memorize those that - * we are interested in and that are likely to be found on the - * directory authorities. */ - this.missingDescriptors = new TreeMap<String, String>(); - this.missingDescriptorsFile = new File( - "stats/missing-relay-descriptors"); - if (this.missingDescriptorsFile.exists()) { - try { - this.logger.fine("Reading file " - + this.missingDescriptorsFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.missingDescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.split(",").length > 2) { - String published = line.split(",")[1]; - if (((line.startsWith("consensus,") || - line.startsWith("vote,")) && - this.currentValidAfter.equals(published)) || - ((line.startsWith("server,") || - line.startsWith("extra,")) && - this.descriptorCutOff.compareTo(published) < 0)) { - if (!line.endsWith("NA")) { - /* Not missing. */ - } else if (line.startsWith("consensus,")) { - oldMissingConsensuses++; - } else if (line.startsWith("vote,")) { - oldMissingVotes++; - } else if (line.startsWith("server,")) { - oldMissingServerDescriptors++; - } else if (line.startsWith("extra,")) { - oldMissingExtraInfoDescriptors++; - } - int separateAt = line.lastIndexOf(","); - this.missingDescriptors.put(line.substring(0, - separateAt), line.substring(separateAt + 1)); - } - } else { - this.logger.fine("Invalid line '" + line + "' in " - + this.missingDescriptorsFile.getAbsolutePath() - + ". Ignoring."); - } - } - br.close(); - this.logger.fine("Finished reading file " - + this.missingDescriptorsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.missingDescriptorsFile.getAbsolutePath() - + "! This means that we might forget to dowload relay " - + "descriptors we are missing.", e); - } - } - - /* Read list of directory authorities and when we last downloaded all - * server and extra-info descriptors from them. */ - this.lastDownloadedAllDescriptors = new HashMap<String, String>(); - this.lastDownloadedAllDescriptorsFile = new File( - "stats/last-downloaded-all-descriptors"); - if (this.lastDownloadedAllDescriptorsFile.exists()) { - try { - this.logger.fine("Reading file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.lastDownloadedAllDescriptorsFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.split(",").length != 2) { - this.logger.fine("Invalid line '" + line + "' in " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + ". Ignoring."); - } else { - String[] parts = line.split(","); - String authority = parts[0]; - String lastDownloaded = parts[1]; - this.lastDownloadedAllDescriptors.put(authority, - lastDownloaded); - } - } - br.close(); - this.logger.fine("Finished reading file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "! This means that we might download all server and " - + "extra-info descriptors more often than we should.", e); - } - } - - /* Make a list of at most two directory authorities that we want to - * download all server and extra-info descriptors from. */ - this.downloadAllDescriptorsFromAuthorities = new HashSet<String>(); - for (String authority : this.authorities) { - if (!this.lastDownloadedAllDescriptors.containsKey(authority) || - this.lastDownloadedAllDescriptors.get(authority).compareTo( - this.downloadAllDescriptorsCutOff) < 0) { - this.downloadAllDescriptorsFromAuthorities.add(authority); - } - if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) { - break; - } - } - - /* Prepare statistics on this execution. */ - this.requestsByAuthority = new HashMap<String, Integer>(); - for (String authority : this.authorities) { - this.requestsByAuthority.put(authority, 0); - } - } - - /** - * We have parsed a consensus. Take this consensus off the missing list - * and add the votes created by the given <code>authorities</code> and - * the <code>serverDescriptors</code> which are in the format - * "<published>,<relayid>,<descid>" to that list. - */ - public void haveParsedConsensus(String validAfter, - Set<String> authorities, Set<String> serverDescriptors) { - - /* Mark consensus as parsed. */ - if (this.currentValidAfter.equals(validAfter)) { - String consensusKey = "consensus," + validAfter; - this.missingDescriptors.put(consensusKey, this.currentTimestamp); - - /* Add votes to missing list. */ - for (String authority : authorities) { - String voteKey = "vote," + validAfter + "," + authority; - if (!this.missingDescriptors.containsKey(voteKey)) { - this.missingDescriptors.put(voteKey, "NA"); - this.newMissingVotes++; - } - } - } - - /* Add server descriptors to missing list. */ - for (String serverDescriptor : serverDescriptors) { - String published = serverDescriptor.split(",")[0]; - if (this.descriptorCutOff.compareTo(published) < 0) { - String serverDescriptorKey = "server," + serverDescriptor; - if (!this.missingDescriptors.containsKey( - serverDescriptorKey)) { - this.missingDescriptors.put(serverDescriptorKey, "NA"); - this.newMissingServerDescriptors++; - } - } - } - } - - /** - * We have parsed a vote. Take this vote off the missing list and add - * the <code>serverDescriptors</code> which are in the format - * "<published>,<relayid>,<descid>" to that list. - */ - public void haveParsedVote(String validAfter, String fingerprint, - Set<String> serverDescriptors) { - - /* Mark vote as parsed. */ - if (this.currentValidAfter.equals(validAfter)) { - String voteKey = "vote," + validAfter + "," + fingerprint; - this.missingDescriptors.put(voteKey, this.currentTimestamp); - } - - /* Add server descriptors to missing list. */ - for (String serverDescriptor : serverDescriptors) { - String published = serverDescriptor.split(",")[0]; - if (this.descriptorCutOff.compareTo(published) < 0) { - String serverDescriptorKey = "server," + serverDescriptor; - if (!this.missingDescriptors.containsKey( - serverDescriptorKey)) { - this.missingDescriptors.put(serverDescriptorKey, "NA"); - this.newMissingServerDescriptors++; - } - } - } - } - - /** - * We have parsed a server descriptor. Take this server descriptor off - * the missing list and put the extra-info descriptor digest on that - * list. - */ - public void haveParsedServerDescriptor(String published, - String relayIdentity, String serverDescriptorDigest, - String extraInfoDigest) { - - /* Mark server descriptor as parsed. */ - if (this.descriptorCutOff.compareTo(published) < 0) { - String serverDescriptorKey = "server," + published + "," - + relayIdentity + "," + serverDescriptorDigest; - this.missingDescriptors.put(serverDescriptorKey, - this.currentTimestamp); - - /* Add extra-info descriptor to missing list. */ - if (extraInfoDigest != null) { - String extraInfoKey = "extra," + published + "," - + relayIdentity + "," + extraInfoDigest; - if (!this.missingDescriptors.containsKey(extraInfoKey)) { - this.missingDescriptors.put(extraInfoKey, "NA"); - this.newMissingExtraInfoDescriptors++; - } - } - } - } - - /** - * We have parsed an extra-info descriptor. Take it off the missing - * list. - */ - public void haveParsedExtraInfoDescriptor(String published, - String relayIdentity, String extraInfoDigest) { - if (this.descriptorCutOff.compareTo(published) < 0) { - String extraInfoKey = "extra," + published + "," - + relayIdentity + "," + extraInfoDigest; - this.missingDescriptors.put(extraInfoKey, this.currentTimestamp); - } - } - - /** - * Downloads missing descriptors that we think might still be available - * on the directory authorities as well as all server and extra-info - * descriptors once per day. - */ - public void downloadDescriptors() { - - /* Put the current consensus on the missing list, unless we already - * have it. */ - String consensusKey = "consensus," + this.currentValidAfter; - if (!this.missingDescriptors.containsKey(consensusKey)) { - this.missingDescriptors.put(consensusKey, "NA"); - this.newMissingConsensuses++; - } - - /* Download descriptors from authorities which are in random order, so - * that we distribute the load somewhat fairly over time. */ - for (String authority : authorities) { - - /* Make all requests to an authority in a single try block. If - * something goes wrong with this authority, we give up on all - * downloads and continue with the next authority. */ - /* TODO Some authorities provide very little bandwidth and could - * slow down the entire download process. Ponder adding a timeout of - * 3 or 5 minutes per authority to avoid getting in the way of the - * next execution. */ - try { - - /* Start with downloading the current consensus, unless we already - * have it. */ - if (downloadCurrentConsensus) { - if (this.missingDescriptors.containsKey(consensusKey) && - this.missingDescriptors.get(consensusKey).equals("NA")) { - this.requestedConsensuses++; - this.downloadedConsensuses += - this.downloadResourceFromAuthority(authority, - "/tor/status-vote/current/consensus"); - } - } - - /* Next, try to download current votes that we're missing. */ - if (downloadCurrentVotes) { - String voteKeyPrefix = "vote," + this.currentValidAfter; - SortedSet<String> fingerprints = new TreeSet<String>(); - for (Map.Entry<String, String> e : - this.missingDescriptors.entrySet()) { - if (e.getValue().equals("NA") && - e.getKey().startsWith(voteKeyPrefix)) { - String fingerprint = e.getKey().split(",")[2]; - fingerprints.add(fingerprint); - } - } - for (String fingerprint : fingerprints) { - this.requestedVotes++; - this.downloadedVotes += - this.downloadResourceFromAuthority(authority, - "/tor/status-vote/current/" + fingerprint); - } - } - - /* Download either all server and extra-info descriptors or only - * those that we're missing. Start with server descriptors, then - * request extra-info descriptors. */ - List<String> types = new ArrayList<String>(Arrays.asList( - "server,extra".split(","))); - for (String type : types) { - - /* Download all server or extra-info descriptors from this - * authority if we haven't done so for 24 hours and if we're - * configured to do so. */ - if (this.downloadAllDescriptorsFromAuthorities.contains( - authority) && ((type.equals("server") && - this.downloadAllServerDescriptors) || - (type.equals("extra") && this.downloadAllExtraInfos))) { - int downloadedAllDescriptors = - this.downloadResourceFromAuthority(authority, "/tor/" - + type + "/all"); - if (type.equals("server")) { - this.requestedAllServerDescriptors++; - this.downloadedAllServerDescriptors += - downloadedAllDescriptors; - } else { - this.requestedAllExtraInfoDescriptors++; - this.downloadedAllExtraInfoDescriptors += - downloadedAllDescriptors; - } - - /* Download missing server or extra-info descriptors if we're - * configured to do so. */ - } else if ((type.equals("server") && - this.downloadMissingServerDescriptors) || - (type.equals("extra") && this.downloadMissingExtraInfos)) { - - /* Go through the list of missing descriptors of this type - * and combine the descriptor identifiers to a URL of up to - * 96 descriptors that we can download at once. */ - SortedSet<String> descriptorIdentifiers = - new TreeSet<String>(); - for (Map.Entry<String, String> e : - this.missingDescriptors.entrySet()) { - if (e.getValue().equals("NA") && - e.getKey().startsWith(type + ",") && - this.descriptorCutOff.compareTo( - e.getKey().split(",")[1]) < 0) { - String descriptorIdentifier = e.getKey().split(",")[3]; - descriptorIdentifiers.add(descriptorIdentifier); - } - } - StringBuilder combinedResource = null; - int descriptorsInCombinedResource = 0, - requestedDescriptors = 0, downloadedDescriptors = 0; - for (String descriptorIdentifier : descriptorIdentifiers) { - if (descriptorsInCombinedResource >= 96) { - requestedDescriptors += descriptorsInCombinedResource; - downloadedDescriptors += - this.downloadResourceFromAuthority(authority, - combinedResource.toString()); - combinedResource = null; - descriptorsInCombinedResource = 0; - } - if (descriptorsInCombinedResource == 0) { - combinedResource = new StringBuilder("/tor/" + type - + "/d/" + descriptorIdentifier); - } else { - combinedResource.append("+" + descriptorIdentifier); - } - descriptorsInCombinedResource++; - } - if (descriptorsInCombinedResource > 0) { - requestedDescriptors += descriptorsInCombinedResource; - downloadedDescriptors += - this.downloadResourceFromAuthority(authority, - combinedResource.toString()); - } - if (type.equals("server")) { - this.requestedMissingServerDescriptors += - requestedDescriptors; - this.downloadedMissingServerDescriptors += - downloadedDescriptors; - } else { - this.requestedMissingExtraInfoDescriptors += - requestedDescriptors; - this.downloadedMissingExtraInfoDescriptors += - downloadedDescriptors; - } - } - } - - /* If a download failed, stop requesting descriptors from this - * authority and move on to the next. */ - } catch (IOException e) { - logger.log(Level.FINE, "Failed downloading from " + authority - + "!", e); - } - } - } - - /** - * Attempts to download one or more descriptors identified by a resource - * string from a directory authority and passes the returned - * descriptor(s) to the <code>RelayDescriptorParser</code> upon success. - * Returns the number of descriptors contained in the reply. Throws an - * <code>IOException</code> if something goes wrong while downloading. - */ - private int downloadResourceFromAuthority(String authority, - String resource) throws IOException { - byte[] allData = null; - this.requestsByAuthority.put(authority, - this.requestsByAuthority.get(authority) + 1); - /* TODO Disable compressed downloads for extra-info descriptors, - * because zlib decompression doesn't work correctly. Figure out why - * this is and fix it. */ - String fullUrl = "http://" + authority + resource - + (this.downloadCompressed && !resource.startsWith("/tor/extra/") - ? ".z" : ""); - URL u = new URL(fullUrl); - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - int response = huc.getResponseCode(); - if (response == 200) { - BufferedInputStream in = this.downloadCompressed && - !resource.startsWith("/tor/extra/") - ? new BufferedInputStream(new InflaterInputStream( - huc.getInputStream())) - : new BufferedInputStream(huc.getInputStream()); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = in.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - in.close(); - allData = baos.toByteArray(); - } - logger.fine("Downloaded " + fullUrl + " -> " + response + " (" - + (allData == null ? 0 : allData.length) + " bytes)"); - int receivedDescriptors = 0; - if (allData != null) { - if (resource.startsWith("/tor/status-vote/current/")) { - this.rdp.parse(allData); - receivedDescriptors = 1; - } else if (resource.startsWith("/tor/server/") || - resource.startsWith("/tor/extra/")) { - if (resource.equals("/tor/server/all")) { - this.lastDownloadedAllDescriptors.put(authority, - this.currentTimestamp); - } - String ascii = null; - try { - ascii = new String(allData, "US-ASCII"); - } catch (UnsupportedEncodingException e) { - /* No way that US-ASCII is not supported. */ - } - int start = -1, sig = -1, end = -1; - String startToken = resource.startsWith("/tor/server/") ? - "router " : "extra-info "; - String sigToken = "\nrouter-signature\n"; - String endToken = "\n-----END SIGNATURE-----\n"; - while (end < ascii.length()) { - start = ascii.indexOf(startToken, end); - if (start < 0) { - break; - } - sig = ascii.indexOf(sigToken, start); - if (sig < 0) { - break; - } - sig += sigToken.length(); - end = ascii.indexOf(endToken, sig); - if (end < 0) { - break; - } - end += endToken.length(); - byte[] descBytes = new byte[end - start]; - System.arraycopy(allData, start, descBytes, 0, end - start); - this.rdp.parse(descBytes); - receivedDescriptors++; - } - } - } - return receivedDescriptors; - } - - /** - * Writes status files to disk and logs statistics about downloading - * relay descriptors in this execution. - */ - public void writeFile() { - - /* Write missing descriptors file to disk. */ - int missingConsensuses = 0, missingVotes = 0, - missingServerDescriptors = 0, missingExtraInfoDescriptors = 0; - try { - this.logger.fine("Writing file " - + this.missingDescriptorsFile.getAbsolutePath() + "..."); - this.missingDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.missingDescriptorsFile)); - for (Map.Entry<String, String> e : - this.missingDescriptors.entrySet()) { - String key = e.getKey(), value = e.getValue(); - if (!value.equals("NA")) { - /* Not missing. */ - } else if (key.startsWith("consensus,")) { - missingConsensuses++; - } else if (key.startsWith("vote,")) { - missingVotes++; - } else if (key.startsWith("server,")) { - missingServerDescriptors++; - } else if (key.startsWith("extra,")) { - missingExtraInfoDescriptors++; - } - bw.write(key + "," + value + "\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.missingDescriptorsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed writing " - + this.missingDescriptorsFile.getAbsolutePath() + "!", e); - } - - /* Write text file containing the directory authorities and when we - * last downloaded all server and extra-info descriptors from them to - * disk. */ - try { - this.logger.fine("Writing file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "..."); - this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.lastDownloadedAllDescriptorsFile)); - for (Map.Entry<String, String> e : - this.lastDownloadedAllDescriptors.entrySet()) { - String authority = e.getKey(); - String lastDownloaded = e.getValue(); - bw.write(authority + "," + lastDownloaded + "\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() - + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed writing " - + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!", - e); - } - - /* Log statistics about this execution. */ - this.logger.info("Finished downloading relay descriptors from the " - + "directory authorities."); - this.logger.info("At the beginning of this execution, we were " - + "missing " + oldMissingConsensuses + " consensus(es), " - + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors - + " server descriptor(s), and " + oldMissingExtraInfoDescriptors - + " extra-info descriptor(s)."); - this.logger.info("During this execution, we added " - + this.newMissingConsensuses + " consensus(es), " - + this.newMissingVotes + " vote(s), " - + this.newMissingServerDescriptors + " server descriptor(s), and " - + this.newMissingExtraInfoDescriptors + " extra-info " - + "descriptor(s) to the missing list, some of which we also " - + "requested and removed from the list again."); - this.logger.info("We requested " + this.requestedConsensuses - + " consensus(es), " + this.requestedVotes + " vote(s), " - + this.requestedMissingServerDescriptors + " missing server " - + "descriptor(s), " + this.requestedAllServerDescriptors - + " times all server descriptors, " - + this.requestedMissingExtraInfoDescriptors + " missing " - + "extra-info descriptor(s), and " - + this.requestedAllExtraInfoDescriptors + " times all extra-info " - + "descriptors from the directory authorities."); - StringBuilder sb = new StringBuilder(); - for (String authority : this.authorities) { - sb.append(" " + authority + "=" - + this.requestsByAuthority.get(authority)); - } - this.logger.info("We sent these numbers of requests to the directory " - + "authorities:" + sb.toString()); - this.logger.info("We successfully downloaded " - + this.downloadedConsensuses + " consensus(es), " - + this.downloadedVotes + " vote(s), " - + this.downloadedMissingServerDescriptors + " missing server " - + "descriptor(s), " + this.downloadedAllServerDescriptors - + " server descriptor(s) when downloading all descriptors, " - + this.downloadedMissingExtraInfoDescriptors + " missing " - + "extra-info descriptor(s) and " - + this.downloadedAllExtraInfoDescriptors + " extra-info " - + "descriptor(s) when downloading all descriptors."); - this.logger.info("At the end of this execution, we are missing " - + missingConsensuses + " consensus(es), " + missingVotes - + " vote(s), " + missingServerDescriptors + " server " - + "descriptor(s), and " + missingExtraInfoDescriptors - + " extra-info descriptor(s), some of which we may try in the next " - + "execution."); - } -} - diff --git a/src/org/torproject/ernie/db/RelayDescriptorParser.java b/src/org/torproject/ernie/db/RelayDescriptorParser.java deleted file mode 100644 index be54656..0000000 --- a/src/org/torproject/ernie/db/RelayDescriptorParser.java +++ /dev/null @@ -1,265 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; - -/** - * Parses relay descriptors including network status consensuses and - * votes, server and extra-info descriptors, and passes the results to the - * stats handlers, to the archive writer, or to the relay descriptor - * downloader. - */ -public class RelayDescriptorParser { - - /** - * File writer that writes descriptor contents to files in a - * directory-archive directory structure. - */ - private ArchiveWriter aw; - - /** - * Missing descriptor downloader that uses the parse results to learn - * which descriptors we are missing and want to download. - */ - private RelayDescriptorDownloader rdd; - - /** - * Logger for this class. - */ - private Logger logger; - - private SimpleDateFormat dateTimeFormat; - - /** - * Initializes this class. - */ - public RelayDescriptorParser(ArchiveWriter aw) { - this.aw = aw; - - /* Initialize logger. */ - this.logger = Logger.getLogger(RelayDescriptorParser.class.getName()); - - this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - public void setRelayDescriptorDownloader( - RelayDescriptorDownloader rdd) { - this.rdd = rdd; - } - - public void parse(byte[] data) { - try { - /* Convert descriptor to ASCII for parsing. This means we'll lose - * the non-ASCII chars, but we don't care about them for parsing - * anyway. */ - BufferedReader br = new BufferedReader(new StringReader(new String( - data, "US-ASCII"))); - String line; - do { - line = br.readLine(); - } while (line != null && line.startsWith("@")); - if (line == null) { - this.logger.fine("We were given an empty descriptor for " - + "parsing. Ignoring."); - return; - } - SimpleDateFormat parseFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (line.equals("network-status-version 3")) { - // TODO when parsing the current consensus, check the fresh-until - // time to see when we switch from hourly to half-hourly - // consensuses - boolean isConsensus = true; - String validAfterTime = null, fingerprint = null, - dirSource = null; - long validAfter = -1L, dirKeyPublished = -1L; - SortedSet<String> dirSources = new TreeSet<String>(); - SortedSet<String> serverDescriptors = new TreeSet<String>(); - SortedSet<String> hashedRelayIdentities = new TreeSet<String>(); - StringBuilder certificateStringBuilder = null; - String certificateString = null; - while ((line = br.readLine()) != null) { - if (certificateStringBuilder != null) { - if (line.startsWith("r ")) { - certificateString = certificateStringBuilder.toString(); - certificateStringBuilder = null; - } else { - certificateStringBuilder.append(line + "\n"); - } - } - if (line.equals("vote-status vote")) { - isConsensus = false; - } else if (line.startsWith("valid-after ")) { - validAfterTime = line.substring("valid-after ".length()); - validAfter = parseFormat.parse(validAfterTime).getTime(); - } else if (line.startsWith("dir-source ")) { - dirSource = line.split(" ")[2]; - } else if (line.startsWith("vote-digest ")) { - dirSources.add(dirSource); - } else if (line.startsWith("dir-key-certificate-version ")) { - certificateStringBuilder = new StringBuilder(); - certificateStringBuilder.append(line + "\n"); - } else if (line.startsWith("fingerprint ")) { - fingerprint = line.split(" ")[1]; - } else if (line.startsWith("dir-key-published ")) { - String dirKeyPublishedTime = line.substring( - "dir-key-published ".length()); - dirKeyPublished = parseFormat.parse(dirKeyPublishedTime). - getTime(); - } else if (line.startsWith("r ")) { - String[] parts = line.split(" "); - if (parts.length < 9) { - this.logger.log(Level.WARNING, "Could not parse r line '" - + line + "' in descriptor. Skipping."); - break; - } - String publishedTime = parts[4] + " " + parts[5]; - String relayIdentity = Hex.encodeHexString( - Base64.decodeBase64(parts[2] + "=")). - toLowerCase(); - String serverDesc = Hex.encodeHexString(Base64.decodeBase64( - parts[3] + "=")).toLowerCase(); - serverDescriptors.add(publishedTime + "," + relayIdentity - + "," + serverDesc); - hashedRelayIdentities.add(DigestUtils.shaHex( - Base64.decodeBase64(parts[2] + "=")). - toUpperCase()); - } - } - if (isConsensus) { - if (this.rdd != null) { - this.rdd.haveParsedConsensus(validAfterTime, dirSources, - serverDescriptors); - } - if (this.aw != null) { - this.aw.storeConsensus(data, validAfter); - } - } else { - if (this.aw != null || this.rdd != null) { - String ascii = new String(data, "US-ASCII"); - String startToken = "network-status-version "; - String sigToken = "directory-signature "; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken); - if (start >= 0 && sig >= 0 && sig > start) { - sig += sigToken.length(); - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - String digest = DigestUtils.shaHex(forDigest).toUpperCase(); - if (this.aw != null) { - this.aw.storeVote(data, validAfter, dirSource, digest); - } - if (this.rdd != null) { - this.rdd.haveParsedVote(validAfterTime, fingerprint, - serverDescriptors); - } - } - if (certificateString != null) { - if (this.aw != null) { - this.aw.storeCertificate(certificateString.getBytes(), - dirSource, dirKeyPublished); - } - } - } - } - } else if (line.startsWith("router ")) { - String publishedTime = null, extraInfoDigest = null, - relayIdentifier = null; - long published = -1L; - while ((line = br.readLine()) != null) { - if (line.startsWith("published ")) { - publishedTime = line.substring("published ".length()); - published = parseFormat.parse(publishedTime).getTime(); - } else if (line.startsWith("opt fingerprint") || - line.startsWith("fingerprint")) { - relayIdentifier = line.substring(line.startsWith("opt ") ? - "opt fingerprint".length() : "fingerprint".length()). - replaceAll(" ", "").toLowerCase(); - } else if (line.startsWith("opt extra-info-digest ") || - line.startsWith("extra-info-digest ")) { - extraInfoDigest = line.startsWith("opt ") ? - line.split(" ")[2].toLowerCase() : - line.split(" ")[1].toLowerCase(); - } - } - String ascii = new String(data, "US-ASCII"); - String startToken = "router "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - String digest = null; - if (start >= 0 || sig >= 0 || sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - digest = DigestUtils.shaHex(forDigest); - } - if (this.aw != null && digest != null) { - this.aw.storeServerDescriptor(data, digest, published); - } - if (this.rdd != null && digest != null) { - this.rdd.haveParsedServerDescriptor(publishedTime, - relayIdentifier, digest, extraInfoDigest); - } - } else if (line.startsWith("extra-info ")) { - String publishedTime = null, relayIdentifier = line.split(" ")[2]; - long published = -1L; - while ((line = br.readLine()) != null) { - if (line.startsWith("published ")) { - publishedTime = line.substring("published ".length()); - published = parseFormat.parse(publishedTime).getTime(); - } - } - String ascii = new String(data, "US-ASCII"); - String startToken = "extra-info "; - String sigToken = "\nrouter-signature\n"; - String digest = null; - int start = ascii.indexOf(startToken); - if (start > 0) { - /* Do not confuse "extra-info " in "@type extra-info 1.0" with - * "extra-info 0000...". TODO This is a hack that should be - * solved by using metrics-lib some day. */ - start = ascii.indexOf("\n" + startToken); - if (start > 0) { - start++; - } - } - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 || sig >= 0 || sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - digest = DigestUtils.shaHex(forDigest); - } - if (this.aw != null && digest != null) { - this.aw.storeExtraInfoDescriptor(data, digest, published); - } - if (this.rdd != null && digest != null) { - this.rdd.haveParsedExtraInfoDescriptor(publishedTime, - relayIdentifier.toLowerCase(), digest); - } - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse descriptor. " - + "Skipping.", e); - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse descriptor. " - + "Skipping.", e); - } - } -} - diff --git a/src/org/torproject/ernie/db/RsyncDataProvider.java b/src/org/torproject/ernie/db/RsyncDataProvider.java deleted file mode 100644 index 2f9632e..0000000 --- a/src/org/torproject/ernie/db/RsyncDataProvider.java +++ /dev/null @@ -1,217 +0,0 @@ -/* Copyright 2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; -import java.util.Stack; -import java.util.logging.Logger; - -/** - * Copy files published in the last 3 days to a local directory that can - * then be served via rsync. - */ -public class RsyncDataProvider { - public RsyncDataProvider(File directoryArchivesOutputDirectory, - File sanitizedBridgesWriteDirectory, - File sanitizedAssignmentsDirectory, - boolean downloadExitList, - File torperfOutputDirectory, File rsyncDirectory) { - - /* Initialize logger. */ - Logger logger = Logger.getLogger(RsyncDataProvider.class.getName()); - - /* Determine the cut-off time for files in rsync/. */ - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - - /* Create rsync/ directory if it doesn't exist. */ - if (!rsyncDirectory.exists()) { - rsyncDirectory.mkdirs(); - } - - /* Make a list of all files in the rsync/ directory to delete those - * that we didn't copy in this run. */ - Set<String> fileNamesInRsync = new HashSet<String>(); - Stack<File> files = new Stack<File>(); - files.add(rsyncDirectory); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else { - fileNamesInRsync.add(pop.getName()); - } - } - logger.info("Found " + fileNamesInRsync.size() + " files in " - + rsyncDirectory.getAbsolutePath() + " that we're either " - + "overwriting or deleting in this execution."); - - /* Copy relay descriptors from the last 3 days. */ - if (directoryArchivesOutputDirectory != null) { - files.add(directoryArchivesOutputDirectory); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else if (pop.lastModified() >= cutOffMillis) { - String fileName = pop.getName(); - if (pop.getAbsolutePath().contains("/consensus/")) { - this.copyFile(pop, new File(rsyncDirectory, - "relay-descriptors/consensuses/" + fileName)); - } else if (pop.getAbsolutePath().contains("/vote/")) { - this.copyFile(pop, new File(rsyncDirectory, - "relay-descriptors/votes/" + fileName)); - } else if (pop.getAbsolutePath().contains( - "/server-descriptor/")) { - this.copyFile(pop, new File(rsyncDirectory, - "relay-descriptors/server-descriptors/" + fileName)); - } else if (pop.getAbsolutePath().contains("/extra-info/")) { - this.copyFile(pop, new File(rsyncDirectory, - "relay-descriptors/extra-infos/" + fileName)); - } else { - continue; - } - fileNamesInRsync.remove(pop.getName()); - } - } - } - logger.info("After copying relay descriptors, there are still " - + fileNamesInRsync.size() + " files left in " - + rsyncDirectory.getAbsolutePath() + "."); - - /* Copy sanitized bridge descriptors from the last 3 days. */ - if (sanitizedBridgesWriteDirectory != null) { - files.add(sanitizedBridgesWriteDirectory); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else if (pop.lastModified() >= cutOffMillis) { - String fileName = pop.getName(); - if (pop.getAbsolutePath().contains("/statuses/")) { - this.copyFile(pop, new File(rsyncDirectory, - "bridge-descriptors/statuses/" + fileName)); - } else if (pop.getAbsolutePath().contains( - "/server-descriptors/")) { - this.copyFile(pop, new File(rsyncDirectory, - "bridge-descriptors/server-descriptors/" + fileName)); - } else if (pop.getAbsolutePath().contains("/extra-infos/")) { - this.copyFile(pop, new File(rsyncDirectory, - "bridge-descriptors/extra-infos/" + fileName)); - } else { - continue; - } - fileNamesInRsync.remove(pop.getName()); - } - } - } - logger.info("After copying sanitized bridge descriptors, there are " - + "still " + fileNamesInRsync.size() + " files left in " - + rsyncDirectory.getAbsolutePath() + "."); - - /* Copy sanitized bridge pool assignments from the last 3 days. */ - if (sanitizedAssignmentsDirectory != null) { - files.add(sanitizedAssignmentsDirectory); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else if (pop.lastModified() >= cutOffMillis) { - String fileName = pop.getName(); - this.copyFile(pop, new File(rsyncDirectory, - "bridge-pool-assignments/" + fileName)); - fileNamesInRsync.remove(pop.getName()); - } - } - } - logger.info("After copying sanitized bridge pool assignments, there " - + "are still " + fileNamesInRsync.size() + " files left in " - + rsyncDirectory.getAbsolutePath() + "."); - - /* Copy exit lists from the last 3 days. */ - if (downloadExitList) { - files.add(new File("exitlist")); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else if (pop.lastModified() >= cutOffMillis) { - String fileName = pop.getName(); - this.copyFile(pop, new File(rsyncDirectory, - "exit-lists/" + fileName)); - fileNamesInRsync.remove(pop.getName()); - } - } - } - logger.info("After copying exit lists, there are still " - + fileNamesInRsync.size() + " files left in " - + rsyncDirectory.getAbsolutePath() + "."); - - /* Copy Torperf files. */ - if (torperfOutputDirectory != null) { - files.add(torperfOutputDirectory); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else if (pop.getName().endsWith(".tpf") && - pop.lastModified() >= cutOffMillis) { - String fileName = pop.getName(); - this.copyFile(pop, new File(rsyncDirectory, - "torperf/" + fileName)); - fileNamesInRsync.remove(pop.getName()); - } - } - } - logger.info("After copying Torperf files, there are still " - + fileNamesInRsync.size() + " files left in " - + rsyncDirectory.getAbsolutePath() + "."); - - /* Delete all files that we didn't (over-)write in this run. */ - files.add(rsyncDirectory); - while (!files.isEmpty()) { - File pop = files.pop(); - if (pop.isDirectory()) { - files.addAll(Arrays.asList(pop.listFiles())); - } else if (fileNamesInRsync.contains(pop.getName())) { - fileNamesInRsync.remove(pop.getName()); - pop.delete(); - } - } - logger.info("After deleting files that we didn't overwrite in this " - + "run, there are " + fileNamesInRsync.size() + " files left in " - + rsyncDirectory.getAbsolutePath() + "."); - } - - private void copyFile(File from, File to) { - if (from.exists() && to.exists() && - from.lastModified() == to.lastModified() && - from.length() == to.length()) { - return; - } - try { - to.getParentFile().mkdirs(); - FileInputStream fis = new FileInputStream(from); - BufferedInputStream bis = new BufferedInputStream(fis); - FileOutputStream fos = new FileOutputStream(to); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - fos.write(data, 0, len); - } - bis.close(); - fos.close(); - to.setLastModified(from.lastModified()); - } catch (IOException e) { - e.printStackTrace(); - } - } -} - diff --git a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/SanitizedBridgesWriter.java deleted file mode 100644 index afafe11..0000000 --- a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java +++ /dev/null @@ -1,911 +0,0 @@ -/* Copyright 2010--2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.*; -import java.security.*; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.logging.*; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.digest.*; -import org.apache.commons.codec.binary.*; - -/** - * Sanitizes bridge descriptors, i.e., removes all possibly sensitive - * information from them, and writes them to a local directory structure. - * During the sanitizing process, all information about the bridge - * identity or IP address are removed or replaced. The goal is to keep the - * sanitized bridge descriptors useful for statistical analysis while not - * making it easier for an adversary to enumerate bridges. - * - * There are three types of bridge descriptors: bridge network statuses - * (lists of all bridges at a given time), server descriptors (published - * by the bridge to advertise their capabilities), and extra-info - * descriptors (published by the bridge, mainly for statistical analysis). - */ -public class SanitizedBridgesWriter { - - /** - * Logger for this class. - */ - private Logger logger; - - /** - * Output directory for writing sanitized bridge descriptors. - */ - private File sanitizedBridgesDirectory; - - private boolean replaceIPAddressesWithHashes; - - private boolean persistenceProblemWithSecrets; - - private SortedMap<String, byte[]> secretsForHashingIPAddresses; - - private String bridgeSanitizingCutOffTimestamp; - - private boolean haveWarnedAboutInterval; - - private File bridgeIpSecretsFile; - - private SecureRandom secureRandom; - - /** - * Initializes this class. - */ - public SanitizedBridgesWriter(File sanitizedBridgesDirectory, - File statsDirectory, boolean replaceIPAddressesWithHashes, - long limitBridgeSanitizingInterval) { - - if (sanitizedBridgesDirectory == null || statsDirectory == null) { - throw new IllegalArgumentException(); - } - - /* Memorize argument values. */ - this.sanitizedBridgesDirectory = sanitizedBridgesDirectory; - this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes; - - /* Initialize logger. */ - this.logger = Logger.getLogger( - SanitizedBridgesWriter.class.getName()); - - /* Initialize secure random number generator if we need it. */ - if (this.replaceIPAddressesWithHashes) { - try { - this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN"); - } catch (GeneralSecurityException e) { - this.logger.log(Level.WARNING, "Could not initialize secure " - + "random number generator! Not calculating any IP address " - + "hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } - } - - /* Read hex-encoded secrets for replacing IP addresses with hashes - * from disk. */ - this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>(); - this.bridgeIpSecretsFile = new File(statsDirectory, - "bridge-ip-secrets"); - if (this.bridgeIpSecretsFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - this.bridgeIpSecretsFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - if ((line.length() != ("yyyy-MM,".length() + 31 * 2) && - line.length() != ("yyyy-MM,".length() + 50 * 2)) || - parts.length != 2) { - this.logger.warning("Invalid line in bridge-ip-secrets file " - + "starting with '" + line.substring(0, 7) + "'! " - + "Not calculating any IP address hashes in this " - + "execution!"); - this.persistenceProblemWithSecrets = true; - break; - } - String month = parts[0]; - byte[] secret = Hex.decodeHex(parts[1].toCharArray()); - this.secretsForHashingIPAddresses.put(month, secret); - } - br.close(); - if (!this.persistenceProblemWithSecrets) { - this.logger.fine("Read " - + this.secretsForHashingIPAddresses.size() + " secrets for " - + "hashing bridge IP addresses."); - } - } catch (DecoderException e) { - this.logger.log(Level.WARNING, "Failed to decode hex string in " - + this.bridgeIpSecretsFile + "! Not calculating any IP " - + "address hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read " - + this.bridgeIpSecretsFile + "! Not calculating any IP " - + "address hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } - } - - /* If we're configured to keep secrets only for a limited time, define - * the cut-off day and time. */ - if (limitBridgeSanitizingInterval >= 0L) { - SimpleDateFormat formatter = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); - this.bridgeSanitizingCutOffTimestamp = formatter.format( - System.currentTimeMillis() - 24L * 60L * 60L * 1000L - * limitBridgeSanitizingInterval); - } else { - this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59"; - } - } - - private String scrubOrAddress(String orAddress, byte[] fingerprintBytes, - String published) throws IOException { - if (!orAddress.contains(":")) { - /* Malformed or-address or a line. */ - return null; - } - String addressPart = orAddress.substring(0, - orAddress.lastIndexOf(":")); - String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1); - String scrubbedAddressPart = null; - if (addressPart.startsWith("[")) { - scrubbedAddressPart = this.scrubIpv6Address(addressPart, - fingerprintBytes, published); - } else { - scrubbedAddressPart = this.scrubIpv4Address(addressPart, - fingerprintBytes, published); - } - return (scrubbedAddressPart == null ? null : - scrubbedAddressPart + ":" + portPart); - } - - private String scrubIpv4Address(String address, byte[] fingerprintBytes, - String published) throws IOException { - if (this.replaceIPAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return null; - } - byte[] hashInput = new byte[4 + 20 + 31]; - String[] ipParts = address.split("\."); - for (int i = 0; i < 4; i++) { - hashInput[i] = (byte) Integer.parseInt(ipParts[i]); - } - System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 0, hashInput, 24, 31); - byte[] hashOutput = DigestUtils.sha256(hashInput); - String hashedAddress = "10." - + (((int) hashOutput[0] + 256) % 256) + "." - + (((int) hashOutput[1] + 256) % 256) + "." - + (((int) hashOutput[2] + 256) % 256); - return hashedAddress; - } else { - return "127.0.0.1"; - } - } - - private String scrubIpv6Address(String address, byte[] fingerprintBytes, - String published) throws IOException { - StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::"); - if (this.replaceIPAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return null; - } - byte[] hashInput = new byte[16 + 20 + 19]; - String[] doubleColonSeparatedParts = address.substring(1, - address.length() - 1).split("::", -1); - if (doubleColonSeparatedParts.length > 2) { - /* Invalid IPv6 address. */ - return null; - } - List<String> hexParts = new ArrayList<String>(); - for (String doubleColonSeparatedPart : doubleColonSeparatedParts) { - StringBuilder hexPart = new StringBuilder(); - String[] parts = doubleColonSeparatedPart.split(":", -1); - if (parts.length < 1 || parts.length > 8) { - /* Invalid IPv6 address. */ - return null; - } - for (int i = 0; i < parts.length; i++) { - String part = parts[i]; - if (part.contains(".")) { - String[] ipParts = part.split("\."); - byte[] ipv4Bytes = new byte[4]; - if (ipParts.length != 4) { - /* Invalid IPv4 part in IPv6 address. */ - return null; - } - for (int m = 0; m < 4; m++) { - ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]); - } - hexPart.append(Hex.encodeHexString(ipv4Bytes)); - } else if (part.length() > 4) { - /* Invalid IPv6 address. */ - return null; - } else { - for (int k = part.length(); k < 4; k++) { - hexPart.append("0"); - } - hexPart.append(part); - } - } - hexParts.add(hexPart.toString()); - } - StringBuilder hex = new StringBuilder(); - hex.append(hexParts.get(0)); - if (hexParts.size() == 2) { - for (int i = 32 - hexParts.get(0).length() - - hexParts.get(1).length(); i > 0; i--) { - hex.append("0"); - } - hex.append(hexParts.get(1)); - } - byte[] ipBytes = null; - try { - ipBytes = Hex.decodeHex(hex.toString().toCharArray()); - } catch (DecoderException e) { - /* TODO Invalid IPv6 address. */ - return null; - } - if (ipBytes.length != 16) { - /* TODO Invalid IPv6 address. */ - return null; - } - System.arraycopy(ipBytes, 0, hashInput, 0, 16); - System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 31, hashInput, 36, 19); - String hashOutput = DigestUtils.sha256Hex(hashInput); - sb.append(hashOutput.substring(hashOutput.length() - 6, - hashOutput.length() - 4)); - sb.append(":"); - sb.append(hashOutput.substring(hashOutput.length() - 4)); - } - sb.append("]"); - return sb.toString(); - } - - private byte[] getSecretForMonth(String month) throws IOException { - if (!this.secretsForHashingIPAddresses.containsKey(month) || - this.secretsForHashingIPAddresses.get(month).length == 31) { - byte[] secret = new byte[50]; - this.secureRandom.nextBytes(secret); - if (this.secretsForHashingIPAddresses.containsKey(month)) { - System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0, - secret, 0, 31); - } - if (month.compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - this.logger.warning("Generated a secret that we won't make " - + "persistent, because it's outside our bridge descriptor " - + "sanitizing interval."); - } else { - /* Append secret to file on disk immediately before using it, or - * we might end with inconsistently sanitized bridges. */ - try { - if (!this.bridgeIpSecretsFile.exists()) { - this.bridgeIpSecretsFile.getParentFile().mkdirs(); - } - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.bridgeIpSecretsFile, - this.bridgeIpSecretsFile.exists())); - bw.write(month + "," + Hex.encodeHexString(secret) + "\n"); - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store new secret " - + "to disk! Not calculating any IP address hashes in " - + "this execution!", e); - this.persistenceProblemWithSecrets = true; - throw new IOException(e); - } - } - this.secretsForHashingIPAddresses.put(month, secret); - } - return this.secretsForHashingIPAddresses.get(month); - } - - /** - * Sanitizes a network status and writes it to disk. - */ - public void sanitizeAndStoreNetworkStatus(byte[] data, - String publicationTime) { - - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return; - } - - if (this.bridgeSanitizingCutOffTimestamp. - compareTo(publicationTime) > 0) { - this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING - : Level.FINE, "Sanitizing and storing network status with " - + "publication time outside our descriptor sanitizing " - + "interval."); - this.haveWarnedAboutInterval = true; - } - - /* Parse the given network status line by line. */ - SortedMap<String, String> scrubbedLines = - new TreeMap<String, String>(); - try { - StringBuilder scrubbed = new StringBuilder(); - BufferedReader br = new BufferedReader(new StringReader(new String( - data, "US-ASCII"))); - String line = null; - String mostRecentDescPublished = null; - byte[] fingerprintBytes = null; - String descPublicationTime = null; - String hashedBridgeIdentityHex = null; - while ((line = br.readLine()) != null) { - - /* r lines contain sensitive information that needs to be removed - * or replaced. */ - if (line.startsWith("r ")) { - - /* Clear buffer from previously scrubbed lines. */ - if (scrubbed.length() > 0) { - String scrubbedLine = scrubbed.toString(); - scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); - scrubbed = new StringBuilder(); - } - - /* Parse the relevant parts of this r line. */ - String[] parts = line.split(" "); - String nickname = parts[1]; - fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); - String descriptorIdentifier = parts[3]; - descPublicationTime = parts[4] + " " + parts[5]; - String address = parts[6]; - String orPort = parts[7]; - String dirPort = parts[8]; - - /* Determine most recent descriptor publication time. */ - if (descPublicationTime.compareTo(publicationTime) <= 0 && - (mostRecentDescPublished == null || - descPublicationTime.compareTo( - mostRecentDescPublished) > 0)) { - mostRecentDescPublished = descPublicationTime; - } - - /* Write scrubbed r line to buffer. */ - byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes); - String hashedBridgeIdentityBase64 = Base64.encodeBase64String( - hashedBridgeIdentity).substring(0, 27); - hashedBridgeIdentityHex = Hex.encodeHexString( - hashedBridgeIdentity); - String hashedDescriptorIdentifier = Base64.encodeBase64String( - DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier - + "=="))).substring(0, 27); - String scrubbedAddress = scrubIpv4Address(address, - fingerprintBytes, - descPublicationTime); - scrubbed.append("r " + nickname + " " - + hashedBridgeIdentityBase64 + " " - + hashedDescriptorIdentifier + " " + descPublicationTime - + " " + scrubbedAddress + " " + orPort + " " + dirPort - + "\n"); - - /* Sanitize any addresses in a lines using the fingerprint and - * descriptor publication time from the previous r line. */ - } else if (line.startsWith("a ")) { - String scrubbedOrAddress = scrubOrAddress( - line.substring("a ".length()), fingerprintBytes, - descPublicationTime); - if (scrubbedOrAddress != null) { - scrubbed.append("a " + scrubbedOrAddress + "\n"); - } else { - this.logger.warning("Invalid address in line '" + line - + "' in bridge network status. Skipping line!"); - } - - /* Nothing special about s, w, and p lines; just copy them. */ - } else if (line.startsWith("s ") || line.equals("s") || - line.startsWith("w ") || line.equals("w") || - line.startsWith("p ") || line.equals("p")) { - scrubbed.append(line + "\n"); - - /* There should be nothing else but r, w, p, and s lines in the - * network status. If there is, we should probably learn before - * writing anything to the sanitized descriptors. */ - } else { - this.logger.fine("Unknown line '" + line + "' in bridge " - + "network status. Not writing to disk!"); - return; - } - } - br.close(); - if (scrubbed.length() > 0) { - String scrubbedLine = scrubbed.toString(); - scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); - scrubbed = new StringBuilder(); - } - - /* Check if we can tell from the descriptor publication times - * whether this status is possibly stale. */ - SimpleDateFormat formatter = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); - if (formatter.parse(publicationTime).getTime() - - formatter.parse(mostRecentDescPublished).getTime() > - 60L * 60L * 1000L) { - this.logger.warning("The most recent descriptor in the bridge " - + "network status published at " + publicationTime + " was " - + "published at " + mostRecentDescPublished + " which is " - + "more than 1 hour before the status. This is a sign for " - + "the status being stale. Please check!"); - } - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Could not parse timestamp in " - + "bridge network status.", e); - return; - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse bridge network " - + "status.", e); - return; - } - - /* Write the sanitized network status to disk. */ - try { - - /* Determine file name. */ - String syear = publicationTime.substring(0, 4); - String smonth = publicationTime.substring(5, 7); - String sday = publicationTime.substring(8, 10); - String stime = publicationTime.substring(11, 13) - + publicationTime.substring(14, 16) - + publicationTime.substring(17, 19); - File statusFile = new File( - this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear - + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth - + sday + "-" + stime + "-" - + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D"); - - /* Create all parent directories to write this network status. */ - statusFile.getParentFile().mkdirs(); - - /* Write sanitized network status to disk. */ - BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile)); - bw.write("@type bridge-network-status 1.0\n"); - bw.write("published " + publicationTime + "\n"); - for (String scrubbed : scrubbedLines.values()) { - bw.write(scrubbed); - } - bw.close(); - - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write sanitized bridge " - + "network status to disk.", e); - return; - } - } - - /** - * Sanitizes a bridge server descriptor and writes it to disk. - */ - public void sanitizeAndStoreServerDescriptor(byte[] data) { - - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return; - } - - /* Parse descriptor to generate a sanitized version. */ - String scrubbedDesc = null, published = null; - try { - BufferedReader br = new BufferedReader(new StringReader( - new String(data, "US-ASCII"))); - StringBuilder scrubbed = new StringBuilder(); - String line = null, hashedBridgeIdentity = null, address = null, - routerLine = null, scrubbedAddress = null; - List<String> orAddresses = null, scrubbedOrAddresses = null; - boolean skipCrypto = false; - while ((line = br.readLine()) != null) { - - /* Skip all crypto parts that might be used to derive the bridge's - * identity fingerprint. */ - if (skipCrypto && !line.startsWith("-----END ")) { - continue; - - /* Store the router line for later processing, because we may need - * the bridge identity fingerprint for replacing the IP address in - * the scrubbed version. */ - } else if (line.startsWith("router ")) { - address = line.split(" ")[2]; - routerLine = line; - - /* Store or-address parts in a list and sanitize them when we have - * read the fingerprint. */ - } else if (line.startsWith("or-address ")) { - if (orAddresses == null) { - orAddresses = new ArrayList<String>(); - } - orAddresses.add(line.substring("or-address ".length())); - - /* Parse the publication time to see if we're still inside the - * sanitizing interval. */ - } else if (line.startsWith("published ")) { - published = line.substring("published ".length()); - if (this.bridgeSanitizingCutOffTimestamp. - compareTo(published) > 0) { - this.logger.log(!this.haveWarnedAboutInterval - ? Level.WARNING : Level.FINE, "Sanitizing and storing " - + "server descriptor with publication time outside our " - + "descriptor sanitizing interval."); - this.haveWarnedAboutInterval = true; - } - scrubbed.append(line + "\n"); - - /* Parse the fingerprint to determine the hashed bridge - * identity. */ - } else if (line.startsWith("opt fingerprint ") || - line.startsWith("fingerprint ")) { - String fingerprint = line.substring(line.startsWith("opt ") ? - "opt fingerprint".length() : "fingerprint".length()). - replaceAll(" ", "").toLowerCase(); - byte[] fingerprintBytes = Hex.decodeHex( - fingerprint.toCharArray()); - hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes). - toLowerCase(); - try { - scrubbedAddress = scrubIpv4Address(address, fingerprintBytes, - published); - if (orAddresses != null) { - scrubbedOrAddresses = new ArrayList<String>(); - for (String orAddress : orAddresses) { - String scrubbedOrAddress = scrubOrAddress(orAddress, - fingerprintBytes, published); - if (scrubbedOrAddress != null) { - scrubbedOrAddresses.add(scrubbedOrAddress); - } else { - this.logger.warning("Invalid address in line " - + "'or-address " + orAddress + "' in bridge server " - + "descriptor. Skipping line!"); - } - } - } - } catch (IOException e) { - /* There's a persistence problem, so we shouldn't scrub more - * IP addresses in this execution. */ - this.persistenceProblemWithSecrets = true; - return; - } - scrubbed.append((line.startsWith("opt ") ? "opt " : "") - + "fingerprint"); - for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) - scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i, - 4 * (i + 1)).toUpperCase()); - scrubbed.append("\n"); - - /* Replace the contact line (if present) with a generic one. */ - } else if (line.startsWith("contact ")) { - scrubbed.append("contact somebody\n"); - - /* When we reach the signature, we're done. Write the sanitized - * descriptor to disk below. */ - } else if (line.startsWith("router-signature")) { - String[] routerLineParts = routerLine.split(" "); - scrubbedDesc = "router " + routerLineParts[1] + " " - + scrubbedAddress + " " + routerLineParts[3] + " " - + routerLineParts[4] + " " + routerLineParts[5] + "\n"; - if (scrubbedOrAddresses != null) { - for (String scrubbedOrAddress : scrubbedOrAddresses) { - scrubbedDesc = scrubbedDesc += "or-address " - + scrubbedOrAddress + "\n"; - } - } - scrubbedDesc += scrubbed.toString(); - break; - - /* Replace extra-info digest with the hashed digest of the - * non-scrubbed descriptor. */ - } else if (line.startsWith("opt extra-info-digest ") || - line.startsWith("extra-info-digest ")) { - String extraInfoDescriptorIdentifier = line.substring( - line.indexOf("extra-info-digest ") - + "extra-info-digest ".length()); - String hashedExtraInfoDescriptorIdentifier = - DigestUtils.shaHex(Hex.decodeHex( - extraInfoDescriptorIdentifier.toCharArray())).toUpperCase(); - scrubbed.append((line.startsWith("opt ") ? "opt " : "") - + "extra-info-digest " + hashedExtraInfoDescriptorIdentifier - + "\n"); - - /* Possibly sanitize reject lines if they contain the bridge's own - * IP address. */ - } else if (line.startsWith("reject ")) { - if (address != null && line.startsWith("reject " + address)) { - scrubbed.append("reject " + scrubbedAddress - + line.substring("reject ".length() + address.length()) - + "\n"); - } else { - scrubbed.append(line + "\n"); - } - - /* Write the following lines unmodified to the sanitized - * descriptor. */ - } else if (line.startsWith("accept ") - || line.startsWith("platform ") - || line.startsWith("opt protocols ") - || line.startsWith("protocols ") - || line.startsWith("uptime ") - || line.startsWith("bandwidth ") - || line.startsWith("opt hibernating ") - || line.startsWith("hibernating ") - || line.equals("opt hidden-service-dir") - || line.equals("hidden-service-dir") - || line.equals("opt caches-extra-info") - || line.equals("caches-extra-info") - || line.equals("opt allow-single-hop-exits") - || line.equals("allow-single-hop-exits")) { - scrubbed.append(line + "\n"); - - /* Replace node fingerprints in the family line with their hashes - * and leave nicknames unchanged. */ - } else if (line.startsWith("family ")) { - StringBuilder familyLine = new StringBuilder("family"); - for (String s : line.substring(7).split(" ")) { - if (s.startsWith("$")) { - familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex( - s.substring(1).toCharArray())).toUpperCase()); - } else { - familyLine.append(" " + s); - } - } - scrubbed.append(familyLine.toString() + "\n"); - - /* Skip the purpose line that the bridge authority adds to its - * cached-descriptors file. */ - } else if (line.startsWith("@purpose ")) { - continue; - - /* Skip all crypto parts that might leak the bridge's identity - * fingerprint. */ - } else if (line.startsWith("-----BEGIN ") - || line.equals("onion-key") || line.equals("signing-key")) { - skipCrypto = true; - - /* Stop skipping lines when the crypto parts are over. */ - } else if (line.startsWith("-----END ")) { - skipCrypto = false; - - /* If we encounter an unrecognized line, stop parsing and print - * out a warning. We might have overlooked sensitive information - * that we need to remove or replace for the sanitized descriptor - * version. */ - } else { - this.logger.fine("Unrecognized line '" + line + "'. Skipping."); - return; - } - } - br.close(); - } catch (Exception e) { - this.logger.log(Level.WARNING, "Could not parse server " - + "descriptor.", e); - return; - } - - /* Determine filename of sanitized server descriptor. */ - String descriptorDigest = null; - try { - String ascii = new String(data, "US-ASCII"); - String startToken = "router "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); - } - } catch (UnsupportedEncodingException e) { - /* Handle below. */ - } - if (descriptorDigest == null) { - this.logger.log(Level.WARNING, "Could not calculate server " - + "descriptor digest."); - return; - } - String dyear = published.substring(0, 4); - String dmonth = published.substring(5, 7); - File newFile = new File( - this.sanitizedBridgesDirectory.getAbsolutePath() + "/" - + dyear + "/" + dmonth + "/server-descriptors/" - + "/" + descriptorDigest.charAt(0) + "/" - + descriptorDigest.charAt(1) + "/" - + descriptorDigest); - - /* Write sanitized server descriptor to disk, including all its parent - * directories. */ - try { - newFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(newFile)); - bw.write("@type bridge-server-descriptor 1.0\n"); - bw.write(scrubbedDesc); - bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n"); - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not write sanitized server " - + "descriptor to disk.", e); - return; - } - } - - /** - * Sanitizes an extra-info descriptor and writes it to disk. - */ - public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) { - - /* Parse descriptor to generate a sanitized version. */ - String scrubbedDesc = null, published = null; - try { - BufferedReader br = new BufferedReader(new StringReader(new String( - data, "US-ASCII"))); - String line = null; - StringBuilder scrubbed = null; - String hashedBridgeIdentity = null; - while ((line = br.readLine()) != null) { - - /* Parse bridge identity from extra-info line and replace it with - * its hash in the sanitized descriptor. */ - String[] parts = line.split(" "); - if (line.startsWith("extra-info ")) { - hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex( - parts[2].toCharArray())).toLowerCase(); - scrubbed = new StringBuilder("extra-info " + parts[1] + " " - + hashedBridgeIdentity.toUpperCase() + "\n"); - - /* Parse the publication time to determine the file name. */ - } else if (line.startsWith("published ")) { - scrubbed.append(line + "\n"); - published = line.substring("published ".length()); - - /* Remove everything from transport lines except the transport - * name. */ - } else if (line.startsWith("transport ")) { - if (parts.length < 3) { - this.logger.fine("Illegal line in extra-info descriptor: '" - + line + "'. Skipping descriptor."); - return; - } - scrubbed.append("transport " + parts[1] + "\n"); - - /* Skip transport-info lines entirely. */ - } else if (line.startsWith("transport-info ")) { - - /* Write the following lines unmodified to the sanitized - * descriptor. */ - } else if (line.startsWith("write-history ") - || line.startsWith("read-history ") - || line.startsWith("geoip-start-time ") - || line.startsWith("geoip-client-origins ") - || line.startsWith("geoip-db-digest ") - || line.startsWith("conn-bi-direct ") - || line.startsWith("bridge-") - || line.startsWith("dirreq-") - || line.startsWith("cell-") - || line.startsWith("entry-") - || line.startsWith("exit-")) { - scrubbed.append(line + "\n"); - - /* When we reach the signature, we're done. Write the sanitized - * descriptor to disk below. */ - } else if (line.startsWith("router-signature")) { - scrubbedDesc = scrubbed.toString(); - break; - - /* If we encounter an unrecognized line, stop parsing and print - * out a warning. We might have overlooked sensitive information - * that we need to remove or replace for the sanitized descriptor - * version. */ - } else { - this.logger.fine("Unrecognized line '" + line + "'. Skipping."); - return; - } - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not parse extra-info " - + "descriptor.", e); - return; - } catch (DecoderException e) { - this.logger.log(Level.WARNING, "Could not parse extra-info " - + "descriptor.", e); - return; - } - - /* Determine filename of sanitized extra-info descriptor. */ - String descriptorDigest = null; - try { - String ascii = new String(data, "US-ASCII"); - String startToken = "extra-info "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); - } - } catch (UnsupportedEncodingException e) { - /* Handle below. */ - } - if (descriptorDigest == null) { - this.logger.log(Level.WARNING, "Could not calculate extra-info " - + "descriptor digest."); - return; - } - String dyear = published.substring(0, 4); - String dmonth = published.substring(5, 7); - File newFile = new File( - this.sanitizedBridgesDirectory.getAbsolutePath() + "/" - + dyear + "/" + dmonth + "/extra-infos/" - + descriptorDigest.charAt(0) + "/" - + descriptorDigest.charAt(1) + "/" - + descriptorDigest); - - /* Write sanitized extra-info descriptor to disk, including all its - * parent directories. */ - try { - newFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(newFile)); - bw.write("@type bridge-extra-info 1.1\n"); - bw.write(scrubbedDesc); - bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n"); - bw.close(); - } catch (Exception e) { - this.logger.log(Level.WARNING, "Could not write sanitized " - + "extra-info descriptor to disk.", e); - } - } - - /** - * Rewrite all network statuses that might contain references to server - * descriptors we added or updated in this execution. This applies to - * all statuses that have been published up to 24 hours after any added - * or updated server descriptor. - */ - public void finishWriting() { - - /* Delete secrets that we don't need anymore. */ - if (!this.secretsForHashingIPAddresses.isEmpty() && - this.secretsForHashingIPAddresses.firstKey().compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - try { - int kept = 0, deleted = 0; - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.bridgeIpSecretsFile)); - for (Map.Entry<String, byte[]> e : - this.secretsForHashingIPAddresses.entrySet()) { - if (e.getKey().compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - deleted++; - } else { - bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue()) - + "\n"); - kept++; - } - } - bw.close(); - this.logger.info("Deleted " + deleted + " secrets that we don't " - + "need anymore and kept " + kept + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store reduced set of " - + "secrets to disk! This is a bad sign, better check what's " - + "going on!", e); - } - } - } -} - diff --git a/src/org/torproject/ernie/db/TorperfDownloader.java b/src/org/torproject/ernie/db/TorperfDownloader.java deleted file mode 100644 index 058e29b..0000000 --- a/src/org/torproject/ernie/db/TorperfDownloader.java +++ /dev/null @@ -1,573 +0,0 @@ -/* Copyright 2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; -import java.text.SimpleDateFormat; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.logging.Level; -import java.util.logging.Logger; - -/* Download possibly truncated Torperf .data and .extradata files from - * configured sources, append them to the files we already have, and merge - * the two files into the .tpf format. */ -public class TorperfDownloader { - - private File torperfOutputDirectory = null; - private SortedMap<String, String> torperfSources = null; - private List<String> torperfFilesLines = null; - private Logger logger = null; - private SimpleDateFormat dateFormat; - - public TorperfDownloader(File torperfOutputDirectory, - SortedMap<String, String> torperfSources, - List<String> torperfFilesLines) { - if (torperfOutputDirectory == null) { - throw new IllegalArgumentException(); - } - this.torperfOutputDirectory = torperfOutputDirectory; - this.torperfSources = torperfSources; - this.torperfFilesLines = torperfFilesLines; - if (!this.torperfOutputDirectory.exists()) { - this.torperfOutputDirectory.mkdirs(); - } - this.logger = Logger.getLogger(TorperfDownloader.class.getName()); - this.dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - this.readLastMergedTimestamps(); - for (String torperfFilesLine : this.torperfFilesLines) { - this.downloadAndMergeFiles(torperfFilesLine); - } - this.writeLastMergedTimestamps(); - } - - private File torperfLastMergedFile = - new File("stats/torperf-last-merged"); - SortedMap<String, String> lastMergedTimestamps = - new TreeMap<String, String>(); - private void readLastMergedTimestamps() { - if (!this.torperfLastMergedFile.exists()) { - return; - } - try { - BufferedReader br = new BufferedReader(new FileReader( - this.torperfLastMergedFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.split(" "); - String fileName = null, timestamp = null; - if (parts.length == 2) { - try { - Double.parseDouble(parts[1]); - fileName = parts[0]; - timestamp = parts[1]; - } catch (NumberFormatException e) { - /* Handle below. */ - } - } - if (fileName == null || timestamp == null) { - this.logger.log(Level.WARNING, "Invalid line '" + line + "' in " - + this.torperfLastMergedFile.getAbsolutePath() + ". " - + "Ignoring past history of merging .data and .extradata " - + "files."); - this.lastMergedTimestamps.clear(); - break; - } - this.lastMergedTimestamps.put(fileName, timestamp); - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Error while reading '" - + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring " - + "past history of merging .data and .extradata files."); - this.lastMergedTimestamps.clear(); - } - } - - private void writeLastMergedTimestamps() { - try { - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.torperfLastMergedFile)); - for (Map.Entry<String, String> e : - this.lastMergedTimestamps.entrySet()) { - String fileName = e.getKey(); - String timestamp = e.getValue(); - bw.write(fileName + " " + timestamp + "\n"); - } - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Error while writing '" - + this.torperfLastMergedFile.getAbsolutePath() + ". This may " - + "result in ignoring history of merging .data and .extradata " - + "files in the next execution.", e); - } - } - - private void downloadAndMergeFiles(String torperfFilesLine) { - String[] parts = torperfFilesLine.split(" "); - String sourceName = parts[1]; - int fileSize = -1; - try { - fileSize = Integer.parseInt(parts[2]); - } catch (NumberFormatException e) { - this.logger.log(Level.WARNING, "Could not parse file size in " - + "TorperfFiles configuration line '" + torperfFilesLine - + "'."); - return; - } - - /* Download and append the .data file. */ - String dataFileName = parts[3]; - String sourceBaseUrl = torperfSources.get(sourceName); - String dataUrl = sourceBaseUrl + dataFileName; - String dataOutputFileName = sourceName + "-" + dataFileName; - File dataOutputFile = new File(torperfOutputDirectory, - dataOutputFileName); - boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl, - dataOutputFile, true); - - /* Download and append the .extradata file. */ - String extradataFileName = parts[4]; - String extradataUrl = sourceBaseUrl + extradataFileName; - String extradataOutputFileName = sourceName + "-" + extradataFileName; - File extradataOutputFile = new File(torperfOutputDirectory, - extradataOutputFileName); - boolean downloadedExtradataFile = this.downloadAndAppendFile( - extradataUrl, extradataOutputFile, false); - - /* Merge both files into .tpf format. */ - if (!downloadedDataFile && !downloadedExtradataFile) { - return; - } - String skipUntil = null; - if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) { - skipUntil = this.lastMergedTimestamps.get(dataOutputFileName); - } - try { - skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile, - sourceName, fileSize, skipUntil); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile - + " and " + extradataOutputFile + ".", e); - } - if (skipUntil != null) { - this.lastMergedTimestamps.put(dataOutputFileName, skipUntil); - } - } - - private boolean downloadAndAppendFile(String url, File outputFile, - boolean isDataFile) { - - /* Read an existing output file to determine which line will be the - * first to append to it. */ - String lastTimestampLine = null; - int linesAfterLastTimestampLine = 0; - if (outputFile.exists() && outputFile.lastModified() > - System.currentTimeMillis() - 330L * 60L * 1000L) { - return false; - } else if (outputFile.exists()) { - try { - BufferedReader br = new BufferedReader(new FileReader( - outputFile)); - String line; - while ((line = br.readLine()) != null) { - if (isDataFile || line.contains(" LAUNCH")) { - lastTimestampLine = line; - linesAfterLastTimestampLine = 0; - } else { - linesAfterLastTimestampLine++; - } - } - br.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed reading '" - + outputFile.getAbsolutePath() + "' to determine the first " - + "line to append to it.", e); - return false; - } - } - try { - this.logger.fine("Downloading " + (isDataFile ? ".data" : - ".extradata") + " file from '" + url + "' and merging it into " - + "'" + outputFile.getAbsolutePath() + "'."); - URL u = new URL(url); - HttpURLConnection huc = (HttpURLConnection) u.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - BufferedReader br = new BufferedReader(new InputStreamReader( - huc.getInputStream())); - String line; - BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile, - true)); - boolean copyLines = lastTimestampLine == null; - while ((line = br.readLine()) != null) { - if (copyLines && linesAfterLastTimestampLine == 0) { - if (isDataFile || line.contains(" LAUNCH")) { - lastTimestampLine = line; - } - bw.write(line + "\n"); - } else if (copyLines && linesAfterLastTimestampLine > 0) { - linesAfterLastTimestampLine--; - } else if (line.equals(lastTimestampLine)) { - copyLines = true; - } - } - bw.close(); - br.close(); - if (!copyLines) { - this.logger.warning("The last timestamp line in '" - + outputFile.getAbsolutePath() + "' is not contained in the " - + "new file downloaded from '" + url + "'. Cannot append " - + "new lines without possibly leaving a gap. Skipping."); - return false; - } - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed downloading and/or merging '" - + url + "'.", e); - return false; - } - if (lastTimestampLine == null) { - this.logger.warning("'" + outputFile.getAbsolutePath() - + "' doesn't contain any timestamp lines. Unable to check " - + "whether that file is stale or not."); - } else { - long lastTimestampMillis = -1L; - if (isDataFile) { - lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( - 0, lastTimestampLine.indexOf(" "))) * 1000L; - } else { - lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( - lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(), - lastTimestampLine.indexOf(".", - lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L; - } - if (lastTimestampMillis < System.currentTimeMillis() - - 330L * 60L * 1000L) { - this.logger.warning("The last timestamp in '" - + outputFile.getAbsolutePath() + "' is more than 5:30 hours " - + "old: " + lastTimestampMillis); - } - } - return true; - } - - private String mergeFiles(File dataFile, File extradataFile, - String source, int fileSize, String skipUntil) throws IOException { - SortedMap<String, String> config = new TreeMap<String, String>(); - config.put("SOURCE", source); - config.put("FILESIZE", String.valueOf(fileSize)); - if (!dataFile.exists() || !extradataFile.exists()) { - this.logger.warning("File " + dataFile.getAbsolutePath() + " or " - + extradataFile.getAbsolutePath() + " is missing."); - return null; - } - this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and " - + extradataFile.getAbsolutePath() + " into .tpf format."); - BufferedReader brD = new BufferedReader(new FileReader(dataFile)), - brE = new BufferedReader(new FileReader(extradataFile)); - String lineD = brD.readLine(), lineE = brE.readLine(); - int d = 1, e = 1; - String maxDataComplete = null, maxUsedAt = null; - while (lineD != null) { - - /* Parse .data line. Every valid .data line will go into the .tpf - * format, either with additional information from the .extradata - * file or without it. */ - if (lineD.isEmpty()) { - this.logger.finer("Skipping empty line " + dataFile.getName() - + ":" + d++ + "."); - lineD = brD.readLine(); - continue; - } - SortedMap<String, String> data = this.parseDataLine(lineD); - if (data == null) { - this.logger.finer("Skipping illegal line " + dataFile.getName() - + ":" + d++ + " '" + lineD + "'."); - lineD = brD.readLine(); - continue; - } - String dataComplete = data.get("DATACOMPLETE"); - double dataCompleteSeconds = Double.parseDouble(dataComplete); - if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) { - this.logger.finer("Skipping " + dataFile.getName() + ":" - + d++ + " which we already processed before."); - lineD = brD.readLine(); - continue; - } - maxDataComplete = dataComplete; - - /* Parse .extradata line if available and try to find the one that - * matches the .data line. */ - SortedMap<String, String> extradata = null; - while (lineE != null) { - if (lineE.isEmpty()) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is empty."); - lineE = brE.readLine(); - continue; - } - if (lineE.startsWith("BUILDTIMEOUT_SET ")) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is a BUILDTIMEOUT_SET line."); - lineE = brE.readLine(); - continue; - } else if (lineE.startsWith("ok ") || - lineE.startsWith("error ")) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is in the old format."); - lineE = brE.readLine(); - continue; - } - extradata = this.parseExtradataLine(lineE); - if (extradata == null) { - this.logger.finer("Skipping Illegal line " - + extradataFile.getName() + ":" + e++ + " '" + lineE - + "'."); - lineE = brE.readLine(); - continue; - } - if (!extradata.containsKey("USED_AT")) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which doesn't contain a USED_AT element."); - lineE = brE.readLine(); - continue; - } - String usedAt = extradata.get("USED_AT"); - double usedAtSeconds = Double.parseDouble(usedAt); - if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which we already processed before."); - lineE = brE.readLine(); - continue; - } - maxUsedAt = usedAt; - if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) { - this.logger.fine("Merging " + extradataFile.getName() + ":" - + e++ + " into the current .data line."); - lineE = brE.readLine(); - break; - } else if (usedAtSeconds > dataCompleteSeconds) { - this.logger.finer("Comparing " + extradataFile.getName() - + " to the next .data line."); - extradata = null; - break; - } else { - this.logger.finer("Skipping " + extradataFile.getName() + ":" - + e++ + " which is too old to be merged with " - + dataFile.getName() + ":" + d + "."); - lineE = brE.readLine(); - continue; - } - } - - /* Write output line to .tpf file. */ - SortedMap<String, String> keysAndValues = - new TreeMap<String, String>(); - if (extradata != null) { - keysAndValues.putAll(extradata); - } - keysAndValues.putAll(data); - keysAndValues.putAll(config); - this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + "."); - lineD = brD.readLine(); - try { - this.writeTpfLine(source, fileSize, keysAndValues); - } catch (IOException ex) { - this.logger.log(Level.WARNING, "Error writing output line. " - + "Aborting to merge " + dataFile.getName() + " and " - + extradataFile.getName() + ".", e); - break; - } - } - brD.close(); - brE.close(); - this.writeCachedTpfLines(); - if (maxDataComplete == null) { - return maxUsedAt; - } else if (maxUsedAt == null) { - return maxDataComplete; - } else if (maxDataComplete.compareTo(maxUsedAt) > 0) { - return maxUsedAt; - } else { - return maxDataComplete; - } - } - - private SortedMap<Integer, String> dataTimestamps; - private SortedMap<String, String> parseDataLine(String line) { - String[] parts = line.trim().split(" "); - if (line.length() == 0 || parts.length < 20) { - return null; - } - if (this.dataTimestamps == null) { - this.dataTimestamps = new TreeMap<Integer, String>(); - this.dataTimestamps.put(0, "START"); - this.dataTimestamps.put(2, "SOCKET"); - this.dataTimestamps.put(4, "CONNECT"); - this.dataTimestamps.put(6, "NEGOTIATE"); - this.dataTimestamps.put(8, "REQUEST"); - this.dataTimestamps.put(10, "RESPONSE"); - this.dataTimestamps.put(12, "DATAREQUEST"); - this.dataTimestamps.put(14, "DATARESPONSE"); - this.dataTimestamps.put(16, "DATACOMPLETE"); - this.dataTimestamps.put(21, "DATAPERC10"); - this.dataTimestamps.put(23, "DATAPERC20"); - this.dataTimestamps.put(25, "DATAPERC30"); - this.dataTimestamps.put(27, "DATAPERC40"); - this.dataTimestamps.put(29, "DATAPERC50"); - this.dataTimestamps.put(31, "DATAPERC60"); - this.dataTimestamps.put(33, "DATAPERC70"); - this.dataTimestamps.put(35, "DATAPERC80"); - this.dataTimestamps.put(37, "DATAPERC90"); - } - SortedMap<String, String> data = new TreeMap<String, String>(); - try { - for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) { - int i = e.getKey(); - if (parts.length > i + 1) { - String key = e.getValue(); - String value = String.format("%s.%02d", parts[i], - Integer.parseInt(parts[i + 1]) / 10000); - data.put(key, value); - } - } - } catch (NumberFormatException e) { - return null; - } - data.put("WRITEBYTES", parts[18]); - data.put("READBYTES", parts[19]); - if (parts.length >= 21) { - data.put("DIDTIMEOUT", parts[20]); - } - return data; - } - - private SortedMap<String, String> parseExtradataLine(String line) { - String[] parts = line.split(" "); - SortedMap<String, String> extradata = new TreeMap<String, String>(); - String previousKey = null; - for (String part : parts) { - String[] keyAndValue = part.split("=", -1); - if (keyAndValue.length == 2) { - String key = keyAndValue[0]; - previousKey = key; - String value = keyAndValue[1]; - if (value.contains(".") && value.lastIndexOf(".") == - value.length() - 2) { - /* Make sure that all floats have two trailing digits. */ - value += "0"; - } - extradata.put(key, value); - } else if (keyAndValue.length == 1 && previousKey != null) { - String value = keyAndValue[0]; - if (previousKey.equals("STREAM_FAIL_REASONS") && - (value.equals("MISC") || value.equals("EXITPOLICY") || - value.equals("RESOURCELIMIT") || - value.equals("RESOLVEFAILED"))) { - extradata.put(previousKey, extradata.get(previousKey) + ":" - + value); - } else { - return null; - } - } else { - return null; - } - } - return extradata; - } - - private String cachedSource; - private int cachedFileSize; - private String cachedStartDate; - private SortedMap<String, String> cachedTpfLines; - private void writeTpfLine(String source, int fileSize, - SortedMap<String, String> keysAndValues) throws IOException { - StringBuilder sb = new StringBuilder(); - int written = 0; - for (Map.Entry<String, String> keyAndValue : - keysAndValues.entrySet()) { - String key = keyAndValue.getKey(); - String value = keyAndValue.getValue(); - sb.append((written++ > 0 ? " " : "") + key + "=" + value); - } - String line = sb.toString(); - String startString = keysAndValues.get("START"); - long startMillis = Long.parseLong(startString.substring(0, - startString.indexOf("."))) * 1000L; - String startDate = dateFormat.format(startMillis); - if (this.cachedTpfLines == null || !source.equals(this.cachedSource) || - fileSize != this.cachedFileSize || - !startDate.equals(this.cachedStartDate)) { - this.writeCachedTpfLines(); - this.readTpfLinesToCache(source, fileSize, startDate); - } - if (!this.cachedTpfLines.containsKey(startString) || - line.length() > this.cachedTpfLines.get(startString).length()) { - this.cachedTpfLines.put(startString, line); - } - } - - private void readTpfLinesToCache(String source, int fileSize, - String startDate) throws IOException { - this.cachedTpfLines = new TreeMap<String, String>(); - this.cachedSource = source; - this.cachedFileSize = fileSize; - this.cachedStartDate = startDate; - File tpfFile = new File(torperfOutputDirectory, - startDate.replaceAll("-", "/") + "/" - + source + "-" + String.valueOf(fileSize) + "-" + startDate - + ".tpf"); - if (!tpfFile.exists()) { - return; - } - BufferedReader br = new BufferedReader(new FileReader(tpfFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.startsWith("@type ")) { - continue; - } - if (line.contains("START=")) { - String startString = line.substring(line.indexOf("START=") - + "START=".length()).split(" ")[0]; - this.cachedTpfLines.put(startString, line); - } - } - br.close(); - } - - private void writeCachedTpfLines() throws IOException { - if (this.cachedSource == null || this.cachedFileSize == 0 || - this.cachedStartDate == null || this.cachedTpfLines == null) { - return; - } - File tpfFile = new File(torperfOutputDirectory, - this.cachedStartDate.replaceAll("-", "/") - + "/" + this.cachedSource + "-" - + String.valueOf(this.cachedFileSize) + "-" - + this.cachedStartDate + ".tpf"); - tpfFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile)); - bw.write("@type torperf 1.0\n"); - for (String line : this.cachedTpfLines.values()) { - bw.write(line + "\n"); - } - bw.close(); - this.cachedSource = null; - this.cachedFileSize = 0; - this.cachedStartDate = null; - this.cachedTpfLines = null; - } -} - diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java new file mode 100644 index 0000000..f0b617a --- /dev/null +++ b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java @@ -0,0 +1,46 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.bridgedescs; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class BridgeDescriptorParser { + private SanitizedBridgesWriter sbw; + private Logger logger; + public BridgeDescriptorParser(SanitizedBridgesWriter sbw) { + this.sbw = sbw; + this.logger = + Logger.getLogger(BridgeDescriptorParser.class.getName()); + } + public void parse(byte[] allData, String dateTime) { + try { + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + String line = br.readLine(); + if (line == null) { + return; + } else if (line.startsWith("r ")) { + if (this.sbw != null) { + this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime); + } + } else if (line.startsWith("router ")) { + if (this.sbw != null) { + this.sbw.sanitizeAndStoreServerDescriptor(allData); + } + } else if (line.startsWith("extra-info ")) { + if (this.sbw != null) { + this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData); + } + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse bridge descriptor.", + e); + return; + } + } +} + diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java new file mode 100644 index 0000000..783775c --- /dev/null +++ b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java @@ -0,0 +1,220 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.bridgedescs; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; + +/** + * Reads the half-hourly snapshots of bridge descriptors from Tonga. + */ +public class BridgeSnapshotReader { + public BridgeSnapshotReader(BridgeDescriptorParser bdp, + File bridgeDirectoriesDir, File statsDirectory) { + + if (bdp == null || bridgeDirectoriesDir == null || + statsDirectory == null) { + throw new IllegalArgumentException(); + } + + Logger logger = + Logger.getLogger(BridgeSnapshotReader.class.getName()); + SortedSet<String> parsed = new TreeSet<String>(); + File bdDir = bridgeDirectoriesDir; + File pbdFile = new File(statsDirectory, "parsed-bridge-directories"); + boolean modified = false; + if (bdDir.exists()) { + if (pbdFile.exists()) { + logger.fine("Reading file " + pbdFile.getAbsolutePath() + "..."); + try { + BufferedReader br = new BufferedReader(new FileReader(pbdFile)); + String line = null; + while ((line = br.readLine()) != null) { + parsed.add(line); + } + br.close(); + logger.fine("Finished reading file " + + pbdFile.getAbsolutePath() + "."); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed reading file " + + pbdFile.getAbsolutePath() + "!", e); + return; + } + } + logger.fine("Importing files in directory " + bridgeDirectoriesDir + + "/..."); + Set<String> descriptorImportHistory = new HashSet<String>(); + int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0, + parsedServerDescriptors = 0, skippedServerDescriptors = 0, + parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0; + Stack<File> filesInInputDir = new Stack<File>(); + filesInInputDir.add(bdDir); + while (!filesInInputDir.isEmpty()) { + File pop = filesInInputDir.pop(); + if (pop.isDirectory()) { + for (File f : pop.listFiles()) { + filesInInputDir.add(f); + } + } else if (!parsed.contains(pop.getName())) { + try { + FileInputStream in = new FileInputStream(pop); + if (in.available() > 0) { + TarArchiveInputStream tais = null; + if (pop.getName().endsWith(".tar.gz")) { + GzipCompressorInputStream gcis = + new GzipCompressorInputStream(in); + tais = new TarArchiveInputStream(gcis); + } else if (pop.getName().endsWith(".tar")) { + tais = new TarArchiveInputStream(in); + } else { + continue; + } + BufferedInputStream bis = new BufferedInputStream(tais); + String fn = pop.getName(); + String dateTime = fn.substring(11, 21) + " " + + fn.substring(22, 24) + ":" + fn.substring(24, 26) + + ":" + fn.substring(26, 28); + while ((tais.getNextTarEntry()) != null) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + byte[] allData = baos.toByteArray(); + if (allData.length == 0) { + continue; + } + String fileDigest = Hex.encodeHexString(DigestUtils.sha( + allData)); + String ascii = new String(allData, "US-ASCII"); + BufferedReader br3 = new BufferedReader(new StringReader( + ascii)); + String firstLine = null; + while ((firstLine = br3.readLine()) != null) { + if (firstLine.startsWith("@")) { + continue; + } else { + break; + } + } + if (firstLine.startsWith("r ")) { + bdp.parse(allData, dateTime); + parsedStatuses++; + } else if (descriptorImportHistory.contains(fileDigest)) { + /* Skip server descriptors or extra-info descriptors if + * we parsed them before. */ + skippedFiles++; + continue; + } else { + int start = -1, sig = -1, end = -1; + String startToken = + firstLine.startsWith("router ") ? + "router " : "extra-info "; + String sigToken = "\nrouter-signature\n"; + String endToken = "\n-----END SIGNATURE-----\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + sig = ascii.indexOf(sigToken, start); + if (sig < 0) { + break; + } + sig += sigToken.length(); + end = ascii.indexOf(endToken, sig); + if (end < 0) { + break; + } + end += endToken.length(); + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, + end - start); + String descriptorDigest = Hex.encodeHexString( + DigestUtils.sha(descBytes)); + if (!descriptorImportHistory.contains( + descriptorDigest)) { + bdp.parse(descBytes, dateTime); + descriptorImportHistory.add(descriptorDigest); + if (firstLine.startsWith("router ")) { + parsedServerDescriptors++; + } else { + parsedExtraInfoDescriptors++; + } + } else { + if (firstLine.startsWith("router ")) { + skippedServerDescriptors++; + } else { + skippedExtraInfoDescriptors++; + } + } + } + } + descriptorImportHistory.add(fileDigest); + parsedFiles++; + } + bis.close(); + } + in.close(); + + /* Let's give some memory back, or we'll run out of it. */ + System.gc(); + + parsed.add(pop.getName()); + modified = true; + } catch (IOException e) { + logger.log(Level.WARNING, "Could not parse bridge snapshot " + + pop.getName() + "!", e); + continue; + } + } + } + logger.fine("Finished importing files in directory " + + bridgeDirectoriesDir + "/. In total, we parsed " + + parsedFiles + " files (skipped " + skippedFiles + + ") containing " + parsedStatuses + " statuses, " + + parsedServerDescriptors + " server descriptors (skipped " + + skippedServerDescriptors + "), and " + + parsedExtraInfoDescriptors + " extra-info descriptors " + + "(skipped " + skippedExtraInfoDescriptors + ")."); + if (!parsed.isEmpty() && modified) { + logger.fine("Writing file " + pbdFile.getAbsolutePath() + "..."); + try { + pbdFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile)); + for (String f : parsed) { + bw.append(f + "\n"); + } + bw.close(); + logger.fine("Finished writing file " + pbdFile.getAbsolutePath() + + "."); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed writing file " + + pbdFile.getAbsolutePath() + "!", e); + } + } + } + } +} + diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java new file mode 100644 index 0000000..ec7ad4b --- /dev/null +++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java @@ -0,0 +1,911 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.bridgedescs; + +import java.io.*; +import java.security.*; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.logging.*; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.digest.*; +import org.apache.commons.codec.binary.*; + +/** + * Sanitizes bridge descriptors, i.e., removes all possibly sensitive + * information from them, and writes them to a local directory structure. + * During the sanitizing process, all information about the bridge + * identity or IP address are removed or replaced. The goal is to keep the + * sanitized bridge descriptors useful for statistical analysis while not + * making it easier for an adversary to enumerate bridges. + * + * There are three types of bridge descriptors: bridge network statuses + * (lists of all bridges at a given time), server descriptors (published + * by the bridge to advertise their capabilities), and extra-info + * descriptors (published by the bridge, mainly for statistical analysis). + */ +public class SanitizedBridgesWriter { + + /** + * Logger for this class. + */ + private Logger logger; + + /** + * Output directory for writing sanitized bridge descriptors. + */ + private File sanitizedBridgesDirectory; + + private boolean replaceIPAddressesWithHashes; + + private boolean persistenceProblemWithSecrets; + + private SortedMap<String, byte[]> secretsForHashingIPAddresses; + + private String bridgeSanitizingCutOffTimestamp; + + private boolean haveWarnedAboutInterval; + + private File bridgeIpSecretsFile; + + private SecureRandom secureRandom; + + /** + * Initializes this class. + */ + public SanitizedBridgesWriter(File sanitizedBridgesDirectory, + File statsDirectory, boolean replaceIPAddressesWithHashes, + long limitBridgeSanitizingInterval) { + + if (sanitizedBridgesDirectory == null || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + /* Memorize argument values. */ + this.sanitizedBridgesDirectory = sanitizedBridgesDirectory; + this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes; + + /* Initialize logger. */ + this.logger = Logger.getLogger( + SanitizedBridgesWriter.class.getName()); + + /* Initialize secure random number generator if we need it. */ + if (this.replaceIPAddressesWithHashes) { + try { + this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN"); + } catch (GeneralSecurityException e) { + this.logger.log(Level.WARNING, "Could not initialize secure " + + "random number generator! Not calculating any IP address " + + "hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } + } + + /* Read hex-encoded secrets for replacing IP addresses with hashes + * from disk. */ + this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>(); + this.bridgeIpSecretsFile = new File(statsDirectory, + "bridge-ip-secrets"); + if (this.bridgeIpSecretsFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + this.bridgeIpSecretsFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + if ((line.length() != ("yyyy-MM,".length() + 31 * 2) && + line.length() != ("yyyy-MM,".length() + 50 * 2)) || + parts.length != 2) { + this.logger.warning("Invalid line in bridge-ip-secrets file " + + "starting with '" + line.substring(0, 7) + "'! " + + "Not calculating any IP address hashes in this " + + "execution!"); + this.persistenceProblemWithSecrets = true; + break; + } + String month = parts[0]; + byte[] secret = Hex.decodeHex(parts[1].toCharArray()); + this.secretsForHashingIPAddresses.put(month, secret); + } + br.close(); + if (!this.persistenceProblemWithSecrets) { + this.logger.fine("Read " + + this.secretsForHashingIPAddresses.size() + " secrets for " + + "hashing bridge IP addresses."); + } + } catch (DecoderException e) { + this.logger.log(Level.WARNING, "Failed to decode hex string in " + + this.bridgeIpSecretsFile + "! Not calculating any IP " + + "address hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read " + + this.bridgeIpSecretsFile + "! Not calculating any IP " + + "address hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } + } + + /* If we're configured to keep secrets only for a limited time, define + * the cut-off day and time. */ + if (limitBridgeSanitizingInterval >= 0L) { + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + this.bridgeSanitizingCutOffTimestamp = formatter.format( + System.currentTimeMillis() - 24L * 60L * 60L * 1000L + * limitBridgeSanitizingInterval); + } else { + this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59"; + } + } + + private String scrubOrAddress(String orAddress, byte[] fingerprintBytes, + String published) throws IOException { + if (!orAddress.contains(":")) { + /* Malformed or-address or a line. */ + return null; + } + String addressPart = orAddress.substring(0, + orAddress.lastIndexOf(":")); + String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1); + String scrubbedAddressPart = null; + if (addressPart.startsWith("[")) { + scrubbedAddressPart = this.scrubIpv6Address(addressPart, + fingerprintBytes, published); + } else { + scrubbedAddressPart = this.scrubIpv4Address(addressPart, + fingerprintBytes, published); + } + return (scrubbedAddressPart == null ? null : + scrubbedAddressPart + ":" + portPart); + } + + private String scrubIpv4Address(String address, byte[] fingerprintBytes, + String published) throws IOException { + if (this.replaceIPAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return null; + } + byte[] hashInput = new byte[4 + 20 + 31]; + String[] ipParts = address.split("\."); + for (int i = 0; i < 4; i++) { + hashInput[i] = (byte) Integer.parseInt(ipParts[i]); + } + System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20); + String month = published.substring(0, "yyyy-MM".length()); + byte[] secret = this.getSecretForMonth(month); + System.arraycopy(secret, 0, hashInput, 24, 31); + byte[] hashOutput = DigestUtils.sha256(hashInput); + String hashedAddress = "10." + + (((int) hashOutput[0] + 256) % 256) + "." + + (((int) hashOutput[1] + 256) % 256) + "." + + (((int) hashOutput[2] + 256) % 256); + return hashedAddress; + } else { + return "127.0.0.1"; + } + } + + private String scrubIpv6Address(String address, byte[] fingerprintBytes, + String published) throws IOException { + StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::"); + if (this.replaceIPAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return null; + } + byte[] hashInput = new byte[16 + 20 + 19]; + String[] doubleColonSeparatedParts = address.substring(1, + address.length() - 1).split("::", -1); + if (doubleColonSeparatedParts.length > 2) { + /* Invalid IPv6 address. */ + return null; + } + List<String> hexParts = new ArrayList<String>(); + for (String doubleColonSeparatedPart : doubleColonSeparatedParts) { + StringBuilder hexPart = new StringBuilder(); + String[] parts = doubleColonSeparatedPart.split(":", -1); + if (parts.length < 1 || parts.length > 8) { + /* Invalid IPv6 address. */ + return null; + } + for (int i = 0; i < parts.length; i++) { + String part = parts[i]; + if (part.contains(".")) { + String[] ipParts = part.split("\."); + byte[] ipv4Bytes = new byte[4]; + if (ipParts.length != 4) { + /* Invalid IPv4 part in IPv6 address. */ + return null; + } + for (int m = 0; m < 4; m++) { + ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]); + } + hexPart.append(Hex.encodeHexString(ipv4Bytes)); + } else if (part.length() > 4) { + /* Invalid IPv6 address. */ + return null; + } else { + for (int k = part.length(); k < 4; k++) { + hexPart.append("0"); + } + hexPart.append(part); + } + } + hexParts.add(hexPart.toString()); + } + StringBuilder hex = new StringBuilder(); + hex.append(hexParts.get(0)); + if (hexParts.size() == 2) { + for (int i = 32 - hexParts.get(0).length() + - hexParts.get(1).length(); i > 0; i--) { + hex.append("0"); + } + hex.append(hexParts.get(1)); + } + byte[] ipBytes = null; + try { + ipBytes = Hex.decodeHex(hex.toString().toCharArray()); + } catch (DecoderException e) { + /* TODO Invalid IPv6 address. */ + return null; + } + if (ipBytes.length != 16) { + /* TODO Invalid IPv6 address. */ + return null; + } + System.arraycopy(ipBytes, 0, hashInput, 0, 16); + System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20); + String month = published.substring(0, "yyyy-MM".length()); + byte[] secret = this.getSecretForMonth(month); + System.arraycopy(secret, 31, hashInput, 36, 19); + String hashOutput = DigestUtils.sha256Hex(hashInput); + sb.append(hashOutput.substring(hashOutput.length() - 6, + hashOutput.length() - 4)); + sb.append(":"); + sb.append(hashOutput.substring(hashOutput.length() - 4)); + } + sb.append("]"); + return sb.toString(); + } + + private byte[] getSecretForMonth(String month) throws IOException { + if (!this.secretsForHashingIPAddresses.containsKey(month) || + this.secretsForHashingIPAddresses.get(month).length == 31) { + byte[] secret = new byte[50]; + this.secureRandom.nextBytes(secret); + if (this.secretsForHashingIPAddresses.containsKey(month)) { + System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0, + secret, 0, 31); + } + if (month.compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + this.logger.warning("Generated a secret that we won't make " + + "persistent, because it's outside our bridge descriptor " + + "sanitizing interval."); + } else { + /* Append secret to file on disk immediately before using it, or + * we might end with inconsistently sanitized bridges. */ + try { + if (!this.bridgeIpSecretsFile.exists()) { + this.bridgeIpSecretsFile.getParentFile().mkdirs(); + } + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.bridgeIpSecretsFile, + this.bridgeIpSecretsFile.exists())); + bw.write(month + "," + Hex.encodeHexString(secret) + "\n"); + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store new secret " + + "to disk! Not calculating any IP address hashes in " + + "this execution!", e); + this.persistenceProblemWithSecrets = true; + throw new IOException(e); + } + } + this.secretsForHashingIPAddresses.put(month, secret); + } + return this.secretsForHashingIPAddresses.get(month); + } + + /** + * Sanitizes a network status and writes it to disk. + */ + public void sanitizeAndStoreNetworkStatus(byte[] data, + String publicationTime) { + + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return; + } + + if (this.bridgeSanitizingCutOffTimestamp. + compareTo(publicationTime) > 0) { + this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING + : Level.FINE, "Sanitizing and storing network status with " + + "publication time outside our descriptor sanitizing " + + "interval."); + this.haveWarnedAboutInterval = true; + } + + /* Parse the given network status line by line. */ + SortedMap<String, String> scrubbedLines = + new TreeMap<String, String>(); + try { + StringBuilder scrubbed = new StringBuilder(); + BufferedReader br = new BufferedReader(new StringReader(new String( + data, "US-ASCII"))); + String line = null; + String mostRecentDescPublished = null; + byte[] fingerprintBytes = null; + String descPublicationTime = null; + String hashedBridgeIdentityHex = null; + while ((line = br.readLine()) != null) { + + /* r lines contain sensitive information that needs to be removed + * or replaced. */ + if (line.startsWith("r ")) { + + /* Clear buffer from previously scrubbed lines. */ + if (scrubbed.length() > 0) { + String scrubbedLine = scrubbed.toString(); + scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); + scrubbed = new StringBuilder(); + } + + /* Parse the relevant parts of this r line. */ + String[] parts = line.split(" "); + String nickname = parts[1]; + fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); + String descriptorIdentifier = parts[3]; + descPublicationTime = parts[4] + " " + parts[5]; + String address = parts[6]; + String orPort = parts[7]; + String dirPort = parts[8]; + + /* Determine most recent descriptor publication time. */ + if (descPublicationTime.compareTo(publicationTime) <= 0 && + (mostRecentDescPublished == null || + descPublicationTime.compareTo( + mostRecentDescPublished) > 0)) { + mostRecentDescPublished = descPublicationTime; + } + + /* Write scrubbed r line to buffer. */ + byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes); + String hashedBridgeIdentityBase64 = Base64.encodeBase64String( + hashedBridgeIdentity).substring(0, 27); + hashedBridgeIdentityHex = Hex.encodeHexString( + hashedBridgeIdentity); + String hashedDescriptorIdentifier = Base64.encodeBase64String( + DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier + + "=="))).substring(0, 27); + String scrubbedAddress = scrubIpv4Address(address, + fingerprintBytes, + descPublicationTime); + scrubbed.append("r " + nickname + " " + + hashedBridgeIdentityBase64 + " " + + hashedDescriptorIdentifier + " " + descPublicationTime + + " " + scrubbedAddress + " " + orPort + " " + dirPort + + "\n"); + + /* Sanitize any addresses in a lines using the fingerprint and + * descriptor publication time from the previous r line. */ + } else if (line.startsWith("a ")) { + String scrubbedOrAddress = scrubOrAddress( + line.substring("a ".length()), fingerprintBytes, + descPublicationTime); + if (scrubbedOrAddress != null) { + scrubbed.append("a " + scrubbedOrAddress + "\n"); + } else { + this.logger.warning("Invalid address in line '" + line + + "' in bridge network status. Skipping line!"); + } + + /* Nothing special about s, w, and p lines; just copy them. */ + } else if (line.startsWith("s ") || line.equals("s") || + line.startsWith("w ") || line.equals("w") || + line.startsWith("p ") || line.equals("p")) { + scrubbed.append(line + "\n"); + + /* There should be nothing else but r, w, p, and s lines in the + * network status. If there is, we should probably learn before + * writing anything to the sanitized descriptors. */ + } else { + this.logger.fine("Unknown line '" + line + "' in bridge " + + "network status. Not writing to disk!"); + return; + } + } + br.close(); + if (scrubbed.length() > 0) { + String scrubbedLine = scrubbed.toString(); + scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine); + scrubbed = new StringBuilder(); + } + + /* Check if we can tell from the descriptor publication times + * whether this status is possibly stale. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + if (formatter.parse(publicationTime).getTime() - + formatter.parse(mostRecentDescPublished).getTime() > + 60L * 60L * 1000L) { + this.logger.warning("The most recent descriptor in the bridge " + + "network status published at " + publicationTime + " was " + + "published at " + mostRecentDescPublished + " which is " + + "more than 1 hour before the status. This is a sign for " + + "the status being stale. Please check!"); + } + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse timestamp in " + + "bridge network status.", e); + return; + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse bridge network " + + "status.", e); + return; + } + + /* Write the sanitized network status to disk. */ + try { + + /* Determine file name. */ + String syear = publicationTime.substring(0, 4); + String smonth = publicationTime.substring(5, 7); + String sday = publicationTime.substring(8, 10); + String stime = publicationTime.substring(11, 13) + + publicationTime.substring(14, 16) + + publicationTime.substring(17, 19); + File statusFile = new File( + this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear + + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth + + sday + "-" + stime + "-" + + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D"); + + /* Create all parent directories to write this network status. */ + statusFile.getParentFile().mkdirs(); + + /* Write sanitized network status to disk. */ + BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile)); + bw.write("@type bridge-network-status 1.0\n"); + bw.write("published " + publicationTime + "\n"); + for (String scrubbed : scrubbedLines.values()) { + bw.write(scrubbed); + } + bw.close(); + + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write sanitized bridge " + + "network status to disk.", e); + return; + } + } + + /** + * Sanitizes a bridge server descriptor and writes it to disk. + */ + public void sanitizeAndStoreServerDescriptor(byte[] data) { + + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return; + } + + /* Parse descriptor to generate a sanitized version. */ + String scrubbedDesc = null, published = null; + try { + BufferedReader br = new BufferedReader(new StringReader( + new String(data, "US-ASCII"))); + StringBuilder scrubbed = new StringBuilder(); + String line = null, hashedBridgeIdentity = null, address = null, + routerLine = null, scrubbedAddress = null; + List<String> orAddresses = null, scrubbedOrAddresses = null; + boolean skipCrypto = false; + while ((line = br.readLine()) != null) { + + /* Skip all crypto parts that might be used to derive the bridge's + * identity fingerprint. */ + if (skipCrypto && !line.startsWith("-----END ")) { + continue; + + /* Store the router line for later processing, because we may need + * the bridge identity fingerprint for replacing the IP address in + * the scrubbed version. */ + } else if (line.startsWith("router ")) { + address = line.split(" ")[2]; + routerLine = line; + + /* Store or-address parts in a list and sanitize them when we have + * read the fingerprint. */ + } else if (line.startsWith("or-address ")) { + if (orAddresses == null) { + orAddresses = new ArrayList<String>(); + } + orAddresses.add(line.substring("or-address ".length())); + + /* Parse the publication time to see if we're still inside the + * sanitizing interval. */ + } else if (line.startsWith("published ")) { + published = line.substring("published ".length()); + if (this.bridgeSanitizingCutOffTimestamp. + compareTo(published) > 0) { + this.logger.log(!this.haveWarnedAboutInterval + ? Level.WARNING : Level.FINE, "Sanitizing and storing " + + "server descriptor with publication time outside our " + + "descriptor sanitizing interval."); + this.haveWarnedAboutInterval = true; + } + scrubbed.append(line + "\n"); + + /* Parse the fingerprint to determine the hashed bridge + * identity. */ + } else if (line.startsWith("opt fingerprint ") || + line.startsWith("fingerprint ")) { + String fingerprint = line.substring(line.startsWith("opt ") ? + "opt fingerprint".length() : "fingerprint".length()). + replaceAll(" ", "").toLowerCase(); + byte[] fingerprintBytes = Hex.decodeHex( + fingerprint.toCharArray()); + hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes). + toLowerCase(); + try { + scrubbedAddress = scrubIpv4Address(address, fingerprintBytes, + published); + if (orAddresses != null) { + scrubbedOrAddresses = new ArrayList<String>(); + for (String orAddress : orAddresses) { + String scrubbedOrAddress = scrubOrAddress(orAddress, + fingerprintBytes, published); + if (scrubbedOrAddress != null) { + scrubbedOrAddresses.add(scrubbedOrAddress); + } else { + this.logger.warning("Invalid address in line " + + "'or-address " + orAddress + "' in bridge server " + + "descriptor. Skipping line!"); + } + } + } + } catch (IOException e) { + /* There's a persistence problem, so we shouldn't scrub more + * IP addresses in this execution. */ + this.persistenceProblemWithSecrets = true; + return; + } + scrubbed.append((line.startsWith("opt ") ? "opt " : "") + + "fingerprint"); + for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) + scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i, + 4 * (i + 1)).toUpperCase()); + scrubbed.append("\n"); + + /* Replace the contact line (if present) with a generic one. */ + } else if (line.startsWith("contact ")) { + scrubbed.append("contact somebody\n"); + + /* When we reach the signature, we're done. Write the sanitized + * descriptor to disk below. */ + } else if (line.startsWith("router-signature")) { + String[] routerLineParts = routerLine.split(" "); + scrubbedDesc = "router " + routerLineParts[1] + " " + + scrubbedAddress + " " + routerLineParts[3] + " " + + routerLineParts[4] + " " + routerLineParts[5] + "\n"; + if (scrubbedOrAddresses != null) { + for (String scrubbedOrAddress : scrubbedOrAddresses) { + scrubbedDesc = scrubbedDesc += "or-address " + + scrubbedOrAddress + "\n"; + } + } + scrubbedDesc += scrubbed.toString(); + break; + + /* Replace extra-info digest with the hashed digest of the + * non-scrubbed descriptor. */ + } else if (line.startsWith("opt extra-info-digest ") || + line.startsWith("extra-info-digest ")) { + String extraInfoDescriptorIdentifier = line.substring( + line.indexOf("extra-info-digest ") + + "extra-info-digest ".length()); + String hashedExtraInfoDescriptorIdentifier = + DigestUtils.shaHex(Hex.decodeHex( + extraInfoDescriptorIdentifier.toCharArray())).toUpperCase(); + scrubbed.append((line.startsWith("opt ") ? "opt " : "") + + "extra-info-digest " + hashedExtraInfoDescriptorIdentifier + + "\n"); + + /* Possibly sanitize reject lines if they contain the bridge's own + * IP address. */ + } else if (line.startsWith("reject ")) { + if (address != null && line.startsWith("reject " + address)) { + scrubbed.append("reject " + scrubbedAddress + + line.substring("reject ".length() + address.length()) + + "\n"); + } else { + scrubbed.append(line + "\n"); + } + + /* Write the following lines unmodified to the sanitized + * descriptor. */ + } else if (line.startsWith("accept ") + || line.startsWith("platform ") + || line.startsWith("opt protocols ") + || line.startsWith("protocols ") + || line.startsWith("uptime ") + || line.startsWith("bandwidth ") + || line.startsWith("opt hibernating ") + || line.startsWith("hibernating ") + || line.equals("opt hidden-service-dir") + || line.equals("hidden-service-dir") + || line.equals("opt caches-extra-info") + || line.equals("caches-extra-info") + || line.equals("opt allow-single-hop-exits") + || line.equals("allow-single-hop-exits")) { + scrubbed.append(line + "\n"); + + /* Replace node fingerprints in the family line with their hashes + * and leave nicknames unchanged. */ + } else if (line.startsWith("family ")) { + StringBuilder familyLine = new StringBuilder("family"); + for (String s : line.substring(7).split(" ")) { + if (s.startsWith("$")) { + familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex( + s.substring(1).toCharArray())).toUpperCase()); + } else { + familyLine.append(" " + s); + } + } + scrubbed.append(familyLine.toString() + "\n"); + + /* Skip the purpose line that the bridge authority adds to its + * cached-descriptors file. */ + } else if (line.startsWith("@purpose ")) { + continue; + + /* Skip all crypto parts that might leak the bridge's identity + * fingerprint. */ + } else if (line.startsWith("-----BEGIN ") + || line.equals("onion-key") || line.equals("signing-key")) { + skipCrypto = true; + + /* Stop skipping lines when the crypto parts are over. */ + } else if (line.startsWith("-----END ")) { + skipCrypto = false; + + /* If we encounter an unrecognized line, stop parsing and print + * out a warning. We might have overlooked sensitive information + * that we need to remove or replace for the sanitized descriptor + * version. */ + } else { + this.logger.fine("Unrecognized line '" + line + "'. Skipping."); + return; + } + } + br.close(); + } catch (Exception e) { + this.logger.log(Level.WARNING, "Could not parse server " + + "descriptor.", e); + return; + } + + /* Determine filename of sanitized server descriptor. */ + String descriptorDigest = null; + try { + String ascii = new String(data, "US-ASCII"); + String startToken = "router "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); + } + } catch (UnsupportedEncodingException e) { + /* Handle below. */ + } + if (descriptorDigest == null) { + this.logger.log(Level.WARNING, "Could not calculate server " + + "descriptor digest."); + return; + } + String dyear = published.substring(0, 4); + String dmonth = published.substring(5, 7); + File newFile = new File( + this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + + dyear + "/" + dmonth + "/server-descriptors/" + + "/" + descriptorDigest.charAt(0) + "/" + + descriptorDigest.charAt(1) + "/" + + descriptorDigest); + + /* Write sanitized server descriptor to disk, including all its parent + * directories. */ + try { + newFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(newFile)); + bw.write("@type bridge-server-descriptor 1.0\n"); + bw.write(scrubbedDesc); + bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n"); + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not write sanitized server " + + "descriptor to disk.", e); + return; + } + } + + /** + * Sanitizes an extra-info descriptor and writes it to disk. + */ + public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) { + + /* Parse descriptor to generate a sanitized version. */ + String scrubbedDesc = null, published = null; + try { + BufferedReader br = new BufferedReader(new StringReader(new String( + data, "US-ASCII"))); + String line = null; + StringBuilder scrubbed = null; + String hashedBridgeIdentity = null; + while ((line = br.readLine()) != null) { + + /* Parse bridge identity from extra-info line and replace it with + * its hash in the sanitized descriptor. */ + String[] parts = line.split(" "); + if (line.startsWith("extra-info ")) { + hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex( + parts[2].toCharArray())).toLowerCase(); + scrubbed = new StringBuilder("extra-info " + parts[1] + " " + + hashedBridgeIdentity.toUpperCase() + "\n"); + + /* Parse the publication time to determine the file name. */ + } else if (line.startsWith("published ")) { + scrubbed.append(line + "\n"); + published = line.substring("published ".length()); + + /* Remove everything from transport lines except the transport + * name. */ + } else if (line.startsWith("transport ")) { + if (parts.length < 3) { + this.logger.fine("Illegal line in extra-info descriptor: '" + + line + "'. Skipping descriptor."); + return; + } + scrubbed.append("transport " + parts[1] + "\n"); + + /* Skip transport-info lines entirely. */ + } else if (line.startsWith("transport-info ")) { + + /* Write the following lines unmodified to the sanitized + * descriptor. */ + } else if (line.startsWith("write-history ") + || line.startsWith("read-history ") + || line.startsWith("geoip-start-time ") + || line.startsWith("geoip-client-origins ") + || line.startsWith("geoip-db-digest ") + || line.startsWith("conn-bi-direct ") + || line.startsWith("bridge-") + || line.startsWith("dirreq-") + || line.startsWith("cell-") + || line.startsWith("entry-") + || line.startsWith("exit-")) { + scrubbed.append(line + "\n"); + + /* When we reach the signature, we're done. Write the sanitized + * descriptor to disk below. */ + } else if (line.startsWith("router-signature")) { + scrubbedDesc = scrubbed.toString(); + break; + + /* If we encounter an unrecognized line, stop parsing and print + * out a warning. We might have overlooked sensitive information + * that we need to remove or replace for the sanitized descriptor + * version. */ + } else { + this.logger.fine("Unrecognized line '" + line + "'. Skipping."); + return; + } + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse extra-info " + + "descriptor.", e); + return; + } catch (DecoderException e) { + this.logger.log(Level.WARNING, "Could not parse extra-info " + + "descriptor.", e); + return; + } + + /* Determine filename of sanitized extra-info descriptor. */ + String descriptorDigest = null; + try { + String ascii = new String(data, "US-ASCII"); + String startToken = "extra-info "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest)); + } + } catch (UnsupportedEncodingException e) { + /* Handle below. */ + } + if (descriptorDigest == null) { + this.logger.log(Level.WARNING, "Could not calculate extra-info " + + "descriptor digest."); + return; + } + String dyear = published.substring(0, 4); + String dmonth = published.substring(5, 7); + File newFile = new File( + this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + + dyear + "/" + dmonth + "/extra-infos/" + + descriptorDigest.charAt(0) + "/" + + descriptorDigest.charAt(1) + "/" + + descriptorDigest); + + /* Write sanitized extra-info descriptor to disk, including all its + * parent directories. */ + try { + newFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(newFile)); + bw.write("@type bridge-extra-info 1.1\n"); + bw.write(scrubbedDesc); + bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n"); + bw.close(); + } catch (Exception e) { + this.logger.log(Level.WARNING, "Could not write sanitized " + + "extra-info descriptor to disk.", e); + } + } + + /** + * Rewrite all network statuses that might contain references to server + * descriptors we added or updated in this execution. This applies to + * all statuses that have been published up to 24 hours after any added + * or updated server descriptor. + */ + public void finishWriting() { + + /* Delete secrets that we don't need anymore. */ + if (!this.secretsForHashingIPAddresses.isEmpty() && + this.secretsForHashingIPAddresses.firstKey().compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + try { + int kept = 0, deleted = 0; + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.bridgeIpSecretsFile)); + for (Map.Entry<String, byte[]> e : + this.secretsForHashingIPAddresses.entrySet()) { + if (e.getKey().compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + deleted++; + } else { + bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue()) + + "\n"); + kept++; + } + } + bw.close(); + this.logger.info("Deleted " + deleted + " secrets that we don't " + + "need anymore and kept " + kept + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store reduced set of " + + "secrets to disk! This is a bad sign, better check what's " + + "going on!", e); + } + } + } +} + diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java new file mode 100644 index 0000000..470f6ab --- /dev/null +++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java @@ -0,0 +1,174 @@ +/* Copyright 2011--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.bridgepools; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; + +public class BridgePoolAssignmentsProcessor { + + public BridgePoolAssignmentsProcessor(File assignmentsDirectory, + File sanitizedAssignmentsDirectory) { + + Logger logger = + Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName()); + if (assignmentsDirectory == null || + sanitizedAssignmentsDirectory == null) { + IllegalArgumentException e = new IllegalArgumentException("Neither " + + "assignmentsDirectory nor sanitizedAssignmentsDirectory may " + + "be null!"); + throw e; + } + + List<File> assignmentFiles = new ArrayList<File>(); + Stack<File> files = new Stack<File>(); + files.add(assignmentsDirectory); + while (!files.isEmpty()) { + File file = files.pop(); + if (file.isDirectory()) { + files.addAll(Arrays.asList(file.listFiles())); + } else if (!file.getName().endsWith(".gz")) { + assignmentFiles.add(file); + } + } + + SimpleDateFormat assignmentFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat filenameFormat = + new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + for (File assignmentFile : assignmentFiles) { + logger.info("Processing bridge pool assignment file '" + + assignmentFile.getAbsolutePath() + "'..."); + try { + BufferedReader br = null; + if (assignmentFile.getName().endsWith(".gz")) { + br = new BufferedReader(new InputStreamReader( + new GzipCompressorInputStream(new FileInputStream( + assignmentFile)))); + } else { + br = new BufferedReader(new FileReader(assignmentFile)); + } + String line, bridgePoolAssignmentLine = null; + SortedSet<String> sanitizedAssignments = new TreeSet<String>(); + boolean wroteLastLine = false, skipBefore20120504125947 = true; + while ((line = br.readLine()) != null || !wroteLastLine) { + if (line != null && line.startsWith("bridge-pool-assignment ")) { + String[] parts = line.split(" "); + if (parts.length != 3) { + continue; + } + /* TODO Take out this temporary hack to ignore all assignments + * coming from ponticum when byblos was still the official + * BridgeDB host. */ + if (line.compareTo( + "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) { + skipBefore20120504125947 = false; + } + } + if (skipBefore20120504125947) { + if (line == null) { + break; + } else { + continue; + } + } + if (line == null || + line.startsWith("bridge-pool-assignment ")) { + if (bridgePoolAssignmentLine != null) { + try { + long bridgePoolAssignmentTime = assignmentFormat.parse( + bridgePoolAssignmentLine.substring( + "bridge-pool-assignment ".length())).getTime(); + File sanitizedAssignmentsFile = new File( + sanitizedAssignmentsDirectory, filenameFormat.format( + bridgePoolAssignmentTime)); + if (!sanitizedAssignmentsFile.exists()) { + sanitizedAssignmentsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + sanitizedAssignmentsFile)); + bw.write("@type bridge-pool-assignment 1.0\n"); + bw.write(bridgePoolAssignmentLine + "\n"); + for (String assignmentLine : sanitizedAssignments) { + bw.write(assignmentLine + "\n"); + } + bw.close(); + } + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write sanitized " + + "bridge pool assignment file for line '" + + bridgePoolAssignmentLine + "' to disk. Skipping " + + "bridge pool assignment file '" + + assignmentFile.getAbsolutePath() + "'.", e); + break; + } catch (ParseException e) { + logger.log(Level.WARNING, "Could not write sanitized " + + "bridge pool assignment file for line '" + + bridgePoolAssignmentLine + "' to disk. Skipping " + + "bridge pool assignment file '" + + assignmentFile.getAbsolutePath() + "'.", e); + break; + } + sanitizedAssignments.clear(); + } + if (line == null) { + wroteLastLine = true; + } else { + bridgePoolAssignmentLine = line; + } + } else { + String[] parts = line.split(" "); + if (parts.length < 2 || parts[0].length() < 40) { + logger.warning("Unrecognized line '" + line + + "'. Aborting."); + break; + } + String hashedFingerprint = null; + try { + hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex( + line.split(" ")[0].toCharArray())).toLowerCase(); + } catch (DecoderException e) { + logger.warning("Unable to decode hex fingerprint in line '" + + line + "'. Aborting."); + break; + } + String assignmentDetails = line.substring(40); + sanitizedAssignments.add(hashedFingerprint + + assignmentDetails); + } + } + br.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not read bridge pool assignment " + + "file '" + assignmentFile.getAbsolutePath() + + "'. Skipping.", e); + } + } + + logger.info("Finished processing bridge pool assignment file(s)."); + } +} + diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java new file mode 100644 index 0000000..64f6a3b --- /dev/null +++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java @@ -0,0 +1,100 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.exitlists; + +import java.io.BufferedInputStream; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ExitListDownloader { + public ExitListDownloader() { + Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); + try { + logger.fine("Downloading exit list..."); + String exitAddressesUrl = + "http://exitlist.torproject.org/exit-addresses"; + URL u = new URL(exitAddressesUrl); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + int response = huc.getResponseCode(); + if (response != 200) { + logger.warning("Could not download exit list. Response code " + + response); + return; + } + BufferedInputStream in = new BufferedInputStream( + huc.getInputStream()); + SimpleDateFormat printFormat = + new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + Date downloadedDate = new Date(); + File exitListFile = new File("exitlist/" + printFormat.format( + downloadedDate)); + exitListFile.getParentFile().mkdirs(); + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + BufferedWriter bw = new BufferedWriter(new FileWriter( + exitListFile)); + bw.write("@type tordnsel 1.0\n"); + bw.write("Downloaded " + dateTimeFormat.format(downloadedDate) + + "\n"); + int len; + byte[] data = new byte[1024]; + while ((len = in.read(data, 0, 1024)) >= 0) { + bw.write(new String(data, 0, len)); + } + in.close(); + bw.close(); + logger.fine("Finished downloading exit list."); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed downloading exit list", e); + return; + } + + /* Write stats. */ + StringBuilder dumpStats = new StringBuilder("Finished downloading " + + "exit list.\nLast three exit lists are:"); + Stack<File> filesInInputDir = new Stack<File>(); + filesInInputDir.add(new File("exitlist")); + SortedSet<File> lastThreeExitLists = new TreeSet<File>(); + while (!filesInInputDir.isEmpty()) { + File pop = filesInInputDir.pop(); + if (pop.isDirectory()) { + SortedSet<File> lastThreeElements = new TreeSet<File>(); + for (File f : pop.listFiles()) { + lastThreeElements.add(f); + } + while (lastThreeElements.size() > 3) { + lastThreeElements.remove(lastThreeElements.first()); + } + for (File f : lastThreeElements) { + filesInInputDir.add(f); + } + } else { + lastThreeExitLists.add(pop); + while (lastThreeExitLists.size() > 3) { + lastThreeExitLists.remove(lastThreeExitLists.first()); + } + } + } + for (File f : lastThreeExitLists) { + dumpStats.append("\n" + f.getName()); + } + logger.info(dumpStats.toString()); + } +} + diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java new file mode 100644 index 0000000..adf22cc --- /dev/null +++ b/src/org/torproject/ernie/db/main/Configuration.java @@ -0,0 +1,359 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.main; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Initialize configuration with hard-coded defaults, overwrite with + * configuration in config file, if exists, and answer Main.java about our + * configuration. + */ +public class Configuration { + private boolean writeDirectoryArchives = false; + private String directoryArchivesOutputDirectory = "directory-archive/"; + private boolean importCachedRelayDescriptors = false; + private List<String> cachedRelayDescriptorsDirectory = + new ArrayList<String>(Arrays.asList("cacheddesc/".split(","))); + private boolean importDirectoryArchives = false; + private String directoryArchivesDirectory = "archives/"; + private boolean keepDirectoryArchiveImportHistory = false; + private boolean writeSanitizedBridges = false; + private boolean replaceIPAddressesWithHashes = false; + private long limitBridgeDescriptorMappings = -1L; + private String sanitizedBridgesWriteDirectory = "sanitized-bridges/"; + private boolean importBridgeSnapshots = false; + private String bridgeSnapshotsDirectory = "bridge-directories/"; + private boolean downloadRelayDescriptors = false; + private List<String> downloadFromDirectoryAuthorities = Arrays.asList(( + "86.59.21.38,76.73.17.194:9030,213.115.239.118:443," + + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131," + + "194.109.206.212,212.112.245.170").split(",")); + private boolean downloadCurrentConsensus = true; + private boolean downloadCurrentVotes = true; + private boolean downloadMissingServerDescriptors = true; + private boolean downloadMissingExtraInfoDescriptors = true; + private boolean downloadAllServerDescriptors = false; + private boolean downloadAllExtraInfoDescriptors = false; + private boolean compressRelayDescriptorDownloads; + private boolean downloadExitList = false; + private boolean processBridgePoolAssignments = false; + private String assignmentsDirectory = "assignments/"; + private String sanitizedAssignmentsDirectory = "sanitized-assignments/"; + private boolean processTorperfFiles = false; + private String torperfOutputDirectory = "torperf/"; + private SortedMap<String, String> torperfSources = null; + private List<String> torperfFiles = null; + private boolean provideFilesViaRsync = false; + private String rsyncDirectory = "rsync"; + public Configuration() { + + /* Initialize logger. */ + Logger logger = Logger.getLogger(Configuration.class.getName()); + + /* Read config file, if present. */ + File configFile = new File("config"); + if (!configFile.exists()) { + logger.warning("Could not find config file. In the default " + + "configuration, we are not configured to read data from any " + + "data source or write data to any data sink. You need to " + + "create a config file (" + configFile.getAbsolutePath() + + ") and provide at least one data source and one data sink. " + + "Refer to the manual for more information."); + return; + } + String line = null; + boolean containsCachedRelayDescriptorsDirectory = false; + try { + BufferedReader br = new BufferedReader(new FileReader(configFile)); + while ((line = br.readLine()) != null) { + if (line.startsWith("#") || line.length() < 1) { + continue; + } else if (line.startsWith("WriteDirectoryArchives")) { + this.writeDirectoryArchives = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DirectoryArchivesOutputDirectory")) { + this.directoryArchivesOutputDirectory = line.split(" ")[1]; + } else if (line.startsWith("ImportCachedRelayDescriptors")) { + this.importCachedRelayDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("CachedRelayDescriptorsDirectory")) { + if (!containsCachedRelayDescriptorsDirectory) { + this.cachedRelayDescriptorsDirectory.clear(); + containsCachedRelayDescriptorsDirectory = true; + } + this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]); + } else if (line.startsWith("ImportDirectoryArchives")) { + this.importDirectoryArchives = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DirectoryArchivesDirectory")) { + this.directoryArchivesDirectory = line.split(" ")[1]; + } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) { + this.keepDirectoryArchiveImportHistory = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("WriteSanitizedBridges")) { + this.writeSanitizedBridges = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("ReplaceIPAddressesWithHashes")) { + this.replaceIPAddressesWithHashes = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("LimitBridgeDescriptorMappings")) { + this.limitBridgeDescriptorMappings = Long.parseLong( + line.split(" ")[1]); + } else if (line.startsWith("SanitizedBridgesWriteDirectory")) { + this.sanitizedBridgesWriteDirectory = line.split(" ")[1]; + } else if (line.startsWith("ImportBridgeSnapshots")) { + this.importBridgeSnapshots = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("BridgeSnapshotsDirectory")) { + this.bridgeSnapshotsDirectory = line.split(" ")[1]; + } else if (line.startsWith("DownloadRelayDescriptors")) { + this.downloadRelayDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadFromDirectoryAuthorities")) { + this.downloadFromDirectoryAuthorities = new ArrayList<String>(); + for (String dir : line.split(" ")[1].split(",")) { + // test if IP:port pair has correct format + if (dir.length() < 1) { + logger.severe("Configuration file contains directory " + + "authority IP:port of length 0 in line '" + line + + "'! Exiting!"); + System.exit(1); + } + new URL("http://" + dir + "/"); + this.downloadFromDirectoryAuthorities.add(dir); + } + } else if (line.startsWith("DownloadCurrentConsensus")) { + this.downloadCurrentConsensus = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadCurrentVotes")) { + this.downloadCurrentVotes = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadMissingServerDescriptors")) { + this.downloadMissingServerDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith( + "DownloadMissingExtraInfoDescriptors")) { + this.downloadMissingExtraInfoDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadAllServerDescriptors")) { + this.downloadAllServerDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) { + this.downloadAllExtraInfoDescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("CompressRelayDescriptorDownloads")) { + this.compressRelayDescriptorDownloads = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadExitList")) { + this.downloadExitList = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("ProcessBridgePoolAssignments")) { + this.processBridgePoolAssignments = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("AssignmentsDirectory")) { + this.assignmentsDirectory = line.split(" ")[1]; + } else if (line.startsWith("SanitizedAssignmentsDirectory")) { + this.sanitizedAssignmentsDirectory = line.split(" ")[1]; + } else if (line.startsWith("ProcessTorperfFiles")) { + this.processTorperfFiles = Integer.parseInt(line.split(" ")[1]) + != 0; + } else if (line.startsWith("TorperfOutputDirectory")) { + } else if (line.startsWith("TorperfSource")) { + if (this.torperfSources == null) { + this.torperfSources = new TreeMap<String, String>(); + } + String[] parts = line.split(" "); + String sourceName = parts[1]; + String baseUrl = parts[2]; + this.torperfSources.put(sourceName, baseUrl); + } else if (line.startsWith("TorperfFiles")) { + if (this.torperfFiles == null) { + this.torperfFiles = new ArrayList<String>(); + } + String[] parts = line.split(" "); + if (parts.length != 5) { + logger.severe("Configuration file contains TorperfFiles " + + "option with wrong number of values in line '" + line + + "'! Exiting!"); + System.exit(1); + } + this.torperfFiles.add(line); + } else if (line.startsWith("ProvideFilesViaRsync")) { + this.provideFilesViaRsync = Integer.parseInt( + line.split(" ")[1]) != 0; + } else if (line.startsWith("RsyncDirectory")) { + this.rsyncDirectory = line.split(" ")[1]; + } else { + logger.severe("Configuration file contains unrecognized " + + "configuration key in line '" + line + "'! Exiting!"); + System.exit(1); + } + } + br.close(); + } catch (ArrayIndexOutOfBoundsException e) { + logger.severe("Configuration file contains configuration key " + + "without value in line '" + line + "'. Exiting!"); + System.exit(1); + } catch (MalformedURLException e) { + logger.severe("Configuration file contains illegal URL or IP:port " + + "pair in line '" + line + "'. Exiting!"); + System.exit(1); + } catch (NumberFormatException e) { + logger.severe("Configuration file contains illegal value in line '" + + line + "' with legal values being 0 or 1. Exiting!"); + System.exit(1); + } catch (IOException e) { + logger.log(Level.SEVERE, "Unknown problem while reading config " + + "file! Exiting!", e); + System.exit(1); + } + + /** Make some checks if configuration is valid. */ + if (!this.importCachedRelayDescriptors && + !this.importDirectoryArchives && !this.downloadRelayDescriptors && + !this.importBridgeSnapshots && + !this.downloadExitList && !this.processBridgePoolAssignments && + !this.writeDirectoryArchives && !this.writeSanitizedBridges && + !this.processTorperfFiles) { + logger.warning("We have not been configured to read data from any " + + "data source or write data to any data sink. You need to " + + "edit your config file (" + configFile.getAbsolutePath() + + ") and provide at least one data source and one data sink. " + + "Refer to the manual for more information."); + } + if ((this.importCachedRelayDescriptors || + this.importDirectoryArchives || this.downloadRelayDescriptors) && + !this.writeDirectoryArchives) { + logger.warning("We are configured to import/download relay " + + "descriptors, but we don't have a single data sink to write " + + "relay descriptors to."); + } + if (!(this.importCachedRelayDescriptors || + this.importDirectoryArchives || this.downloadRelayDescriptors) && + this.writeDirectoryArchives) { + logger.warning("We are configured to write relay descriptor to at " + + "least one data sink, but we don't have a single data source " + + "containing relay descriptors."); + } + if (this.importBridgeSnapshots && !this.writeSanitizedBridges) { + logger.warning("We are configured to import/download bridge " + + "descriptors, but we don't have a single data sink to write " + + "bridge descriptors to."); + } + if (!this.importBridgeSnapshots && this.writeSanitizedBridges) { + logger.warning("We are configured to write bridge descriptor to at " + + "least one data sink, but we don't have a single data source " + + "containing bridge descriptors."); + } + } + public boolean getWriteDirectoryArchives() { + return this.writeDirectoryArchives; + } + public String getDirectoryArchivesOutputDirectory() { + return this.directoryArchivesOutputDirectory; + } + public boolean getImportCachedRelayDescriptors() { + return this.importCachedRelayDescriptors; + } + public List<String> getCachedRelayDescriptorDirectory() { + return this.cachedRelayDescriptorsDirectory; + } + public boolean getImportDirectoryArchives() { + return this.importDirectoryArchives; + } + public String getDirectoryArchivesDirectory() { + return this.directoryArchivesDirectory; + } + public boolean getKeepDirectoryArchiveImportHistory() { + return this.keepDirectoryArchiveImportHistory; + } + public boolean getWriteSanitizedBridges() { + return this.writeSanitizedBridges; + } + public boolean getReplaceIPAddressesWithHashes() { + return this.replaceIPAddressesWithHashes; + } + public long getLimitBridgeDescriptorMappings() { + return this.limitBridgeDescriptorMappings; + } + public String getSanitizedBridgesWriteDirectory() { + return this.sanitizedBridgesWriteDirectory; + } + public boolean getImportBridgeSnapshots() { + return this.importBridgeSnapshots; + } + public String getBridgeSnapshotsDirectory() { + return this.bridgeSnapshotsDirectory; + } + public boolean getDownloadRelayDescriptors() { + return this.downloadRelayDescriptors; + } + public List<String> getDownloadFromDirectoryAuthorities() { + return this.downloadFromDirectoryAuthorities; + } + public boolean getDownloadCurrentConsensus() { + return this.downloadCurrentConsensus; + } + public boolean getDownloadCurrentVotes() { + return this.downloadCurrentVotes; + } + public boolean getDownloadMissingServerDescriptors() { + return this.downloadMissingServerDescriptors; + } + public boolean getDownloadMissingExtraInfoDescriptors() { + return this.downloadMissingExtraInfoDescriptors; + } + public boolean getDownloadAllServerDescriptors() { + return this.downloadAllServerDescriptors; + } + public boolean getDownloadAllExtraInfoDescriptors() { + return this.downloadAllExtraInfoDescriptors; + } + public boolean getCompressRelayDescriptorDownloads() { + return this.compressRelayDescriptorDownloads; + } + public boolean getDownloadExitList() { + return this.downloadExitList; + } + public boolean getProcessBridgePoolAssignments() { + return processBridgePoolAssignments; + } + public String getAssignmentsDirectory() { + return assignmentsDirectory; + } + public String getSanitizedAssignmentsDirectory() { + return sanitizedAssignmentsDirectory; + } + public boolean getProcessTorperfFiles() { + return this.processTorperfFiles; + } + public String getTorperfOutputDirectory() { + return this.torperfOutputDirectory; + } + public SortedMap<String, String> getTorperfSources() { + return this.torperfSources; + } + public List<String> getTorperfFiles() { + return this.torperfFiles; + } + public boolean getProvideFilesViaRsync() { + return this.provideFilesViaRsync; + } + public String getRsyncDirectory() { + return this.rsyncDirectory; + } +} + diff --git a/src/org/torproject/ernie/db/main/LockFile.java b/src/org/torproject/ernie/db/main/LockFile.java new file mode 100644 index 0000000..68375ec --- /dev/null +++ b/src/org/torproject/ernie/db/main/LockFile.java @@ -0,0 +1,52 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.main; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.logging.Logger; + +public class LockFile { + + private File lockFile; + private Logger logger; + + public LockFile() { + this.lockFile = new File("lock"); + this.logger = Logger.getLogger(LockFile.class.getName()); + } + + public boolean acquireLock() { + this.logger.fine("Trying to acquire lock..."); + try { + if (this.lockFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader("lock")); + long runStarted = Long.parseLong(br.readLine()); + br.close(); + if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) { + return false; + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter("lock")); + bw.append("" + System.currentTimeMillis() + "\n"); + bw.close(); + this.logger.fine("Acquired lock."); + return true; + } catch (IOException e) { + this.logger.warning("Caught exception while trying to acquire " + + "lock!"); + return false; + } + } + + public void releaseLock() { + this.logger.fine("Releasing lock..."); + this.lockFile.delete(); + this.logger.fine("Released lock."); + } +} + diff --git a/src/org/torproject/ernie/db/main/LoggingConfiguration.java b/src/org/torproject/ernie/db/main/LoggingConfiguration.java new file mode 100644 index 0000000..b0ddeaa --- /dev/null +++ b/src/org/torproject/ernie/db/main/LoggingConfiguration.java @@ -0,0 +1,93 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.main; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; +import java.util.logging.ConsoleHandler; +import java.util.logging.FileHandler; +import java.util.logging.Formatter; +import java.util.logging.Handler; +import java.util.logging.Level; +import java.util.logging.LogRecord; +import java.util.logging.Logger; +/** + * Initialize logging configuration. + * + * Log levels used by ERNIE: + * + * - SEVERE: An event made it impossible to continue program execution. + * - WARNING: A potential problem occurred that requires the operator to + * look after the otherwise unattended setup + * - INFO: Messages on INFO level are meant to help the operator in making + * sure that operation works as expected. + * - FINE: Debug messages that are used to identify problems and which are + * turned on by default. + * - FINER: More detailed debug messages to investigate problems in more + * detail. Not turned on by default. Increase log file limit when using + * FINER. + * - FINEST: Most detailed debug messages. Not used. + */ +public class LoggingConfiguration { + public LoggingConfiguration() { + + /* Remove default console handler. */ + for (Handler h : Logger.getLogger("").getHandlers()) { + Logger.getLogger("").removeHandler(h); + } + + /* Disable logging of internal Sun classes. */ + Logger.getLogger("sun").setLevel(Level.OFF); + + /* Set minimum log level we care about from INFO to FINER. */ + Logger.getLogger("").setLevel(Level.FINER); + + /* Create log handler that writes messages on WARNING or higher to the + * console. */ + final SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + Formatter cf = new Formatter() { + public String format(LogRecord record) { + return dateTimeFormat.format(new Date(record.getMillis())) + " " + + record.getMessage() + "\n"; + } + }; + Handler ch = new ConsoleHandler(); + ch.setFormatter(cf); + ch.setLevel(Level.WARNING); + Logger.getLogger("").addHandler(ch); + + /* Initialize own logger for this class. */ + Logger logger = Logger.getLogger( + LoggingConfiguration.class.getName()); + + /* Create log handler that writes all messages on FINE or higher to a + * local file. */ + Formatter ff = new Formatter() { + public String format(LogRecord record) { + return dateTimeFormat.format(new Date(record.getMillis())) + " " + + record.getLevel() + " " + record.getSourceClassName() + " " + + record.getSourceMethodName() + " " + record.getMessage() + + (record.getThrown() != null ? " " + record.getThrown() : "") + + "\n"; + } + }; + try { + FileHandler fh = new FileHandler("log", 5000000, 5, true); + fh.setFormatter(ff); + fh.setLevel(Level.FINE); + Logger.getLogger("").addHandler(fh); + } catch (SecurityException e) { + logger.log(Level.WARNING, "No permission to create log file. " + + "Logging to file is disabled.", e); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write to log file. Logging to " + + "file is disabled.", e); + } + } +} diff --git a/src/org/torproject/ernie/db/main/Main.java b/src/org/torproject/ernie/db/main/Main.java new file mode 100644 index 0000000..e008eca --- /dev/null +++ b/src/org/torproject/ernie/db/main/Main.java @@ -0,0 +1,172 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.main; + +import java.io.File; +import java.util.List; +import java.util.logging.Logger; + +import org.torproject.ernie.db.bridgedescs.BridgeDescriptorParser; +import org.torproject.ernie.db.bridgedescs.BridgeSnapshotReader; +import org.torproject.ernie.db.bridgedescs.SanitizedBridgesWriter; +import org.torproject.ernie.db.bridgepools.BridgePoolAssignmentsProcessor; +import org.torproject.ernie.db.exitlists.ExitListDownloader; +import org.torproject.ernie.db.relaydescs.ArchiveReader; +import org.torproject.ernie.db.relaydescs.ArchiveWriter; +import org.torproject.ernie.db.relaydescs.CachedRelayDescriptorReader; +import org.torproject.ernie.db.relaydescs.RelayDescriptorDownloader; +import org.torproject.ernie.db.relaydescs.RelayDescriptorParser; +import org.torproject.ernie.db.torperf.TorperfDownloader; + +/** + * Coordinate downloading and parsing of descriptors and extraction of + * statistically relevant data for later processing with R. + */ +public class Main { + public static void main(String[] args) { + + /* Initialize logging configuration. */ + new LoggingConfiguration(); + + Logger logger = Logger.getLogger(Main.class.getName()); + logger.info("Starting ERNIE."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile(); + if (!lf.acquireLock()) { + logger.severe("Warning: ERNIE is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Define stats directory for temporary files + File statsDirectory = new File("stats"); + + // Prepare writing relay descriptor archive to disk + ArchiveWriter aw = config.getWriteDirectoryArchives() ? + new ArchiveWriter( + new File(config.getDirectoryArchivesOutputDirectory())) : null; + + // Prepare relay descriptor parser (only if we are writing stats or + // directory archives to disk) + RelayDescriptorParser rdp = aw != null ? + new RelayDescriptorParser(aw) : null; + + // Import/download relay descriptors from the various sources + if (rdp != null) { + RelayDescriptorDownloader rdd = null; + if (config.getDownloadRelayDescriptors()) { + List<String> dirSources = + config.getDownloadFromDirectoryAuthorities(); + rdd = new RelayDescriptorDownloader(rdp, dirSources, + config.getDownloadCurrentConsensus(), + config.getDownloadCurrentVotes(), + config.getDownloadMissingServerDescriptors(), + config.getDownloadMissingExtraInfoDescriptors(), + config.getDownloadAllServerDescriptors(), + config.getDownloadAllExtraInfoDescriptors(), + config.getCompressRelayDescriptorDownloads()); + rdp.setRelayDescriptorDownloader(rdd); + } + if (config.getImportCachedRelayDescriptors()) { + new CachedRelayDescriptorReader(rdp, + config.getCachedRelayDescriptorDirectory(), statsDirectory); + if (aw != null) { + aw.intermediateStats("importing relay descriptors from local " + + "Tor data directories"); + } + } + if (config.getImportDirectoryArchives()) { + new ArchiveReader(rdp, + new File(config.getDirectoryArchivesDirectory()), + statsDirectory, + config.getKeepDirectoryArchiveImportHistory()); + if (aw != null) { + aw.intermediateStats("importing relay descriptors from local " + + "directory"); + } + } + if (rdd != null) { + rdd.downloadDescriptors(); + rdd.writeFile(); + rdd = null; + if (aw != null) { + aw.intermediateStats("downloading relay descriptors from the " + + "directory authorities"); + } + } + } + + // Write output to disk that only depends on relay descriptors + if (aw != null) { + aw.dumpStats(); + aw = null; + } + + // Prepare sanitized bridge descriptor writer + SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ? + new SanitizedBridgesWriter( + new File(config.getSanitizedBridgesWriteDirectory()), + statsDirectory, config.getReplaceIPAddressesWithHashes(), + config.getLimitBridgeDescriptorMappings()) : null; + + // Prepare bridge descriptor parser + BridgeDescriptorParser bdp = config.getWriteSanitizedBridges() + ? new BridgeDescriptorParser(sbw) : null; + + // Import bridge descriptors + if (bdp != null && config.getImportBridgeSnapshots()) { + new BridgeSnapshotReader(bdp, + new File(config.getBridgeSnapshotsDirectory()), + statsDirectory); + } + + // Finish writing sanitized bridge descriptors to disk + if (sbw != null) { + sbw.finishWriting(); + sbw = null; + } + + // Download exit list and store it to disk + if (config.getDownloadExitList()) { + new ExitListDownloader(); + } + + // Process bridge pool assignments + if (config.getProcessBridgePoolAssignments()) { + new BridgePoolAssignmentsProcessor( + new File(config.getAssignmentsDirectory()), + new File(config.getSanitizedAssignmentsDirectory())); + } + + // Process Torperf files + if (config.getProcessTorperfFiles()) { + new TorperfDownloader(new File(config.getTorperfOutputDirectory()), + config.getTorperfSources(), config.getTorperfFiles()); + } + + // Copy recently published files to a local directory that can then + // be served via rsync. + if (config.getProvideFilesViaRsync()) { + new RsyncDataProvider( + !config.getWriteDirectoryArchives() ? null : + new File(config.getDirectoryArchivesOutputDirectory()), + !config.getWriteSanitizedBridges() ? null : + new File(config.getSanitizedBridgesWriteDirectory()), + !config.getProcessBridgePoolAssignments() ? null : + new File(config.getSanitizedAssignmentsDirectory()), + config.getDownloadExitList(), + !config.getProcessTorperfFiles() ? null : + new File(config.getTorperfOutputDirectory()), + new File(config.getRsyncDirectory())); + } + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating ERNIE."); + } +} diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java new file mode 100644 index 0000000..cd4a6f9 --- /dev/null +++ b/src/org/torproject/ernie/db/main/RsyncDataProvider.java @@ -0,0 +1,217 @@ +/* Copyright 2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.main; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.Stack; +import java.util.logging.Logger; + +/** + * Copy files published in the last 3 days to a local directory that can + * then be served via rsync. + */ +public class RsyncDataProvider { + public RsyncDataProvider(File directoryArchivesOutputDirectory, + File sanitizedBridgesWriteDirectory, + File sanitizedAssignmentsDirectory, + boolean downloadExitList, + File torperfOutputDirectory, File rsyncDirectory) { + + /* Initialize logger. */ + Logger logger = Logger.getLogger(RsyncDataProvider.class.getName()); + + /* Determine the cut-off time for files in rsync/. */ + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + + /* Create rsync/ directory if it doesn't exist. */ + if (!rsyncDirectory.exists()) { + rsyncDirectory.mkdirs(); + } + + /* Make a list of all files in the rsync/ directory to delete those + * that we didn't copy in this run. */ + Set<String> fileNamesInRsync = new HashSet<String>(); + Stack<File> files = new Stack<File>(); + files.add(rsyncDirectory); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else { + fileNamesInRsync.add(pop.getName()); + } + } + logger.info("Found " + fileNamesInRsync.size() + " files in " + + rsyncDirectory.getAbsolutePath() + " that we're either " + + "overwriting or deleting in this execution."); + + /* Copy relay descriptors from the last 3 days. */ + if (directoryArchivesOutputDirectory != null) { + files.add(directoryArchivesOutputDirectory); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else if (pop.lastModified() >= cutOffMillis) { + String fileName = pop.getName(); + if (pop.getAbsolutePath().contains("/consensus/")) { + this.copyFile(pop, new File(rsyncDirectory, + "relay-descriptors/consensuses/" + fileName)); + } else if (pop.getAbsolutePath().contains("/vote/")) { + this.copyFile(pop, new File(rsyncDirectory, + "relay-descriptors/votes/" + fileName)); + } else if (pop.getAbsolutePath().contains( + "/server-descriptor/")) { + this.copyFile(pop, new File(rsyncDirectory, + "relay-descriptors/server-descriptors/" + fileName)); + } else if (pop.getAbsolutePath().contains("/extra-info/")) { + this.copyFile(pop, new File(rsyncDirectory, + "relay-descriptors/extra-infos/" + fileName)); + } else { + continue; + } + fileNamesInRsync.remove(pop.getName()); + } + } + } + logger.info("After copying relay descriptors, there are still " + + fileNamesInRsync.size() + " files left in " + + rsyncDirectory.getAbsolutePath() + "."); + + /* Copy sanitized bridge descriptors from the last 3 days. */ + if (sanitizedBridgesWriteDirectory != null) { + files.add(sanitizedBridgesWriteDirectory); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else if (pop.lastModified() >= cutOffMillis) { + String fileName = pop.getName(); + if (pop.getAbsolutePath().contains("/statuses/")) { + this.copyFile(pop, new File(rsyncDirectory, + "bridge-descriptors/statuses/" + fileName)); + } else if (pop.getAbsolutePath().contains( + "/server-descriptors/")) { + this.copyFile(pop, new File(rsyncDirectory, + "bridge-descriptors/server-descriptors/" + fileName)); + } else if (pop.getAbsolutePath().contains("/extra-infos/")) { + this.copyFile(pop, new File(rsyncDirectory, + "bridge-descriptors/extra-infos/" + fileName)); + } else { + continue; + } + fileNamesInRsync.remove(pop.getName()); + } + } + } + logger.info("After copying sanitized bridge descriptors, there are " + + "still " + fileNamesInRsync.size() + " files left in " + + rsyncDirectory.getAbsolutePath() + "."); + + /* Copy sanitized bridge pool assignments from the last 3 days. */ + if (sanitizedAssignmentsDirectory != null) { + files.add(sanitizedAssignmentsDirectory); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else if (pop.lastModified() >= cutOffMillis) { + String fileName = pop.getName(); + this.copyFile(pop, new File(rsyncDirectory, + "bridge-pool-assignments/" + fileName)); + fileNamesInRsync.remove(pop.getName()); + } + } + } + logger.info("After copying sanitized bridge pool assignments, there " + + "are still " + fileNamesInRsync.size() + " files left in " + + rsyncDirectory.getAbsolutePath() + "."); + + /* Copy exit lists from the last 3 days. */ + if (downloadExitList) { + files.add(new File("exitlist")); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else if (pop.lastModified() >= cutOffMillis) { + String fileName = pop.getName(); + this.copyFile(pop, new File(rsyncDirectory, + "exit-lists/" + fileName)); + fileNamesInRsync.remove(pop.getName()); + } + } + } + logger.info("After copying exit lists, there are still " + + fileNamesInRsync.size() + " files left in " + + rsyncDirectory.getAbsolutePath() + "."); + + /* Copy Torperf files. */ + if (torperfOutputDirectory != null) { + files.add(torperfOutputDirectory); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else if (pop.getName().endsWith(".tpf") && + pop.lastModified() >= cutOffMillis) { + String fileName = pop.getName(); + this.copyFile(pop, new File(rsyncDirectory, + "torperf/" + fileName)); + fileNamesInRsync.remove(pop.getName()); + } + } + } + logger.info("After copying Torperf files, there are still " + + fileNamesInRsync.size() + " files left in " + + rsyncDirectory.getAbsolutePath() + "."); + + /* Delete all files that we didn't (over-)write in this run. */ + files.add(rsyncDirectory); + while (!files.isEmpty()) { + File pop = files.pop(); + if (pop.isDirectory()) { + files.addAll(Arrays.asList(pop.listFiles())); + } else if (fileNamesInRsync.contains(pop.getName())) { + fileNamesInRsync.remove(pop.getName()); + pop.delete(); + } + } + logger.info("After deleting files that we didn't overwrite in this " + + "run, there are " + fileNamesInRsync.size() + " files left in " + + rsyncDirectory.getAbsolutePath() + "."); + } + + private void copyFile(File from, File to) { + if (from.exists() && to.exists() && + from.lastModified() == to.lastModified() && + from.length() == to.length()) { + return; + } + try { + to.getParentFile().mkdirs(); + FileInputStream fis = new FileInputStream(from); + BufferedInputStream bis = new BufferedInputStream(fis); + FileOutputStream fos = new FileOutputStream(to); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + fos.write(data, 0, len); + } + bis.close(); + fos.close(); + to.setLastModified(from.lastModified()); + } catch (IOException e) { + e.printStackTrace(); + } + } +} + diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java new file mode 100644 index 0000000..fba0a9f --- /dev/null +++ b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java @@ -0,0 +1,146 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.relaydescs; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; + +/** + * Read in all files in a given directory and pass buffered readers of + * them to the relay descriptor parser. + */ +public class ArchiveReader { + public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory, + File statsDirectory, boolean keepImportHistory) { + + if (rdp == null || archivesDirectory == null || + statsDirectory == null) { + throw new IllegalArgumentException(); + } + + int parsedFiles = 0, ignoredFiles = 0; + Logger logger = Logger.getLogger(ArchiveReader.class.getName()); + SortedSet<String> archivesImportHistory = new TreeSet<String>(); + File archivesImportHistoryFile = new File(statsDirectory, + "archives-import-history"); + if (keepImportHistory && archivesImportHistoryFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + archivesImportHistoryFile)); + String line = null; + while ((line = br.readLine()) != null) { + archivesImportHistory.add(line); + } + br.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not read in archives import " + + "history file. Skipping."); + } + } + if (archivesDirectory.exists()) { + logger.fine("Importing files in directory " + archivesDirectory + + "/..."); + Stack<File> filesInInputDir = new Stack<File>(); + filesInInputDir.add(archivesDirectory); + List<File> problems = new ArrayList<File>(); + while (!filesInInputDir.isEmpty()) { + File pop = filesInInputDir.pop(); + if (pop.isDirectory()) { + for (File f : pop.listFiles()) { + filesInInputDir.add(f); + } + } else { + if (rdp != null) { + try { + BufferedInputStream bis = null; + if (keepImportHistory && + archivesImportHistory.contains(pop.getName())) { + ignoredFiles++; + continue; + } else if (pop.getName().endsWith(".tar.bz2")) { + logger.warning("Cannot parse compressed tarball " + + pop.getAbsolutePath() + ". Skipping."); + continue; + } else if (pop.getName().endsWith(".bz2")) { + FileInputStream fis = new FileInputStream(pop); + BZip2CompressorInputStream bcis = + new BZip2CompressorInputStream(fis); + bis = new BufferedInputStream(bcis); + } else { + FileInputStream fis = new FileInputStream(pop); + bis = new BufferedInputStream(fis); + } + if (keepImportHistory) { + archivesImportHistory.add(pop.getName()); + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + bis.close(); + byte[] allData = baos.toByteArray(); + rdp.parse(allData); + parsedFiles++; + } catch (IOException e) { + problems.add(pop); + if (problems.size() > 3) { + break; + } + } + } + } + } + if (problems.isEmpty()) { + logger.fine("Finished importing files in directory " + + archivesDirectory + "/."); + } else { + StringBuilder sb = new StringBuilder("Failed importing files in " + + "directory " + archivesDirectory + "/:"); + int printed = 0; + for (File f : problems) { + sb.append("\n " + f.getAbsolutePath()); + if (++printed >= 3) { + sb.append("\n ... more"); + break; + } + } + } + } + if (keepImportHistory) { + try { + archivesImportHistoryFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + archivesImportHistoryFile)); + for (String line : archivesImportHistory) { + bw.write(line + "\n"); + } + bw.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write archives import " + + "history file."); + } + } + logger.info("Finished importing relay descriptors from local " + + "directory:\nParsed " + parsedFiles + ", ignored " + + ignoredFiles + " files."); + } +} + diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java new file mode 100644 index 0000000..c632656 --- /dev/null +++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java @@ -0,0 +1,339 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.relaydescs; + +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.torproject.descriptor.DescriptorParser; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.impl.DescriptorParseException; + +public class ArchiveWriter { + private Logger logger; + private File outputDirectory; + private DescriptorParser descriptorParser; + private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0, + storedServerDescriptors = 0, storedExtraInfoDescriptors = 0; + + public ArchiveWriter(File outputDirectory) { + + if (outputDirectory == null) { + throw new IllegalArgumentException(); + } + + this.logger = Logger.getLogger(ArchiveWriter.class.getName()); + this.outputDirectory = outputDirectory; + this.descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + } + + private boolean store(byte[] typeAnnotation, byte[] data, + String filename) { + try { + File file = new File(filename); + if (!file.exists()) { + this.logger.finer("Storing " + filename); + if (this.descriptorParser.parseDescriptors(data, filename).size() + != 1) { + this.logger.info("Relay descriptor file " + filename + + " doesn't contain exactly one descriptor. Not storing."); + return false; + } + file.getParentFile().mkdirs(); + BufferedOutputStream bos = new BufferedOutputStream( + new FileOutputStream(file)); + if (data.length > 0 && data[0] != '@') { + bos.write(typeAnnotation, 0, typeAnnotation.length); + } + bos.write(data, 0, data.length); + bos.close(); + return true; + } + } catch (DescriptorParseException e) { + this.logger.log(Level.WARNING, "Could not parse relay descriptor " + + filename + " before storing it to disk. Skipping.", e); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store relay descriptor " + + filename, e); + } + return false; + } + + private static final byte[] CONSENSUS_ANNOTATION = + "@type network-status-consensus-3 1.0\n".getBytes(); + public void storeConsensus(byte[] data, long validAfter) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String filename = outputDirectory + "/consensus/" + + printFormat.format(new Date(validAfter)) + "-consensus"; + if (this.store(CONSENSUS_ANNOTATION, data, filename)) { + this.storedConsensuses++; + } + } + + private static final byte[] VOTE_ANNOTATION = + "@type network-status-vote-3 1.0\n".getBytes(); + public void storeVote(byte[] data, long validAfter, + String fingerprint, String digest) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String filename = outputDirectory + "/vote/" + + printFormat.format(new Date(validAfter)) + "-vote-" + + fingerprint + "-" + digest; + if (this.store(VOTE_ANNOTATION, data, filename)) { + this.storedVotes++; + } + } + + private static final byte[] CERTIFICATE_ANNOTATION = + "@type dir-key-certificate-3 1.0\n".getBytes(); + public void storeCertificate(byte[] data, String fingerprint, + long published) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String filename = outputDirectory + "/certs/" + + fingerprint + "-" + printFormat.format(new Date(published)); + if (this.store(CERTIFICATE_ANNOTATION, data, filename)) { + this.storedCerts++; + } + } + + private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = + "@type server-descriptor 1.0\n".getBytes(); + public void storeServerDescriptor(byte[] data, String digest, + long published) { + SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String filename = outputDirectory + "/server-descriptor/" + + printFormat.format(new Date(published)) + + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/" + + digest; + if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) { + this.storedServerDescriptors++; + } + } + + private static final byte[] EXTRA_INFO_ANNOTATION = + "@type extra-info 1.0\n".getBytes(); + public void storeExtraInfoDescriptor(byte[] data, + String extraInfoDigest, long published) { + SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String filename = outputDirectory + "/extra-info/" + + descriptorFormat.format(new Date(published)) + + extraInfoDigest.substring(0, 1) + "/" + + extraInfoDigest.substring(1, 2) + "/" + + extraInfoDigest; + if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) { + this.storedExtraInfoDescriptors++; + } + } + + private StringBuilder intermediateStats = new StringBuilder(); + public void intermediateStats(String event) { + intermediateStats.append("While " + event + ", we stored " + + this.storedConsensuses + " consensus(es), " + this.storedVotes + + " vote(s), " + this.storedCerts + " certificate(s), " + + this.storedServerDescriptors + " server descriptor(s), and " + + this.storedExtraInfoDescriptors + + " extra-info descriptor(s) to disk.\n"); + this.storedConsensuses = 0; + this.storedVotes = 0; + this.storedCerts = 0; + this.storedServerDescriptors = 0; + this.storedExtraInfoDescriptors = 0; + } + /** + * Dump some statistics on the completeness of descriptors to the logs + * on level INFO. + */ + public void dumpStats() { + StringBuilder sb = new StringBuilder("Finished writing relay " + + "descriptors to disk.\n"); + sb.append(intermediateStats.toString()); + sb.append("Statistics on the completeness of written relay " + + "descriptors of the last 3 consensuses (Consensus/Vote, " + + "valid-after, votes, server descriptors, extra-infos):"); + try { + SimpleDateFormat validAfterFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + validAfterFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat consensusVoteFormat = + new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat descriptorFormat = + new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + + SortedSet<File> consensuses = new TreeSet<File>(); + Stack<File> leftToParse = new Stack<File>(); + leftToParse.add(new File(outputDirectory + "/consensus")); + while (!leftToParse.isEmpty()) { + File pop = leftToParse.pop(); + if (pop.isDirectory()) { + for (File f : pop.listFiles()) { + leftToParse.add(f); + } + } else if (pop.length() > 0) { + consensuses.add(pop); + } + while (consensuses.size() > 3) { + consensuses.remove(consensuses.first()); + } + } + for (File f : consensuses) { + BufferedReader br = new BufferedReader(new FileReader(f)); + String line = null, validAfterTime = null, + voteFilenamePrefix = null, dirSource = null; + int allVotes = 0, foundVotes = 0, + allServerDescs = 0, foundServerDescs = 0, + allExtraInfos = 0, foundExtraInfos = 0; + while ((line = br.readLine()) != null) { + if (line.startsWith("valid-after ")) { + validAfterTime = line.substring("valid-after ".length()); + long validAfter = validAfterFormat.parse( + validAfterTime).getTime(); + voteFilenamePrefix = outputDirectory + "/vote/" + + consensusVoteFormat.format(new Date(validAfter)) + + "-vote-"; + } else if (line.startsWith("dir-source ")) { + dirSource = line.split(" ")[2]; + } else if (line.startsWith("vote-digest ")) { + allVotes++; + File voteFile = new File(voteFilenamePrefix + dirSource + "-" + + line.split(" ")[1]); + if (voteFile.exists()) { + foundVotes++; + BufferedReader vbr = new BufferedReader(new FileReader( + voteFile)); + String line3 = null; + int voteAllServerDescs = 0, voteFoundServerDescs = 0, + voteAllExtraInfos = 0, voteFoundExtraInfos = 0; + while ((line3 = vbr.readLine()) != null) { + if (line3.startsWith("r ")) { + voteAllServerDescs++; + String digest = Hex.encodeHexString(Base64.decodeBase64( + line3.split(" ")[3] + "=")).toLowerCase(); + long published = validAfterFormat.parse( + line3.split(" ")[4] + " " + + line3.split(" ")[5]).getTime(); + String filename = outputDirectory + + "/server-descriptor/" + + descriptorFormat.format(new Date(published)) + + digest.substring(0, 1) + "/" + + digest.substring(1, 2) + "/" + digest; + if (new File(filename).exists()) { + BufferedReader sbr = new BufferedReader(new FileReader( + new File(filename))); + String line2 = null; + while ((line2 = sbr.readLine()) != null) { + if (line2.startsWith("opt extra-info-digest ") || + line2.startsWith("extra-info-digest ")) { + voteAllExtraInfos++; + String extraInfoDigest = line2.startsWith("opt ") ? + line2.split(" ")[2].toLowerCase() : + line2.split(" ")[1].toLowerCase(); + String filename2 = + outputDirectory.getAbsolutePath() + + "/extra-info/" + + descriptorFormat.format(new Date(published)) + + extraInfoDigest.substring(0, 1) + "/" + + extraInfoDigest.substring(1, 2) + "/" + + extraInfoDigest; + if (new File(filename2).exists()) { + voteFoundExtraInfos++; + } + } + } + sbr.close(); + voteFoundServerDescs++; + } + } + } + vbr.close(); + sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), " + + "%d/%d (%.1f%%)", validAfterTime, + voteFoundServerDescs, voteAllServerDescs, + 100.0D * (double) voteFoundServerDescs / + (double) voteAllServerDescs, + voteFoundExtraInfos, voteAllExtraInfos, + 100.0D * (double) voteFoundExtraInfos / + (double) voteAllExtraInfos)); + } + } else if (line.startsWith("r ")) { + allServerDescs++; + String digest = Hex.encodeHexString(Base64.decodeBase64( + line.split(" ")[3] + "=")).toLowerCase(); + long published = validAfterFormat.parse( + line.split(" ")[4] + " " + line.split(" ")[5]).getTime(); + String filename = outputDirectory.getAbsolutePath() + + "/server-descriptor/" + + descriptorFormat.format(new Date(published)) + + digest.substring(0, 1) + "/" + + digest.substring(1, 2) + "/" + digest; + if (new File (filename).exists()) { + BufferedReader sbr = new BufferedReader(new FileReader( + new File(filename))); + String line2 = null; + while ((line2 = sbr.readLine()) != null) { + if (line2.startsWith("opt extra-info-digest ") || + line2.startsWith("extra-info-digest ")) { + allExtraInfos++; + String extraInfoDigest = line2.startsWith("opt ") ? + line2.split(" ")[2].toLowerCase() : + line2.split(" ")[1].toLowerCase(); + String filename2 = outputDirectory.getAbsolutePath() + + "/extra-info/" + + descriptorFormat.format(new Date(published)) + + extraInfoDigest.substring(0, 1) + "/" + + extraInfoDigest.substring(1, 2) + "/" + + extraInfoDigest; + if (new File (filename2).exists()) { + foundExtraInfos++; + } + } + } + sbr.close(); + foundServerDescs++; + } + } + } + br.close(); + sb.append(String.format("%nC, %s, %d/%d (%.1f%%), " + + "%d/%d (%.1f%%), %d/%d (%.1f%%)", + validAfterTime, foundVotes, allVotes, + 100.0D * (double) foundVotes / (double) allVotes, + foundServerDescs, allServerDescs, + 100.0D * (double) foundServerDescs / (double) allServerDescs, + foundExtraInfos, allExtraInfos, + 100.0D * (double) foundExtraInfos / (double) allExtraInfos)); + } + this.logger.info(sb.toString()); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not dump statistics to disk.", + e); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not dump statistics to disk.", + e); + } + } +} diff --git a/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java new file mode 100644 index 0000000..194e0a2 --- /dev/null +++ b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java @@ -0,0 +1,235 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.relaydescs; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.TimeZone; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; + +/** + * Parses all descriptors in local directory cacheddesc/ and sorts them + * into directory structure in directory-archive/. + */ +public class CachedRelayDescriptorReader { + public CachedRelayDescriptorReader(RelayDescriptorParser rdp, + List<String> inputDirectories, File statsDirectory) { + + if (rdp == null || inputDirectories == null || + inputDirectories.isEmpty() || statsDirectory == null) { + throw new IllegalArgumentException(); + } + + StringBuilder dumpStats = new StringBuilder("Finished importing " + + "relay descriptors from local Tor data directories:"); + Logger logger = Logger.getLogger( + CachedRelayDescriptorReader.class.getName()); + + /* Read import history containing SHA-1 digests of previously parsed + * statuses and descriptors, so that we can skip them in this run. */ + Set<String> lastImportHistory = new HashSet<String>(), + currentImportHistory = new HashSet<String>(); + File importHistoryFile = new File(statsDirectory, + "cacheddesc-import-history"); + if (importHistoryFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + importHistoryFile)); + String line; + while ((line = br.readLine()) != null) { + lastImportHistory.add(line); + } + br.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not read import history from " + + importHistoryFile.getAbsolutePath() + ".", e); + } + } + + /* Read cached descriptors directories. */ + for (String inputDirectory : inputDirectories) { + File cachedDescDir = new File(inputDirectory); + if (!cachedDescDir.exists()) { + logger.warning("Directory " + cachedDescDir.getAbsolutePath() + + " does not exist. Skipping."); + continue; + } + logger.fine("Reading " + cachedDescDir.getAbsolutePath() + + " directory."); + for (File f : cachedDescDir.listFiles()) { + try { + // descriptors may contain non-ASCII chars; read as bytes to + // determine digests + BufferedInputStream bis = + new BufferedInputStream(new FileInputStream(f)); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + bis.close(); + byte[] allData = baos.toByteArray(); + if (f.getName().equals("cached-consensus")) { + /* Check if directory information is stale. */ + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + String line = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("valid-after ")) { + dumpStats.append("\n" + f.getName() + ": " + line.substring( + "valid-after ".length())); + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (dateTimeFormat.parse(line.substring("valid-after ". + length())).getTime() < System.currentTimeMillis() + - 6L * 60L * 60L * 1000L) { + logger.warning("Cached descriptor files in " + + cachedDescDir.getAbsolutePath() + " are stale. " + + "The valid-after line in cached-consensus is '" + + line + "'."); + dumpStats.append(" (stale!)"); + } + break; + } + } + br.close(); + + /* Parse the cached consensus if we haven't parsed it before + * (but regardless of whether it's stale or not). */ + if (rdp != null) { + String digest = Hex.encodeHexString(DigestUtils.sha( + allData)); + if (!lastImportHistory.contains(digest) && + !currentImportHistory.contains(digest)) { + rdp.parse(allData); + } else { + dumpStats.append(" (skipped)"); + } + currentImportHistory.add(digest); + } + } else if (f.getName().equals("v3-status-votes")) { + int parsedNum = 0, skippedNum = 0; + String ascii = new String(allData, "US-ASCII"); + String startToken = "network-status-version "; + int end = ascii.length(); + int start = ascii.indexOf(startToken); + while (start >= 0 && start < end) { + int next = ascii.indexOf(startToken, start + 1); + if (next < 0) { + next = end; + } + if (start < next) { + byte[] rawNetworkStatusBytes = new byte[next - start]; + System.arraycopy(allData, start, rawNetworkStatusBytes, 0, + next - start); + if (rdp != null) { + String digest = Hex.encodeHexString(DigestUtils.sha( + rawNetworkStatusBytes)); + if (!lastImportHistory.contains(digest) && + !currentImportHistory.contains(digest)) { + rdp.parse(rawNetworkStatusBytes); + parsedNum++; + } else { + skippedNum++; + } + currentImportHistory.add(digest); + } + } + start = next; + } + dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum + + ", skipped " + skippedNum + " votes"); + } else if (f.getName().startsWith("cached-descriptors") || + f.getName().startsWith("cached-extrainfo")) { + String ascii = new String(allData, "US-ASCII"); + int start = -1, sig = -1, end = -1; + String startToken = + f.getName().startsWith("cached-descriptors") ? + "router " : "extra-info "; + String sigToken = "\nrouter-signature\n"; + String endToken = "\n-----END SIGNATURE-----\n"; + int parsedNum = 0, skippedNum = 0; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + sig = ascii.indexOf(sigToken, start); + if (sig < 0) { + break; + } + sig += sigToken.length(); + end = ascii.indexOf(endToken, sig); + if (end < 0) { + break; + } + end += endToken.length(); + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + if (rdp != null) { + String digest = Hex.encodeHexString(DigestUtils.sha( + descBytes)); + if (!lastImportHistory.contains(digest) && + !currentImportHistory.contains(digest)) { + rdp.parse(descBytes); + parsedNum++; + } else { + skippedNum++; + } + currentImportHistory.add(digest); + } + } + dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum + + ", skipped " + skippedNum + " " + + (f.getName().startsWith("cached-descriptors") ? + "server" : "extra-info") + " descriptors"); + } + } catch (IOException e) { + logger.log(Level.WARNING, "Failed reading " + + cachedDescDir.getAbsolutePath() + " directory.", e); + } catch (ParseException e) { + logger.log(Level.WARNING, "Failed reading " + + cachedDescDir.getAbsolutePath() + " directory.", e); + } + } + logger.fine("Finished reading " + + cachedDescDir.getAbsolutePath() + " directory."); + } + + /* Write import history containing SHA-1 digests to disk. */ + try { + importHistoryFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + importHistoryFile)); + for (String digest : currentImportHistory) { + bw.write(digest + "\n"); + } + bw.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write import history to " + + importHistoryFile.getAbsolutePath() + ".", e); + } + + logger.info(dumpStats.toString()); + } +} + diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java new file mode 100644 index 0000000..0bea50a --- /dev/null +++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java @@ -0,0 +1,821 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.relaydescs; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.InflaterInputStream; + +/** + * Downloads relay descriptors from the directory authorities via HTTP. + * Keeps a list of missing descriptors that gets updated by parse results + * from <code>RelayDescriptorParser</code> and downloads all missing + * descriptors that have been published in the last 24 hours. Also + * downloads all server and extra-info descriptors known to a directory + * authority at most once a day. + */ +public class RelayDescriptorDownloader { + + /** + * Text file containing the descriptors that we are missing and that we + * want to download. Lines are formatted as: + * + * - "consensus,<validafter>,<parsed>", + * - "vote,<validafter>,<fingerprint>,<parsed>", + * - "server,<published>,<relayid>,<descid>,<parsed>", or + * - "extra,<published>,<relayid>,<descid>,<parsed>". + */ + private File missingDescriptorsFile; + + /** + * Relay descriptors that we are missing and that we want to download + * either in this execution or write to disk and try next time. Map keys + * contain comma-separated values as in the missing descriptors files + * without the "parsed" column. Map values contain the "parsed" column. + */ + private SortedMap<String, String> missingDescriptors; + + /** + * Text file containing the IP addresses (and Dir ports if not 80) of + * directory authorities and when we last downloaded all server and + * extra-info descriptors from them, so that we can avoid downloading + * them too often. + */ + private File lastDownloadedAllDescriptorsFile; + + /** + * Map of directory authorities and when we last downloaded all server + * and extra-info descriptors from them. Map keys are IP addresses (and + * Dir ports if not 80), map values are timestamps. + */ + private Map<String, String> lastDownloadedAllDescriptors; + + /** + * <code>RelayDescriptorParser</code> that we will hand over the + * downloaded descriptors for parsing. + */ + private RelayDescriptorParser rdp; + + /** + * Directory authorities that we will try to download missing + * descriptors from. + */ + private List<String> authorities; + + /** + * Should we try to download the current consensus if we don't have it? + */ + private boolean downloadCurrentConsensus; + + /** + * Should we try to download current votes if we don't have them? + */ + private boolean downloadCurrentVotes; + + /** + * Should we try to download missing server descriptors that have been + * published within the past 24 hours? + */ + private boolean downloadMissingServerDescriptors; + + /** + * Should we try to download missing extra-info descriptors that have + * been published within the past 24 hours? + */ + private boolean downloadMissingExtraInfos; + + /** + * Should we try to download all server descriptors from the authorities + * once every 24 hours? + */ + private boolean downloadAllServerDescriptors; + + /** + * Should we try to download all extra-info descriptors from the + * authorities once every 24 hours? + */ + private boolean downloadAllExtraInfos; + + /** + * Should we download zlib-compressed versions of descriptors by adding + * ".z" to URLs? + */ + private boolean downloadCompressed; + + /** + * valid-after time that we expect the current consensus and votes to + * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find + * consensuses and votes with this valid-after time on the directory + * authorities. This time is initialized as the beginning of the current + * hour. + */ + private String currentValidAfter; + + /** + * Cut-off time for missing server and extra-info descriptors, formatted + * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system + * time minus 24 hours. + */ + private String descriptorCutOff; + + /** + * Cut-off time for downloading all server and extra-info descriptors + * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This + * time is initialized as the current system time minus 23:30 hours. + */ + private String downloadAllDescriptorsCutOff; + + /** + * Directory authorities that we plan to download all server and + * extra-info descriptors from in this execution. + */ + private Set<String> downloadAllDescriptorsFromAuthorities; + + /** + * Current timestamp that is written to the missing list for descriptors + * that we parsed in this execution and for authorities that we + * downloaded all server and extra-info descriptors from. + */ + private String currentTimestamp; + + /** + * Logger for this class. + */ + private Logger logger; + + /** + * Number of descriptors requested by directory authority to be included + * in logs. + */ + private Map<String, Integer> requestsByAuthority; + + /** + * Counters for descriptors that we had on the missing list at the + * beginning of the execution, that we added to the missing list, + * that we requested, and that we successfully downloaded in this + * execution. + */ + private int oldMissingConsensuses = 0, oldMissingVotes = 0, + oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0, + newMissingConsensuses = 0, newMissingVotes = 0, + newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0, + requestedConsensuses = 0, requestedVotes = 0, + requestedMissingServerDescriptors = 0, + requestedAllServerDescriptors = 0, + requestedMissingExtraInfoDescriptors = 0, + requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0, + downloadedVotes = 0, downloadedMissingServerDescriptors = 0, + downloadedAllServerDescriptors = 0, + downloadedMissingExtraInfoDescriptors = 0, + downloadedAllExtraInfoDescriptors = 0; + + /** + * Initializes this class, including reading in missing descriptors from + * <code>stats/missing-relay-descriptors</code> and the times when we + * last downloaded all server and extra-info descriptors from + * <code>stats/last-downloaded-all-descriptors</code>. + */ + public RelayDescriptorDownloader(RelayDescriptorParser rdp, + List<String> authorities, boolean downloadCurrentConsensus, + boolean downloadCurrentVotes, + boolean downloadMissingServerDescriptors, + boolean downloadMissingExtraInfos, + boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos, + boolean downloadCompressed) { + + /* Memorize argument values. */ + this.rdp = rdp; + this.authorities = new ArrayList<String>(authorities); + this.downloadCurrentConsensus = downloadCurrentConsensus; + this.downloadCurrentVotes = downloadCurrentVotes; + this.downloadMissingServerDescriptors = + downloadMissingServerDescriptors; + this.downloadMissingExtraInfos = downloadMissingExtraInfos; + this.downloadAllServerDescriptors = downloadAllServerDescriptors; + this.downloadAllExtraInfos = downloadAllExtraInfos; + this.downloadCompressed = downloadCompressed; + + /* Shuffle list of authorities for better load balancing over time. */ + Collections.shuffle(this.authorities); + + /* Initialize logger. */ + this.logger = Logger.getLogger( + RelayDescriptorDownloader.class.getName()); + + /* Prepare cut-off times and timestamp for the missing descriptors + * list and the list of authorities to download all server and + * extra-info descriptors from. */ + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + format.setTimeZone(TimeZone.getTimeZone("UTC")); + long now = System.currentTimeMillis(); + this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) * + (60L * 60L * 1000L)); + this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L); + this.currentTimestamp = format.format(now); + this.downloadAllDescriptorsCutOff = format.format(now + - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L); + + /* Read list of missing descriptors from disk and memorize those that + * we are interested in and that are likely to be found on the + * directory authorities. */ + this.missingDescriptors = new TreeMap<String, String>(); + this.missingDescriptorsFile = new File( + "stats/missing-relay-descriptors"); + if (this.missingDescriptorsFile.exists()) { + try { + this.logger.fine("Reading file " + + this.missingDescriptorsFile.getAbsolutePath() + "..."); + BufferedReader br = new BufferedReader(new FileReader( + this.missingDescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.split(",").length > 2) { + String published = line.split(",")[1]; + if (((line.startsWith("consensus,") || + line.startsWith("vote,")) && + this.currentValidAfter.equals(published)) || + ((line.startsWith("server,") || + line.startsWith("extra,")) && + this.descriptorCutOff.compareTo(published) < 0)) { + if (!line.endsWith("NA")) { + /* Not missing. */ + } else if (line.startsWith("consensus,")) { + oldMissingConsensuses++; + } else if (line.startsWith("vote,")) { + oldMissingVotes++; + } else if (line.startsWith("server,")) { + oldMissingServerDescriptors++; + } else if (line.startsWith("extra,")) { + oldMissingExtraInfoDescriptors++; + } + int separateAt = line.lastIndexOf(","); + this.missingDescriptors.put(line.substring(0, + separateAt), line.substring(separateAt + 1)); + } + } else { + this.logger.fine("Invalid line '" + line + "' in " + + this.missingDescriptorsFile.getAbsolutePath() + + ". Ignoring."); + } + } + br.close(); + this.logger.fine("Finished reading file " + + this.missingDescriptorsFile.getAbsolutePath() + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read file " + + this.missingDescriptorsFile.getAbsolutePath() + + "! This means that we might forget to dowload relay " + + "descriptors we are missing.", e); + } + } + + /* Read list of directory authorities and when we last downloaded all + * server and extra-info descriptors from them. */ + this.lastDownloadedAllDescriptors = new HashMap<String, String>(); + this.lastDownloadedAllDescriptorsFile = new File( + "stats/last-downloaded-all-descriptors"); + if (this.lastDownloadedAllDescriptorsFile.exists()) { + try { + this.logger.fine("Reading file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "..."); + BufferedReader br = new BufferedReader(new FileReader( + this.lastDownloadedAllDescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.split(",").length != 2) { + this.logger.fine("Invalid line '" + line + "' in " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + ". Ignoring."); + } else { + String[] parts = line.split(","); + String authority = parts[0]; + String lastDownloaded = parts[1]; + this.lastDownloadedAllDescriptors.put(authority, + lastDownloaded); + } + } + br.close(); + this.logger.fine("Finished reading file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed to read file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "! This means that we might download all server and " + + "extra-info descriptors more often than we should.", e); + } + } + + /* Make a list of at most two directory authorities that we want to + * download all server and extra-info descriptors from. */ + this.downloadAllDescriptorsFromAuthorities = new HashSet<String>(); + for (String authority : this.authorities) { + if (!this.lastDownloadedAllDescriptors.containsKey(authority) || + this.lastDownloadedAllDescriptors.get(authority).compareTo( + this.downloadAllDescriptorsCutOff) < 0) { + this.downloadAllDescriptorsFromAuthorities.add(authority); + } + if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) { + break; + } + } + + /* Prepare statistics on this execution. */ + this.requestsByAuthority = new HashMap<String, Integer>(); + for (String authority : this.authorities) { + this.requestsByAuthority.put(authority, 0); + } + } + + /** + * We have parsed a consensus. Take this consensus off the missing list + * and add the votes created by the given <code>authorities</code> and + * the <code>serverDescriptors</code> which are in the format + * "<published>,<relayid>,<descid>" to that list. + */ + public void haveParsedConsensus(String validAfter, + Set<String> authorities, Set<String> serverDescriptors) { + + /* Mark consensus as parsed. */ + if (this.currentValidAfter.equals(validAfter)) { + String consensusKey = "consensus," + validAfter; + this.missingDescriptors.put(consensusKey, this.currentTimestamp); + + /* Add votes to missing list. */ + for (String authority : authorities) { + String voteKey = "vote," + validAfter + "," + authority; + if (!this.missingDescriptors.containsKey(voteKey)) { + this.missingDescriptors.put(voteKey, "NA"); + this.newMissingVotes++; + } + } + } + + /* Add server descriptors to missing list. */ + for (String serverDescriptor : serverDescriptors) { + String published = serverDescriptor.split(",")[0]; + if (this.descriptorCutOff.compareTo(published) < 0) { + String serverDescriptorKey = "server," + serverDescriptor; + if (!this.missingDescriptors.containsKey( + serverDescriptorKey)) { + this.missingDescriptors.put(serverDescriptorKey, "NA"); + this.newMissingServerDescriptors++; + } + } + } + } + + /** + * We have parsed a vote. Take this vote off the missing list and add + * the <code>serverDescriptors</code> which are in the format + * "<published>,<relayid>,<descid>" to that list. + */ + public void haveParsedVote(String validAfter, String fingerprint, + Set<String> serverDescriptors) { + + /* Mark vote as parsed. */ + if (this.currentValidAfter.equals(validAfter)) { + String voteKey = "vote," + validAfter + "," + fingerprint; + this.missingDescriptors.put(voteKey, this.currentTimestamp); + } + + /* Add server descriptors to missing list. */ + for (String serverDescriptor : serverDescriptors) { + String published = serverDescriptor.split(",")[0]; + if (this.descriptorCutOff.compareTo(published) < 0) { + String serverDescriptorKey = "server," + serverDescriptor; + if (!this.missingDescriptors.containsKey( + serverDescriptorKey)) { + this.missingDescriptors.put(serverDescriptorKey, "NA"); + this.newMissingServerDescriptors++; + } + } + } + } + + /** + * We have parsed a server descriptor. Take this server descriptor off + * the missing list and put the extra-info descriptor digest on that + * list. + */ + public void haveParsedServerDescriptor(String published, + String relayIdentity, String serverDescriptorDigest, + String extraInfoDigest) { + + /* Mark server descriptor as parsed. */ + if (this.descriptorCutOff.compareTo(published) < 0) { + String serverDescriptorKey = "server," + published + "," + + relayIdentity + "," + serverDescriptorDigest; + this.missingDescriptors.put(serverDescriptorKey, + this.currentTimestamp); + + /* Add extra-info descriptor to missing list. */ + if (extraInfoDigest != null) { + String extraInfoKey = "extra," + published + "," + + relayIdentity + "," + extraInfoDigest; + if (!this.missingDescriptors.containsKey(extraInfoKey)) { + this.missingDescriptors.put(extraInfoKey, "NA"); + this.newMissingExtraInfoDescriptors++; + } + } + } + } + + /** + * We have parsed an extra-info descriptor. Take it off the missing + * list. + */ + public void haveParsedExtraInfoDescriptor(String published, + String relayIdentity, String extraInfoDigest) { + if (this.descriptorCutOff.compareTo(published) < 0) { + String extraInfoKey = "extra," + published + "," + + relayIdentity + "," + extraInfoDigest; + this.missingDescriptors.put(extraInfoKey, this.currentTimestamp); + } + } + + /** + * Downloads missing descriptors that we think might still be available + * on the directory authorities as well as all server and extra-info + * descriptors once per day. + */ + public void downloadDescriptors() { + + /* Put the current consensus on the missing list, unless we already + * have it. */ + String consensusKey = "consensus," + this.currentValidAfter; + if (!this.missingDescriptors.containsKey(consensusKey)) { + this.missingDescriptors.put(consensusKey, "NA"); + this.newMissingConsensuses++; + } + + /* Download descriptors from authorities which are in random order, so + * that we distribute the load somewhat fairly over time. */ + for (String authority : authorities) { + + /* Make all requests to an authority in a single try block. If + * something goes wrong with this authority, we give up on all + * downloads and continue with the next authority. */ + /* TODO Some authorities provide very little bandwidth and could + * slow down the entire download process. Ponder adding a timeout of + * 3 or 5 minutes per authority to avoid getting in the way of the + * next execution. */ + try { + + /* Start with downloading the current consensus, unless we already + * have it. */ + if (downloadCurrentConsensus) { + if (this.missingDescriptors.containsKey(consensusKey) && + this.missingDescriptors.get(consensusKey).equals("NA")) { + this.requestedConsensuses++; + this.downloadedConsensuses += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/current/consensus"); + } + } + + /* Next, try to download current votes that we're missing. */ + if (downloadCurrentVotes) { + String voteKeyPrefix = "vote," + this.currentValidAfter; + SortedSet<String> fingerprints = new TreeSet<String>(); + for (Map.Entry<String, String> e : + this.missingDescriptors.entrySet()) { + if (e.getValue().equals("NA") && + e.getKey().startsWith(voteKeyPrefix)) { + String fingerprint = e.getKey().split(",")[2]; + fingerprints.add(fingerprint); + } + } + for (String fingerprint : fingerprints) { + this.requestedVotes++; + this.downloadedVotes += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/current/" + fingerprint); + } + } + + /* Download either all server and extra-info descriptors or only + * those that we're missing. Start with server descriptors, then + * request extra-info descriptors. */ + List<String> types = new ArrayList<String>(Arrays.asList( + "server,extra".split(","))); + for (String type : types) { + + /* Download all server or extra-info descriptors from this + * authority if we haven't done so for 24 hours and if we're + * configured to do so. */ + if (this.downloadAllDescriptorsFromAuthorities.contains( + authority) && ((type.equals("server") && + this.downloadAllServerDescriptors) || + (type.equals("extra") && this.downloadAllExtraInfos))) { + int downloadedAllDescriptors = + this.downloadResourceFromAuthority(authority, "/tor/" + + type + "/all"); + if (type.equals("server")) { + this.requestedAllServerDescriptors++; + this.downloadedAllServerDescriptors += + downloadedAllDescriptors; + } else { + this.requestedAllExtraInfoDescriptors++; + this.downloadedAllExtraInfoDescriptors += + downloadedAllDescriptors; + } + + /* Download missing server or extra-info descriptors if we're + * configured to do so. */ + } else if ((type.equals("server") && + this.downloadMissingServerDescriptors) || + (type.equals("extra") && this.downloadMissingExtraInfos)) { + + /* Go through the list of missing descriptors of this type + * and combine the descriptor identifiers to a URL of up to + * 96 descriptors that we can download at once. */ + SortedSet<String> descriptorIdentifiers = + new TreeSet<String>(); + for (Map.Entry<String, String> e : + this.missingDescriptors.entrySet()) { + if (e.getValue().equals("NA") && + e.getKey().startsWith(type + ",") && + this.descriptorCutOff.compareTo( + e.getKey().split(",")[1]) < 0) { + String descriptorIdentifier = e.getKey().split(",")[3]; + descriptorIdentifiers.add(descriptorIdentifier); + } + } + StringBuilder combinedResource = null; + int descriptorsInCombinedResource = 0, + requestedDescriptors = 0, downloadedDescriptors = 0; + for (String descriptorIdentifier : descriptorIdentifiers) { + if (descriptorsInCombinedResource >= 96) { + requestedDescriptors += descriptorsInCombinedResource; + downloadedDescriptors += + this.downloadResourceFromAuthority(authority, + combinedResource.toString()); + combinedResource = null; + descriptorsInCombinedResource = 0; + } + if (descriptorsInCombinedResource == 0) { + combinedResource = new StringBuilder("/tor/" + type + + "/d/" + descriptorIdentifier); + } else { + combinedResource.append("+" + descriptorIdentifier); + } + descriptorsInCombinedResource++; + } + if (descriptorsInCombinedResource > 0) { + requestedDescriptors += descriptorsInCombinedResource; + downloadedDescriptors += + this.downloadResourceFromAuthority(authority, + combinedResource.toString()); + } + if (type.equals("server")) { + this.requestedMissingServerDescriptors += + requestedDescriptors; + this.downloadedMissingServerDescriptors += + downloadedDescriptors; + } else { + this.requestedMissingExtraInfoDescriptors += + requestedDescriptors; + this.downloadedMissingExtraInfoDescriptors += + downloadedDescriptors; + } + } + } + + /* If a download failed, stop requesting descriptors from this + * authority and move on to the next. */ + } catch (IOException e) { + logger.log(Level.FINE, "Failed downloading from " + authority + + "!", e); + } + } + } + + /** + * Attempts to download one or more descriptors identified by a resource + * string from a directory authority and passes the returned + * descriptor(s) to the <code>RelayDescriptorParser</code> upon success. + * Returns the number of descriptors contained in the reply. Throws an + * <code>IOException</code> if something goes wrong while downloading. + */ + private int downloadResourceFromAuthority(String authority, + String resource) throws IOException { + byte[] allData = null; + this.requestsByAuthority.put(authority, + this.requestsByAuthority.get(authority) + 1); + /* TODO Disable compressed downloads for extra-info descriptors, + * because zlib decompression doesn't work correctly. Figure out why + * this is and fix it. */ + String fullUrl = "http://" + authority + resource + + (this.downloadCompressed && !resource.startsWith("/tor/extra/") + ? ".z" : ""); + URL u = new URL(fullUrl); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + int response = huc.getResponseCode(); + if (response == 200) { + BufferedInputStream in = this.downloadCompressed && + !resource.startsWith("/tor/extra/") + ? new BufferedInputStream(new InflaterInputStream( + huc.getInputStream())) + : new BufferedInputStream(huc.getInputStream()); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = in.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + in.close(); + allData = baos.toByteArray(); + } + logger.fine("Downloaded " + fullUrl + " -> " + response + " (" + + (allData == null ? 0 : allData.length) + " bytes)"); + int receivedDescriptors = 0; + if (allData != null) { + if (resource.startsWith("/tor/status-vote/current/")) { + this.rdp.parse(allData); + receivedDescriptors = 1; + } else if (resource.startsWith("/tor/server/") || + resource.startsWith("/tor/extra/")) { + if (resource.equals("/tor/server/all")) { + this.lastDownloadedAllDescriptors.put(authority, + this.currentTimestamp); + } + String ascii = null; + try { + ascii = new String(allData, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* No way that US-ASCII is not supported. */ + } + int start = -1, sig = -1, end = -1; + String startToken = resource.startsWith("/tor/server/") ? + "router " : "extra-info "; + String sigToken = "\nrouter-signature\n"; + String endToken = "\n-----END SIGNATURE-----\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + sig = ascii.indexOf(sigToken, start); + if (sig < 0) { + break; + } + sig += sigToken.length(); + end = ascii.indexOf(endToken, sig); + if (end < 0) { + break; + } + end += endToken.length(); + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + this.rdp.parse(descBytes); + receivedDescriptors++; + } + } + } + return receivedDescriptors; + } + + /** + * Writes status files to disk and logs statistics about downloading + * relay descriptors in this execution. + */ + public void writeFile() { + + /* Write missing descriptors file to disk. */ + int missingConsensuses = 0, missingVotes = 0, + missingServerDescriptors = 0, missingExtraInfoDescriptors = 0; + try { + this.logger.fine("Writing file " + + this.missingDescriptorsFile.getAbsolutePath() + "..."); + this.missingDescriptorsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.missingDescriptorsFile)); + for (Map.Entry<String, String> e : + this.missingDescriptors.entrySet()) { + String key = e.getKey(), value = e.getValue(); + if (!value.equals("NA")) { + /* Not missing. */ + } else if (key.startsWith("consensus,")) { + missingConsensuses++; + } else if (key.startsWith("vote,")) { + missingVotes++; + } else if (key.startsWith("server,")) { + missingServerDescriptors++; + } else if (key.startsWith("extra,")) { + missingExtraInfoDescriptors++; + } + bw.write(key + "," + value + "\n"); + } + bw.close(); + this.logger.fine("Finished writing file " + + this.missingDescriptorsFile.getAbsolutePath() + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed writing " + + this.missingDescriptorsFile.getAbsolutePath() + "!", e); + } + + /* Write text file containing the directory authorities and when we + * last downloaded all server and extra-info descriptors from them to + * disk. */ + try { + this.logger.fine("Writing file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "..."); + this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.lastDownloadedAllDescriptorsFile)); + for (Map.Entry<String, String> e : + this.lastDownloadedAllDescriptors.entrySet()) { + String authority = e.getKey(); + String lastDownloaded = e.getValue(); + bw.write(authority + "," + lastDownloaded + "\n"); + } + bw.close(); + this.logger.fine("Finished writing file " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + + "."); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed writing " + + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!", + e); + } + + /* Log statistics about this execution. */ + this.logger.info("Finished downloading relay descriptors from the " + + "directory authorities."); + this.logger.info("At the beginning of this execution, we were " + + "missing " + oldMissingConsensuses + " consensus(es), " + + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors + + " server descriptor(s), and " + oldMissingExtraInfoDescriptors + + " extra-info descriptor(s)."); + this.logger.info("During this execution, we added " + + this.newMissingConsensuses + " consensus(es), " + + this.newMissingVotes + " vote(s), " + + this.newMissingServerDescriptors + " server descriptor(s), and " + + this.newMissingExtraInfoDescriptors + " extra-info " + + "descriptor(s) to the missing list, some of which we also " + + "requested and removed from the list again."); + this.logger.info("We requested " + this.requestedConsensuses + + " consensus(es), " + this.requestedVotes + " vote(s), " + + this.requestedMissingServerDescriptors + " missing server " + + "descriptor(s), " + this.requestedAllServerDescriptors + + " times all server descriptors, " + + this.requestedMissingExtraInfoDescriptors + " missing " + + "extra-info descriptor(s), and " + + this.requestedAllExtraInfoDescriptors + " times all extra-info " + + "descriptors from the directory authorities."); + StringBuilder sb = new StringBuilder(); + for (String authority : this.authorities) { + sb.append(" " + authority + "=" + + this.requestsByAuthority.get(authority)); + } + this.logger.info("We sent these numbers of requests to the directory " + + "authorities:" + sb.toString()); + this.logger.info("We successfully downloaded " + + this.downloadedConsensuses + " consensus(es), " + + this.downloadedVotes + " vote(s), " + + this.downloadedMissingServerDescriptors + " missing server " + + "descriptor(s), " + this.downloadedAllServerDescriptors + + " server descriptor(s) when downloading all descriptors, " + + this.downloadedMissingExtraInfoDescriptors + " missing " + + "extra-info descriptor(s) and " + + this.downloadedAllExtraInfoDescriptors + " extra-info " + + "descriptor(s) when downloading all descriptors."); + this.logger.info("At the end of this execution, we are missing " + + missingConsensuses + " consensus(es), " + missingVotes + + " vote(s), " + missingServerDescriptors + " server " + + "descriptor(s), and " + missingExtraInfoDescriptors + + " extra-info descriptor(s), some of which we may try in the next " + + "execution."); + } +} + diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java new file mode 100644 index 0000000..6f04c20 --- /dev/null +++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java @@ -0,0 +1,265 @@ +/* Copyright 2010--2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.relaydescs; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; + +/** + * Parses relay descriptors including network status consensuses and + * votes, server and extra-info descriptors, and passes the results to the + * stats handlers, to the archive writer, or to the relay descriptor + * downloader. + */ +public class RelayDescriptorParser { + + /** + * File writer that writes descriptor contents to files in a + * directory-archive directory structure. + */ + private ArchiveWriter aw; + + /** + * Missing descriptor downloader that uses the parse results to learn + * which descriptors we are missing and want to download. + */ + private RelayDescriptorDownloader rdd; + + /** + * Logger for this class. + */ + private Logger logger; + + private SimpleDateFormat dateTimeFormat; + + /** + * Initializes this class. + */ + public RelayDescriptorParser(ArchiveWriter aw) { + this.aw = aw; + + /* Initialize logger. */ + this.logger = Logger.getLogger(RelayDescriptorParser.class.getName()); + + this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + public void setRelayDescriptorDownloader( + RelayDescriptorDownloader rdd) { + this.rdd = rdd; + } + + public void parse(byte[] data) { + try { + /* Convert descriptor to ASCII for parsing. This means we'll lose + * the non-ASCII chars, but we don't care about them for parsing + * anyway. */ + BufferedReader br = new BufferedReader(new StringReader(new String( + data, "US-ASCII"))); + String line; + do { + line = br.readLine(); + } while (line != null && line.startsWith("@")); + if (line == null) { + this.logger.fine("We were given an empty descriptor for " + + "parsing. Ignoring."); + return; + } + SimpleDateFormat parseFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (line.equals("network-status-version 3")) { + // TODO when parsing the current consensus, check the fresh-until + // time to see when we switch from hourly to half-hourly + // consensuses + boolean isConsensus = true; + String validAfterTime = null, fingerprint = null, + dirSource = null; + long validAfter = -1L, dirKeyPublished = -1L; + SortedSet<String> dirSources = new TreeSet<String>(); + SortedSet<String> serverDescriptors = new TreeSet<String>(); + SortedSet<String> hashedRelayIdentities = new TreeSet<String>(); + StringBuilder certificateStringBuilder = null; + String certificateString = null; + while ((line = br.readLine()) != null) { + if (certificateStringBuilder != null) { + if (line.startsWith("r ")) { + certificateString = certificateStringBuilder.toString(); + certificateStringBuilder = null; + } else { + certificateStringBuilder.append(line + "\n"); + } + } + if (line.equals("vote-status vote")) { + isConsensus = false; + } else if (line.startsWith("valid-after ")) { + validAfterTime = line.substring("valid-after ".length()); + validAfter = parseFormat.parse(validAfterTime).getTime(); + } else if (line.startsWith("dir-source ")) { + dirSource = line.split(" ")[2]; + } else if (line.startsWith("vote-digest ")) { + dirSources.add(dirSource); + } else if (line.startsWith("dir-key-certificate-version ")) { + certificateStringBuilder = new StringBuilder(); + certificateStringBuilder.append(line + "\n"); + } else if (line.startsWith("fingerprint ")) { + fingerprint = line.split(" ")[1]; + } else if (line.startsWith("dir-key-published ")) { + String dirKeyPublishedTime = line.substring( + "dir-key-published ".length()); + dirKeyPublished = parseFormat.parse(dirKeyPublishedTime). + getTime(); + } else if (line.startsWith("r ")) { + String[] parts = line.split(" "); + if (parts.length < 9) { + this.logger.log(Level.WARNING, "Could not parse r line '" + + line + "' in descriptor. Skipping."); + break; + } + String publishedTime = parts[4] + " " + parts[5]; + String relayIdentity = Hex.encodeHexString( + Base64.decodeBase64(parts[2] + "=")). + toLowerCase(); + String serverDesc = Hex.encodeHexString(Base64.decodeBase64( + parts[3] + "=")).toLowerCase(); + serverDescriptors.add(publishedTime + "," + relayIdentity + + "," + serverDesc); + hashedRelayIdentities.add(DigestUtils.shaHex( + Base64.decodeBase64(parts[2] + "=")). + toUpperCase()); + } + } + if (isConsensus) { + if (this.rdd != null) { + this.rdd.haveParsedConsensus(validAfterTime, dirSources, + serverDescriptors); + } + if (this.aw != null) { + this.aw.storeConsensus(data, validAfter); + } + } else { + if (this.aw != null || this.rdd != null) { + String ascii = new String(data, "US-ASCII"); + String startToken = "network-status-version "; + String sigToken = "directory-signature "; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken); + if (start >= 0 && sig >= 0 && sig > start) { + sig += sigToken.length(); + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + String digest = DigestUtils.shaHex(forDigest).toUpperCase(); + if (this.aw != null) { + this.aw.storeVote(data, validAfter, dirSource, digest); + } + if (this.rdd != null) { + this.rdd.haveParsedVote(validAfterTime, fingerprint, + serverDescriptors); + } + } + if (certificateString != null) { + if (this.aw != null) { + this.aw.storeCertificate(certificateString.getBytes(), + dirSource, dirKeyPublished); + } + } + } + } + } else if (line.startsWith("router ")) { + String publishedTime = null, extraInfoDigest = null, + relayIdentifier = null; + long published = -1L; + while ((line = br.readLine()) != null) { + if (line.startsWith("published ")) { + publishedTime = line.substring("published ".length()); + published = parseFormat.parse(publishedTime).getTime(); + } else if (line.startsWith("opt fingerprint") || + line.startsWith("fingerprint")) { + relayIdentifier = line.substring(line.startsWith("opt ") ? + "opt fingerprint".length() : "fingerprint".length()). + replaceAll(" ", "").toLowerCase(); + } else if (line.startsWith("opt extra-info-digest ") || + line.startsWith("extra-info-digest ")) { + extraInfoDigest = line.startsWith("opt ") ? + line.split(" ")[2].toLowerCase() : + line.split(" ")[1].toLowerCase(); + } + } + String ascii = new String(data, "US-ASCII"); + String startToken = "router "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + String digest = null; + if (start >= 0 || sig >= 0 || sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + digest = DigestUtils.shaHex(forDigest); + } + if (this.aw != null && digest != null) { + this.aw.storeServerDescriptor(data, digest, published); + } + if (this.rdd != null && digest != null) { + this.rdd.haveParsedServerDescriptor(publishedTime, + relayIdentifier, digest, extraInfoDigest); + } + } else if (line.startsWith("extra-info ")) { + String publishedTime = null, relayIdentifier = line.split(" ")[2]; + long published = -1L; + while ((line = br.readLine()) != null) { + if (line.startsWith("published ")) { + publishedTime = line.substring("published ".length()); + published = parseFormat.parse(publishedTime).getTime(); + } + } + String ascii = new String(data, "US-ASCII"); + String startToken = "extra-info "; + String sigToken = "\nrouter-signature\n"; + String digest = null; + int start = ascii.indexOf(startToken); + if (start > 0) { + /* Do not confuse "extra-info " in "@type extra-info 1.0" with + * "extra-info 0000...". TODO This is a hack that should be + * solved by using metrics-lib some day. */ + start = ascii.indexOf("\n" + startToken); + if (start > 0) { + start++; + } + } + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 || sig >= 0 || sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(data, start, forDigest, 0, sig - start); + digest = DigestUtils.shaHex(forDigest); + } + if (this.aw != null && digest != null) { + this.aw.storeExtraInfoDescriptor(data, digest, published); + } + if (this.rdd != null && digest != null) { + this.rdd.haveParsedExtraInfoDescriptor(publishedTime, + relayIdentifier.toLowerCase(), digest); + } + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not parse descriptor. " + + "Skipping.", e); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse descriptor. " + + "Skipping.", e); + } + } +} + diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java new file mode 100644 index 0000000..1ac593a --- /dev/null +++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java @@ -0,0 +1,573 @@ +/* Copyright 2012 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.db.torperf; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +/* Download possibly truncated Torperf .data and .extradata files from + * configured sources, append them to the files we already have, and merge + * the two files into the .tpf format. */ +public class TorperfDownloader { + + private File torperfOutputDirectory = null; + private SortedMap<String, String> torperfSources = null; + private List<String> torperfFilesLines = null; + private Logger logger = null; + private SimpleDateFormat dateFormat; + + public TorperfDownloader(File torperfOutputDirectory, + SortedMap<String, String> torperfSources, + List<String> torperfFilesLines) { + if (torperfOutputDirectory == null) { + throw new IllegalArgumentException(); + } + this.torperfOutputDirectory = torperfOutputDirectory; + this.torperfSources = torperfSources; + this.torperfFilesLines = torperfFilesLines; + if (!this.torperfOutputDirectory.exists()) { + this.torperfOutputDirectory.mkdirs(); + } + this.logger = Logger.getLogger(TorperfDownloader.class.getName()); + this.dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + this.readLastMergedTimestamps(); + for (String torperfFilesLine : this.torperfFilesLines) { + this.downloadAndMergeFiles(torperfFilesLine); + } + this.writeLastMergedTimestamps(); + } + + private File torperfLastMergedFile = + new File("stats/torperf-last-merged"); + SortedMap<String, String> lastMergedTimestamps = + new TreeMap<String, String>(); + private void readLastMergedTimestamps() { + if (!this.torperfLastMergedFile.exists()) { + return; + } + try { + BufferedReader br = new BufferedReader(new FileReader( + this.torperfLastMergedFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + String fileName = null, timestamp = null; + if (parts.length == 2) { + try { + Double.parseDouble(parts[1]); + fileName = parts[0]; + timestamp = parts[1]; + } catch (NumberFormatException e) { + /* Handle below. */ + } + } + if (fileName == null || timestamp == null) { + this.logger.log(Level.WARNING, "Invalid line '" + line + "' in " + + this.torperfLastMergedFile.getAbsolutePath() + ". " + + "Ignoring past history of merging .data and .extradata " + + "files."); + this.lastMergedTimestamps.clear(); + break; + } + this.lastMergedTimestamps.put(fileName, timestamp); + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Error while reading '" + + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring " + + "past history of merging .data and .extradata files."); + this.lastMergedTimestamps.clear(); + } + } + + private void writeLastMergedTimestamps() { + try { + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.torperfLastMergedFile)); + for (Map.Entry<String, String> e : + this.lastMergedTimestamps.entrySet()) { + String fileName = e.getKey(); + String timestamp = e.getValue(); + bw.write(fileName + " " + timestamp + "\n"); + } + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Error while writing '" + + this.torperfLastMergedFile.getAbsolutePath() + ". This may " + + "result in ignoring history of merging .data and .extradata " + + "files in the next execution.", e); + } + } + + private void downloadAndMergeFiles(String torperfFilesLine) { + String[] parts = torperfFilesLine.split(" "); + String sourceName = parts[1]; + int fileSize = -1; + try { + fileSize = Integer.parseInt(parts[2]); + } catch (NumberFormatException e) { + this.logger.log(Level.WARNING, "Could not parse file size in " + + "TorperfFiles configuration line '" + torperfFilesLine + + "'."); + return; + } + + /* Download and append the .data file. */ + String dataFileName = parts[3]; + String sourceBaseUrl = torperfSources.get(sourceName); + String dataUrl = sourceBaseUrl + dataFileName; + String dataOutputFileName = sourceName + "-" + dataFileName; + File dataOutputFile = new File(torperfOutputDirectory, + dataOutputFileName); + boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl, + dataOutputFile, true); + + /* Download and append the .extradata file. */ + String extradataFileName = parts[4]; + String extradataUrl = sourceBaseUrl + extradataFileName; + String extradataOutputFileName = sourceName + "-" + extradataFileName; + File extradataOutputFile = new File(torperfOutputDirectory, + extradataOutputFileName); + boolean downloadedExtradataFile = this.downloadAndAppendFile( + extradataUrl, extradataOutputFile, false); + + /* Merge both files into .tpf format. */ + if (!downloadedDataFile && !downloadedExtradataFile) { + return; + } + String skipUntil = null; + if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) { + skipUntil = this.lastMergedTimestamps.get(dataOutputFileName); + } + try { + skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile, + sourceName, fileSize, skipUntil); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile + + " and " + extradataOutputFile + ".", e); + } + if (skipUntil != null) { + this.lastMergedTimestamps.put(dataOutputFileName, skipUntil); + } + } + + private boolean downloadAndAppendFile(String url, File outputFile, + boolean isDataFile) { + + /* Read an existing output file to determine which line will be the + * first to append to it. */ + String lastTimestampLine = null; + int linesAfterLastTimestampLine = 0; + if (outputFile.exists() && outputFile.lastModified() > + System.currentTimeMillis() - 330L * 60L * 1000L) { + return false; + } else if (outputFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + outputFile)); + String line; + while ((line = br.readLine()) != null) { + if (isDataFile || line.contains(" LAUNCH")) { + lastTimestampLine = line; + linesAfterLastTimestampLine = 0; + } else { + linesAfterLastTimestampLine++; + } + } + br.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed reading '" + + outputFile.getAbsolutePath() + "' to determine the first " + + "line to append to it.", e); + return false; + } + } + try { + this.logger.fine("Downloading " + (isDataFile ? ".data" : + ".extradata") + " file from '" + url + "' and merging it into " + + "'" + outputFile.getAbsolutePath() + "'."); + URL u = new URL(url); + HttpURLConnection huc = (HttpURLConnection) u.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + BufferedReader br = new BufferedReader(new InputStreamReader( + huc.getInputStream())); + String line; + BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile, + true)); + boolean copyLines = lastTimestampLine == null; + while ((line = br.readLine()) != null) { + if (copyLines && linesAfterLastTimestampLine == 0) { + if (isDataFile || line.contains(" LAUNCH")) { + lastTimestampLine = line; + } + bw.write(line + "\n"); + } else if (copyLines && linesAfterLastTimestampLine > 0) { + linesAfterLastTimestampLine--; + } else if (line.equals(lastTimestampLine)) { + copyLines = true; + } + } + bw.close(); + br.close(); + if (!copyLines) { + this.logger.warning("The last timestamp line in '" + + outputFile.getAbsolutePath() + "' is not contained in the " + + "new file downloaded from '" + url + "'. Cannot append " + + "new lines without possibly leaving a gap. Skipping."); + return false; + } + } catch (IOException e) { + this.logger.log(Level.WARNING, "Failed downloading and/or merging '" + + url + "'.", e); + return false; + } + if (lastTimestampLine == null) { + this.logger.warning("'" + outputFile.getAbsolutePath() + + "' doesn't contain any timestamp lines. Unable to check " + + "whether that file is stale or not."); + } else { + long lastTimestampMillis = -1L; + if (isDataFile) { + lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( + 0, lastTimestampLine.indexOf(" "))) * 1000L; + } else { + lastTimestampMillis = Long.parseLong(lastTimestampLine.substring( + lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(), + lastTimestampLine.indexOf(".", + lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L; + } + if (lastTimestampMillis < System.currentTimeMillis() + - 330L * 60L * 1000L) { + this.logger.warning("The last timestamp in '" + + outputFile.getAbsolutePath() + "' is more than 5:30 hours " + + "old: " + lastTimestampMillis); + } + } + return true; + } + + private String mergeFiles(File dataFile, File extradataFile, + String source, int fileSize, String skipUntil) throws IOException { + SortedMap<String, String> config = new TreeMap<String, String>(); + config.put("SOURCE", source); + config.put("FILESIZE", String.valueOf(fileSize)); + if (!dataFile.exists() || !extradataFile.exists()) { + this.logger.warning("File " + dataFile.getAbsolutePath() + " or " + + extradataFile.getAbsolutePath() + " is missing."); + return null; + } + this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and " + + extradataFile.getAbsolutePath() + " into .tpf format."); + BufferedReader brD = new BufferedReader(new FileReader(dataFile)), + brE = new BufferedReader(new FileReader(extradataFile)); + String lineD = brD.readLine(), lineE = brE.readLine(); + int d = 1, e = 1; + String maxDataComplete = null, maxUsedAt = null; + while (lineD != null) { + + /* Parse .data line. Every valid .data line will go into the .tpf + * format, either with additional information from the .extradata + * file or without it. */ + if (lineD.isEmpty()) { + this.logger.finer("Skipping empty line " + dataFile.getName() + + ":" + d++ + "."); + lineD = brD.readLine(); + continue; + } + SortedMap<String, String> data = this.parseDataLine(lineD); + if (data == null) { + this.logger.finer("Skipping illegal line " + dataFile.getName() + + ":" + d++ + " '" + lineD + "'."); + lineD = brD.readLine(); + continue; + } + String dataComplete = data.get("DATACOMPLETE"); + double dataCompleteSeconds = Double.parseDouble(dataComplete); + if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) { + this.logger.finer("Skipping " + dataFile.getName() + ":" + + d++ + " which we already processed before."); + lineD = brD.readLine(); + continue; + } + maxDataComplete = dataComplete; + + /* Parse .extradata line if available and try to find the one that + * matches the .data line. */ + SortedMap<String, String> extradata = null; + while (lineE != null) { + if (lineE.isEmpty()) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is empty."); + lineE = brE.readLine(); + continue; + } + if (lineE.startsWith("BUILDTIMEOUT_SET ")) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is a BUILDTIMEOUT_SET line."); + lineE = brE.readLine(); + continue; + } else if (lineE.startsWith("ok ") || + lineE.startsWith("error ")) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is in the old format."); + lineE = brE.readLine(); + continue; + } + extradata = this.parseExtradataLine(lineE); + if (extradata == null) { + this.logger.finer("Skipping Illegal line " + + extradataFile.getName() + ":" + e++ + " '" + lineE + + "'."); + lineE = brE.readLine(); + continue; + } + if (!extradata.containsKey("USED_AT")) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which doesn't contain a USED_AT element."); + lineE = brE.readLine(); + continue; + } + String usedAt = extradata.get("USED_AT"); + double usedAtSeconds = Double.parseDouble(usedAt); + if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which we already processed before."); + lineE = brE.readLine(); + continue; + } + maxUsedAt = usedAt; + if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) { + this.logger.fine("Merging " + extradataFile.getName() + ":" + + e++ + " into the current .data line."); + lineE = brE.readLine(); + break; + } else if (usedAtSeconds > dataCompleteSeconds) { + this.logger.finer("Comparing " + extradataFile.getName() + + " to the next .data line."); + extradata = null; + break; + } else { + this.logger.finer("Skipping " + extradataFile.getName() + ":" + + e++ + " which is too old to be merged with " + + dataFile.getName() + ":" + d + "."); + lineE = brE.readLine(); + continue; + } + } + + /* Write output line to .tpf file. */ + SortedMap<String, String> keysAndValues = + new TreeMap<String, String>(); + if (extradata != null) { + keysAndValues.putAll(extradata); + } + keysAndValues.putAll(data); + keysAndValues.putAll(config); + this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + "."); + lineD = brD.readLine(); + try { + this.writeTpfLine(source, fileSize, keysAndValues); + } catch (IOException ex) { + this.logger.log(Level.WARNING, "Error writing output line. " + + "Aborting to merge " + dataFile.getName() + " and " + + extradataFile.getName() + ".", e); + break; + } + } + brD.close(); + brE.close(); + this.writeCachedTpfLines(); + if (maxDataComplete == null) { + return maxUsedAt; + } else if (maxUsedAt == null) { + return maxDataComplete; + } else if (maxDataComplete.compareTo(maxUsedAt) > 0) { + return maxUsedAt; + } else { + return maxDataComplete; + } + } + + private SortedMap<Integer, String> dataTimestamps; + private SortedMap<String, String> parseDataLine(String line) { + String[] parts = line.trim().split(" "); + if (line.length() == 0 || parts.length < 20) { + return null; + } + if (this.dataTimestamps == null) { + this.dataTimestamps = new TreeMap<Integer, String>(); + this.dataTimestamps.put(0, "START"); + this.dataTimestamps.put(2, "SOCKET"); + this.dataTimestamps.put(4, "CONNECT"); + this.dataTimestamps.put(6, "NEGOTIATE"); + this.dataTimestamps.put(8, "REQUEST"); + this.dataTimestamps.put(10, "RESPONSE"); + this.dataTimestamps.put(12, "DATAREQUEST"); + this.dataTimestamps.put(14, "DATARESPONSE"); + this.dataTimestamps.put(16, "DATACOMPLETE"); + this.dataTimestamps.put(21, "DATAPERC10"); + this.dataTimestamps.put(23, "DATAPERC20"); + this.dataTimestamps.put(25, "DATAPERC30"); + this.dataTimestamps.put(27, "DATAPERC40"); + this.dataTimestamps.put(29, "DATAPERC50"); + this.dataTimestamps.put(31, "DATAPERC60"); + this.dataTimestamps.put(33, "DATAPERC70"); + this.dataTimestamps.put(35, "DATAPERC80"); + this.dataTimestamps.put(37, "DATAPERC90"); + } + SortedMap<String, String> data = new TreeMap<String, String>(); + try { + for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) { + int i = e.getKey(); + if (parts.length > i + 1) { + String key = e.getValue(); + String value = String.format("%s.%02d", parts[i], + Integer.parseInt(parts[i + 1]) / 10000); + data.put(key, value); + } + } + } catch (NumberFormatException e) { + return null; + } + data.put("WRITEBYTES", parts[18]); + data.put("READBYTES", parts[19]); + if (parts.length >= 21) { + data.put("DIDTIMEOUT", parts[20]); + } + return data; + } + + private SortedMap<String, String> parseExtradataLine(String line) { + String[] parts = line.split(" "); + SortedMap<String, String> extradata = new TreeMap<String, String>(); + String previousKey = null; + for (String part : parts) { + String[] keyAndValue = part.split("=", -1); + if (keyAndValue.length == 2) { + String key = keyAndValue[0]; + previousKey = key; + String value = keyAndValue[1]; + if (value.contains(".") && value.lastIndexOf(".") == + value.length() - 2) { + /* Make sure that all floats have two trailing digits. */ + value += "0"; + } + extradata.put(key, value); + } else if (keyAndValue.length == 1 && previousKey != null) { + String value = keyAndValue[0]; + if (previousKey.equals("STREAM_FAIL_REASONS") && + (value.equals("MISC") || value.equals("EXITPOLICY") || + value.equals("RESOURCELIMIT") || + value.equals("RESOLVEFAILED"))) { + extradata.put(previousKey, extradata.get(previousKey) + ":" + + value); + } else { + return null; + } + } else { + return null; + } + } + return extradata; + } + + private String cachedSource; + private int cachedFileSize; + private String cachedStartDate; + private SortedMap<String, String> cachedTpfLines; + private void writeTpfLine(String source, int fileSize, + SortedMap<String, String> keysAndValues) throws IOException { + StringBuilder sb = new StringBuilder(); + int written = 0; + for (Map.Entry<String, String> keyAndValue : + keysAndValues.entrySet()) { + String key = keyAndValue.getKey(); + String value = keyAndValue.getValue(); + sb.append((written++ > 0 ? " " : "") + key + "=" + value); + } + String line = sb.toString(); + String startString = keysAndValues.get("START"); + long startMillis = Long.parseLong(startString.substring(0, + startString.indexOf("."))) * 1000L; + String startDate = dateFormat.format(startMillis); + if (this.cachedTpfLines == null || !source.equals(this.cachedSource) || + fileSize != this.cachedFileSize || + !startDate.equals(this.cachedStartDate)) { + this.writeCachedTpfLines(); + this.readTpfLinesToCache(source, fileSize, startDate); + } + if (!this.cachedTpfLines.containsKey(startString) || + line.length() > this.cachedTpfLines.get(startString).length()) { + this.cachedTpfLines.put(startString, line); + } + } + + private void readTpfLinesToCache(String source, int fileSize, + String startDate) throws IOException { + this.cachedTpfLines = new TreeMap<String, String>(); + this.cachedSource = source; + this.cachedFileSize = fileSize; + this.cachedStartDate = startDate; + File tpfFile = new File(torperfOutputDirectory, + startDate.replaceAll("-", "/") + "/" + + source + "-" + String.valueOf(fileSize) + "-" + startDate + + ".tpf"); + if (!tpfFile.exists()) { + return; + } + BufferedReader br = new BufferedReader(new FileReader(tpfFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.startsWith("@type ")) { + continue; + } + if (line.contains("START=")) { + String startString = line.substring(line.indexOf("START=") + + "START=".length()).split(" ")[0]; + this.cachedTpfLines.put(startString, line); + } + } + br.close(); + } + + private void writeCachedTpfLines() throws IOException { + if (this.cachedSource == null || this.cachedFileSize == 0 || + this.cachedStartDate == null || this.cachedTpfLines == null) { + return; + } + File tpfFile = new File(torperfOutputDirectory, + this.cachedStartDate.replaceAll("-", "/") + + "/" + this.cachedSource + "-" + + String.valueOf(this.cachedFileSize) + "-" + + this.cachedStartDate + ".tpf"); + tpfFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile)); + bw.write("@type torperf 1.0\n"); + for (String line : this.cachedTpfLines.values()) { + bw.write(line + "\n"); + } + bw.close(); + this.cachedSource = null; + this.cachedFileSize = 0; + this.cachedStartDate = null; + this.cachedTpfLines = null; + } +} + diff --git a/test/org/torproject/ernie/db/ArchiveReaderTest.java b/test/org/torproject/ernie/db/ArchiveReaderTest.java deleted file mode 100644 index cb3dea7..0000000 --- a/test/org/torproject/ernie/db/ArchiveReaderTest.java +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2011 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.File; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class ArchiveReaderTest { - - private File tempArchivesDirectory; - private File tempStatsDirectory; - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Before - public void createTempDirectories() { - this.tempArchivesDirectory = folder.newFolder("sanitized-bridges"); - this.tempStatsDirectory = folder.newFolder("stats"); - } - - @Test(expected = IllegalArgumentException.class) - public void testRelayDescriptorParserNull() { - new ArchiveReader(null, this.tempArchivesDirectory, - this.tempStatsDirectory, false); - } -} - diff --git a/test/org/torproject/ernie/db/ArchiveWriterTest.java b/test/org/torproject/ernie/db/ArchiveWriterTest.java deleted file mode 100644 index 2a18e74..0000000 --- a/test/org/torproject/ernie/db/ArchiveWriterTest.java +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright 2011 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class ArchiveWriterTest { - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Test(expected = IllegalArgumentException.class) - public void testArchivesDirectoryNull() { - new ArchiveWriter(null); - } -} - diff --git a/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java b/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java deleted file mode 100644 index f1ad03b..0000000 --- a/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2011 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.File; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class BridgeSnapshotReaderTest { - - private File tempBridgeDirectoriesDirectory; - private File tempStatsDirectory; - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Before - public void createTempDirectories() { - this.tempBridgeDirectoriesDirectory = folder.newFolder("bridges"); - this.tempStatsDirectory = folder.newFolder("stats"); - } - - @Test(expected = IllegalArgumentException.class) - public void testBridgeDescriptorParserNull() { - new BridgeSnapshotReader(null, this.tempBridgeDirectoriesDirectory, - this.tempStatsDirectory); - } -} - diff --git a/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java b/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java deleted file mode 100644 index f101249..0000000 --- a/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright 2011 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.File; -import java.util.ArrayList; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class CachedRelayDescriptorReaderTest { - - private File tempStatsDirectory; - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Before - public void createTempDirectories() { - this.tempStatsDirectory = folder.newFolder("stats"); - } - - @Test(expected = IllegalArgumentException.class) - public void testRelayDescriptorParserNull() { - new CachedRelayDescriptorReader(null, new ArrayList<String>(), - this.tempStatsDirectory); - } -} - diff --git a/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java b/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java deleted file mode 100644 index f8b4cdd..0000000 --- a/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2011 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.db; - -import java.io.File; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class SanitizedBridgesWriterTest { - - private File tempSanitizedBridgesDirectory; - private File tempStatsDirectory; - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - @Before - public void createTempDirectories() { - this.tempSanitizedBridgesDirectory = - folder.newFolder("sanitized-bridges"); - this.tempStatsDirectory = folder.newFolder("stats"); - } - - @Test(expected = IllegalArgumentException.class) - public void testSanitizedBridgesDirectoryNull() { - new SanitizedBridgesWriter(null, this.tempStatsDirectory, false, -1L); - } - - @Test(expected = IllegalArgumentException.class) - public void testStatsDirectoryNull() { - new SanitizedBridgesWriter(this.tempSanitizedBridgesDirectory, null, - false, -1L); - } -} -
tor-commits@lists.torproject.org