tor-commits
Threads by month
- ----- 2025 -----
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
October 2012
- 20 participants
- 1288 discussions

[metrics-db/master] Group classes by kind of processed metrics data.
by karsten@torproject.org 27 Oct '12
by karsten@torproject.org 27 Oct '12
27 Oct '12
commit 8746badd1bfd8cb05983159be2336f1cf72cbd44
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 12:44:42 2012 -0400
Group classes by kind of processed metrics data.
Also remove unused unit tests.
---
build.xml | 24 +-
src/org/torproject/ernie/db/ArchiveReader.java | 146 ----
src/org/torproject/ernie/db/ArchiveWriter.java | 339 --------
.../ernie/db/BridgeDescriptorParser.java | 46 -
.../ernie/db/BridgePoolAssignmentsProcessor.java | 174 ----
.../torproject/ernie/db/BridgeSnapshotReader.java | 220 -----
.../ernie/db/CachedRelayDescriptorReader.java | 235 -----
src/org/torproject/ernie/db/Configuration.java | 359 --------
.../torproject/ernie/db/ExitListDownloader.java | 100 ---
src/org/torproject/ernie/db/LockFile.java | 52 --
.../torproject/ernie/db/LoggingConfiguration.java | 93 --
src/org/torproject/ernie/db/Main.java | 160 ----
.../ernie/db/RelayDescriptorDownloader.java | 821 ------------------
.../torproject/ernie/db/RelayDescriptorParser.java | 265 ------
src/org/torproject/ernie/db/RsyncDataProvider.java | 217 -----
.../ernie/db/SanitizedBridgesWriter.java | 911 --------------------
src/org/torproject/ernie/db/TorperfDownloader.java | 573 ------------
.../db/bridgedescs/BridgeDescriptorParser.java | 46 +
.../ernie/db/bridgedescs/BridgeSnapshotReader.java | 220 +++++
.../db/bridgedescs/SanitizedBridgesWriter.java | 911 ++++++++++++++++++++
.../BridgePoolAssignmentsProcessor.java | 174 ++++
.../ernie/db/exitlists/ExitListDownloader.java | 100 +++
.../torproject/ernie/db/main/Configuration.java | 359 ++++++++
src/org/torproject/ernie/db/main/LockFile.java | 52 ++
.../ernie/db/main/LoggingConfiguration.java | 93 ++
src/org/torproject/ernie/db/main/Main.java | 172 ++++
.../ernie/db/main/RsyncDataProvider.java | 217 +++++
.../ernie/db/relaydescs/ArchiveReader.java | 146 ++++
.../ernie/db/relaydescs/ArchiveWriter.java | 339 ++++++++
.../db/relaydescs/CachedRelayDescriptorReader.java | 235 +++++
.../db/relaydescs/RelayDescriptorDownloader.java | 821 ++++++++++++++++++
.../ernie/db/relaydescs/RelayDescriptorParser.java | 265 ++++++
.../ernie/db/torperf/TorperfDownloader.java | 573 ++++++++++++
.../org/torproject/ernie/db/ArchiveReaderTest.java | 32 -
.../org/torproject/ernie/db/ArchiveWriterTest.java | 19 -
.../ernie/db/BridgeSnapshotReaderTest.java | 32 -
.../ernie/db/CachedRelayDescriptorReaderTest.java | 31 -
.../ernie/db/SanitizedBridgesWriterTest.java | 38 -
38 files changed, 4724 insertions(+), 4886 deletions(-)
diff --git a/build.xml b/build.xml
index ce3e337..7cba58c 100644
--- a/build.xml
+++ b/build.xml
@@ -1,7 +1,6 @@
<project default="run" name="ERNIE" basedir=".">
<property name="sources" value="src/"/>
<property name="classes" value="classes/"/>
- <property name="tests" value="test"/>
<property name="docs" value="javadoc/"/>
<property name="name" value="ERNIE"/>
<path id="classpath">
@@ -27,7 +26,7 @@
<target name="run" depends="compile">
<java fork="true"
maxmemory="2048m"
- classname="org.torproject.ernie.db.Main">
+ classname="org.torproject.ernie.db.main.Main">
<classpath refid="classpath"/>
</java>
</target>
@@ -38,26 +37,5 @@
<fileset dir="${sources}/" includes="**/*.java" />
</javadoc>
</target>
- <target name="test" depends="compile">
- <javac destdir="${classes}"
- srcdir="${tests}"
- source="1.5"
- target="1.5"
- debug="true"
- deprecation="true"
- optimize="false"
- failonerror="true"
- includeantruntime="false">
- <classpath refid="classpath"/>
- </javac>
- <junit haltonfailure="true" printsummary="off">
- <classpath refid="classpath"/>
- <formatter type="plain" usefile="false"/>
- <batchtest>
- <fileset dir="${classes}"
- includes="**/*Test.class"/>
- </batchtest>
- </junit>
- </target>
</project>
diff --git a/src/org/torproject/ernie/db/ArchiveReader.java b/src/org/torproject/ernie/db/ArchiveReader.java
deleted file mode 100644
index 06abf6c..0000000
--- a/src/org/torproject/ernie/db/ArchiveReader.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-
-/**
- * Read in all files in a given directory and pass buffered readers of
- * them to the relay descriptor parser.
- */
-public class ArchiveReader {
- public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
- File statsDirectory, boolean keepImportHistory) {
-
- if (rdp == null || archivesDirectory == null ||
- statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- int parsedFiles = 0, ignoredFiles = 0;
- Logger logger = Logger.getLogger(ArchiveReader.class.getName());
- SortedSet<String> archivesImportHistory = new TreeSet<String>();
- File archivesImportHistoryFile = new File(statsDirectory,
- "archives-import-history");
- if (keepImportHistory && archivesImportHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- archivesImportHistoryFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- archivesImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read in archives import "
- + "history file. Skipping.");
- }
- }
- if (archivesDirectory.exists()) {
- logger.fine("Importing files in directory " + archivesDirectory
- + "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(archivesDirectory);
- List<File> problems = new ArrayList<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else {
- if (rdp != null) {
- try {
- BufferedInputStream bis = null;
- if (keepImportHistory &&
- archivesImportHistory.contains(pop.getName())) {
- ignoredFiles++;
- continue;
- } else if (pop.getName().endsWith(".tar.bz2")) {
- logger.warning("Cannot parse compressed tarball "
- + pop.getAbsolutePath() + ". Skipping.");
- continue;
- } else if (pop.getName().endsWith(".bz2")) {
- FileInputStream fis = new FileInputStream(pop);
- BZip2CompressorInputStream bcis =
- new BZip2CompressorInputStream(fis);
- bis = new BufferedInputStream(bcis);
- } else {
- FileInputStream fis = new FileInputStream(pop);
- bis = new BufferedInputStream(fis);
- }
- if (keepImportHistory) {
- archivesImportHistory.add(pop.getName());
- }
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- rdp.parse(allData);
- parsedFiles++;
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
- }
- }
- if (problems.isEmpty()) {
- logger.fine("Finished importing files in directory "
- + archivesDirectory + "/.");
- } else {
- StringBuilder sb = new StringBuilder("Failed importing files in "
- + "directory " + archivesDirectory + "/:");
- int printed = 0;
- for (File f : problems) {
- sb.append("\n " + f.getAbsolutePath());
- if (++printed >= 3) {
- sb.append("\n ... more");
- break;
- }
- }
- }
- }
- if (keepImportHistory) {
- try {
- archivesImportHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- archivesImportHistoryFile));
- for (String line : archivesImportHistory) {
- bw.write(line + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write archives import "
- + "history file.");
- }
- }
- logger.info("Finished importing relay descriptors from local "
- + "directory:\nParsed " + parsedFiles + ", ignored "
- + ignoredFiles + " files.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/ArchiveWriter.java b/src/org/torproject/ernie/db/ArchiveWriter.java
deleted file mode 100644
index d1b9499..0000000
--- a/src/org/torproject/ernie/db/ArchiveWriter.java
+++ /dev/null
@@ -1,339 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedOutputStream;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.torproject.descriptor.DescriptorParser;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.impl.DescriptorParseException;
-
-public class ArchiveWriter {
- private Logger logger;
- private File outputDirectory;
- private DescriptorParser descriptorParser;
- private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0,
- storedServerDescriptors = 0, storedExtraInfoDescriptors = 0;
-
- public ArchiveWriter(File outputDirectory) {
-
- if (outputDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- this.logger = Logger.getLogger(ArchiveWriter.class.getName());
- this.outputDirectory = outputDirectory;
- this.descriptorParser =
- DescriptorSourceFactory.createDescriptorParser();
- }
-
- private boolean store(byte[] typeAnnotation, byte[] data,
- String filename) {
- try {
- File file = new File(filename);
- if (!file.exists()) {
- this.logger.finer("Storing " + filename);
- if (this.descriptorParser.parseDescriptors(data, filename).size()
- != 1) {
- this.logger.info("Relay descriptor file " + filename
- + " doesn't contain exactly one descriptor. Not storing.");
- return false;
- }
- file.getParentFile().mkdirs();
- BufferedOutputStream bos = new BufferedOutputStream(
- new FileOutputStream(file));
- if (data.length > 0 && data[0] != '@') {
- bos.write(typeAnnotation, 0, typeAnnotation.length);
- }
- bos.write(data, 0, data.length);
- bos.close();
- return true;
- }
- } catch (DescriptorParseException e) {
- this.logger.log(Level.WARNING, "Could not parse relay descriptor "
- + filename + " before storing it to disk. Skipping.", e);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store relay descriptor "
- + filename, e);
- }
- return false;
- }
-
- private static final byte[] CONSENSUS_ANNOTATION =
- "@type network-status-consensus-3 1.0\n".getBytes();
- public void storeConsensus(byte[] data, long validAfter) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/consensus/"
- + printFormat.format(new Date(validAfter)) + "-consensus";
- if (this.store(CONSENSUS_ANNOTATION, data, filename)) {
- this.storedConsensuses++;
- }
- }
-
- private static final byte[] VOTE_ANNOTATION =
- "@type network-status-vote-3 1.0\n".getBytes();
- public void storeVote(byte[] data, long validAfter,
- String fingerprint, String digest) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/vote/"
- + printFormat.format(new Date(validAfter)) + "-vote-"
- + fingerprint + "-" + digest;
- if (this.store(VOTE_ANNOTATION, data, filename)) {
- this.storedVotes++;
- }
- }
-
- private static final byte[] CERTIFICATE_ANNOTATION =
- "@type dir-key-certificate-3 1.0\n".getBytes();
- public void storeCertificate(byte[] data, String fingerprint,
- long published) {
- SimpleDateFormat printFormat = new SimpleDateFormat(
- "yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/certs/"
- + fingerprint + "-" + printFormat.format(new Date(published));
- if (this.store(CERTIFICATE_ANNOTATION, data, filename)) {
- this.storedCerts++;
- }
- }
-
- private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
- "@type server-descriptor 1.0\n".getBytes();
- public void storeServerDescriptor(byte[] data, String digest,
- long published) {
- SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/server-descriptor/"
- + printFormat.format(new Date(published))
- + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
- + digest;
- if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) {
- this.storedServerDescriptors++;
- }
- }
-
- private static final byte[] EXTRA_INFO_ANNOTATION =
- "@type extra-info 1.0\n".getBytes();
- public void storeExtraInfoDescriptor(byte[] data,
- String extraInfoDigest, long published) {
- SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
- descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) {
- this.storedExtraInfoDescriptors++;
- }
- }
-
- private StringBuilder intermediateStats = new StringBuilder();
- public void intermediateStats(String event) {
- intermediateStats.append("While " + event + ", we stored "
- + this.storedConsensuses + " consensus(es), " + this.storedVotes
- + " vote(s), " + this.storedCerts + " certificate(s), "
- + this.storedServerDescriptors + " server descriptor(s), and "
- + this.storedExtraInfoDescriptors
- + " extra-info descriptor(s) to disk.\n");
- this.storedConsensuses = 0;
- this.storedVotes = 0;
- this.storedCerts = 0;
- this.storedServerDescriptors = 0;
- this.storedExtraInfoDescriptors = 0;
- }
- /**
- * Dump some statistics on the completeness of descriptors to the logs
- * on level INFO.
- */
- public void dumpStats() {
- StringBuilder sb = new StringBuilder("Finished writing relay "
- + "descriptors to disk.\n");
- sb.append(intermediateStats.toString());
- sb.append("Statistics on the completeness of written relay "
- + "descriptors of the last 3 consensuses (Consensus/Vote, "
- + "valid-after, votes, server descriptors, extra-infos):");
- try {
- SimpleDateFormat validAfterFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- validAfterFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat consensusVoteFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat descriptorFormat =
- new SimpleDateFormat("yyyy/MM/");
- descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-
- SortedSet<File> consensuses = new TreeSet<File>();
- Stack<File> leftToParse = new Stack<File>();
- leftToParse.add(new File(outputDirectory + "/consensus"));
- while (!leftToParse.isEmpty()) {
- File pop = leftToParse.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- leftToParse.add(f);
- }
- } else if (pop.length() > 0) {
- consensuses.add(pop);
- }
- while (consensuses.size() > 3) {
- consensuses.remove(consensuses.first());
- }
- }
- for (File f : consensuses) {
- BufferedReader br = new BufferedReader(new FileReader(f));
- String line = null, validAfterTime = null,
- voteFilenamePrefix = null, dirSource = null;
- int allVotes = 0, foundVotes = 0,
- allServerDescs = 0, foundServerDescs = 0,
- allExtraInfos = 0, foundExtraInfos = 0;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- long validAfter = validAfterFormat.parse(
- validAfterTime).getTime();
- voteFilenamePrefix = outputDirectory + "/vote/"
- + consensusVoteFormat.format(new Date(validAfter))
- + "-vote-";
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("vote-digest ")) {
- allVotes++;
- File voteFile = new File(voteFilenamePrefix + dirSource + "-"
- + line.split(" ")[1]);
- if (voteFile.exists()) {
- foundVotes++;
- BufferedReader vbr = new BufferedReader(new FileReader(
- voteFile));
- String line3 = null;
- int voteAllServerDescs = 0, voteFoundServerDescs = 0,
- voteAllExtraInfos = 0, voteFoundExtraInfos = 0;
- while ((line3 = vbr.readLine()) != null) {
- if (line3.startsWith("r ")) {
- voteAllServerDescs++;
- String digest = Hex.encodeHexString(Base64.decodeBase64(
- line3.split(" ")[3] + "=")).toLowerCase();
- long published = validAfterFormat.parse(
- line3.split(" ")[4] + " "
- + line3.split(" ")[5]).getTime();
- String filename = outputDirectory
- + "/server-descriptor/"
- + descriptorFormat.format(new Date(published))
- + digest.substring(0, 1) + "/"
- + digest.substring(1, 2) + "/" + digest;
- if (new File(filename).exists()) {
- BufferedReader sbr = new BufferedReader(new FileReader(
- new File(filename)));
- String line2 = null;
- while ((line2 = sbr.readLine()) != null) {
- if (line2.startsWith("opt extra-info-digest ") ||
- line2.startsWith("extra-info-digest ")) {
- voteAllExtraInfos++;
- String extraInfoDigest = line2.startsWith("opt ") ?
- line2.split(" ")[2].toLowerCase() :
- line2.split(" ")[1].toLowerCase();
- String filename2 =
- outputDirectory.getAbsolutePath()
- + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (new File(filename2).exists()) {
- voteFoundExtraInfos++;
- }
- }
- }
- sbr.close();
- voteFoundServerDescs++;
- }
- }
- }
- vbr.close();
- sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), "
- + "%d/%d (%.1f%%)", validAfterTime,
- voteFoundServerDescs, voteAllServerDescs,
- 100.0D * (double) voteFoundServerDescs /
- (double) voteAllServerDescs,
- voteFoundExtraInfos, voteAllExtraInfos,
- 100.0D * (double) voteFoundExtraInfos /
- (double) voteAllExtraInfos));
- }
- } else if (line.startsWith("r ")) {
- allServerDescs++;
- String digest = Hex.encodeHexString(Base64.decodeBase64(
- line.split(" ")[3] + "=")).toLowerCase();
- long published = validAfterFormat.parse(
- line.split(" ")[4] + " " + line.split(" ")[5]).getTime();
- String filename = outputDirectory.getAbsolutePath()
- + "/server-descriptor/"
- + descriptorFormat.format(new Date(published))
- + digest.substring(0, 1) + "/"
- + digest.substring(1, 2) + "/" + digest;
- if (new File (filename).exists()) {
- BufferedReader sbr = new BufferedReader(new FileReader(
- new File(filename)));
- String line2 = null;
- while ((line2 = sbr.readLine()) != null) {
- if (line2.startsWith("opt extra-info-digest ") ||
- line2.startsWith("extra-info-digest ")) {
- allExtraInfos++;
- String extraInfoDigest = line2.startsWith("opt ") ?
- line2.split(" ")[2].toLowerCase() :
- line2.split(" ")[1].toLowerCase();
- String filename2 = outputDirectory.getAbsolutePath()
- + "/extra-info/"
- + descriptorFormat.format(new Date(published))
- + extraInfoDigest.substring(0, 1) + "/"
- + extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (new File (filename2).exists()) {
- foundExtraInfos++;
- }
- }
- }
- sbr.close();
- foundServerDescs++;
- }
- }
- }
- br.close();
- sb.append(String.format("%nC, %s, %d/%d (%.1f%%), "
- + "%d/%d (%.1f%%), %d/%d (%.1f%%)",
- validAfterTime, foundVotes, allVotes,
- 100.0D * (double) foundVotes / (double) allVotes,
- foundServerDescs, allServerDescs,
- 100.0D * (double) foundServerDescs / (double) allServerDescs,
- foundExtraInfos, allExtraInfos,
- 100.0D * (double) foundExtraInfos / (double) allExtraInfos));
- }
- this.logger.info(sb.toString());
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
- e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
- e);
- }
- }
-}
diff --git a/src/org/torproject/ernie/db/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/BridgeDescriptorParser.java
deleted file mode 100644
index 7773525..0000000
--- a/src/org/torproject/ernie/db/BridgeDescriptorParser.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-public class BridgeDescriptorParser {
- private SanitizedBridgesWriter sbw;
- private Logger logger;
- public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
- this.sbw = sbw;
- this.logger =
- Logger.getLogger(BridgeDescriptorParser.class.getName());
- }
- public void parse(byte[] allData, String dateTime) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line = br.readLine();
- if (line == null) {
- return;
- } else if (line.startsWith("r ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime);
- }
- } else if (line.startsWith("router ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreServerDescriptor(allData);
- }
- } else if (line.startsWith("extra-info ")) {
- if (this.sbw != null) {
- this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
- }
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
deleted file mode 100644
index d03dcaf..0000000
--- a/src/org/torproject/ernie/db/BridgePoolAssignmentsProcessor.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Copyright 2011--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-
-public class BridgePoolAssignmentsProcessor {
-
- public BridgePoolAssignmentsProcessor(File assignmentsDirectory,
- File sanitizedAssignmentsDirectory) {
-
- Logger logger =
- Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
- if (assignmentsDirectory == null ||
- sanitizedAssignmentsDirectory == null) {
- IllegalArgumentException e = new IllegalArgumentException("Neither "
- + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
- + "be null!");
- throw e;
- }
-
- List<File> assignmentFiles = new ArrayList<File>();
- Stack<File> files = new Stack<File>();
- files.add(assignmentsDirectory);
- while (!files.isEmpty()) {
- File file = files.pop();
- if (file.isDirectory()) {
- files.addAll(Arrays.asList(file.listFiles()));
- } else if (!file.getName().endsWith(".gz")) {
- assignmentFiles.add(file);
- }
- }
-
- SimpleDateFormat assignmentFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- SimpleDateFormat filenameFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- for (File assignmentFile : assignmentFiles) {
- logger.info("Processing bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'...");
- try {
- BufferedReader br = null;
- if (assignmentFile.getName().endsWith(".gz")) {
- br = new BufferedReader(new InputStreamReader(
- new GzipCompressorInputStream(new FileInputStream(
- assignmentFile))));
- } else {
- br = new BufferedReader(new FileReader(assignmentFile));
- }
- String line, bridgePoolAssignmentLine = null;
- SortedSet<String> sanitizedAssignments = new TreeSet<String>();
- boolean wroteLastLine = false, skipBefore20120504125947 = true;
- while ((line = br.readLine()) != null || !wroteLastLine) {
- if (line != null && line.startsWith("bridge-pool-assignment ")) {
- String[] parts = line.split(" ");
- if (parts.length != 3) {
- continue;
- }
- /* TODO Take out this temporary hack to ignore all assignments
- * coming from ponticum when byblos was still the official
- * BridgeDB host. */
- if (line.compareTo(
- "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) {
- skipBefore20120504125947 = false;
- }
- }
- if (skipBefore20120504125947) {
- if (line == null) {
- break;
- } else {
- continue;
- }
- }
- if (line == null ||
- line.startsWith("bridge-pool-assignment ")) {
- if (bridgePoolAssignmentLine != null) {
- try {
- long bridgePoolAssignmentTime = assignmentFormat.parse(
- bridgePoolAssignmentLine.substring(
- "bridge-pool-assignment ".length())).getTime();
- File sanitizedAssignmentsFile = new File(
- sanitizedAssignmentsDirectory, filenameFormat.format(
- bridgePoolAssignmentTime));
- if (!sanitizedAssignmentsFile.exists()) {
- sanitizedAssignmentsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- sanitizedAssignmentsFile));
- bw.write("@type bridge-pool-assignment 1.0\n");
- bw.write(bridgePoolAssignmentLine + "\n");
- for (String assignmentLine : sanitizedAssignments) {
- bw.write(assignmentLine + "\n");
- }
- bw.close();
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write sanitized "
- + "bridge pool assignment file for line '"
- + bridgePoolAssignmentLine + "' to disk. Skipping "
- + "bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'.", e);
- break;
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Could not write sanitized "
- + "bridge pool assignment file for line '"
- + bridgePoolAssignmentLine + "' to disk. Skipping "
- + "bridge pool assignment file '"
- + assignmentFile.getAbsolutePath() + "'.", e);
- break;
- }
- sanitizedAssignments.clear();
- }
- if (line == null) {
- wroteLastLine = true;
- } else {
- bridgePoolAssignmentLine = line;
- }
- } else {
- String[] parts = line.split(" ");
- if (parts.length < 2 || parts[0].length() < 40) {
- logger.warning("Unrecognized line '" + line
- + "'. Aborting.");
- break;
- }
- String hashedFingerprint = null;
- try {
- hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
- line.split(" ")[0].toCharArray())).toLowerCase();
- } catch (DecoderException e) {
- logger.warning("Unable to decode hex fingerprint in line '"
- + line + "'. Aborting.");
- break;
- }
- String assignmentDetails = line.substring(40);
- sanitizedAssignments.add(hashedFingerprint
- + assignmentDetails);
- }
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read bridge pool assignment "
- + "file '" + assignmentFile.getAbsolutePath()
- + "'. Skipping.", e);
- }
- }
-
- logger.info("Finished processing bridge pool assignment file(s).");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/BridgeSnapshotReader.java
deleted file mode 100644
index f21794d..0000000
--- a/src/org/torproject/ernie/db/BridgeSnapshotReader.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-
-/**
- * Reads the half-hourly snapshots of bridge descriptors from Tonga.
- */
-public class BridgeSnapshotReader {
- public BridgeSnapshotReader(BridgeDescriptorParser bdp,
- File bridgeDirectoriesDir, File statsDirectory) {
-
- if (bdp == null || bridgeDirectoriesDir == null ||
- statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- Logger logger =
- Logger.getLogger(BridgeSnapshotReader.class.getName());
- SortedSet<String> parsed = new TreeSet<String>();
- File bdDir = bridgeDirectoriesDir;
- File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
- boolean modified = false;
- if (bdDir.exists()) {
- if (pbdFile.exists()) {
- logger.fine("Reading file " + pbdFile.getAbsolutePath() + "...");
- try {
- BufferedReader br = new BufferedReader(new FileReader(pbdFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- parsed.add(line);
- }
- br.close();
- logger.fine("Finished reading file "
- + pbdFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed reading file "
- + pbdFile.getAbsolutePath() + "!", e);
- return;
- }
- }
- logger.fine("Importing files in directory " + bridgeDirectoriesDir
- + "/...");
- Set<String> descriptorImportHistory = new HashSet<String>();
- int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0,
- parsedServerDescriptors = 0, skippedServerDescriptors = 0,
- parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0;
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(bdDir);
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else if (!parsed.contains(pop.getName())) {
- try {
- FileInputStream in = new FileInputStream(pop);
- if (in.available() > 0) {
- TarArchiveInputStream tais = null;
- if (pop.getName().endsWith(".tar.gz")) {
- GzipCompressorInputStream gcis =
- new GzipCompressorInputStream(in);
- tais = new TarArchiveInputStream(gcis);
- } else if (pop.getName().endsWith(".tar")) {
- tais = new TarArchiveInputStream(in);
- } else {
- continue;
- }
- BufferedInputStream bis = new BufferedInputStream(tais);
- String fn = pop.getName();
- String dateTime = fn.substring(11, 21) + " "
- + fn.substring(22, 24) + ":" + fn.substring(24, 26)
- + ":" + fn.substring(26, 28);
- while ((tais.getNextTarEntry()) != null) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- byte[] allData = baos.toByteArray();
- if (allData.length == 0) {
- continue;
- }
- String fileDigest = Hex.encodeHexString(DigestUtils.sha(
- allData));
- String ascii = new String(allData, "US-ASCII");
- BufferedReader br3 = new BufferedReader(new StringReader(
- ascii));
- String firstLine = null;
- while ((firstLine = br3.readLine()) != null) {
- if (firstLine.startsWith("@")) {
- continue;
- } else {
- break;
- }
- }
- if (firstLine.startsWith("r ")) {
- bdp.parse(allData, dateTime);
- parsedStatuses++;
- } else if (descriptorImportHistory.contains(fileDigest)) {
- /* Skip server descriptors or extra-info descriptors if
- * we parsed them before. */
- skippedFiles++;
- continue;
- } else {
- int start = -1, sig = -1, end = -1;
- String startToken =
- firstLine.startsWith("router ") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0,
- end - start);
- String descriptorDigest = Hex.encodeHexString(
- DigestUtils.sha(descBytes));
- if (!descriptorImportHistory.contains(
- descriptorDigest)) {
- bdp.parse(descBytes, dateTime);
- descriptorImportHistory.add(descriptorDigest);
- if (firstLine.startsWith("router ")) {
- parsedServerDescriptors++;
- } else {
- parsedExtraInfoDescriptors++;
- }
- } else {
- if (firstLine.startsWith("router ")) {
- skippedServerDescriptors++;
- } else {
- skippedExtraInfoDescriptors++;
- }
- }
- }
- }
- descriptorImportHistory.add(fileDigest);
- parsedFiles++;
- }
- bis.close();
- }
- in.close();
-
- /* Let's give some memory back, or we'll run out of it. */
- System.gc();
-
- parsed.add(pop.getName());
- modified = true;
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not parse bridge snapshot "
- + pop.getName() + "!", e);
- continue;
- }
- }
- }
- logger.fine("Finished importing files in directory "
- + bridgeDirectoriesDir + "/. In total, we parsed "
- + parsedFiles + " files (skipped " + skippedFiles
- + ") containing " + parsedStatuses + " statuses, "
- + parsedServerDescriptors + " server descriptors (skipped "
- + skippedServerDescriptors + "), and "
- + parsedExtraInfoDescriptors + " extra-info descriptors "
- + "(skipped " + skippedExtraInfoDescriptors + ").");
- if (!parsed.isEmpty() && modified) {
- logger.fine("Writing file " + pbdFile.getAbsolutePath() + "...");
- try {
- pbdFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
- for (String f : parsed) {
- bw.append(f + "\n");
- }
- bw.close();
- logger.fine("Finished writing file " + pbdFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed writing file "
- + pbdFile.getAbsolutePath() + "!", e);
- }
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java
deleted file mode 100644
index 4da3e44..0000000
--- a/src/org/torproject/ernie/db/CachedRelayDescriptorReader.java
+++ /dev/null
@@ -1,235 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.TimeZone;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses all descriptors in local directory cacheddesc/ and sorts them
- * into directory structure in directory-archive/.
- */
-public class CachedRelayDescriptorReader {
- public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
- List<String> inputDirectories, File statsDirectory) {
-
- if (rdp == null || inputDirectories == null ||
- inputDirectories.isEmpty() || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- StringBuilder dumpStats = new StringBuilder("Finished importing "
- + "relay descriptors from local Tor data directories:");
- Logger logger = Logger.getLogger(
- CachedRelayDescriptorReader.class.getName());
-
- /* Read import history containing SHA-1 digests of previously parsed
- * statuses and descriptors, so that we can skip them in this run. */
- Set<String> lastImportHistory = new HashSet<String>(),
- currentImportHistory = new HashSet<String>();
- File importHistoryFile = new File(statsDirectory,
- "cacheddesc-import-history");
- if (importHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- importHistoryFile));
- String line;
- while ((line = br.readLine()) != null) {
- lastImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read import history from "
- + importHistoryFile.getAbsolutePath() + ".", e);
- }
- }
-
- /* Read cached descriptors directories. */
- for (String inputDirectory : inputDirectories) {
- File cachedDescDir = new File(inputDirectory);
- if (!cachedDescDir.exists()) {
- logger.warning("Directory " + cachedDescDir.getAbsolutePath()
- + " does not exist. Skipping.");
- continue;
- }
- logger.fine("Reading " + cachedDescDir.getAbsolutePath()
- + " directory.");
- for (File f : cachedDescDir.listFiles()) {
- try {
- // descriptors may contain non-ASCII chars; read as bytes to
- // determine digests
- BufferedInputStream bis =
- new BufferedInputStream(new FileInputStream(f));
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- if (f.getName().equals("cached-consensus")) {
- /* Check if directory information is stale. */
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("valid-after ")) {
- dumpStats.append("\n" + f.getName() + ": " + line.substring(
- "valid-after ".length()));
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (dateTimeFormat.parse(line.substring("valid-after ".
- length())).getTime() < System.currentTimeMillis()
- - 6L * 60L * 60L * 1000L) {
- logger.warning("Cached descriptor files in "
- + cachedDescDir.getAbsolutePath() + " are stale. "
- + "The valid-after line in cached-consensus is '"
- + line + "'.");
- dumpStats.append(" (stale!)");
- }
- break;
- }
- }
- br.close();
-
- /* Parse the cached consensus if we haven't parsed it before
- * (but regardless of whether it's stale or not). */
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- allData));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(allData);
- } else {
- dumpStats.append(" (skipped)");
- }
- currentImportHistory.add(digest);
- }
- } else if (f.getName().equals("v3-status-votes")) {
- int parsedNum = 0, skippedNum = 0;
- String ascii = new String(allData, "US-ASCII");
- String startToken = "network-status-version ";
- int end = ascii.length();
- int start = ascii.indexOf(startToken);
- while (start >= 0 && start < end) {
- int next = ascii.indexOf(startToken, start + 1);
- if (next < 0) {
- next = end;
- }
- if (start < next) {
- byte[] rawNetworkStatusBytes = new byte[next - start];
- System.arraycopy(allData, start, rawNetworkStatusBytes, 0,
- next - start);
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- rawNetworkStatusBytes));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(rawNetworkStatusBytes);
- parsedNum++;
- } else {
- skippedNum++;
- }
- currentImportHistory.add(digest);
- }
- }
- start = next;
- }
- dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
- + ", skipped " + skippedNum + " votes");
- } else if (f.getName().startsWith("cached-descriptors") ||
- f.getName().startsWith("cached-extrainfo")) {
- String ascii = new String(allData, "US-ASCII");
- int start = -1, sig = -1, end = -1;
- String startToken =
- f.getName().startsWith("cached-descriptors") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- int parsedNum = 0, skippedNum = 0;
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- if (rdp != null) {
- String digest = Hex.encodeHexString(DigestUtils.sha(
- descBytes));
- if (!lastImportHistory.contains(digest) &&
- !currentImportHistory.contains(digest)) {
- rdp.parse(descBytes);
- parsedNum++;
- } else {
- skippedNum++;
- }
- currentImportHistory.add(digest);
- }
- }
- dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
- + ", skipped " + skippedNum + " "
- + (f.getName().startsWith("cached-descriptors") ?
- "server" : "extra-info") + " descriptors");
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed reading "
- + cachedDescDir.getAbsolutePath() + " directory.", e);
- } catch (ParseException e) {
- logger.log(Level.WARNING, "Failed reading "
- + cachedDescDir.getAbsolutePath() + " directory.", e);
- }
- }
- logger.fine("Finished reading "
- + cachedDescDir.getAbsolutePath() + " directory.");
- }
-
- /* Write import history containing SHA-1 digests to disk. */
- try {
- importHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- importHistoryFile));
- for (String digest : currentImportHistory) {
- bw.write(digest + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write import history to "
- + importHistoryFile.getAbsolutePath() + ".", e);
- }
-
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
deleted file mode 100644
index e130dab..0000000
--- a/src/org/torproject/ernie/db/Configuration.java
+++ /dev/null
@@ -1,359 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.SortedMap;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Initialize configuration with hard-coded defaults, overwrite with
- * configuration in config file, if exists, and answer Main.java about our
- * configuration.
- */
-public class Configuration {
- private boolean writeDirectoryArchives = false;
- private String directoryArchivesOutputDirectory = "directory-archive/";
- private boolean importCachedRelayDescriptors = false;
- private List<String> cachedRelayDescriptorsDirectory =
- new ArrayList<String>(Arrays.asList("cacheddesc/".split(",")));
- private boolean importDirectoryArchives = false;
- private String directoryArchivesDirectory = "archives/";
- private boolean keepDirectoryArchiveImportHistory = false;
- private boolean writeSanitizedBridges = false;
- private boolean replaceIPAddressesWithHashes = false;
- private long limitBridgeDescriptorMappings = -1L;
- private String sanitizedBridgesWriteDirectory = "sanitized-bridges/";
- private boolean importBridgeSnapshots = false;
- private String bridgeSnapshotsDirectory = "bridge-directories/";
- private boolean downloadRelayDescriptors = false;
- private List<String> downloadFromDirectoryAuthorities = Arrays.asList((
- "86.59.21.38,76.73.17.194:9030,213.115.239.118:443,"
- + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131,"
- + "194.109.206.212,212.112.245.170").split(","));
- private boolean downloadCurrentConsensus = true;
- private boolean downloadCurrentVotes = true;
- private boolean downloadMissingServerDescriptors = true;
- private boolean downloadMissingExtraInfoDescriptors = true;
- private boolean downloadAllServerDescriptors = false;
- private boolean downloadAllExtraInfoDescriptors = false;
- private boolean compressRelayDescriptorDownloads;
- private boolean downloadExitList = false;
- private boolean processBridgePoolAssignments = false;
- private String assignmentsDirectory = "assignments/";
- private String sanitizedAssignmentsDirectory = "sanitized-assignments/";
- private boolean processTorperfFiles = false;
- private String torperfOutputDirectory = "torperf/";
- private SortedMap<String, String> torperfSources = null;
- private List<String> torperfFiles = null;
- private boolean provideFilesViaRsync = false;
- private String rsyncDirectory = "rsync";
- public Configuration() {
-
- /* Initialize logger. */
- Logger logger = Logger.getLogger(Configuration.class.getName());
-
- /* Read config file, if present. */
- File configFile = new File("config");
- if (!configFile.exists()) {
- logger.warning("Could not find config file. In the default "
- + "configuration, we are not configured to read data from any "
- + "data source or write data to any data sink. You need to "
- + "create a config file (" + configFile.getAbsolutePath()
- + ") and provide at least one data source and one data sink. "
- + "Refer to the manual for more information.");
- return;
- }
- String line = null;
- boolean containsCachedRelayDescriptorsDirectory = false;
- try {
- BufferedReader br = new BufferedReader(new FileReader(configFile));
- while ((line = br.readLine()) != null) {
- if (line.startsWith("#") || line.length() < 1) {
- continue;
- } else if (line.startsWith("WriteDirectoryArchives")) {
- this.writeDirectoryArchives = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirectoryArchivesOutputDirectory")) {
- this.directoryArchivesOutputDirectory = line.split(" ")[1];
- } else if (line.startsWith("ImportCachedRelayDescriptors")) {
- this.importCachedRelayDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("CachedRelayDescriptorsDirectory")) {
- if (!containsCachedRelayDescriptorsDirectory) {
- this.cachedRelayDescriptorsDirectory.clear();
- containsCachedRelayDescriptorsDirectory = true;
- }
- this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]);
- } else if (line.startsWith("ImportDirectoryArchives")) {
- this.importDirectoryArchives = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DirectoryArchivesDirectory")) {
- this.directoryArchivesDirectory = line.split(" ")[1];
- } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
- this.keepDirectoryArchiveImportHistory = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("WriteSanitizedBridges")) {
- this.writeSanitizedBridges = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
- this.replaceIPAddressesWithHashes = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("LimitBridgeDescriptorMappings")) {
- this.limitBridgeDescriptorMappings = Long.parseLong(
- line.split(" ")[1]);
- } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
- this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
- } else if (line.startsWith("ImportBridgeSnapshots")) {
- this.importBridgeSnapshots = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("BridgeSnapshotsDirectory")) {
- this.bridgeSnapshotsDirectory = line.split(" ")[1];
- } else if (line.startsWith("DownloadRelayDescriptors")) {
- this.downloadRelayDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadFromDirectoryAuthorities")) {
- this.downloadFromDirectoryAuthorities = new ArrayList<String>();
- for (String dir : line.split(" ")[1].split(",")) {
- // test if IP:port pair has correct format
- if (dir.length() < 1) {
- logger.severe("Configuration file contains directory "
- + "authority IP:port of length 0 in line '" + line
- + "'! Exiting!");
- System.exit(1);
- }
- new URL("http://" + dir + "/");
- this.downloadFromDirectoryAuthorities.add(dir);
- }
- } else if (line.startsWith("DownloadCurrentConsensus")) {
- this.downloadCurrentConsensus = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadCurrentVotes")) {
- this.downloadCurrentVotes = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadMissingServerDescriptors")) {
- this.downloadMissingServerDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith(
- "DownloadMissingExtraInfoDescriptors")) {
- this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadAllServerDescriptors")) {
- this.downloadAllServerDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) {
- this.downloadAllExtraInfoDescriptors = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("CompressRelayDescriptorDownloads")) {
- this.compressRelayDescriptorDownloads = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("DownloadExitList")) {
- this.downloadExitList = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("ProcessBridgePoolAssignments")) {
- this.processBridgePoolAssignments = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("AssignmentsDirectory")) {
- this.assignmentsDirectory = line.split(" ")[1];
- } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
- this.sanitizedAssignmentsDirectory = line.split(" ")[1];
- } else if (line.startsWith("ProcessTorperfFiles")) {
- this.processTorperfFiles = Integer.parseInt(line.split(" ")[1])
- != 0;
- } else if (line.startsWith("TorperfOutputDirectory")) {
- } else if (line.startsWith("TorperfSource")) {
- if (this.torperfSources == null) {
- this.torperfSources = new TreeMap<String, String>();
- }
- String[] parts = line.split(" ");
- String sourceName = parts[1];
- String baseUrl = parts[2];
- this.torperfSources.put(sourceName, baseUrl);
- } else if (line.startsWith("TorperfFiles")) {
- if (this.torperfFiles == null) {
- this.torperfFiles = new ArrayList<String>();
- }
- String[] parts = line.split(" ");
- if (parts.length != 5) {
- logger.severe("Configuration file contains TorperfFiles "
- + "option with wrong number of values in line '" + line
- + "'! Exiting!");
- System.exit(1);
- }
- this.torperfFiles.add(line);
- } else if (line.startsWith("ProvideFilesViaRsync")) {
- this.provideFilesViaRsync = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RsyncDirectory")) {
- this.rsyncDirectory = line.split(" ")[1];
- } else {
- logger.severe("Configuration file contains unrecognized "
- + "configuration key in line '" + line + "'! Exiting!");
- System.exit(1);
- }
- }
- br.close();
- } catch (ArrayIndexOutOfBoundsException e) {
- logger.severe("Configuration file contains configuration key "
- + "without value in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (MalformedURLException e) {
- logger.severe("Configuration file contains illegal URL or IP:port "
- + "pair in line '" + line + "'. Exiting!");
- System.exit(1);
- } catch (NumberFormatException e) {
- logger.severe("Configuration file contains illegal value in line '"
- + line + "' with legal values being 0 or 1. Exiting!");
- System.exit(1);
- } catch (IOException e) {
- logger.log(Level.SEVERE, "Unknown problem while reading config "
- + "file! Exiting!", e);
- System.exit(1);
- }
-
- /** Make some checks if configuration is valid. */
- if (!this.importCachedRelayDescriptors &&
- !this.importDirectoryArchives && !this.downloadRelayDescriptors &&
- !this.importBridgeSnapshots &&
- !this.downloadExitList && !this.processBridgePoolAssignments &&
- !this.writeDirectoryArchives && !this.writeSanitizedBridges &&
- !this.processTorperfFiles) {
- logger.warning("We have not been configured to read data from any "
- + "data source or write data to any data sink. You need to "
- + "edit your config file (" + configFile.getAbsolutePath()
- + ") and provide at least one data source and one data sink. "
- + "Refer to the manual for more information.");
- }
- if ((this.importCachedRelayDescriptors ||
- this.importDirectoryArchives || this.downloadRelayDescriptors) &&
- !this.writeDirectoryArchives) {
- logger.warning("We are configured to import/download relay "
- + "descriptors, but we don't have a single data sink to write "
- + "relay descriptors to.");
- }
- if (!(this.importCachedRelayDescriptors ||
- this.importDirectoryArchives || this.downloadRelayDescriptors) &&
- this.writeDirectoryArchives) {
- logger.warning("We are configured to write relay descriptor to at "
- + "least one data sink, but we don't have a single data source "
- + "containing relay descriptors.");
- }
- if (this.importBridgeSnapshots && !this.writeSanitizedBridges) {
- logger.warning("We are configured to import/download bridge "
- + "descriptors, but we don't have a single data sink to write "
- + "bridge descriptors to.");
- }
- if (!this.importBridgeSnapshots && this.writeSanitizedBridges) {
- logger.warning("We are configured to write bridge descriptor to at "
- + "least one data sink, but we don't have a single data source "
- + "containing bridge descriptors.");
- }
- }
- public boolean getWriteDirectoryArchives() {
- return this.writeDirectoryArchives;
- }
- public String getDirectoryArchivesOutputDirectory() {
- return this.directoryArchivesOutputDirectory;
- }
- public boolean getImportCachedRelayDescriptors() {
- return this.importCachedRelayDescriptors;
- }
- public List<String> getCachedRelayDescriptorDirectory() {
- return this.cachedRelayDescriptorsDirectory;
- }
- public boolean getImportDirectoryArchives() {
- return this.importDirectoryArchives;
- }
- public String getDirectoryArchivesDirectory() {
- return this.directoryArchivesDirectory;
- }
- public boolean getKeepDirectoryArchiveImportHistory() {
- return this.keepDirectoryArchiveImportHistory;
- }
- public boolean getWriteSanitizedBridges() {
- return this.writeSanitizedBridges;
- }
- public boolean getReplaceIPAddressesWithHashes() {
- return this.replaceIPAddressesWithHashes;
- }
- public long getLimitBridgeDescriptorMappings() {
- return this.limitBridgeDescriptorMappings;
- }
- public String getSanitizedBridgesWriteDirectory() {
- return this.sanitizedBridgesWriteDirectory;
- }
- public boolean getImportBridgeSnapshots() {
- return this.importBridgeSnapshots;
- }
- public String getBridgeSnapshotsDirectory() {
- return this.bridgeSnapshotsDirectory;
- }
- public boolean getDownloadRelayDescriptors() {
- return this.downloadRelayDescriptors;
- }
- public List<String> getDownloadFromDirectoryAuthorities() {
- return this.downloadFromDirectoryAuthorities;
- }
- public boolean getDownloadCurrentConsensus() {
- return this.downloadCurrentConsensus;
- }
- public boolean getDownloadCurrentVotes() {
- return this.downloadCurrentVotes;
- }
- public boolean getDownloadMissingServerDescriptors() {
- return this.downloadMissingServerDescriptors;
- }
- public boolean getDownloadMissingExtraInfoDescriptors() {
- return this.downloadMissingExtraInfoDescriptors;
- }
- public boolean getDownloadAllServerDescriptors() {
- return this.downloadAllServerDescriptors;
- }
- public boolean getDownloadAllExtraInfoDescriptors() {
- return this.downloadAllExtraInfoDescriptors;
- }
- public boolean getCompressRelayDescriptorDownloads() {
- return this.compressRelayDescriptorDownloads;
- }
- public boolean getDownloadExitList() {
- return this.downloadExitList;
- }
- public boolean getProcessBridgePoolAssignments() {
- return processBridgePoolAssignments;
- }
- public String getAssignmentsDirectory() {
- return assignmentsDirectory;
- }
- public String getSanitizedAssignmentsDirectory() {
- return sanitizedAssignmentsDirectory;
- }
- public boolean getProcessTorperfFiles() {
- return this.processTorperfFiles;
- }
- public String getTorperfOutputDirectory() {
- return this.torperfOutputDirectory;
- }
- public SortedMap<String, String> getTorperfSources() {
- return this.torperfSources;
- }
- public List<String> getTorperfFiles() {
- return this.torperfFiles;
- }
- public boolean getProvideFilesViaRsync() {
- return this.provideFilesViaRsync;
- }
- public String getRsyncDirectory() {
- return this.rsyncDirectory;
- }
-}
-
diff --git a/src/org/torproject/ernie/db/ExitListDownloader.java b/src/org/torproject/ernie/db/ExitListDownloader.java
deleted file mode 100644
index 01a554f..0000000
--- a/src/org/torproject/ernie/db/ExitListDownloader.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-public class ExitListDownloader {
- public ExitListDownloader() {
- Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
- try {
- logger.fine("Downloading exit list...");
- String exitAddressesUrl =
- "http://exitlist.torproject.org/exit-addresses";
- URL u = new URL(exitAddressesUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response != 200) {
- logger.warning("Could not download exit list. Response code " +
- response);
- return;
- }
- BufferedInputStream in = new BufferedInputStream(
- huc.getInputStream());
- SimpleDateFormat printFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Date downloadedDate = new Date();
- File exitListFile = new File("exitlist/" + printFormat.format(
- downloadedDate));
- exitListFile.getParentFile().mkdirs();
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- exitListFile));
- bw.write("@type tordnsel 1.0\n");
- bw.write("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- bw.write(new String(data, 0, len));
- }
- in.close();
- bw.close();
- logger.fine("Finished downloading exit list.");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed downloading exit list", e);
- return;
- }
-
- /* Write stats. */
- StringBuilder dumpStats = new StringBuilder("Finished downloading "
- + "exit list.\nLast three exit lists are:");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(new File("exitlist"));
- SortedSet<File> lastThreeExitLists = new TreeSet<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- SortedSet<File> lastThreeElements = new TreeSet<File>();
- for (File f : pop.listFiles()) {
- lastThreeElements.add(f);
- }
- while (lastThreeElements.size() > 3) {
- lastThreeElements.remove(lastThreeElements.first());
- }
- for (File f : lastThreeElements) {
- filesInInputDir.add(f);
- }
- } else {
- lastThreeExitLists.add(pop);
- while (lastThreeExitLists.size() > 3) {
- lastThreeExitLists.remove(lastThreeExitLists.first());
- }
- }
- }
- for (File f : lastThreeExitLists) {
- dumpStats.append("\n" + f.getName());
- }
- logger.info(dumpStats.toString());
- }
-}
-
diff --git a/src/org/torproject/ernie/db/LockFile.java b/src/org/torproject/ernie/db/LockFile.java
deleted file mode 100644
index 3255620..0000000
--- a/src/org/torproject/ernie/db/LockFile.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.logging.Logger;
-
-public class LockFile {
-
- private File lockFile;
- private Logger logger;
-
- public LockFile() {
- this.lockFile = new File("lock");
- this.logger = Logger.getLogger(LockFile.class.getName());
- }
-
- public boolean acquireLock() {
- this.logger.fine("Trying to acquire lock...");
- try {
- if (this.lockFile.exists()) {
- BufferedReader br = new BufferedReader(new FileReader("lock"));
- long runStarted = Long.parseLong(br.readLine());
- br.close();
- if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) {
- return false;
- }
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter("lock"));
- bw.append("" + System.currentTimeMillis() + "\n");
- bw.close();
- this.logger.fine("Acquired lock.");
- return true;
- } catch (IOException e) {
- this.logger.warning("Caught exception while trying to acquire "
- + "lock!");
- return false;
- }
- }
-
- public void releaseLock() {
- this.logger.fine("Releasing lock...");
- this.lockFile.delete();
- this.logger.fine("Released lock.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/LoggingConfiguration.java b/src/org/torproject/ernie/db/LoggingConfiguration.java
deleted file mode 100644
index b83ef53..0000000
--- a/src/org/torproject/ernie/db/LoggingConfiguration.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.TimeZone;
-import java.util.logging.ConsoleHandler;
-import java.util.logging.FileHandler;
-import java.util.logging.Formatter;
-import java.util.logging.Handler;
-import java.util.logging.Level;
-import java.util.logging.LogRecord;
-import java.util.logging.Logger;
-/**
- * Initialize logging configuration.
- *
- * Log levels used by ERNIE:
- *
- * - SEVERE: An event made it impossible to continue program execution.
- * - WARNING: A potential problem occurred that requires the operator to
- * look after the otherwise unattended setup
- * - INFO: Messages on INFO level are meant to help the operator in making
- * sure that operation works as expected.
- * - FINE: Debug messages that are used to identify problems and which are
- * turned on by default.
- * - FINER: More detailed debug messages to investigate problems in more
- * detail. Not turned on by default. Increase log file limit when using
- * FINER.
- * - FINEST: Most detailed debug messages. Not used.
- */
-public class LoggingConfiguration {
- public LoggingConfiguration() {
-
- /* Remove default console handler. */
- for (Handler h : Logger.getLogger("").getHandlers()) {
- Logger.getLogger("").removeHandler(h);
- }
-
- /* Disable logging of internal Sun classes. */
- Logger.getLogger("sun").setLevel(Level.OFF);
-
- /* Set minimum log level we care about from INFO to FINER. */
- Logger.getLogger("").setLevel(Level.FINER);
-
- /* Create log handler that writes messages on WARNING or higher to the
- * console. */
- final SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Formatter cf = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getMessage() + "\n";
- }
- };
- Handler ch = new ConsoleHandler();
- ch.setFormatter(cf);
- ch.setLevel(Level.WARNING);
- Logger.getLogger("").addHandler(ch);
-
- /* Initialize own logger for this class. */
- Logger logger = Logger.getLogger(
- LoggingConfiguration.class.getName());
-
- /* Create log handler that writes all messages on FINE or higher to a
- * local file. */
- Formatter ff = new Formatter() {
- public String format(LogRecord record) {
- return dateTimeFormat.format(new Date(record.getMillis())) + " "
- + record.getLevel() + " " + record.getSourceClassName() + " "
- + record.getSourceMethodName() + " " + record.getMessage()
- + (record.getThrown() != null ? " " + record.getThrown() : "")
- + "\n";
- }
- };
- try {
- FileHandler fh = new FileHandler("log", 5000000, 5, true);
- fh.setFormatter(ff);
- fh.setLevel(Level.FINE);
- Logger.getLogger("").addHandler(fh);
- } catch (SecurityException e) {
- logger.log(Level.WARNING, "No permission to create log file. "
- + "Logging to file is disabled.", e);
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write to log file. Logging to "
- + "file is disabled.", e);
- }
- }
-}
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
deleted file mode 100644
index 04cc868..0000000
--- a/src/org/torproject/ernie/db/Main.java
+++ /dev/null
@@ -1,160 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- * Coordinate downloading and parsing of descriptors and extraction of
- * statistically relevant data for later processing with R.
- */
-public class Main {
- public static void main(String[] args) {
-
- /* Initialize logging configuration. */
- new LoggingConfiguration();
-
- Logger logger = Logger.getLogger(Main.class.getName());
- logger.info("Starting ERNIE.");
-
- // Initialize configuration
- Configuration config = new Configuration();
-
- // Use lock file to avoid overlapping runs
- LockFile lf = new LockFile();
- if (!lf.acquireLock()) {
- logger.severe("Warning: ERNIE is already running or has not exited "
- + "cleanly! Exiting!");
- System.exit(1);
- }
-
- // Define stats directory for temporary files
- File statsDirectory = new File("stats");
-
- // Prepare writing relay descriptor archive to disk
- ArchiveWriter aw = config.getWriteDirectoryArchives() ?
- new ArchiveWriter(
- new File(config.getDirectoryArchivesOutputDirectory())) : null;
-
- // Prepare relay descriptor parser (only if we are writing stats or
- // directory archives to disk)
- RelayDescriptorParser rdp = aw != null ?
- new RelayDescriptorParser(aw) : null;
-
- // Import/download relay descriptors from the various sources
- if (rdp != null) {
- RelayDescriptorDownloader rdd = null;
- if (config.getDownloadRelayDescriptors()) {
- List<String> dirSources =
- config.getDownloadFromDirectoryAuthorities();
- rdd = new RelayDescriptorDownloader(rdp, dirSources,
- config.getDownloadCurrentConsensus(),
- config.getDownloadCurrentVotes(),
- config.getDownloadMissingServerDescriptors(),
- config.getDownloadMissingExtraInfoDescriptors(),
- config.getDownloadAllServerDescriptors(),
- config.getDownloadAllExtraInfoDescriptors(),
- config.getCompressRelayDescriptorDownloads());
- rdp.setRelayDescriptorDownloader(rdd);
- }
- if (config.getImportCachedRelayDescriptors()) {
- new CachedRelayDescriptorReader(rdp,
- config.getCachedRelayDescriptorDirectory(), statsDirectory);
- if (aw != null) {
- aw.intermediateStats("importing relay descriptors from local "
- + "Tor data directories");
- }
- }
- if (config.getImportDirectoryArchives()) {
- new ArchiveReader(rdp,
- new File(config.getDirectoryArchivesDirectory()),
- statsDirectory,
- config.getKeepDirectoryArchiveImportHistory());
- if (aw != null) {
- aw.intermediateStats("importing relay descriptors from local "
- + "directory");
- }
- }
- if (rdd != null) {
- rdd.downloadDescriptors();
- rdd.writeFile();
- rdd = null;
- if (aw != null) {
- aw.intermediateStats("downloading relay descriptors from the "
- + "directory authorities");
- }
- }
- }
-
- // Write output to disk that only depends on relay descriptors
- if (aw != null) {
- aw.dumpStats();
- aw = null;
- }
-
- // Prepare sanitized bridge descriptor writer
- SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ?
- new SanitizedBridgesWriter(
- new File(config.getSanitizedBridgesWriteDirectory()),
- statsDirectory, config.getReplaceIPAddressesWithHashes(),
- config.getLimitBridgeDescriptorMappings()) : null;
-
- // Prepare bridge descriptor parser
- BridgeDescriptorParser bdp = config.getWriteSanitizedBridges()
- ? new BridgeDescriptorParser(sbw) : null;
-
- // Import bridge descriptors
- if (bdp != null && config.getImportBridgeSnapshots()) {
- new BridgeSnapshotReader(bdp,
- new File(config.getBridgeSnapshotsDirectory()),
- statsDirectory);
- }
-
- // Finish writing sanitized bridge descriptors to disk
- if (sbw != null) {
- sbw.finishWriting();
- sbw = null;
- }
-
- // Download exit list and store it to disk
- if (config.getDownloadExitList()) {
- new ExitListDownloader();
- }
-
- // Process bridge pool assignments
- if (config.getProcessBridgePoolAssignments()) {
- new BridgePoolAssignmentsProcessor(
- new File(config.getAssignmentsDirectory()),
- new File(config.getSanitizedAssignmentsDirectory()));
- }
-
- // Process Torperf files
- if (config.getProcessTorperfFiles()) {
- new TorperfDownloader(new File(config.getTorperfOutputDirectory()),
- config.getTorperfSources(), config.getTorperfFiles());
- }
-
- // Copy recently published files to a local directory that can then
- // be served via rsync.
- if (config.getProvideFilesViaRsync()) {
- new RsyncDataProvider(
- !config.getWriteDirectoryArchives() ? null :
- new File(config.getDirectoryArchivesOutputDirectory()),
- !config.getWriteSanitizedBridges() ? null :
- new File(config.getSanitizedBridgesWriteDirectory()),
- !config.getProcessBridgePoolAssignments() ? null :
- new File(config.getSanitizedAssignmentsDirectory()),
- config.getDownloadExitList(),
- !config.getProcessTorperfFiles() ? null :
- new File(config.getTorperfOutputDirectory()),
- new File(config.getRsyncDirectory()));
- }
-
- // Remove lock file
- lf.releaseLock();
-
- logger.info("Terminating ERNIE.");
- }
-}
diff --git a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
deleted file mode 100644
index f7e9468..0000000
--- a/src/org/torproject/ernie/db/RelayDescriptorDownloader.java
+++ /dev/null
@@ -1,821 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.zip.InflaterInputStream;
-
-/**
- * Downloads relay descriptors from the directory authorities via HTTP.
- * Keeps a list of missing descriptors that gets updated by parse results
- * from <code>RelayDescriptorParser</code> and downloads all missing
- * descriptors that have been published in the last 24 hours. Also
- * downloads all server and extra-info descriptors known to a directory
- * authority at most once a day.
- */
-public class RelayDescriptorDownloader {
-
- /**
- * Text file containing the descriptors that we are missing and that we
- * want to download. Lines are formatted as:
- *
- * - "consensus,<validafter>,<parsed>",
- * - "vote,<validafter>,<fingerprint>,<parsed>",
- * - "server,<published>,<relayid>,<descid>,<parsed>", or
- * - "extra,<published>,<relayid>,<descid>,<parsed>".
- */
- private File missingDescriptorsFile;
-
- /**
- * Relay descriptors that we are missing and that we want to download
- * either in this execution or write to disk and try next time. Map keys
- * contain comma-separated values as in the missing descriptors files
- * without the "parsed" column. Map values contain the "parsed" column.
- */
- private SortedMap<String, String> missingDescriptors;
-
- /**
- * Text file containing the IP addresses (and Dir ports if not 80) of
- * directory authorities and when we last downloaded all server and
- * extra-info descriptors from them, so that we can avoid downloading
- * them too often.
- */
- private File lastDownloadedAllDescriptorsFile;
-
- /**
- * Map of directory authorities and when we last downloaded all server
- * and extra-info descriptors from them. Map keys are IP addresses (and
- * Dir ports if not 80), map values are timestamps.
- */
- private Map<String, String> lastDownloadedAllDescriptors;
-
- /**
- * <code>RelayDescriptorParser</code> that we will hand over the
- * downloaded descriptors for parsing.
- */
- private RelayDescriptorParser rdp;
-
- /**
- * Directory authorities that we will try to download missing
- * descriptors from.
- */
- private List<String> authorities;
-
- /**
- * Should we try to download the current consensus if we don't have it?
- */
- private boolean downloadCurrentConsensus;
-
- /**
- * Should we try to download current votes if we don't have them?
- */
- private boolean downloadCurrentVotes;
-
- /**
- * Should we try to download missing server descriptors that have been
- * published within the past 24 hours?
- */
- private boolean downloadMissingServerDescriptors;
-
- /**
- * Should we try to download missing extra-info descriptors that have
- * been published within the past 24 hours?
- */
- private boolean downloadMissingExtraInfos;
-
- /**
- * Should we try to download all server descriptors from the authorities
- * once every 24 hours?
- */
- private boolean downloadAllServerDescriptors;
-
- /**
- * Should we try to download all extra-info descriptors from the
- * authorities once every 24 hours?
- */
- private boolean downloadAllExtraInfos;
-
- /**
- * Should we download zlib-compressed versions of descriptors by adding
- * ".z" to URLs?
- */
- private boolean downloadCompressed;
-
- /**
- * valid-after time that we expect the current consensus and votes to
- * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
- * consensuses and votes with this valid-after time on the directory
- * authorities. This time is initialized as the beginning of the current
- * hour.
- */
- private String currentValidAfter;
-
- /**
- * Cut-off time for missing server and extra-info descriptors, formatted
- * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system
- * time minus 24 hours.
- */
- private String descriptorCutOff;
-
- /**
- * Cut-off time for downloading all server and extra-info descriptors
- * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This
- * time is initialized as the current system time minus 23:30 hours.
- */
- private String downloadAllDescriptorsCutOff;
-
- /**
- * Directory authorities that we plan to download all server and
- * extra-info descriptors from in this execution.
- */
- private Set<String> downloadAllDescriptorsFromAuthorities;
-
- /**
- * Current timestamp that is written to the missing list for descriptors
- * that we parsed in this execution and for authorities that we
- * downloaded all server and extra-info descriptors from.
- */
- private String currentTimestamp;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- /**
- * Number of descriptors requested by directory authority to be included
- * in logs.
- */
- private Map<String, Integer> requestsByAuthority;
-
- /**
- * Counters for descriptors that we had on the missing list at the
- * beginning of the execution, that we added to the missing list,
- * that we requested, and that we successfully downloaded in this
- * execution.
- */
- private int oldMissingConsensuses = 0, oldMissingVotes = 0,
- oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
- newMissingConsensuses = 0, newMissingVotes = 0,
- newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
- requestedConsensuses = 0, requestedVotes = 0,
- requestedMissingServerDescriptors = 0,
- requestedAllServerDescriptors = 0,
- requestedMissingExtraInfoDescriptors = 0,
- requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0,
- downloadedVotes = 0, downloadedMissingServerDescriptors = 0,
- downloadedAllServerDescriptors = 0,
- downloadedMissingExtraInfoDescriptors = 0,
- downloadedAllExtraInfoDescriptors = 0;
-
- /**
- * Initializes this class, including reading in missing descriptors from
- * <code>stats/missing-relay-descriptors</code> and the times when we
- * last downloaded all server and extra-info descriptors from
- * <code>stats/last-downloaded-all-descriptors</code>.
- */
- public RelayDescriptorDownloader(RelayDescriptorParser rdp,
- List<String> authorities, boolean downloadCurrentConsensus,
- boolean downloadCurrentVotes,
- boolean downloadMissingServerDescriptors,
- boolean downloadMissingExtraInfos,
- boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
- boolean downloadCompressed) {
-
- /* Memorize argument values. */
- this.rdp = rdp;
- this.authorities = new ArrayList<String>(authorities);
- this.downloadCurrentConsensus = downloadCurrentConsensus;
- this.downloadCurrentVotes = downloadCurrentVotes;
- this.downloadMissingServerDescriptors =
- downloadMissingServerDescriptors;
- this.downloadMissingExtraInfos = downloadMissingExtraInfos;
- this.downloadAllServerDescriptors = downloadAllServerDescriptors;
- this.downloadAllExtraInfos = downloadAllExtraInfos;
- this.downloadCompressed = downloadCompressed;
-
- /* Shuffle list of authorities for better load balancing over time. */
- Collections.shuffle(this.authorities);
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- RelayDescriptorDownloader.class.getName());
-
- /* Prepare cut-off times and timestamp for the missing descriptors
- * list and the list of authorities to download all server and
- * extra-info descriptors from. */
- SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- format.setTimeZone(TimeZone.getTimeZone("UTC"));
- long now = System.currentTimeMillis();
- this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) *
- (60L * 60L * 1000L));
- this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L);
- this.currentTimestamp = format.format(now);
- this.downloadAllDescriptorsCutOff = format.format(now
- - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L);
-
- /* Read list of missing descriptors from disk and memorize those that
- * we are interested in and that are likely to be found on the
- * directory authorities. */
- this.missingDescriptors = new TreeMap<String, String>();
- this.missingDescriptorsFile = new File(
- "stats/missing-relay-descriptors");
- if (this.missingDescriptorsFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.missingDescriptorsFile.getAbsolutePath() + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.missingDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.split(",").length > 2) {
- String published = line.split(",")[1];
- if (((line.startsWith("consensus,") ||
- line.startsWith("vote,")) &&
- this.currentValidAfter.equals(published)) ||
- ((line.startsWith("server,") ||
- line.startsWith("extra,")) &&
- this.descriptorCutOff.compareTo(published) < 0)) {
- if (!line.endsWith("NA")) {
- /* Not missing. */
- } else if (line.startsWith("consensus,")) {
- oldMissingConsensuses++;
- } else if (line.startsWith("vote,")) {
- oldMissingVotes++;
- } else if (line.startsWith("server,")) {
- oldMissingServerDescriptors++;
- } else if (line.startsWith("extra,")) {
- oldMissingExtraInfoDescriptors++;
- }
- int separateAt = line.lastIndexOf(",");
- this.missingDescriptors.put(line.substring(0,
- separateAt), line.substring(separateAt + 1));
- }
- } else {
- this.logger.fine("Invalid line '" + line + "' in "
- + this.missingDescriptorsFile.getAbsolutePath()
- + ". Ignoring.");
- }
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.missingDescriptorsFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.missingDescriptorsFile.getAbsolutePath()
- + "! This means that we might forget to dowload relay "
- + "descriptors we are missing.", e);
- }
- }
-
- /* Read list of directory authorities and when we last downloaded all
- * server and extra-info descriptors from them. */
- this.lastDownloadedAllDescriptors = new HashMap<String, String>();
- this.lastDownloadedAllDescriptorsFile = new File(
- "stats/last-downloaded-all-descriptors");
- if (this.lastDownloadedAllDescriptorsFile.exists()) {
- try {
- this.logger.fine("Reading file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "...");
- BufferedReader br = new BufferedReader(new FileReader(
- this.lastDownloadedAllDescriptorsFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.split(",").length != 2) {
- this.logger.fine("Invalid line '" + line + "' in "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ". Ignoring.");
- } else {
- String[] parts = line.split(",");
- String authority = parts[0];
- String lastDownloaded = parts[1];
- this.lastDownloadedAllDescriptors.put(authority,
- lastDownloaded);
- }
- }
- br.close();
- this.logger.fine("Finished reading file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "! This means that we might download all server and "
- + "extra-info descriptors more often than we should.", e);
- }
- }
-
- /* Make a list of at most two directory authorities that we want to
- * download all server and extra-info descriptors from. */
- this.downloadAllDescriptorsFromAuthorities = new HashSet<String>();
- for (String authority : this.authorities) {
- if (!this.lastDownloadedAllDescriptors.containsKey(authority) ||
- this.lastDownloadedAllDescriptors.get(authority).compareTo(
- this.downloadAllDescriptorsCutOff) < 0) {
- this.downloadAllDescriptorsFromAuthorities.add(authority);
- }
- if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) {
- break;
- }
- }
-
- /* Prepare statistics on this execution. */
- this.requestsByAuthority = new HashMap<String, Integer>();
- for (String authority : this.authorities) {
- this.requestsByAuthority.put(authority, 0);
- }
- }
-
- /**
- * We have parsed a consensus. Take this consensus off the missing list
- * and add the votes created by the given <code>authorities</code> and
- * the <code>serverDescriptors</code> which are in the format
- * "<published>,<relayid>,<descid>" to that list.
- */
- public void haveParsedConsensus(String validAfter,
- Set<String> authorities, Set<String> serverDescriptors) {
-
- /* Mark consensus as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String consensusKey = "consensus," + validAfter;
- this.missingDescriptors.put(consensusKey, this.currentTimestamp);
-
- /* Add votes to missing list. */
- for (String authority : authorities) {
- String voteKey = "vote," + validAfter + "," + authority;
- if (!this.missingDescriptors.containsKey(voteKey)) {
- this.missingDescriptors.put(voteKey, "NA");
- this.newMissingVotes++;
- }
- }
- }
-
- /* Add server descriptors to missing list. */
- for (String serverDescriptor : serverDescriptors) {
- String published = serverDescriptor.split(",")[0];
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a vote. Take this vote off the missing list and add
- * the <code>serverDescriptors</code> which are in the format
- * "<published>,<relayid>,<descid>" to that list.
- */
- public void haveParsedVote(String validAfter, String fingerprint,
- Set<String> serverDescriptors) {
-
- /* Mark vote as parsed. */
- if (this.currentValidAfter.equals(validAfter)) {
- String voteKey = "vote," + validAfter + "," + fingerprint;
- this.missingDescriptors.put(voteKey, this.currentTimestamp);
- }
-
- /* Add server descriptors to missing list. */
- for (String serverDescriptor : serverDescriptors) {
- String published = serverDescriptor.split(",")[0];
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + serverDescriptor;
- if (!this.missingDescriptors.containsKey(
- serverDescriptorKey)) {
- this.missingDescriptors.put(serverDescriptorKey, "NA");
- this.newMissingServerDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed a server descriptor. Take this server descriptor off
- * the missing list and put the extra-info descriptor digest on that
- * list.
- */
- public void haveParsedServerDescriptor(String published,
- String relayIdentity, String serverDescriptorDigest,
- String extraInfoDigest) {
-
- /* Mark server descriptor as parsed. */
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String serverDescriptorKey = "server," + published + ","
- + relayIdentity + "," + serverDescriptorDigest;
- this.missingDescriptors.put(serverDescriptorKey,
- this.currentTimestamp);
-
- /* Add extra-info descriptor to missing list. */
- if (extraInfoDigest != null) {
- String extraInfoKey = "extra," + published + ","
- + relayIdentity + "," + extraInfoDigest;
- if (!this.missingDescriptors.containsKey(extraInfoKey)) {
- this.missingDescriptors.put(extraInfoKey, "NA");
- this.newMissingExtraInfoDescriptors++;
- }
- }
- }
- }
-
- /**
- * We have parsed an extra-info descriptor. Take it off the missing
- * list.
- */
- public void haveParsedExtraInfoDescriptor(String published,
- String relayIdentity, String extraInfoDigest) {
- if (this.descriptorCutOff.compareTo(published) < 0) {
- String extraInfoKey = "extra," + published + ","
- + relayIdentity + "," + extraInfoDigest;
- this.missingDescriptors.put(extraInfoKey, this.currentTimestamp);
- }
- }
-
- /**
- * Downloads missing descriptors that we think might still be available
- * on the directory authorities as well as all server and extra-info
- * descriptors once per day.
- */
- public void downloadDescriptors() {
-
- /* Put the current consensus on the missing list, unless we already
- * have it. */
- String consensusKey = "consensus," + this.currentValidAfter;
- if (!this.missingDescriptors.containsKey(consensusKey)) {
- this.missingDescriptors.put(consensusKey, "NA");
- this.newMissingConsensuses++;
- }
-
- /* Download descriptors from authorities which are in random order, so
- * that we distribute the load somewhat fairly over time. */
- for (String authority : authorities) {
-
- /* Make all requests to an authority in a single try block. If
- * something goes wrong with this authority, we give up on all
- * downloads and continue with the next authority. */
- /* TODO Some authorities provide very little bandwidth and could
- * slow down the entire download process. Ponder adding a timeout of
- * 3 or 5 minutes per authority to avoid getting in the way of the
- * next execution. */
- try {
-
- /* Start with downloading the current consensus, unless we already
- * have it. */
- if (downloadCurrentConsensus) {
- if (this.missingDescriptors.containsKey(consensusKey) &&
- this.missingDescriptors.get(consensusKey).equals("NA")) {
- this.requestedConsensuses++;
- this.downloadedConsensuses +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/consensus");
- }
- }
-
- /* Next, try to download current votes that we're missing. */
- if (downloadCurrentVotes) {
- String voteKeyPrefix = "vote," + this.currentValidAfter;
- SortedSet<String> fingerprints = new TreeSet<String>();
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- if (e.getValue().equals("NA") &&
- e.getKey().startsWith(voteKeyPrefix)) {
- String fingerprint = e.getKey().split(",")[2];
- fingerprints.add(fingerprint);
- }
- }
- for (String fingerprint : fingerprints) {
- this.requestedVotes++;
- this.downloadedVotes +=
- this.downloadResourceFromAuthority(authority,
- "/tor/status-vote/current/" + fingerprint);
- }
- }
-
- /* Download either all server and extra-info descriptors or only
- * those that we're missing. Start with server descriptors, then
- * request extra-info descriptors. */
- List<String> types = new ArrayList<String>(Arrays.asList(
- "server,extra".split(",")));
- for (String type : types) {
-
- /* Download all server or extra-info descriptors from this
- * authority if we haven't done so for 24 hours and if we're
- * configured to do so. */
- if (this.downloadAllDescriptorsFromAuthorities.contains(
- authority) && ((type.equals("server") &&
- this.downloadAllServerDescriptors) ||
- (type.equals("extra") && this.downloadAllExtraInfos))) {
- int downloadedAllDescriptors =
- this.downloadResourceFromAuthority(authority, "/tor/"
- + type + "/all");
- if (type.equals("server")) {
- this.requestedAllServerDescriptors++;
- this.downloadedAllServerDescriptors +=
- downloadedAllDescriptors;
- } else {
- this.requestedAllExtraInfoDescriptors++;
- this.downloadedAllExtraInfoDescriptors +=
- downloadedAllDescriptors;
- }
-
- /* Download missing server or extra-info descriptors if we're
- * configured to do so. */
- } else if ((type.equals("server") &&
- this.downloadMissingServerDescriptors) ||
- (type.equals("extra") && this.downloadMissingExtraInfos)) {
-
- /* Go through the list of missing descriptors of this type
- * and combine the descriptor identifiers to a URL of up to
- * 96 descriptors that we can download at once. */
- SortedSet<String> descriptorIdentifiers =
- new TreeSet<String>();
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- if (e.getValue().equals("NA") &&
- e.getKey().startsWith(type + ",") &&
- this.descriptorCutOff.compareTo(
- e.getKey().split(",")[1]) < 0) {
- String descriptorIdentifier = e.getKey().split(",")[3];
- descriptorIdentifiers.add(descriptorIdentifier);
- }
- }
- StringBuilder combinedResource = null;
- int descriptorsInCombinedResource = 0,
- requestedDescriptors = 0, downloadedDescriptors = 0;
- for (String descriptorIdentifier : descriptorIdentifiers) {
- if (descriptorsInCombinedResource >= 96) {
- requestedDescriptors += descriptorsInCombinedResource;
- downloadedDescriptors +=
- this.downloadResourceFromAuthority(authority,
- combinedResource.toString());
- combinedResource = null;
- descriptorsInCombinedResource = 0;
- }
- if (descriptorsInCombinedResource == 0) {
- combinedResource = new StringBuilder("/tor/" + type
- + "/d/" + descriptorIdentifier);
- } else {
- combinedResource.append("+" + descriptorIdentifier);
- }
- descriptorsInCombinedResource++;
- }
- if (descriptorsInCombinedResource > 0) {
- requestedDescriptors += descriptorsInCombinedResource;
- downloadedDescriptors +=
- this.downloadResourceFromAuthority(authority,
- combinedResource.toString());
- }
- if (type.equals("server")) {
- this.requestedMissingServerDescriptors +=
- requestedDescriptors;
- this.downloadedMissingServerDescriptors +=
- downloadedDescriptors;
- } else {
- this.requestedMissingExtraInfoDescriptors +=
- requestedDescriptors;
- this.downloadedMissingExtraInfoDescriptors +=
- downloadedDescriptors;
- }
- }
- }
-
- /* If a download failed, stop requesting descriptors from this
- * authority and move on to the next. */
- } catch (IOException e) {
- logger.log(Level.FINE, "Failed downloading from " + authority
- + "!", e);
- }
- }
- }
-
- /**
- * Attempts to download one or more descriptors identified by a resource
- * string from a directory authority and passes the returned
- * descriptor(s) to the <code>RelayDescriptorParser</code> upon success.
- * Returns the number of descriptors contained in the reply. Throws an
- * <code>IOException</code> if something goes wrong while downloading.
- */
- private int downloadResourceFromAuthority(String authority,
- String resource) throws IOException {
- byte[] allData = null;
- this.requestsByAuthority.put(authority,
- this.requestsByAuthority.get(authority) + 1);
- /* TODO Disable compressed downloads for extra-info descriptors,
- * because zlib decompression doesn't work correctly. Figure out why
- * this is and fix it. */
- String fullUrl = "http://" + authority + resource
- + (this.downloadCompressed && !resource.startsWith("/tor/extra/")
- ? ".z" : "");
- URL u = new URL(fullUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response == 200) {
- BufferedInputStream in = this.downloadCompressed &&
- !resource.startsWith("/tor/extra/")
- ? new BufferedInputStream(new InflaterInputStream(
- huc.getInputStream()))
- : new BufferedInputStream(huc.getInputStream());
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- in.close();
- allData = baos.toByteArray();
- }
- logger.fine("Downloaded " + fullUrl + " -> " + response + " ("
- + (allData == null ? 0 : allData.length) + " bytes)");
- int receivedDescriptors = 0;
- if (allData != null) {
- if (resource.startsWith("/tor/status-vote/current/")) {
- this.rdp.parse(allData);
- receivedDescriptors = 1;
- } else if (resource.startsWith("/tor/server/") ||
- resource.startsWith("/tor/extra/")) {
- if (resource.equals("/tor/server/all")) {
- this.lastDownloadedAllDescriptors.put(authority,
- this.currentTimestamp);
- }
- String ascii = null;
- try {
- ascii = new String(allData, "US-ASCII");
- } catch (UnsupportedEncodingException e) {
- /* No way that US-ASCII is not supported. */
- }
- int start = -1, sig = -1, end = -1;
- String startToken = resource.startsWith("/tor/server/") ?
- "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0, end - start);
- this.rdp.parse(descBytes);
- receivedDescriptors++;
- }
- }
- }
- return receivedDescriptors;
- }
-
- /**
- * Writes status files to disk and logs statistics about downloading
- * relay descriptors in this execution.
- */
- public void writeFile() {
-
- /* Write missing descriptors file to disk. */
- int missingConsensuses = 0, missingVotes = 0,
- missingServerDescriptors = 0, missingExtraInfoDescriptors = 0;
- try {
- this.logger.fine("Writing file "
- + this.missingDescriptorsFile.getAbsolutePath() + "...");
- this.missingDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.missingDescriptorsFile));
- for (Map.Entry<String, String> e :
- this.missingDescriptors.entrySet()) {
- String key = e.getKey(), value = e.getValue();
- if (!value.equals("NA")) {
- /* Not missing. */
- } else if (key.startsWith("consensus,")) {
- missingConsensuses++;
- } else if (key.startsWith("vote,")) {
- missingVotes++;
- } else if (key.startsWith("server,")) {
- missingServerDescriptors++;
- } else if (key.startsWith("extra,")) {
- missingExtraInfoDescriptors++;
- }
- bw.write(key + "," + value + "\n");
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.missingDescriptorsFile.getAbsolutePath() + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
- }
-
- /* Write text file containing the directory authorities and when we
- * last downloaded all server and extra-info descriptors from them to
- * disk. */
- try {
- this.logger.fine("Writing file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + "...");
- this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.lastDownloadedAllDescriptorsFile));
- for (Map.Entry<String, String> e :
- this.lastDownloadedAllDescriptors.entrySet()) {
- String authority = e.getKey();
- String lastDownloaded = e.getValue();
- bw.write(authority + "," + lastDownloaded + "\n");
- }
- bw.close();
- this.logger.fine("Finished writing file "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
- + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed writing "
- + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!",
- e);
- }
-
- /* Log statistics about this execution. */
- this.logger.info("Finished downloading relay descriptors from the "
- + "directory authorities.");
- this.logger.info("At the beginning of this execution, we were "
- + "missing " + oldMissingConsensuses + " consensus(es), "
- + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
- + " server descriptor(s), and " + oldMissingExtraInfoDescriptors
- + " extra-info descriptor(s).");
- this.logger.info("During this execution, we added "
- + this.newMissingConsensuses + " consensus(es), "
- + this.newMissingVotes + " vote(s), "
- + this.newMissingServerDescriptors + " server descriptor(s), and "
- + this.newMissingExtraInfoDescriptors + " extra-info "
- + "descriptor(s) to the missing list, some of which we also "
- + "requested and removed from the list again.");
- this.logger.info("We requested " + this.requestedConsensuses
- + " consensus(es), " + this.requestedVotes + " vote(s), "
- + this.requestedMissingServerDescriptors + " missing server "
- + "descriptor(s), " + this.requestedAllServerDescriptors
- + " times all server descriptors, "
- + this.requestedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s), and "
- + this.requestedAllExtraInfoDescriptors + " times all extra-info "
- + "descriptors from the directory authorities.");
- StringBuilder sb = new StringBuilder();
- for (String authority : this.authorities) {
- sb.append(" " + authority + "="
- + this.requestsByAuthority.get(authority));
- }
- this.logger.info("We sent these numbers of requests to the directory "
- + "authorities:" + sb.toString());
- this.logger.info("We successfully downloaded "
- + this.downloadedConsensuses + " consensus(es), "
- + this.downloadedVotes + " vote(s), "
- + this.downloadedMissingServerDescriptors + " missing server "
- + "descriptor(s), " + this.downloadedAllServerDescriptors
- + " server descriptor(s) when downloading all descriptors, "
- + this.downloadedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s) and "
- + this.downloadedAllExtraInfoDescriptors + " extra-info "
- + "descriptor(s) when downloading all descriptors.");
- this.logger.info("At the end of this execution, we are missing "
- + missingConsensuses + " consensus(es), " + missingVotes
- + " vote(s), " + missingServerDescriptors + " server "
- + "descriptor(s), and " + missingExtraInfoDescriptors
- + " extra-info descriptor(s), some of which we may try in the next "
- + "execution.");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/RelayDescriptorParser.java b/src/org/torproject/ernie/db/RelayDescriptorParser.java
deleted file mode 100644
index be54656..0000000
--- a/src/org/torproject/ernie/db/RelayDescriptorParser.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-
-/**
- * Parses relay descriptors including network status consensuses and
- * votes, server and extra-info descriptors, and passes the results to the
- * stats handlers, to the archive writer, or to the relay descriptor
- * downloader.
- */
-public class RelayDescriptorParser {
-
- /**
- * File writer that writes descriptor contents to files in a
- * directory-archive directory structure.
- */
- private ArchiveWriter aw;
-
- /**
- * Missing descriptor downloader that uses the parse results to learn
- * which descriptors we are missing and want to download.
- */
- private RelayDescriptorDownloader rdd;
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- private SimpleDateFormat dateTimeFormat;
-
- /**
- * Initializes this class.
- */
- public RelayDescriptorParser(ArchiveWriter aw) {
- this.aw = aw;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
-
- this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- public void setRelayDescriptorDownloader(
- RelayDescriptorDownloader rdd) {
- this.rdd = rdd;
- }
-
- public void parse(byte[] data) {
- try {
- /* Convert descriptor to ASCII for parsing. This means we'll lose
- * the non-ASCII chars, but we don't care about them for parsing
- * anyway. */
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line;
- do {
- line = br.readLine();
- } while (line != null && line.startsWith("@"));
- if (line == null) {
- this.logger.fine("We were given an empty descriptor for "
- + "parsing. Ignoring.");
- return;
- }
- SimpleDateFormat parseFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (line.equals("network-status-version 3")) {
- // TODO when parsing the current consensus, check the fresh-until
- // time to see when we switch from hourly to half-hourly
- // consensuses
- boolean isConsensus = true;
- String validAfterTime = null, fingerprint = null,
- dirSource = null;
- long validAfter = -1L, dirKeyPublished = -1L;
- SortedSet<String> dirSources = new TreeSet<String>();
- SortedSet<String> serverDescriptors = new TreeSet<String>();
- SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
- StringBuilder certificateStringBuilder = null;
- String certificateString = null;
- while ((line = br.readLine()) != null) {
- if (certificateStringBuilder != null) {
- if (line.startsWith("r ")) {
- certificateString = certificateStringBuilder.toString();
- certificateStringBuilder = null;
- } else {
- certificateStringBuilder.append(line + "\n");
- }
- }
- if (line.equals("vote-status vote")) {
- isConsensus = false;
- } else if (line.startsWith("valid-after ")) {
- validAfterTime = line.substring("valid-after ".length());
- validAfter = parseFormat.parse(validAfterTime).getTime();
- } else if (line.startsWith("dir-source ")) {
- dirSource = line.split(" ")[2];
- } else if (line.startsWith("vote-digest ")) {
- dirSources.add(dirSource);
- } else if (line.startsWith("dir-key-certificate-version ")) {
- certificateStringBuilder = new StringBuilder();
- certificateStringBuilder.append(line + "\n");
- } else if (line.startsWith("fingerprint ")) {
- fingerprint = line.split(" ")[1];
- } else if (line.startsWith("dir-key-published ")) {
- String dirKeyPublishedTime = line.substring(
- "dir-key-published ".length());
- dirKeyPublished = parseFormat.parse(dirKeyPublishedTime).
- getTime();
- } else if (line.startsWith("r ")) {
- String[] parts = line.split(" ");
- if (parts.length < 9) {
- this.logger.log(Level.WARNING, "Could not parse r line '"
- + line + "' in descriptor. Skipping.");
- break;
- }
- String publishedTime = parts[4] + " " + parts[5];
- String relayIdentity = Hex.encodeHexString(
- Base64.decodeBase64(parts[2] + "=")).
- toLowerCase();
- String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
- parts[3] + "=")).toLowerCase();
- serverDescriptors.add(publishedTime + "," + relayIdentity
- + "," + serverDesc);
- hashedRelayIdentities.add(DigestUtils.shaHex(
- Base64.decodeBase64(parts[2] + "=")).
- toUpperCase());
- }
- }
- if (isConsensus) {
- if (this.rdd != null) {
- this.rdd.haveParsedConsensus(validAfterTime, dirSources,
- serverDescriptors);
- }
- if (this.aw != null) {
- this.aw.storeConsensus(data, validAfter);
- }
- } else {
- if (this.aw != null || this.rdd != null) {
- String ascii = new String(data, "US-ASCII");
- String startToken = "network-status-version ";
- String sigToken = "directory-signature ";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken);
- if (start >= 0 && sig >= 0 && sig > start) {
- sig += sigToken.length();
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- String digest = DigestUtils.shaHex(forDigest).toUpperCase();
- if (this.aw != null) {
- this.aw.storeVote(data, validAfter, dirSource, digest);
- }
- if (this.rdd != null) {
- this.rdd.haveParsedVote(validAfterTime, fingerprint,
- serverDescriptors);
- }
- }
- if (certificateString != null) {
- if (this.aw != null) {
- this.aw.storeCertificate(certificateString.getBytes(),
- dirSource, dirKeyPublished);
- }
- }
- }
- }
- } else if (line.startsWith("router ")) {
- String publishedTime = null, extraInfoDigest = null,
- relayIdentifier = null;
- long published = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- relayIdentifier = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- extraInfoDigest = line.startsWith("opt ") ?
- line.split(" ")[2].toLowerCase() :
- line.split(" ")[1].toLowerCase();
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- String digest = null;
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.aw != null && digest != null) {
- this.aw.storeServerDescriptor(data, digest, published);
- }
- if (this.rdd != null && digest != null) {
- this.rdd.haveParsedServerDescriptor(publishedTime,
- relayIdentifier, digest, extraInfoDigest);
- }
- } else if (line.startsWith("extra-info ")) {
- String publishedTime = null, relayIdentifier = line.split(" ")[2];
- long published = -1L;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("published ")) {
- publishedTime = line.substring("published ".length());
- published = parseFormat.parse(publishedTime).getTime();
- }
- }
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String digest = null;
- int start = ascii.indexOf(startToken);
- if (start > 0) {
- /* Do not confuse "extra-info " in "@type extra-info 1.0" with
- * "extra-info 0000...". TODO This is a hack that should be
- * solved by using metrics-lib some day. */
- start = ascii.indexOf("\n" + startToken);
- if (start > 0) {
- start++;
- }
- }
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 || sig >= 0 || sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- digest = DigestUtils.shaHex(forDigest);
- }
- if (this.aw != null && digest != null) {
- this.aw.storeExtraInfoDescriptor(data, digest, published);
- }
- if (this.rdd != null && digest != null) {
- this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
- relayIdentifier.toLowerCase(), digest);
- }
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse descriptor. "
- + "Skipping.", e);
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/RsyncDataProvider.java b/src/org/torproject/ernie/db/RsyncDataProvider.java
deleted file mode 100644
index 2f9632e..0000000
--- a/src/org/torproject/ernie/db/RsyncDataProvider.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.Stack;
-import java.util.logging.Logger;
-
-/**
- * Copy files published in the last 3 days to a local directory that can
- * then be served via rsync.
- */
-public class RsyncDataProvider {
- public RsyncDataProvider(File directoryArchivesOutputDirectory,
- File sanitizedBridgesWriteDirectory,
- File sanitizedAssignmentsDirectory,
- boolean downloadExitList,
- File torperfOutputDirectory, File rsyncDirectory) {
-
- /* Initialize logger. */
- Logger logger = Logger.getLogger(RsyncDataProvider.class.getName());
-
- /* Determine the cut-off time for files in rsync/. */
- long cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
-
- /* Create rsync/ directory if it doesn't exist. */
- if (!rsyncDirectory.exists()) {
- rsyncDirectory.mkdirs();
- }
-
- /* Make a list of all files in the rsync/ directory to delete those
- * that we didn't copy in this run. */
- Set<String> fileNamesInRsync = new HashSet<String>();
- Stack<File> files = new Stack<File>();
- files.add(rsyncDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else {
- fileNamesInRsync.add(pop.getName());
- }
- }
- logger.info("Found " + fileNamesInRsync.size() + " files in "
- + rsyncDirectory.getAbsolutePath() + " that we're either "
- + "overwriting or deleting in this execution.");
-
- /* Copy relay descriptors from the last 3 days. */
- if (directoryArchivesOutputDirectory != null) {
- files.add(directoryArchivesOutputDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- if (pop.getAbsolutePath().contains("/consensus/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/consensuses/" + fileName));
- } else if (pop.getAbsolutePath().contains("/vote/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/votes/" + fileName));
- } else if (pop.getAbsolutePath().contains(
- "/server-descriptor/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/server-descriptors/" + fileName));
- } else if (pop.getAbsolutePath().contains("/extra-info/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/extra-infos/" + fileName));
- } else {
- continue;
- }
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying relay descriptors, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy sanitized bridge descriptors from the last 3 days. */
- if (sanitizedBridgesWriteDirectory != null) {
- files.add(sanitizedBridgesWriteDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- if (pop.getAbsolutePath().contains("/statuses/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/statuses/" + fileName));
- } else if (pop.getAbsolutePath().contains(
- "/server-descriptors/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/server-descriptors/" + fileName));
- } else if (pop.getAbsolutePath().contains("/extra-infos/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/extra-infos/" + fileName));
- } else {
- continue;
- }
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying sanitized bridge descriptors, there are "
- + "still " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy sanitized bridge pool assignments from the last 3 days. */
- if (sanitizedAssignmentsDirectory != null) {
- files.add(sanitizedAssignmentsDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-pool-assignments/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying sanitized bridge pool assignments, there "
- + "are still " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy exit lists from the last 3 days. */
- if (downloadExitList) {
- files.add(new File("exitlist"));
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "exit-lists/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying exit lists, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy Torperf files. */
- if (torperfOutputDirectory != null) {
- files.add(torperfOutputDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.getName().endsWith(".tpf") &&
- pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "torperf/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying Torperf files, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Delete all files that we didn't (over-)write in this run. */
- files.add(rsyncDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (fileNamesInRsync.contains(pop.getName())) {
- fileNamesInRsync.remove(pop.getName());
- pop.delete();
- }
- }
- logger.info("After deleting files that we didn't overwrite in this "
- + "run, there are " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
- }
-
- private void copyFile(File from, File to) {
- if (from.exists() && to.exists() &&
- from.lastModified() == to.lastModified() &&
- from.length() == to.length()) {
- return;
- }
- try {
- to.getParentFile().mkdirs();
- FileInputStream fis = new FileInputStream(from);
- BufferedInputStream bis = new BufferedInputStream(fis);
- FileOutputStream fos = new FileOutputStream(to);
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- fos.write(data, 0, len);
- }
- bis.close();
- fos.close();
- to.setLastModified(from.lastModified());
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
deleted file mode 100644
index afafe11..0000000
--- a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
+++ /dev/null
@@ -1,911 +0,0 @@
-/* Copyright 2010--2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.*;
-import java.security.*;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.logging.*;
-
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.digest.*;
-import org.apache.commons.codec.binary.*;
-
-/**
- * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
- * information from them, and writes them to a local directory structure.
- * During the sanitizing process, all information about the bridge
- * identity or IP address are removed or replaced. The goal is to keep the
- * sanitized bridge descriptors useful for statistical analysis while not
- * making it easier for an adversary to enumerate bridges.
- *
- * There are three types of bridge descriptors: bridge network statuses
- * (lists of all bridges at a given time), server descriptors (published
- * by the bridge to advertise their capabilities), and extra-info
- * descriptors (published by the bridge, mainly for statistical analysis).
- */
-public class SanitizedBridgesWriter {
-
- /**
- * Logger for this class.
- */
- private Logger logger;
-
- /**
- * Output directory for writing sanitized bridge descriptors.
- */
- private File sanitizedBridgesDirectory;
-
- private boolean replaceIPAddressesWithHashes;
-
- private boolean persistenceProblemWithSecrets;
-
- private SortedMap<String, byte[]> secretsForHashingIPAddresses;
-
- private String bridgeSanitizingCutOffTimestamp;
-
- private boolean haveWarnedAboutInterval;
-
- private File bridgeIpSecretsFile;
-
- private SecureRandom secureRandom;
-
- /**
- * Initializes this class.
- */
- public SanitizedBridgesWriter(File sanitizedBridgesDirectory,
- File statsDirectory, boolean replaceIPAddressesWithHashes,
- long limitBridgeSanitizingInterval) {
-
- if (sanitizedBridgesDirectory == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- /* Memorize argument values. */
- this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
- this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(
- SanitizedBridgesWriter.class.getName());
-
- /* Initialize secure random number generator if we need it. */
- if (this.replaceIPAddressesWithHashes) {
- try {
- this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
- } catch (GeneralSecurityException e) {
- this.logger.log(Level.WARNING, "Could not initialize secure "
- + "random number generator! Not calculating any IP address "
- + "hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* Read hex-encoded secrets for replacing IP addresses with hashes
- * from disk. */
- this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>();
- this.bridgeIpSecretsFile = new File(statsDirectory,
- "bridge-ip-secrets");
- if (this.bridgeIpSecretsFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.bridgeIpSecretsFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(",");
- if ((line.length() != ("yyyy-MM,".length() + 31 * 2) &&
- line.length() != ("yyyy-MM,".length() + 50 * 2)) ||
- parts.length != 2) {
- this.logger.warning("Invalid line in bridge-ip-secrets file "
- + "starting with '" + line.substring(0, 7) + "'! "
- + "Not calculating any IP address hashes in this "
- + "execution!");
- this.persistenceProblemWithSecrets = true;
- break;
- }
- String month = parts[0];
- byte[] secret = Hex.decodeHex(parts[1].toCharArray());
- this.secretsForHashingIPAddresses.put(month, secret);
- }
- br.close();
- if (!this.persistenceProblemWithSecrets) {
- this.logger.fine("Read "
- + this.secretsForHashingIPAddresses.size() + " secrets for "
- + "hashing bridge IP addresses.");
- }
- } catch (DecoderException e) {
- this.logger.log(Level.WARNING, "Failed to decode hex string in "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed to read "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* If we're configured to keep secrets only for a limited time, define
- * the cut-off day and time. */
- if (limitBridgeSanitizingInterval >= 0L) {
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.bridgeSanitizingCutOffTimestamp = formatter.format(
- System.currentTimeMillis() - 24L * 60L * 60L * 1000L
- * limitBridgeSanitizingInterval);
- } else {
- this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59";
- }
- }
-
- private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
- String published) throws IOException {
- if (!orAddress.contains(":")) {
- /* Malformed or-address or a line. */
- return null;
- }
- String addressPart = orAddress.substring(0,
- orAddress.lastIndexOf(":"));
- String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
- String scrubbedAddressPart = null;
- if (addressPart.startsWith("[")) {
- scrubbedAddressPart = this.scrubIpv6Address(addressPart,
- fingerprintBytes, published);
- } else {
- scrubbedAddressPart = this.scrubIpv4Address(addressPart,
- fingerprintBytes, published);
- }
- return (scrubbedAddressPart == null ? null :
- scrubbedAddressPart + ":" + portPart);
- }
-
- private String scrubIpv4Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- if (this.replaceIPAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[4 + 20 + 31];
- String[] ipParts = address.split("\\.");
- for (int i = 0; i < 4; i++) {
- hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
- }
- System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 0, hashInput, 24, 31);
- byte[] hashOutput = DigestUtils.sha256(hashInput);
- String hashedAddress = "10."
- + (((int) hashOutput[0] + 256) % 256) + "."
- + (((int) hashOutput[1] + 256) % 256) + "."
- + (((int) hashOutput[2] + 256) % 256);
- return hashedAddress;
- } else {
- return "127.0.0.1";
- }
- }
-
- private String scrubIpv6Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
- if (this.replaceIPAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[16 + 20 + 19];
- String[] doubleColonSeparatedParts = address.substring(1,
- address.length() - 1).split("::", -1);
- if (doubleColonSeparatedParts.length > 2) {
- /* Invalid IPv6 address. */
- return null;
- }
- List<String> hexParts = new ArrayList<String>();
- for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
- StringBuilder hexPart = new StringBuilder();
- String[] parts = doubleColonSeparatedPart.split(":", -1);
- if (parts.length < 1 || parts.length > 8) {
- /* Invalid IPv6 address. */
- return null;
- }
- for (int i = 0; i < parts.length; i++) {
- String part = parts[i];
- if (part.contains(".")) {
- String[] ipParts = part.split("\\.");
- byte[] ipv4Bytes = new byte[4];
- if (ipParts.length != 4) {
- /* Invalid IPv4 part in IPv6 address. */
- return null;
- }
- for (int m = 0; m < 4; m++) {
- ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
- }
- hexPart.append(Hex.encodeHexString(ipv4Bytes));
- } else if (part.length() > 4) {
- /* Invalid IPv6 address. */
- return null;
- } else {
- for (int k = part.length(); k < 4; k++) {
- hexPart.append("0");
- }
- hexPart.append(part);
- }
- }
- hexParts.add(hexPart.toString());
- }
- StringBuilder hex = new StringBuilder();
- hex.append(hexParts.get(0));
- if (hexParts.size() == 2) {
- for (int i = 32 - hexParts.get(0).length()
- - hexParts.get(1).length(); i > 0; i--) {
- hex.append("0");
- }
- hex.append(hexParts.get(1));
- }
- byte[] ipBytes = null;
- try {
- ipBytes = Hex.decodeHex(hex.toString().toCharArray());
- } catch (DecoderException e) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- if (ipBytes.length != 16) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- System.arraycopy(ipBytes, 0, hashInput, 0, 16);
- System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 31, hashInput, 36, 19);
- String hashOutput = DigestUtils.sha256Hex(hashInput);
- sb.append(hashOutput.substring(hashOutput.length() - 6,
- hashOutput.length() - 4));
- sb.append(":");
- sb.append(hashOutput.substring(hashOutput.length() - 4));
- }
- sb.append("]");
- return sb.toString();
- }
-
- private byte[] getSecretForMonth(String month) throws IOException {
- if (!this.secretsForHashingIPAddresses.containsKey(month) ||
- this.secretsForHashingIPAddresses.get(month).length == 31) {
- byte[] secret = new byte[50];
- this.secureRandom.nextBytes(secret);
- if (this.secretsForHashingIPAddresses.containsKey(month)) {
- System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0,
- secret, 0, 31);
- }
- if (month.compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- this.logger.warning("Generated a secret that we won't make "
- + "persistent, because it's outside our bridge descriptor "
- + "sanitizing interval.");
- } else {
- /* Append secret to file on disk immediately before using it, or
- * we might end with inconsistently sanitized bridges. */
- try {
- if (!this.bridgeIpSecretsFile.exists()) {
- this.bridgeIpSecretsFile.getParentFile().mkdirs();
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile,
- this.bridgeIpSecretsFile.exists()));
- bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store new secret "
- + "to disk! Not calculating any IP address hashes in "
- + "this execution!", e);
- this.persistenceProblemWithSecrets = true;
- throw new IOException(e);
- }
- }
- this.secretsForHashingIPAddresses.put(month, secret);
- }
- return this.secretsForHashingIPAddresses.get(month);
- }
-
- /**
- * Sanitizes a network status and writes it to disk.
- */
- public void sanitizeAndStoreNetworkStatus(byte[] data,
- String publicationTime) {
-
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return;
- }
-
- if (this.bridgeSanitizingCutOffTimestamp.
- compareTo(publicationTime) > 0) {
- this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
- : Level.FINE, "Sanitizing and storing network status with "
- + "publication time outside our descriptor sanitizing "
- + "interval.");
- this.haveWarnedAboutInterval = true;
- }
-
- /* Parse the given network status line by line. */
- SortedMap<String, String> scrubbedLines =
- new TreeMap<String, String>();
- try {
- StringBuilder scrubbed = new StringBuilder();
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = null;
- String mostRecentDescPublished = null;
- byte[] fingerprintBytes = null;
- String descPublicationTime = null;
- String hashedBridgeIdentityHex = null;
- while ((line = br.readLine()) != null) {
-
- /* r lines contain sensitive information that needs to be removed
- * or replaced. */
- if (line.startsWith("r ")) {
-
- /* Clear buffer from previously scrubbed lines. */
- if (scrubbed.length() > 0) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
- }
-
- /* Parse the relevant parts of this r line. */
- String[] parts = line.split(" ");
- String nickname = parts[1];
- fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
- String descriptorIdentifier = parts[3];
- descPublicationTime = parts[4] + " " + parts[5];
- String address = parts[6];
- String orPort = parts[7];
- String dirPort = parts[8];
-
- /* Determine most recent descriptor publication time. */
- if (descPublicationTime.compareTo(publicationTime) <= 0 &&
- (mostRecentDescPublished == null ||
- descPublicationTime.compareTo(
- mostRecentDescPublished) > 0)) {
- mostRecentDescPublished = descPublicationTime;
- }
-
- /* Write scrubbed r line to buffer. */
- byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes);
- String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
- hashedBridgeIdentity).substring(0, 27);
- hashedBridgeIdentityHex = Hex.encodeHexString(
- hashedBridgeIdentity);
- String hashedDescriptorIdentifier = Base64.encodeBase64String(
- DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier
- + "=="))).substring(0, 27);
- String scrubbedAddress = scrubIpv4Address(address,
- fingerprintBytes,
- descPublicationTime);
- scrubbed.append("r " + nickname + " "
- + hashedBridgeIdentityBase64 + " "
- + hashedDescriptorIdentifier + " " + descPublicationTime
- + " " + scrubbedAddress + " " + orPort + " " + dirPort
- + "\n");
-
- /* Sanitize any addresses in a lines using the fingerprint and
- * descriptor publication time from the previous r line. */
- } else if (line.startsWith("a ")) {
- String scrubbedOrAddress = scrubOrAddress(
- line.substring("a ".length()), fingerprintBytes,
- descPublicationTime);
- if (scrubbedOrAddress != null) {
- scrubbed.append("a " + scrubbedOrAddress + "\n");
- } else {
- this.logger.warning("Invalid address in line '" + line
- + "' in bridge network status. Skipping line!");
- }
-
- /* Nothing special about s, w, and p lines; just copy them. */
- } else if (line.startsWith("s ") || line.equals("s") ||
- line.startsWith("w ") || line.equals("w") ||
- line.startsWith("p ") || line.equals("p")) {
- scrubbed.append(line + "\n");
-
- /* There should be nothing else but r, w, p, and s lines in the
- * network status. If there is, we should probably learn before
- * writing anything to the sanitized descriptors. */
- } else {
- this.logger.fine("Unknown line '" + line + "' in bridge "
- + "network status. Not writing to disk!");
- return;
- }
- }
- br.close();
- if (scrubbed.length() > 0) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
- }
-
- /* Check if we can tell from the descriptor publication times
- * whether this status is possibly stale. */
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (formatter.parse(publicationTime).getTime() -
- formatter.parse(mostRecentDescPublished).getTime() >
- 60L * 60L * 1000L) {
- this.logger.warning("The most recent descriptor in the bridge "
- + "network status published at " + publicationTime + " was "
- + "published at " + mostRecentDescPublished + " which is "
- + "more than 1 hour before the status. This is a sign for "
- + "the status being stale. Please check!");
- }
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse timestamp in "
- + "bridge network status.", e);
- return;
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge network "
- + "status.", e);
- return;
- }
-
- /* Write the sanitized network status to disk. */
- try {
-
- /* Determine file name. */
- String syear = publicationTime.substring(0, 4);
- String smonth = publicationTime.substring(5, 7);
- String sday = publicationTime.substring(8, 10);
- String stime = publicationTime.substring(11, 13)
- + publicationTime.substring(14, 16)
- + publicationTime.substring(17, 19);
- File statusFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
- + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
- + sday + "-" + stime + "-"
- + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
-
- /* Create all parent directories to write this network status. */
- statusFile.getParentFile().mkdirs();
-
- /* Write sanitized network status to disk. */
- BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
- bw.write("@type bridge-network-status 1.0\n");
- bw.write("published " + publicationTime + "\n");
- for (String scrubbed : scrubbedLines.values()) {
- bw.write(scrubbed);
- }
- bw.close();
-
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write sanitized bridge "
- + "network status to disk.", e);
- return;
- }
- }
-
- /**
- * Sanitizes a bridge server descriptor and writes it to disk.
- */
- public void sanitizeAndStoreServerDescriptor(byte[] data) {
-
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return;
- }
-
- /* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null, published = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(data, "US-ASCII")));
- StringBuilder scrubbed = new StringBuilder();
- String line = null, hashedBridgeIdentity = null, address = null,
- routerLine = null, scrubbedAddress = null;
- List<String> orAddresses = null, scrubbedOrAddresses = null;
- boolean skipCrypto = false;
- while ((line = br.readLine()) != null) {
-
- /* Skip all crypto parts that might be used to derive the bridge's
- * identity fingerprint. */
- if (skipCrypto && !line.startsWith("-----END ")) {
- continue;
-
- /* Store the router line for later processing, because we may need
- * the bridge identity fingerprint for replacing the IP address in
- * the scrubbed version. */
- } else if (line.startsWith("router ")) {
- address = line.split(" ")[2];
- routerLine = line;
-
- /* Store or-address parts in a list and sanitize them when we have
- * read the fingerprint. */
- } else if (line.startsWith("or-address ")) {
- if (orAddresses == null) {
- orAddresses = new ArrayList<String>();
- }
- orAddresses.add(line.substring("or-address ".length()));
-
- /* Parse the publication time to see if we're still inside the
- * sanitizing interval. */
- } else if (line.startsWith("published ")) {
- published = line.substring("published ".length());
- if (this.bridgeSanitizingCutOffTimestamp.
- compareTo(published) > 0) {
- this.logger.log(!this.haveWarnedAboutInterval
- ? Level.WARNING : Level.FINE, "Sanitizing and storing "
- + "server descriptor with publication time outside our "
- + "descriptor sanitizing interval.");
- this.haveWarnedAboutInterval = true;
- }
- scrubbed.append(line + "\n");
-
- /* Parse the fingerprint to determine the hashed bridge
- * identity. */
- } else if (line.startsWith("opt fingerprint ") ||
- line.startsWith("fingerprint ")) {
- String fingerprint = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- byte[] fingerprintBytes = Hex.decodeHex(
- fingerprint.toCharArray());
- hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
- toLowerCase();
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- if (orAddresses != null) {
- scrubbedOrAddresses = new ArrayList<String>();
- for (String orAddress : orAddresses) {
- String scrubbedOrAddress = scrubOrAddress(orAddress,
- fingerprintBytes, published);
- if (scrubbedOrAddress != null) {
- scrubbedOrAddresses.add(scrubbedOrAddress);
- } else {
- this.logger.warning("Invalid address in line "
- + "'or-address " + orAddress + "' in bridge server "
- + "descriptor. Skipping line!");
- }
- }
- }
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "fingerprint");
- for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
- scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
- 4 * (i + 1)).toUpperCase());
- scrubbed.append("\n");
-
- /* Replace the contact line (if present) with a generic one. */
- } else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody\n");
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- String[] routerLineParts = routerLine.split(" ");
- scrubbedDesc = "router " + routerLineParts[1] + " "
- + scrubbedAddress + " " + routerLineParts[3] + " "
- + routerLineParts[4] + " " + routerLineParts[5] + "\n";
- if (scrubbedOrAddresses != null) {
- for (String scrubbedOrAddress : scrubbedOrAddresses) {
- scrubbedDesc = scrubbedDesc += "or-address "
- + scrubbedOrAddress + "\n";
- }
- }
- scrubbedDesc += scrubbed.toString();
- break;
-
- /* Replace extra-info digest with the hashed digest of the
- * non-scrubbed descriptor. */
- } else if (line.startsWith("opt extra-info-digest ") ||
- line.startsWith("extra-info-digest ")) {
- String extraInfoDescriptorIdentifier = line.substring(
- line.indexOf("extra-info-digest ")
- + "extra-info-digest ".length());
- String hashedExtraInfoDescriptorIdentifier =
- DigestUtils.shaHex(Hex.decodeHex(
- extraInfoDescriptorIdentifier.toCharArray())).toUpperCase();
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "extra-info-digest " + hashedExtraInfoDescriptorIdentifier
- + "\n");
-
- /* Possibly sanitize reject lines if they contain the bridge's own
- * IP address. */
- } else if (line.startsWith("reject ")) {
- if (address != null && line.startsWith("reject " + address)) {
- scrubbed.append("reject " + scrubbedAddress
- + line.substring("reject ".length() + address.length())
- + "\n");
- } else {
- scrubbed.append(line + "\n");
- }
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("accept ")
- || line.startsWith("platform ")
- || line.startsWith("opt protocols ")
- || line.startsWith("protocols ")
- || line.startsWith("uptime ")
- || line.startsWith("bandwidth ")
- || line.startsWith("opt hibernating ")
- || line.startsWith("hibernating ")
- || line.equals("opt hidden-service-dir")
- || line.equals("hidden-service-dir")
- || line.equals("opt caches-extra-info")
- || line.equals("caches-extra-info")
- || line.equals("opt allow-single-hop-exits")
- || line.equals("allow-single-hop-exits")) {
- scrubbed.append(line + "\n");
-
- /* Replace node fingerprints in the family line with their hashes
- * and leave nicknames unchanged. */
- } else if (line.startsWith("family ")) {
- StringBuilder familyLine = new StringBuilder("family");
- for (String s : line.substring(7).split(" ")) {
- if (s.startsWith("$")) {
- familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
- s.substring(1).toCharArray())).toUpperCase());
- } else {
- familyLine.append(" " + s);
- }
- }
- scrubbed.append(familyLine.toString() + "\n");
-
- /* Skip the purpose line that the bridge authority adds to its
- * cached-descriptors file. */
- } else if (line.startsWith("@purpose ")) {
- continue;
-
- /* Skip all crypto parts that might leak the bridge's identity
- * fingerprint. */
- } else if (line.startsWith("-----BEGIN ")
- || line.equals("onion-key") || line.equals("signing-key")) {
- skipCrypto = true;
-
- /* Stop skipping lines when the crypto parts are over. */
- } else if (line.startsWith("-----END ")) {
- skipCrypto = false;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
- return;
- }
- }
- br.close();
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Could not parse server "
- + "descriptor.", e);
- return;
- }
-
- /* Determine filename of sanitized server descriptor. */
- String descriptorDigest = null;
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigest == null) {
- this.logger.log(Level.WARNING, "Could not calculate server "
- + "descriptor digest.");
- return;
- }
- String dyear = published.substring(0, 4);
- String dmonth = published.substring(5, 7);
- File newFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/server-descriptors/"
- + "/" + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
-
- /* Write sanitized server descriptor to disk, including all its parent
- * directories. */
- try {
- newFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
- bw.write("@type bridge-server-descriptor 1.0\n");
- bw.write(scrubbedDesc);
- bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not write sanitized server "
- + "descriptor to disk.", e);
- return;
- }
- }
-
- /**
- * Sanitizes an extra-info descriptor and writes it to disk.
- */
- public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
-
- /* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null, published = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, "US-ASCII")));
- String line = null;
- StringBuilder scrubbed = null;
- String hashedBridgeIdentity = null;
- while ((line = br.readLine()) != null) {
-
- /* Parse bridge identity from extra-info line and replace it with
- * its hash in the sanitized descriptor. */
- String[] parts = line.split(" ");
- if (line.startsWith("extra-info ")) {
- hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
- parts[2].toCharArray())).toLowerCase();
- scrubbed = new StringBuilder("extra-info " + parts[1] + " "
- + hashedBridgeIdentity.toUpperCase() + "\n");
-
- /* Parse the publication time to determine the file name. */
- } else if (line.startsWith("published ")) {
- scrubbed.append(line + "\n");
- published = line.substring("published ".length());
-
- /* Remove everything from transport lines except the transport
- * name. */
- } else if (line.startsWith("transport ")) {
- if (parts.length < 3) {
- this.logger.fine("Illegal line in extra-info descriptor: '"
- + line + "'. Skipping descriptor.");
- return;
- }
- scrubbed.append("transport " + parts[1] + "\n");
-
- /* Skip transport-info lines entirely. */
- } else if (line.startsWith("transport-info ")) {
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("write-history ")
- || line.startsWith("read-history ")
- || line.startsWith("geoip-start-time ")
- || line.startsWith("geoip-client-origins ")
- || line.startsWith("geoip-db-digest ")
- || line.startsWith("conn-bi-direct ")
- || line.startsWith("bridge-")
- || line.startsWith("dirreq-")
- || line.startsWith("cell-")
- || line.startsWith("entry-")
- || line.startsWith("exit-")) {
- scrubbed.append(line + "\n");
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- scrubbedDesc = scrubbed.toString();
- break;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
- return;
- }
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse extra-info "
- + "descriptor.", e);
- return;
- } catch (DecoderException e) {
- this.logger.log(Level.WARNING, "Could not parse extra-info "
- + "descriptor.", e);
- return;
- }
-
- /* Determine filename of sanitized extra-info descriptor. */
- String descriptorDigest = null;
- try {
- String ascii = new String(data, "US-ASCII");
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
- }
- } catch (UnsupportedEncodingException e) {
- /* Handle below. */
- }
- if (descriptorDigest == null) {
- this.logger.log(Level.WARNING, "Could not calculate extra-info "
- + "descriptor digest.");
- return;
- }
- String dyear = published.substring(0, 4);
- String dmonth = published.substring(5, 7);
- File newFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/extra-infos/"
- + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
-
- /* Write sanitized extra-info descriptor to disk, including all its
- * parent directories. */
- try {
- newFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
- bw.write("@type bridge-extra-info 1.1\n");
- bw.write(scrubbedDesc);
- bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
- bw.close();
- } catch (Exception e) {
- this.logger.log(Level.WARNING, "Could not write sanitized "
- + "extra-info descriptor to disk.", e);
- }
- }
-
- /**
- * Rewrite all network statuses that might contain references to server
- * descriptors we added or updated in this execution. This applies to
- * all statuses that have been published up to 24 hours after any added
- * or updated server descriptor.
- */
- public void finishWriting() {
-
- /* Delete secrets that we don't need anymore. */
- if (!this.secretsForHashingIPAddresses.isEmpty() &&
- this.secretsForHashingIPAddresses.firstKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- try {
- int kept = 0, deleted = 0;
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile));
- for (Map.Entry<String, byte[]> e :
- this.secretsForHashingIPAddresses.entrySet()) {
- if (e.getKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- deleted++;
- } else {
- bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
- + "\n");
- kept++;
- }
- }
- bw.close();
- this.logger.info("Deleted " + deleted + " secrets that we don't "
- + "need anymore and kept " + kept + ".");
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not store reduced set of "
- + "secrets to disk! This is a bad sign, better check what's "
- + "going on!", e);
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/TorperfDownloader.java b/src/org/torproject/ernie/db/TorperfDownloader.java
deleted file mode 100644
index 058e29b..0000000
--- a/src/org/torproject/ernie/db/TorperfDownloader.java
+++ /dev/null
@@ -1,573 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.text.SimpleDateFormat;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/* Download possibly truncated Torperf .data and .extradata files from
- * configured sources, append them to the files we already have, and merge
- * the two files into the .tpf format. */
-public class TorperfDownloader {
-
- private File torperfOutputDirectory = null;
- private SortedMap<String, String> torperfSources = null;
- private List<String> torperfFilesLines = null;
- private Logger logger = null;
- private SimpleDateFormat dateFormat;
-
- public TorperfDownloader(File torperfOutputDirectory,
- SortedMap<String, String> torperfSources,
- List<String> torperfFilesLines) {
- if (torperfOutputDirectory == null) {
- throw new IllegalArgumentException();
- }
- this.torperfOutputDirectory = torperfOutputDirectory;
- this.torperfSources = torperfSources;
- this.torperfFilesLines = torperfFilesLines;
- if (!this.torperfOutputDirectory.exists()) {
- this.torperfOutputDirectory.mkdirs();
- }
- this.logger = Logger.getLogger(TorperfDownloader.class.getName());
- this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- this.readLastMergedTimestamps();
- for (String torperfFilesLine : this.torperfFilesLines) {
- this.downloadAndMergeFiles(torperfFilesLine);
- }
- this.writeLastMergedTimestamps();
- }
-
- private File torperfLastMergedFile =
- new File("stats/torperf-last-merged");
- SortedMap<String, String> lastMergedTimestamps =
- new TreeMap<String, String>();
- private void readLastMergedTimestamps() {
- if (!this.torperfLastMergedFile.exists()) {
- return;
- }
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- this.torperfLastMergedFile));
- String line;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(" ");
- String fileName = null, timestamp = null;
- if (parts.length == 2) {
- try {
- Double.parseDouble(parts[1]);
- fileName = parts[0];
- timestamp = parts[1];
- } catch (NumberFormatException e) {
- /* Handle below. */
- }
- }
- if (fileName == null || timestamp == null) {
- this.logger.log(Level.WARNING, "Invalid line '" + line + "' in "
- + this.torperfLastMergedFile.getAbsolutePath() + ". "
- + "Ignoring past history of merging .data and .extradata "
- + "files.");
- this.lastMergedTimestamps.clear();
- break;
- }
- this.lastMergedTimestamps.put(fileName, timestamp);
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Error while reading '"
- + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring "
- + "past history of merging .data and .extradata files.");
- this.lastMergedTimestamps.clear();
- }
- }
-
- private void writeLastMergedTimestamps() {
- try {
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.torperfLastMergedFile));
- for (Map.Entry<String, String> e :
- this.lastMergedTimestamps.entrySet()) {
- String fileName = e.getKey();
- String timestamp = e.getValue();
- bw.write(fileName + " " + timestamp + "\n");
- }
- bw.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Error while writing '"
- + this.torperfLastMergedFile.getAbsolutePath() + ". This may "
- + "result in ignoring history of merging .data and .extradata "
- + "files in the next execution.", e);
- }
- }
-
- private void downloadAndMergeFiles(String torperfFilesLine) {
- String[] parts = torperfFilesLine.split(" ");
- String sourceName = parts[1];
- int fileSize = -1;
- try {
- fileSize = Integer.parseInt(parts[2]);
- } catch (NumberFormatException e) {
- this.logger.log(Level.WARNING, "Could not parse file size in "
- + "TorperfFiles configuration line '" + torperfFilesLine
- + "'.");
- return;
- }
-
- /* Download and append the .data file. */
- String dataFileName = parts[3];
- String sourceBaseUrl = torperfSources.get(sourceName);
- String dataUrl = sourceBaseUrl + dataFileName;
- String dataOutputFileName = sourceName + "-" + dataFileName;
- File dataOutputFile = new File(torperfOutputDirectory,
- dataOutputFileName);
- boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl,
- dataOutputFile, true);
-
- /* Download and append the .extradata file. */
- String extradataFileName = parts[4];
- String extradataUrl = sourceBaseUrl + extradataFileName;
- String extradataOutputFileName = sourceName + "-" + extradataFileName;
- File extradataOutputFile = new File(torperfOutputDirectory,
- extradataOutputFileName);
- boolean downloadedExtradataFile = this.downloadAndAppendFile(
- extradataUrl, extradataOutputFile, false);
-
- /* Merge both files into .tpf format. */
- if (!downloadedDataFile && !downloadedExtradataFile) {
- return;
- }
- String skipUntil = null;
- if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) {
- skipUntil = this.lastMergedTimestamps.get(dataOutputFileName);
- }
- try {
- skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile,
- sourceName, fileSize, skipUntil);
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile
- + " and " + extradataOutputFile + ".", e);
- }
- if (skipUntil != null) {
- this.lastMergedTimestamps.put(dataOutputFileName, skipUntil);
- }
- }
-
- private boolean downloadAndAppendFile(String url, File outputFile,
- boolean isDataFile) {
-
- /* Read an existing output file to determine which line will be the
- * first to append to it. */
- String lastTimestampLine = null;
- int linesAfterLastTimestampLine = 0;
- if (outputFile.exists() && outputFile.lastModified() >
- System.currentTimeMillis() - 330L * 60L * 1000L) {
- return false;
- } else if (outputFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- outputFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (isDataFile || line.contains(" LAUNCH")) {
- lastTimestampLine = line;
- linesAfterLastTimestampLine = 0;
- } else {
- linesAfterLastTimestampLine++;
- }
- }
- br.close();
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed reading '"
- + outputFile.getAbsolutePath() + "' to determine the first "
- + "line to append to it.", e);
- return false;
- }
- }
- try {
- this.logger.fine("Downloading " + (isDataFile ? ".data" :
- ".extradata") + " file from '" + url + "' and merging it into "
- + "'" + outputFile.getAbsolutePath() + "'.");
- URL u = new URL(url);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- BufferedReader br = new BufferedReader(new InputStreamReader(
- huc.getInputStream()));
- String line;
- BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile,
- true));
- boolean copyLines = lastTimestampLine == null;
- while ((line = br.readLine()) != null) {
- if (copyLines && linesAfterLastTimestampLine == 0) {
- if (isDataFile || line.contains(" LAUNCH")) {
- lastTimestampLine = line;
- }
- bw.write(line + "\n");
- } else if (copyLines && linesAfterLastTimestampLine > 0) {
- linesAfterLastTimestampLine--;
- } else if (line.equals(lastTimestampLine)) {
- copyLines = true;
- }
- }
- bw.close();
- br.close();
- if (!copyLines) {
- this.logger.warning("The last timestamp line in '"
- + outputFile.getAbsolutePath() + "' is not contained in the "
- + "new file downloaded from '" + url + "'. Cannot append "
- + "new lines without possibly leaving a gap. Skipping.");
- return false;
- }
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Failed downloading and/or merging '"
- + url + "'.", e);
- return false;
- }
- if (lastTimestampLine == null) {
- this.logger.warning("'" + outputFile.getAbsolutePath()
- + "' doesn't contain any timestamp lines. Unable to check "
- + "whether that file is stale or not.");
- } else {
- long lastTimestampMillis = -1L;
- if (isDataFile) {
- lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
- 0, lastTimestampLine.indexOf(" "))) * 1000L;
- } else {
- lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
- lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(),
- lastTimestampLine.indexOf(".",
- lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L;
- }
- if (lastTimestampMillis < System.currentTimeMillis()
- - 330L * 60L * 1000L) {
- this.logger.warning("The last timestamp in '"
- + outputFile.getAbsolutePath() + "' is more than 5:30 hours "
- + "old: " + lastTimestampMillis);
- }
- }
- return true;
- }
-
- private String mergeFiles(File dataFile, File extradataFile,
- String source, int fileSize, String skipUntil) throws IOException {
- SortedMap<String, String> config = new TreeMap<String, String>();
- config.put("SOURCE", source);
- config.put("FILESIZE", String.valueOf(fileSize));
- if (!dataFile.exists() || !extradataFile.exists()) {
- this.logger.warning("File " + dataFile.getAbsolutePath() + " or "
- + extradataFile.getAbsolutePath() + " is missing.");
- return null;
- }
- this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and "
- + extradataFile.getAbsolutePath() + " into .tpf format.");
- BufferedReader brD = new BufferedReader(new FileReader(dataFile)),
- brE = new BufferedReader(new FileReader(extradataFile));
- String lineD = brD.readLine(), lineE = brE.readLine();
- int d = 1, e = 1;
- String maxDataComplete = null, maxUsedAt = null;
- while (lineD != null) {
-
- /* Parse .data line. Every valid .data line will go into the .tpf
- * format, either with additional information from the .extradata
- * file or without it. */
- if (lineD.isEmpty()) {
- this.logger.finer("Skipping empty line " + dataFile.getName()
- + ":" + d++ + ".");
- lineD = brD.readLine();
- continue;
- }
- SortedMap<String, String> data = this.parseDataLine(lineD);
- if (data == null) {
- this.logger.finer("Skipping illegal line " + dataFile.getName()
- + ":" + d++ + " '" + lineD + "'.");
- lineD = brD.readLine();
- continue;
- }
- String dataComplete = data.get("DATACOMPLETE");
- double dataCompleteSeconds = Double.parseDouble(dataComplete);
- if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) {
- this.logger.finer("Skipping " + dataFile.getName() + ":"
- + d++ + " which we already processed before.");
- lineD = brD.readLine();
- continue;
- }
- maxDataComplete = dataComplete;
-
- /* Parse .extradata line if available and try to find the one that
- * matches the .data line. */
- SortedMap<String, String> extradata = null;
- while (lineE != null) {
- if (lineE.isEmpty()) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is empty.");
- lineE = brE.readLine();
- continue;
- }
- if (lineE.startsWith("BUILDTIMEOUT_SET ")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is a BUILDTIMEOUT_SET line.");
- lineE = brE.readLine();
- continue;
- } else if (lineE.startsWith("ok ") ||
- lineE.startsWith("error ")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is in the old format.");
- lineE = brE.readLine();
- continue;
- }
- extradata = this.parseExtradataLine(lineE);
- if (extradata == null) {
- this.logger.finer("Skipping Illegal line "
- + extradataFile.getName() + ":" + e++ + " '" + lineE
- + "'.");
- lineE = brE.readLine();
- continue;
- }
- if (!extradata.containsKey("USED_AT")) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which doesn't contain a USED_AT element.");
- lineE = brE.readLine();
- continue;
- }
- String usedAt = extradata.get("USED_AT");
- double usedAtSeconds = Double.parseDouble(usedAt);
- if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which we already processed before.");
- lineE = brE.readLine();
- continue;
- }
- maxUsedAt = usedAt;
- if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) {
- this.logger.fine("Merging " + extradataFile.getName() + ":"
- + e++ + " into the current .data line.");
- lineE = brE.readLine();
- break;
- } else if (usedAtSeconds > dataCompleteSeconds) {
- this.logger.finer("Comparing " + extradataFile.getName()
- + " to the next .data line.");
- extradata = null;
- break;
- } else {
- this.logger.finer("Skipping " + extradataFile.getName() + ":"
- + e++ + " which is too old to be merged with "
- + dataFile.getName() + ":" + d + ".");
- lineE = brE.readLine();
- continue;
- }
- }
-
- /* Write output line to .tpf file. */
- SortedMap<String, String> keysAndValues =
- new TreeMap<String, String>();
- if (extradata != null) {
- keysAndValues.putAll(extradata);
- }
- keysAndValues.putAll(data);
- keysAndValues.putAll(config);
- this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + ".");
- lineD = brD.readLine();
- try {
- this.writeTpfLine(source, fileSize, keysAndValues);
- } catch (IOException ex) {
- this.logger.log(Level.WARNING, "Error writing output line. "
- + "Aborting to merge " + dataFile.getName() + " and "
- + extradataFile.getName() + ".", e);
- break;
- }
- }
- brD.close();
- brE.close();
- this.writeCachedTpfLines();
- if (maxDataComplete == null) {
- return maxUsedAt;
- } else if (maxUsedAt == null) {
- return maxDataComplete;
- } else if (maxDataComplete.compareTo(maxUsedAt) > 0) {
- return maxUsedAt;
- } else {
- return maxDataComplete;
- }
- }
-
- private SortedMap<Integer, String> dataTimestamps;
- private SortedMap<String, String> parseDataLine(String line) {
- String[] parts = line.trim().split(" ");
- if (line.length() == 0 || parts.length < 20) {
- return null;
- }
- if (this.dataTimestamps == null) {
- this.dataTimestamps = new TreeMap<Integer, String>();
- this.dataTimestamps.put(0, "START");
- this.dataTimestamps.put(2, "SOCKET");
- this.dataTimestamps.put(4, "CONNECT");
- this.dataTimestamps.put(6, "NEGOTIATE");
- this.dataTimestamps.put(8, "REQUEST");
- this.dataTimestamps.put(10, "RESPONSE");
- this.dataTimestamps.put(12, "DATAREQUEST");
- this.dataTimestamps.put(14, "DATARESPONSE");
- this.dataTimestamps.put(16, "DATACOMPLETE");
- this.dataTimestamps.put(21, "DATAPERC10");
- this.dataTimestamps.put(23, "DATAPERC20");
- this.dataTimestamps.put(25, "DATAPERC30");
- this.dataTimestamps.put(27, "DATAPERC40");
- this.dataTimestamps.put(29, "DATAPERC50");
- this.dataTimestamps.put(31, "DATAPERC60");
- this.dataTimestamps.put(33, "DATAPERC70");
- this.dataTimestamps.put(35, "DATAPERC80");
- this.dataTimestamps.put(37, "DATAPERC90");
- }
- SortedMap<String, String> data = new TreeMap<String, String>();
- try {
- for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) {
- int i = e.getKey();
- if (parts.length > i + 1) {
- String key = e.getValue();
- String value = String.format("%s.%02d", parts[i],
- Integer.parseInt(parts[i + 1]) / 10000);
- data.put(key, value);
- }
- }
- } catch (NumberFormatException e) {
- return null;
- }
- data.put("WRITEBYTES", parts[18]);
- data.put("READBYTES", parts[19]);
- if (parts.length >= 21) {
- data.put("DIDTIMEOUT", parts[20]);
- }
- return data;
- }
-
- private SortedMap<String, String> parseExtradataLine(String line) {
- String[] parts = line.split(" ");
- SortedMap<String, String> extradata = new TreeMap<String, String>();
- String previousKey = null;
- for (String part : parts) {
- String[] keyAndValue = part.split("=", -1);
- if (keyAndValue.length == 2) {
- String key = keyAndValue[0];
- previousKey = key;
- String value = keyAndValue[1];
- if (value.contains(".") && value.lastIndexOf(".") ==
- value.length() - 2) {
- /* Make sure that all floats have two trailing digits. */
- value += "0";
- }
- extradata.put(key, value);
- } else if (keyAndValue.length == 1 && previousKey != null) {
- String value = keyAndValue[0];
- if (previousKey.equals("STREAM_FAIL_REASONS") &&
- (value.equals("MISC") || value.equals("EXITPOLICY") ||
- value.equals("RESOURCELIMIT") ||
- value.equals("RESOLVEFAILED"))) {
- extradata.put(previousKey, extradata.get(previousKey) + ":"
- + value);
- } else {
- return null;
- }
- } else {
- return null;
- }
- }
- return extradata;
- }
-
- private String cachedSource;
- private int cachedFileSize;
- private String cachedStartDate;
- private SortedMap<String, String> cachedTpfLines;
- private void writeTpfLine(String source, int fileSize,
- SortedMap<String, String> keysAndValues) throws IOException {
- StringBuilder sb = new StringBuilder();
- int written = 0;
- for (Map.Entry<String, String> keyAndValue :
- keysAndValues.entrySet()) {
- String key = keyAndValue.getKey();
- String value = keyAndValue.getValue();
- sb.append((written++ > 0 ? " " : "") + key + "=" + value);
- }
- String line = sb.toString();
- String startString = keysAndValues.get("START");
- long startMillis = Long.parseLong(startString.substring(0,
- startString.indexOf("."))) * 1000L;
- String startDate = dateFormat.format(startMillis);
- if (this.cachedTpfLines == null || !source.equals(this.cachedSource) ||
- fileSize != this.cachedFileSize ||
- !startDate.equals(this.cachedStartDate)) {
- this.writeCachedTpfLines();
- this.readTpfLinesToCache(source, fileSize, startDate);
- }
- if (!this.cachedTpfLines.containsKey(startString) ||
- line.length() > this.cachedTpfLines.get(startString).length()) {
- this.cachedTpfLines.put(startString, line);
- }
- }
-
- private void readTpfLinesToCache(String source, int fileSize,
- String startDate) throws IOException {
- this.cachedTpfLines = new TreeMap<String, String>();
- this.cachedSource = source;
- this.cachedFileSize = fileSize;
- this.cachedStartDate = startDate;
- File tpfFile = new File(torperfOutputDirectory,
- startDate.replaceAll("-", "/") + "/"
- + source + "-" + String.valueOf(fileSize) + "-" + startDate
- + ".tpf");
- if (!tpfFile.exists()) {
- return;
- }
- BufferedReader br = new BufferedReader(new FileReader(tpfFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("@type ")) {
- continue;
- }
- if (line.contains("START=")) {
- String startString = line.substring(line.indexOf("START=")
- + "START=".length()).split(" ")[0];
- this.cachedTpfLines.put(startString, line);
- }
- }
- br.close();
- }
-
- private void writeCachedTpfLines() throws IOException {
- if (this.cachedSource == null || this.cachedFileSize == 0 ||
- this.cachedStartDate == null || this.cachedTpfLines == null) {
- return;
- }
- File tpfFile = new File(torperfOutputDirectory,
- this.cachedStartDate.replaceAll("-", "/")
- + "/" + this.cachedSource + "-"
- + String.valueOf(this.cachedFileSize) + "-"
- + this.cachedStartDate + ".tpf");
- tpfFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile));
- bw.write("@type torperf 1.0\n");
- for (String line : this.cachedTpfLines.values()) {
- bw.write(line + "\n");
- }
- bw.close();
- this.cachedSource = null;
- this.cachedFileSize = 0;
- this.cachedStartDate = null;
- this.cachedTpfLines = null;
- }
-}
-
diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java
new file mode 100644
index 0000000..f0b617a
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgedescs/BridgeDescriptorParser.java
@@ -0,0 +1,46 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgedescs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class BridgeDescriptorParser {
+ private SanitizedBridgesWriter sbw;
+ private Logger logger;
+ public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
+ this.sbw = sbw;
+ this.logger =
+ Logger.getLogger(BridgeDescriptorParser.class.getName());
+ }
+ public void parse(byte[] allData, String dateTime) {
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line = br.readLine();
+ if (line == null) {
+ return;
+ } else if (line.startsWith("r ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime);
+ }
+ } else if (line.startsWith("router ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreServerDescriptor(allData);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ if (this.sbw != null) {
+ this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
+ }
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
+ e);
+ return;
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java
new file mode 100644
index 0000000..783775c
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgedescs/BridgeSnapshotReader.java
@@ -0,0 +1,220 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgedescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+/**
+ * Reads the half-hourly snapshots of bridge descriptors from Tonga.
+ */
+public class BridgeSnapshotReader {
+ public BridgeSnapshotReader(BridgeDescriptorParser bdp,
+ File bridgeDirectoriesDir, File statsDirectory) {
+
+ if (bdp == null || bridgeDirectoriesDir == null ||
+ statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ Logger logger =
+ Logger.getLogger(BridgeSnapshotReader.class.getName());
+ SortedSet<String> parsed = new TreeSet<String>();
+ File bdDir = bridgeDirectoriesDir;
+ File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
+ boolean modified = false;
+ if (bdDir.exists()) {
+ if (pbdFile.exists()) {
+ logger.fine("Reading file " + pbdFile.getAbsolutePath() + "...");
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(pbdFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ parsed.add(line);
+ }
+ br.close();
+ logger.fine("Finished reading file "
+ + pbdFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed reading file "
+ + pbdFile.getAbsolutePath() + "!", e);
+ return;
+ }
+ }
+ logger.fine("Importing files in directory " + bridgeDirectoriesDir
+ + "/...");
+ Set<String> descriptorImportHistory = new HashSet<String>();
+ int parsedFiles = 0, skippedFiles = 0, parsedStatuses = 0,
+ parsedServerDescriptors = 0, skippedServerDescriptors = 0,
+ parsedExtraInfoDescriptors = 0, skippedExtraInfoDescriptors = 0;
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(bdDir);
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else if (!parsed.contains(pop.getName())) {
+ try {
+ FileInputStream in = new FileInputStream(pop);
+ if (in.available() > 0) {
+ TarArchiveInputStream tais = null;
+ if (pop.getName().endsWith(".tar.gz")) {
+ GzipCompressorInputStream gcis =
+ new GzipCompressorInputStream(in);
+ tais = new TarArchiveInputStream(gcis);
+ } else if (pop.getName().endsWith(".tar")) {
+ tais = new TarArchiveInputStream(in);
+ } else {
+ continue;
+ }
+ BufferedInputStream bis = new BufferedInputStream(tais);
+ String fn = pop.getName();
+ String dateTime = fn.substring(11, 21) + " "
+ + fn.substring(22, 24) + ":" + fn.substring(24, 26)
+ + ":" + fn.substring(26, 28);
+ while ((tais.getNextTarEntry()) != null) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ byte[] allData = baos.toByteArray();
+ if (allData.length == 0) {
+ continue;
+ }
+ String fileDigest = Hex.encodeHexString(DigestUtils.sha(
+ allData));
+ String ascii = new String(allData, "US-ASCII");
+ BufferedReader br3 = new BufferedReader(new StringReader(
+ ascii));
+ String firstLine = null;
+ while ((firstLine = br3.readLine()) != null) {
+ if (firstLine.startsWith("@")) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ if (firstLine.startsWith("r ")) {
+ bdp.parse(allData, dateTime);
+ parsedStatuses++;
+ } else if (descriptorImportHistory.contains(fileDigest)) {
+ /* Skip server descriptors or extra-info descriptors if
+ * we parsed them before. */
+ skippedFiles++;
+ continue;
+ } else {
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ firstLine.startsWith("router ") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0,
+ end - start);
+ String descriptorDigest = Hex.encodeHexString(
+ DigestUtils.sha(descBytes));
+ if (!descriptorImportHistory.contains(
+ descriptorDigest)) {
+ bdp.parse(descBytes, dateTime);
+ descriptorImportHistory.add(descriptorDigest);
+ if (firstLine.startsWith("router ")) {
+ parsedServerDescriptors++;
+ } else {
+ parsedExtraInfoDescriptors++;
+ }
+ } else {
+ if (firstLine.startsWith("router ")) {
+ skippedServerDescriptors++;
+ } else {
+ skippedExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+ descriptorImportHistory.add(fileDigest);
+ parsedFiles++;
+ }
+ bis.close();
+ }
+ in.close();
+
+ /* Let's give some memory back, or we'll run out of it. */
+ System.gc();
+
+ parsed.add(pop.getName());
+ modified = true;
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not parse bridge snapshot "
+ + pop.getName() + "!", e);
+ continue;
+ }
+ }
+ }
+ logger.fine("Finished importing files in directory "
+ + bridgeDirectoriesDir + "/. In total, we parsed "
+ + parsedFiles + " files (skipped " + skippedFiles
+ + ") containing " + parsedStatuses + " statuses, "
+ + parsedServerDescriptors + " server descriptors (skipped "
+ + skippedServerDescriptors + "), and "
+ + parsedExtraInfoDescriptors + " extra-info descriptors "
+ + "(skipped " + skippedExtraInfoDescriptors + ").");
+ if (!parsed.isEmpty() && modified) {
+ logger.fine("Writing file " + pbdFile.getAbsolutePath() + "...");
+ try {
+ pbdFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
+ for (String f : parsed) {
+ bw.append(f + "\n");
+ }
+ bw.close();
+ logger.fine("Finished writing file " + pbdFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed writing file "
+ + pbdFile.getAbsolutePath() + "!", e);
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
new file mode 100644
index 0000000..ec7ad4b
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -0,0 +1,911 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgedescs;
+
+import java.io.*;
+import java.security.*;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.logging.*;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.digest.*;
+import org.apache.commons.codec.binary.*;
+
+/**
+ * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
+ * information from them, and writes them to a local directory structure.
+ * During the sanitizing process, all information about the bridge
+ * identity or IP address are removed or replaced. The goal is to keep the
+ * sanitized bridge descriptors useful for statistical analysis while not
+ * making it easier for an adversary to enumerate bridges.
+ *
+ * There are three types of bridge descriptors: bridge network statuses
+ * (lists of all bridges at a given time), server descriptors (published
+ * by the bridge to advertise their capabilities), and extra-info
+ * descriptors (published by the bridge, mainly for statistical analysis).
+ */
+public class SanitizedBridgesWriter {
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /**
+ * Output directory for writing sanitized bridge descriptors.
+ */
+ private File sanitizedBridgesDirectory;
+
+ private boolean replaceIPAddressesWithHashes;
+
+ private boolean persistenceProblemWithSecrets;
+
+ private SortedMap<String, byte[]> secretsForHashingIPAddresses;
+
+ private String bridgeSanitizingCutOffTimestamp;
+
+ private boolean haveWarnedAboutInterval;
+
+ private File bridgeIpSecretsFile;
+
+ private SecureRandom secureRandom;
+
+ /**
+ * Initializes this class.
+ */
+ public SanitizedBridgesWriter(File sanitizedBridgesDirectory,
+ File statsDirectory, boolean replaceIPAddressesWithHashes,
+ long limitBridgeSanitizingInterval) {
+
+ if (sanitizedBridgesDirectory == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /* Memorize argument values. */
+ this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
+ this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ SanitizedBridgesWriter.class.getName());
+
+ /* Initialize secure random number generator if we need it. */
+ if (this.replaceIPAddressesWithHashes) {
+ try {
+ this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
+ } catch (GeneralSecurityException e) {
+ this.logger.log(Level.WARNING, "Could not initialize secure "
+ + "random number generator! Not calculating any IP address "
+ + "hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ /* Read hex-encoded secrets for replacing IP addresses with hashes
+ * from disk. */
+ this.secretsForHashingIPAddresses = new TreeMap<String, byte[]>();
+ this.bridgeIpSecretsFile = new File(statsDirectory,
+ "bridge-ip-secrets");
+ if (this.bridgeIpSecretsFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.bridgeIpSecretsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if ((line.length() != ("yyyy-MM,".length() + 31 * 2) &&
+ line.length() != ("yyyy-MM,".length() + 50 * 2)) ||
+ parts.length != 2) {
+ this.logger.warning("Invalid line in bridge-ip-secrets file "
+ + "starting with '" + line.substring(0, 7) + "'! "
+ + "Not calculating any IP address hashes in this "
+ + "execution!");
+ this.persistenceProblemWithSecrets = true;
+ break;
+ }
+ String month = parts[0];
+ byte[] secret = Hex.decodeHex(parts[1].toCharArray());
+ this.secretsForHashingIPAddresses.put(month, secret);
+ }
+ br.close();
+ if (!this.persistenceProblemWithSecrets) {
+ this.logger.fine("Read "
+ + this.secretsForHashingIPAddresses.size() + " secrets for "
+ + "hashing bridge IP addresses.");
+ }
+ } catch (DecoderException e) {
+ this.logger.log(Level.WARNING, "Failed to decode hex string in "
+ + this.bridgeIpSecretsFile + "! Not calculating any IP "
+ + "address hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read "
+ + this.bridgeIpSecretsFile + "! Not calculating any IP "
+ + "address hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ /* If we're configured to keep secrets only for a limited time, define
+ * the cut-off day and time. */
+ if (limitBridgeSanitizingInterval >= 0L) {
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.bridgeSanitizingCutOffTimestamp = formatter.format(
+ System.currentTimeMillis() - 24L * 60L * 60L * 1000L
+ * limitBridgeSanitizingInterval);
+ } else {
+ this.bridgeSanitizingCutOffTimestamp = "1999-12-31 23:59:59";
+ }
+ }
+
+ private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (!orAddress.contains(":")) {
+ /* Malformed or-address or a line. */
+ return null;
+ }
+ String addressPart = orAddress.substring(0,
+ orAddress.lastIndexOf(":"));
+ String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
+ String scrubbedAddressPart = null;
+ if (addressPart.startsWith("[")) {
+ scrubbedAddressPart = this.scrubIpv6Address(addressPart,
+ fingerprintBytes, published);
+ } else {
+ scrubbedAddressPart = this.scrubIpv4Address(addressPart,
+ fingerprintBytes, published);
+ }
+ return (scrubbedAddressPart == null ? null :
+ scrubbedAddressPart + ":" + portPart);
+ }
+
+ private String scrubIpv4Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (this.replaceIPAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[4 + 20 + 31];
+ String[] ipParts = address.split("\\.");
+ for (int i = 0; i < 4; i++) {
+ hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
+ }
+ System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 0, hashInput, 24, 31);
+ byte[] hashOutput = DigestUtils.sha256(hashInput);
+ String hashedAddress = "10."
+ + (((int) hashOutput[0] + 256) % 256) + "."
+ + (((int) hashOutput[1] + 256) % 256) + "."
+ + (((int) hashOutput[2] + 256) % 256);
+ return hashedAddress;
+ } else {
+ return "127.0.0.1";
+ }
+ }
+
+ private String scrubIpv6Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
+ if (this.replaceIPAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[16 + 20 + 19];
+ String[] doubleColonSeparatedParts = address.substring(1,
+ address.length() - 1).split("::", -1);
+ if (doubleColonSeparatedParts.length > 2) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ List<String> hexParts = new ArrayList<String>();
+ for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
+ StringBuilder hexPart = new StringBuilder();
+ String[] parts = doubleColonSeparatedPart.split(":", -1);
+ if (parts.length < 1 || parts.length > 8) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ for (int i = 0; i < parts.length; i++) {
+ String part = parts[i];
+ if (part.contains(".")) {
+ String[] ipParts = part.split("\\.");
+ byte[] ipv4Bytes = new byte[4];
+ if (ipParts.length != 4) {
+ /* Invalid IPv4 part in IPv6 address. */
+ return null;
+ }
+ for (int m = 0; m < 4; m++) {
+ ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
+ }
+ hexPart.append(Hex.encodeHexString(ipv4Bytes));
+ } else if (part.length() > 4) {
+ /* Invalid IPv6 address. */
+ return null;
+ } else {
+ for (int k = part.length(); k < 4; k++) {
+ hexPart.append("0");
+ }
+ hexPart.append(part);
+ }
+ }
+ hexParts.add(hexPart.toString());
+ }
+ StringBuilder hex = new StringBuilder();
+ hex.append(hexParts.get(0));
+ if (hexParts.size() == 2) {
+ for (int i = 32 - hexParts.get(0).length()
+ - hexParts.get(1).length(); i > 0; i--) {
+ hex.append("0");
+ }
+ hex.append(hexParts.get(1));
+ }
+ byte[] ipBytes = null;
+ try {
+ ipBytes = Hex.decodeHex(hex.toString().toCharArray());
+ } catch (DecoderException e) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ if (ipBytes.length != 16) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ System.arraycopy(ipBytes, 0, hashInput, 0, 16);
+ System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
+ String month = published.substring(0, "yyyy-MM".length());
+ byte[] secret = this.getSecretForMonth(month);
+ System.arraycopy(secret, 31, hashInput, 36, 19);
+ String hashOutput = DigestUtils.sha256Hex(hashInput);
+ sb.append(hashOutput.substring(hashOutput.length() - 6,
+ hashOutput.length() - 4));
+ sb.append(":");
+ sb.append(hashOutput.substring(hashOutput.length() - 4));
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+
+ private byte[] getSecretForMonth(String month) throws IOException {
+ if (!this.secretsForHashingIPAddresses.containsKey(month) ||
+ this.secretsForHashingIPAddresses.get(month).length == 31) {
+ byte[] secret = new byte[50];
+ this.secureRandom.nextBytes(secret);
+ if (this.secretsForHashingIPAddresses.containsKey(month)) {
+ System.arraycopy(this.secretsForHashingIPAddresses.get(month), 0,
+ secret, 0, 31);
+ }
+ if (month.compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ this.logger.warning("Generated a secret that we won't make "
+ + "persistent, because it's outside our bridge descriptor "
+ + "sanitizing interval.");
+ } else {
+ /* Append secret to file on disk immediately before using it, or
+ * we might end with inconsistently sanitized bridges. */
+ try {
+ if (!this.bridgeIpSecretsFile.exists()) {
+ this.bridgeIpSecretsFile.getParentFile().mkdirs();
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeIpSecretsFile,
+ this.bridgeIpSecretsFile.exists()));
+ bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store new secret "
+ + "to disk! Not calculating any IP address hashes in "
+ + "this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ throw new IOException(e);
+ }
+ }
+ this.secretsForHashingIPAddresses.put(month, secret);
+ }
+ return this.secretsForHashingIPAddresses.get(month);
+ }
+
+ /**
+ * Sanitizes a network status and writes it to disk.
+ */
+ public void sanitizeAndStoreNetworkStatus(byte[] data,
+ String publicationTime) {
+
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return;
+ }
+
+ if (this.bridgeSanitizingCutOffTimestamp.
+ compareTo(publicationTime) > 0) {
+ this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
+ : Level.FINE, "Sanitizing and storing network status with "
+ + "publication time outside our descriptor sanitizing "
+ + "interval.");
+ this.haveWarnedAboutInterval = true;
+ }
+
+ /* Parse the given network status line by line. */
+ SortedMap<String, String> scrubbedLines =
+ new TreeMap<String, String>();
+ try {
+ StringBuilder scrubbed = new StringBuilder();
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = null;
+ String mostRecentDescPublished = null;
+ byte[] fingerprintBytes = null;
+ String descPublicationTime = null;
+ String hashedBridgeIdentityHex = null;
+ while ((line = br.readLine()) != null) {
+
+ /* r lines contain sensitive information that needs to be removed
+ * or replaced. */
+ if (line.startsWith("r ")) {
+
+ /* Clear buffer from previously scrubbed lines. */
+ if (scrubbed.length() > 0) {
+ String scrubbedLine = scrubbed.toString();
+ scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ scrubbed = new StringBuilder();
+ }
+
+ /* Parse the relevant parts of this r line. */
+ String[] parts = line.split(" ");
+ String nickname = parts[1];
+ fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
+ String descriptorIdentifier = parts[3];
+ descPublicationTime = parts[4] + " " + parts[5];
+ String address = parts[6];
+ String orPort = parts[7];
+ String dirPort = parts[8];
+
+ /* Determine most recent descriptor publication time. */
+ if (descPublicationTime.compareTo(publicationTime) <= 0 &&
+ (mostRecentDescPublished == null ||
+ descPublicationTime.compareTo(
+ mostRecentDescPublished) > 0)) {
+ mostRecentDescPublished = descPublicationTime;
+ }
+
+ /* Write scrubbed r line to buffer. */
+ byte[] hashedBridgeIdentity = DigestUtils.sha(fingerprintBytes);
+ String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
+ hashedBridgeIdentity).substring(0, 27);
+ hashedBridgeIdentityHex = Hex.encodeHexString(
+ hashedBridgeIdentity);
+ String hashedDescriptorIdentifier = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(descriptorIdentifier
+ + "=="))).substring(0, 27);
+ String scrubbedAddress = scrubIpv4Address(address,
+ fingerprintBytes,
+ descPublicationTime);
+ scrubbed.append("r " + nickname + " "
+ + hashedBridgeIdentityBase64 + " "
+ + hashedDescriptorIdentifier + " " + descPublicationTime
+ + " " + scrubbedAddress + " " + orPort + " " + dirPort
+ + "\n");
+
+ /* Sanitize any addresses in a lines using the fingerprint and
+ * descriptor publication time from the previous r line. */
+ } else if (line.startsWith("a ")) {
+ String scrubbedOrAddress = scrubOrAddress(
+ line.substring("a ".length()), fingerprintBytes,
+ descPublicationTime);
+ if (scrubbedOrAddress != null) {
+ scrubbed.append("a " + scrubbedOrAddress + "\n");
+ } else {
+ this.logger.warning("Invalid address in line '" + line
+ + "' in bridge network status. Skipping line!");
+ }
+
+ /* Nothing special about s, w, and p lines; just copy them. */
+ } else if (line.startsWith("s ") || line.equals("s") ||
+ line.startsWith("w ") || line.equals("w") ||
+ line.startsWith("p ") || line.equals("p")) {
+ scrubbed.append(line + "\n");
+
+ /* There should be nothing else but r, w, p, and s lines in the
+ * network status. If there is, we should probably learn before
+ * writing anything to the sanitized descriptors. */
+ } else {
+ this.logger.fine("Unknown line '" + line + "' in bridge "
+ + "network status. Not writing to disk!");
+ return;
+ }
+ }
+ br.close();
+ if (scrubbed.length() > 0) {
+ String scrubbedLine = scrubbed.toString();
+ scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ scrubbed = new StringBuilder();
+ }
+
+ /* Check if we can tell from the descriptor publication times
+ * whether this status is possibly stale. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (formatter.parse(publicationTime).getTime() -
+ formatter.parse(mostRecentDescPublished).getTime() >
+ 60L * 60L * 1000L) {
+ this.logger.warning("The most recent descriptor in the bridge "
+ + "network status published at " + publicationTime + " was "
+ + "published at " + mostRecentDescPublished + " which is "
+ + "more than 1 hour before the status. This is a sign for "
+ + "the status being stale. Please check!");
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse timestamp in "
+ + "bridge network status.", e);
+ return;
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse bridge network "
+ + "status.", e);
+ return;
+ }
+
+ /* Write the sanitized network status to disk. */
+ try {
+
+ /* Determine file name. */
+ String syear = publicationTime.substring(0, 4);
+ String smonth = publicationTime.substring(5, 7);
+ String sday = publicationTime.substring(8, 10);
+ String stime = publicationTime.substring(11, 13)
+ + publicationTime.substring(14, 16)
+ + publicationTime.substring(17, 19);
+ File statusFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
+ + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
+ + sday + "-" + stime + "-"
+ + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
+
+ /* Create all parent directories to write this network status. */
+ statusFile.getParentFile().mkdirs();
+
+ /* Write sanitized network status to disk. */
+ BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
+ bw.write("@type bridge-network-status 1.0\n");
+ bw.write("published " + publicationTime + "\n");
+ for (String scrubbed : scrubbedLines.values()) {
+ bw.write(scrubbed);
+ }
+ bw.close();
+
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized bridge "
+ + "network status to disk.", e);
+ return;
+ }
+ }
+
+ /**
+ * Sanitizes a bridge server descriptor and writes it to disk.
+ */
+ public void sanitizeAndStoreServerDescriptor(byte[] data) {
+
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return;
+ }
+
+ /* Parse descriptor to generate a sanitized version. */
+ String scrubbedDesc = null, published = null;
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(data, "US-ASCII")));
+ StringBuilder scrubbed = new StringBuilder();
+ String line = null, hashedBridgeIdentity = null, address = null,
+ routerLine = null, scrubbedAddress = null;
+ List<String> orAddresses = null, scrubbedOrAddresses = null;
+ boolean skipCrypto = false;
+ while ((line = br.readLine()) != null) {
+
+ /* Skip all crypto parts that might be used to derive the bridge's
+ * identity fingerprint. */
+ if (skipCrypto && !line.startsWith("-----END ")) {
+ continue;
+
+ /* Store the router line for later processing, because we may need
+ * the bridge identity fingerprint for replacing the IP address in
+ * the scrubbed version. */
+ } else if (line.startsWith("router ")) {
+ address = line.split(" ")[2];
+ routerLine = line;
+
+ /* Store or-address parts in a list and sanitize them when we have
+ * read the fingerprint. */
+ } else if (line.startsWith("or-address ")) {
+ if (orAddresses == null) {
+ orAddresses = new ArrayList<String>();
+ }
+ orAddresses.add(line.substring("or-address ".length()));
+
+ /* Parse the publication time to see if we're still inside the
+ * sanitizing interval. */
+ } else if (line.startsWith("published ")) {
+ published = line.substring("published ".length());
+ if (this.bridgeSanitizingCutOffTimestamp.
+ compareTo(published) > 0) {
+ this.logger.log(!this.haveWarnedAboutInterval
+ ? Level.WARNING : Level.FINE, "Sanitizing and storing "
+ + "server descriptor with publication time outside our "
+ + "descriptor sanitizing interval.");
+ this.haveWarnedAboutInterval = true;
+ }
+ scrubbed.append(line + "\n");
+
+ /* Parse the fingerprint to determine the hashed bridge
+ * identity. */
+ } else if (line.startsWith("opt fingerprint ") ||
+ line.startsWith("fingerprint ")) {
+ String fingerprint = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ byte[] fingerprintBytes = Hex.decodeHex(
+ fingerprint.toCharArray());
+ hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
+ toLowerCase();
+ try {
+ scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
+ published);
+ if (orAddresses != null) {
+ scrubbedOrAddresses = new ArrayList<String>();
+ for (String orAddress : orAddresses) {
+ String scrubbedOrAddress = scrubOrAddress(orAddress,
+ fingerprintBytes, published);
+ if (scrubbedOrAddress != null) {
+ scrubbedOrAddresses.add(scrubbedOrAddress);
+ } else {
+ this.logger.warning("Invalid address in line "
+ + "'or-address " + orAddress + "' in bridge server "
+ + "descriptor. Skipping line!");
+ }
+ }
+ }
+ } catch (IOException e) {
+ /* There's a persistence problem, so we shouldn't scrub more
+ * IP addresses in this execution. */
+ this.persistenceProblemWithSecrets = true;
+ return;
+ }
+ scrubbed.append((line.startsWith("opt ") ? "opt " : "")
+ + "fingerprint");
+ for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
+ scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
+ 4 * (i + 1)).toUpperCase());
+ scrubbed.append("\n");
+
+ /* Replace the contact line (if present) with a generic one. */
+ } else if (line.startsWith("contact ")) {
+ scrubbed.append("contact somebody\n");
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ String[] routerLineParts = routerLine.split(" ");
+ scrubbedDesc = "router " + routerLineParts[1] + " "
+ + scrubbedAddress + " " + routerLineParts[3] + " "
+ + routerLineParts[4] + " " + routerLineParts[5] + "\n";
+ if (scrubbedOrAddresses != null) {
+ for (String scrubbedOrAddress : scrubbedOrAddresses) {
+ scrubbedDesc = scrubbedDesc += "or-address "
+ + scrubbedOrAddress + "\n";
+ }
+ }
+ scrubbedDesc += scrubbed.toString();
+ break;
+
+ /* Replace extra-info digest with the hashed digest of the
+ * non-scrubbed descriptor. */
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ String extraInfoDescriptorIdentifier = line.substring(
+ line.indexOf("extra-info-digest ")
+ + "extra-info-digest ".length());
+ String hashedExtraInfoDescriptorIdentifier =
+ DigestUtils.shaHex(Hex.decodeHex(
+ extraInfoDescriptorIdentifier.toCharArray())).toUpperCase();
+ scrubbed.append((line.startsWith("opt ") ? "opt " : "")
+ + "extra-info-digest " + hashedExtraInfoDescriptorIdentifier
+ + "\n");
+
+ /* Possibly sanitize reject lines if they contain the bridge's own
+ * IP address. */
+ } else if (line.startsWith("reject ")) {
+ if (address != null && line.startsWith("reject " + address)) {
+ scrubbed.append("reject " + scrubbedAddress
+ + line.substring("reject ".length() + address.length())
+ + "\n");
+ } else {
+ scrubbed.append(line + "\n");
+ }
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("accept ")
+ || line.startsWith("platform ")
+ || line.startsWith("opt protocols ")
+ || line.startsWith("protocols ")
+ || line.startsWith("uptime ")
+ || line.startsWith("bandwidth ")
+ || line.startsWith("opt hibernating ")
+ || line.startsWith("hibernating ")
+ || line.equals("opt hidden-service-dir")
+ || line.equals("hidden-service-dir")
+ || line.equals("opt caches-extra-info")
+ || line.equals("caches-extra-info")
+ || line.equals("opt allow-single-hop-exits")
+ || line.equals("allow-single-hop-exits")) {
+ scrubbed.append(line + "\n");
+
+ /* Replace node fingerprints in the family line with their hashes
+ * and leave nicknames unchanged. */
+ } else if (line.startsWith("family ")) {
+ StringBuilder familyLine = new StringBuilder("family");
+ for (String s : line.substring(7).split(" ")) {
+ if (s.startsWith("$")) {
+ familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
+ s.substring(1).toCharArray())).toUpperCase());
+ } else {
+ familyLine.append(" " + s);
+ }
+ }
+ scrubbed.append(familyLine.toString() + "\n");
+
+ /* Skip the purpose line that the bridge authority adds to its
+ * cached-descriptors file. */
+ } else if (line.startsWith("@purpose ")) {
+ continue;
+
+ /* Skip all crypto parts that might leak the bridge's identity
+ * fingerprint. */
+ } else if (line.startsWith("-----BEGIN ")
+ || line.equals("onion-key") || line.equals("signing-key")) {
+ skipCrypto = true;
+
+ /* Stop skipping lines when the crypto parts are over. */
+ } else if (line.startsWith("-----END ")) {
+ skipCrypto = false;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
+ return;
+ }
+ }
+ br.close();
+ } catch (Exception e) {
+ this.logger.log(Level.WARNING, "Could not parse server "
+ + "descriptor.", e);
+ return;
+ }
+
+ /* Determine filename of sanitized server descriptor. */
+ String descriptorDigest = null;
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigest == null) {
+ this.logger.log(Level.WARNING, "Could not calculate server "
+ + "descriptor digest.");
+ return;
+ }
+ String dyear = published.substring(0, 4);
+ String dmonth = published.substring(5, 7);
+ File newFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ + dyear + "/" + dmonth + "/server-descriptors/"
+ + "/" + descriptorDigest.charAt(0) + "/"
+ + descriptorDigest.charAt(1) + "/"
+ + descriptorDigest);
+
+ /* Write sanitized server descriptor to disk, including all its parent
+ * directories. */
+ try {
+ newFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
+ bw.write("@type bridge-server-descriptor 1.0\n");
+ bw.write(scrubbedDesc);
+ bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized server "
+ + "descriptor to disk.", e);
+ return;
+ }
+ }
+
+ /**
+ * Sanitizes an extra-info descriptor and writes it to disk.
+ */
+ public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
+
+ /* Parse descriptor to generate a sanitized version. */
+ String scrubbedDesc = null, published = null;
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line = null;
+ StringBuilder scrubbed = null;
+ String hashedBridgeIdentity = null;
+ while ((line = br.readLine()) != null) {
+
+ /* Parse bridge identity from extra-info line and replace it with
+ * its hash in the sanitized descriptor. */
+ String[] parts = line.split(" ");
+ if (line.startsWith("extra-info ")) {
+ hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
+ parts[2].toCharArray())).toLowerCase();
+ scrubbed = new StringBuilder("extra-info " + parts[1] + " "
+ + hashedBridgeIdentity.toUpperCase() + "\n");
+
+ /* Parse the publication time to determine the file name. */
+ } else if (line.startsWith("published ")) {
+ scrubbed.append(line + "\n");
+ published = line.substring("published ".length());
+
+ /* Remove everything from transport lines except the transport
+ * name. */
+ } else if (line.startsWith("transport ")) {
+ if (parts.length < 3) {
+ this.logger.fine("Illegal line in extra-info descriptor: '"
+ + line + "'. Skipping descriptor.");
+ return;
+ }
+ scrubbed.append("transport " + parts[1] + "\n");
+
+ /* Skip transport-info lines entirely. */
+ } else if (line.startsWith("transport-info ")) {
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("write-history ")
+ || line.startsWith("read-history ")
+ || line.startsWith("geoip-start-time ")
+ || line.startsWith("geoip-client-origins ")
+ || line.startsWith("geoip-db-digest ")
+ || line.startsWith("conn-bi-direct ")
+ || line.startsWith("bridge-")
+ || line.startsWith("dirreq-")
+ || line.startsWith("cell-")
+ || line.startsWith("entry-")
+ || line.startsWith("exit-")) {
+ scrubbed.append(line + "\n");
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ scrubbedDesc = scrubbed.toString();
+ break;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
+ return;
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse extra-info "
+ + "descriptor.", e);
+ return;
+ } catch (DecoderException e) {
+ this.logger.log(Level.WARNING, "Could not parse extra-info "
+ + "descriptor.", e);
+ return;
+ }
+
+ /* Determine filename of sanitized extra-info descriptor. */
+ String descriptorDigest = null;
+ try {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.shaHex(DigestUtils.sha(forDigest));
+ }
+ } catch (UnsupportedEncodingException e) {
+ /* Handle below. */
+ }
+ if (descriptorDigest == null) {
+ this.logger.log(Level.WARNING, "Could not calculate extra-info "
+ + "descriptor digest.");
+ return;
+ }
+ String dyear = published.substring(0, 4);
+ String dmonth = published.substring(5, 7);
+ File newFile = new File(
+ this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ + dyear + "/" + dmonth + "/extra-infos/"
+ + descriptorDigest.charAt(0) + "/"
+ + descriptorDigest.charAt(1) + "/"
+ + descriptorDigest);
+
+ /* Write sanitized extra-info descriptor to disk, including all its
+ * parent directories. */
+ try {
+ newFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
+ bw.write("@type bridge-extra-info 1.1\n");
+ bw.write(scrubbedDesc);
+ bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+ bw.close();
+ } catch (Exception e) {
+ this.logger.log(Level.WARNING, "Could not write sanitized "
+ + "extra-info descriptor to disk.", e);
+ }
+ }
+
+ /**
+ * Rewrite all network statuses that might contain references to server
+ * descriptors we added or updated in this execution. This applies to
+ * all statuses that have been published up to 24 hours after any added
+ * or updated server descriptor.
+ */
+ public void finishWriting() {
+
+ /* Delete secrets that we don't need anymore. */
+ if (!this.secretsForHashingIPAddresses.isEmpty() &&
+ this.secretsForHashingIPAddresses.firstKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ try {
+ int kept = 0, deleted = 0;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.bridgeIpSecretsFile));
+ for (Map.Entry<String, byte[]> e :
+ this.secretsForHashingIPAddresses.entrySet()) {
+ if (e.getKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ deleted++;
+ } else {
+ bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
+ + "\n");
+ kept++;
+ }
+ }
+ bw.close();
+ this.logger.info("Deleted " + deleted + " secrets that we don't "
+ + "need anymore and kept " + kept + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store reduced set of "
+ + "secrets to disk! This is a bad sign, better check what's "
+ + "going on!", e);
+ }
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
new file mode 100644
index 0000000..470f6ab
--- /dev/null
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -0,0 +1,174 @@
+/* Copyright 2011--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.bridgepools;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+public class BridgePoolAssignmentsProcessor {
+
+ public BridgePoolAssignmentsProcessor(File assignmentsDirectory,
+ File sanitizedAssignmentsDirectory) {
+
+ Logger logger =
+ Logger.getLogger(BridgePoolAssignmentsProcessor.class.getName());
+ if (assignmentsDirectory == null ||
+ sanitizedAssignmentsDirectory == null) {
+ IllegalArgumentException e = new IllegalArgumentException("Neither "
+ + "assignmentsDirectory nor sanitizedAssignmentsDirectory may "
+ + "be null!");
+ throw e;
+ }
+
+ List<File> assignmentFiles = new ArrayList<File>();
+ Stack<File> files = new Stack<File>();
+ files.add(assignmentsDirectory);
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else if (!file.getName().endsWith(".gz")) {
+ assignmentFiles.add(file);
+ }
+ }
+
+ SimpleDateFormat assignmentFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ assignmentFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat filenameFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ for (File assignmentFile : assignmentFiles) {
+ logger.info("Processing bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'...");
+ try {
+ BufferedReader br = null;
+ if (assignmentFile.getName().endsWith(".gz")) {
+ br = new BufferedReader(new InputStreamReader(
+ new GzipCompressorInputStream(new FileInputStream(
+ assignmentFile))));
+ } else {
+ br = new BufferedReader(new FileReader(assignmentFile));
+ }
+ String line, bridgePoolAssignmentLine = null;
+ SortedSet<String> sanitizedAssignments = new TreeSet<String>();
+ boolean wroteLastLine = false, skipBefore20120504125947 = true;
+ while ((line = br.readLine()) != null || !wroteLastLine) {
+ if (line != null && line.startsWith("bridge-pool-assignment ")) {
+ String[] parts = line.split(" ");
+ if (parts.length != 3) {
+ continue;
+ }
+ /* TODO Take out this temporary hack to ignore all assignments
+ * coming from ponticum when byblos was still the official
+ * BridgeDB host. */
+ if (line.compareTo(
+ "bridge-pool-assignment 2012-05-04 12:59:47") >= 0) {
+ skipBefore20120504125947 = false;
+ }
+ }
+ if (skipBefore20120504125947) {
+ if (line == null) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ if (line == null ||
+ line.startsWith("bridge-pool-assignment ")) {
+ if (bridgePoolAssignmentLine != null) {
+ try {
+ long bridgePoolAssignmentTime = assignmentFormat.parse(
+ bridgePoolAssignmentLine.substring(
+ "bridge-pool-assignment ".length())).getTime();
+ File sanitizedAssignmentsFile = new File(
+ sanitizedAssignmentsDirectory, filenameFormat.format(
+ bridgePoolAssignmentTime));
+ if (!sanitizedAssignmentsFile.exists()) {
+ sanitizedAssignmentsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ sanitizedAssignmentsFile));
+ bw.write("@type bridge-pool-assignment 1.0\n");
+ bw.write(bridgePoolAssignmentLine + "\n");
+ for (String assignmentLine : sanitizedAssignments) {
+ bw.write(assignmentLine + "\n");
+ }
+ bw.close();
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Could not write sanitized "
+ + "bridge pool assignment file for line '"
+ + bridgePoolAssignmentLine + "' to disk. Skipping "
+ + "bridge pool assignment file '"
+ + assignmentFile.getAbsolutePath() + "'.", e);
+ break;
+ }
+ sanitizedAssignments.clear();
+ }
+ if (line == null) {
+ wroteLastLine = true;
+ } else {
+ bridgePoolAssignmentLine = line;
+ }
+ } else {
+ String[] parts = line.split(" ");
+ if (parts.length < 2 || parts[0].length() < 40) {
+ logger.warning("Unrecognized line '" + line
+ + "'. Aborting.");
+ break;
+ }
+ String hashedFingerprint = null;
+ try {
+ hashedFingerprint = DigestUtils.shaHex(Hex.decodeHex(
+ line.split(" ")[0].toCharArray())).toLowerCase();
+ } catch (DecoderException e) {
+ logger.warning("Unable to decode hex fingerprint in line '"
+ + line + "'. Aborting.");
+ break;
+ }
+ String assignmentDetails = line.substring(40);
+ sanitizedAssignments.add(hashedFingerprint
+ + assignmentDetails);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read bridge pool assignment "
+ + "file '" + assignmentFile.getAbsolutePath()
+ + "'. Skipping.", e);
+ }
+ }
+
+ logger.info("Finished processing bridge pool assignment file(s).");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
new file mode 100644
index 0000000..64f6a3b
--- /dev/null
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -0,0 +1,100 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.exitlists;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class ExitListDownloader {
+ public ExitListDownloader() {
+ Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+ try {
+ logger.fine("Downloading exit list...");
+ String exitAddressesUrl =
+ "http://exitlist.torproject.org/exit-addresses";
+ URL u = new URL(exitAddressesUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response != 200) {
+ logger.warning("Could not download exit list. Response code " +
+ response);
+ return;
+ }
+ BufferedInputStream in = new BufferedInputStream(
+ huc.getInputStream());
+ SimpleDateFormat printFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Date downloadedDate = new Date();
+ File exitListFile = new File("exitlist/" + printFormat.format(
+ downloadedDate));
+ exitListFile.getParentFile().mkdirs();
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ exitListFile));
+ bw.write("@type tordnsel 1.0\n");
+ bw.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+ + "\n");
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ bw.write(new String(data, 0, len));
+ }
+ in.close();
+ bw.close();
+ logger.fine("Finished downloading exit list.");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed downloading exit list", e);
+ return;
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished downloading "
+ + "exit list.\nLast three exit lists are:");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(new File("exitlist"));
+ SortedSet<File> lastThreeExitLists = new TreeSet<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ SortedSet<File> lastThreeElements = new TreeSet<File>();
+ for (File f : pop.listFiles()) {
+ lastThreeElements.add(f);
+ }
+ while (lastThreeElements.size() > 3) {
+ lastThreeElements.remove(lastThreeElements.first());
+ }
+ for (File f : lastThreeElements) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ lastThreeExitLists.add(pop);
+ while (lastThreeExitLists.size() > 3) {
+ lastThreeExitLists.remove(lastThreeExitLists.first());
+ }
+ }
+ }
+ for (File f : lastThreeExitLists) {
+ dumpStats.append("\n" + f.getName());
+ }
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java
new file mode 100644
index 0000000..adf22cc
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/Configuration.java
@@ -0,0 +1,359 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * Initialize configuration with hard-coded defaults, overwrite with
+ * configuration in config file, if exists, and answer Main.java about our
+ * configuration.
+ */
+public class Configuration {
+ private boolean writeDirectoryArchives = false;
+ private String directoryArchivesOutputDirectory = "directory-archive/";
+ private boolean importCachedRelayDescriptors = false;
+ private List<String> cachedRelayDescriptorsDirectory =
+ new ArrayList<String>(Arrays.asList("cacheddesc/".split(",")));
+ private boolean importDirectoryArchives = false;
+ private String directoryArchivesDirectory = "archives/";
+ private boolean keepDirectoryArchiveImportHistory = false;
+ private boolean writeSanitizedBridges = false;
+ private boolean replaceIPAddressesWithHashes = false;
+ private long limitBridgeDescriptorMappings = -1L;
+ private String sanitizedBridgesWriteDirectory = "sanitized-bridges/";
+ private boolean importBridgeSnapshots = false;
+ private String bridgeSnapshotsDirectory = "bridge-directories/";
+ private boolean downloadRelayDescriptors = false;
+ private List<String> downloadFromDirectoryAuthorities = Arrays.asList((
+ "86.59.21.38,76.73.17.194:9030,213.115.239.118:443,"
+ + "193.23.244.244,208.83.223.34:443,128.31.0.34:9131,"
+ + "194.109.206.212,212.112.245.170").split(","));
+ private boolean downloadCurrentConsensus = true;
+ private boolean downloadCurrentVotes = true;
+ private boolean downloadMissingServerDescriptors = true;
+ private boolean downloadMissingExtraInfoDescriptors = true;
+ private boolean downloadAllServerDescriptors = false;
+ private boolean downloadAllExtraInfoDescriptors = false;
+ private boolean compressRelayDescriptorDownloads;
+ private boolean downloadExitList = false;
+ private boolean processBridgePoolAssignments = false;
+ private String assignmentsDirectory = "assignments/";
+ private String sanitizedAssignmentsDirectory = "sanitized-assignments/";
+ private boolean processTorperfFiles = false;
+ private String torperfOutputDirectory = "torperf/";
+ private SortedMap<String, String> torperfSources = null;
+ private List<String> torperfFiles = null;
+ private boolean provideFilesViaRsync = false;
+ private String rsyncDirectory = "rsync";
+ public Configuration() {
+
+ /* Initialize logger. */
+ Logger logger = Logger.getLogger(Configuration.class.getName());
+
+ /* Read config file, if present. */
+ File configFile = new File("config");
+ if (!configFile.exists()) {
+ logger.warning("Could not find config file. In the default "
+ + "configuration, we are not configured to read data from any "
+ + "data source or write data to any data sink. You need to "
+ + "create a config file (" + configFile.getAbsolutePath()
+ + ") and provide at least one data source and one data sink. "
+ + "Refer to the manual for more information.");
+ return;
+ }
+ String line = null;
+ boolean containsCachedRelayDescriptorsDirectory = false;
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(configFile));
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("#") || line.length() < 1) {
+ continue;
+ } else if (line.startsWith("WriteDirectoryArchives")) {
+ this.writeDirectoryArchives = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DirectoryArchivesOutputDirectory")) {
+ this.directoryArchivesOutputDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ImportCachedRelayDescriptors")) {
+ this.importCachedRelayDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("CachedRelayDescriptorsDirectory")) {
+ if (!containsCachedRelayDescriptorsDirectory) {
+ this.cachedRelayDescriptorsDirectory.clear();
+ containsCachedRelayDescriptorsDirectory = true;
+ }
+ this.cachedRelayDescriptorsDirectory.add(line.split(" ")[1]);
+ } else if (line.startsWith("ImportDirectoryArchives")) {
+ this.importDirectoryArchives = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DirectoryArchivesDirectory")) {
+ this.directoryArchivesDirectory = line.split(" ")[1];
+ } else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
+ this.keepDirectoryArchiveImportHistory = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("WriteSanitizedBridges")) {
+ this.writeSanitizedBridges = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
+ this.replaceIPAddressesWithHashes = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("LimitBridgeDescriptorMappings")) {
+ this.limitBridgeDescriptorMappings = Long.parseLong(
+ line.split(" ")[1]);
+ } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
+ this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ImportBridgeSnapshots")) {
+ this.importBridgeSnapshots = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("BridgeSnapshotsDirectory")) {
+ this.bridgeSnapshotsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("DownloadRelayDescriptors")) {
+ this.downloadRelayDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadFromDirectoryAuthorities")) {
+ this.downloadFromDirectoryAuthorities = new ArrayList<String>();
+ for (String dir : line.split(" ")[1].split(",")) {
+ // test if IP:port pair has correct format
+ if (dir.length() < 1) {
+ logger.severe("Configuration file contains directory "
+ + "authority IP:port of length 0 in line '" + line
+ + "'! Exiting!");
+ System.exit(1);
+ }
+ new URL("http://" + dir + "/");
+ this.downloadFromDirectoryAuthorities.add(dir);
+ }
+ } else if (line.startsWith("DownloadCurrentConsensus")) {
+ this.downloadCurrentConsensus = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadCurrentVotes")) {
+ this.downloadCurrentVotes = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadMissingServerDescriptors")) {
+ this.downloadMissingServerDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith(
+ "DownloadMissingExtraInfoDescriptors")) {
+ this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadAllServerDescriptors")) {
+ this.downloadAllServerDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadAllExtraInfoDescriptors")) {
+ this.downloadAllExtraInfoDescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("CompressRelayDescriptorDownloads")) {
+ this.compressRelayDescriptorDownloads = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadExitList")) {
+ this.downloadExitList = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ProcessBridgePoolAssignments")) {
+ this.processBridgePoolAssignments = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("AssignmentsDirectory")) {
+ this.assignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("SanitizedAssignmentsDirectory")) {
+ this.sanitizedAssignmentsDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ProcessTorperfFiles")) {
+ this.processTorperfFiles = Integer.parseInt(line.split(" ")[1])
+ != 0;
+ } else if (line.startsWith("TorperfOutputDirectory")) {
+ } else if (line.startsWith("TorperfSource")) {
+ if (this.torperfSources == null) {
+ this.torperfSources = new TreeMap<String, String>();
+ }
+ String[] parts = line.split(" ");
+ String sourceName = parts[1];
+ String baseUrl = parts[2];
+ this.torperfSources.put(sourceName, baseUrl);
+ } else if (line.startsWith("TorperfFiles")) {
+ if (this.torperfFiles == null) {
+ this.torperfFiles = new ArrayList<String>();
+ }
+ String[] parts = line.split(" ");
+ if (parts.length != 5) {
+ logger.severe("Configuration file contains TorperfFiles "
+ + "option with wrong number of values in line '" + line
+ + "'! Exiting!");
+ System.exit(1);
+ }
+ this.torperfFiles.add(line);
+ } else if (line.startsWith("ProvideFilesViaRsync")) {
+ this.provideFilesViaRsync = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("RsyncDirectory")) {
+ this.rsyncDirectory = line.split(" ")[1];
+ } else {
+ logger.severe("Configuration file contains unrecognized "
+ + "configuration key in line '" + line + "'! Exiting!");
+ System.exit(1);
+ }
+ }
+ br.close();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ logger.severe("Configuration file contains configuration key "
+ + "without value in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (MalformedURLException e) {
+ logger.severe("Configuration file contains illegal URL or IP:port "
+ + "pair in line '" + line + "'. Exiting!");
+ System.exit(1);
+ } catch (NumberFormatException e) {
+ logger.severe("Configuration file contains illegal value in line '"
+ + line + "' with legal values being 0 or 1. Exiting!");
+ System.exit(1);
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Unknown problem while reading config "
+ + "file! Exiting!", e);
+ System.exit(1);
+ }
+
+ /** Make some checks if configuration is valid. */
+ if (!this.importCachedRelayDescriptors &&
+ !this.importDirectoryArchives && !this.downloadRelayDescriptors &&
+ !this.importBridgeSnapshots &&
+ !this.downloadExitList && !this.processBridgePoolAssignments &&
+ !this.writeDirectoryArchives && !this.writeSanitizedBridges &&
+ !this.processTorperfFiles) {
+ logger.warning("We have not been configured to read data from any "
+ + "data source or write data to any data sink. You need to "
+ + "edit your config file (" + configFile.getAbsolutePath()
+ + ") and provide at least one data source and one data sink. "
+ + "Refer to the manual for more information.");
+ }
+ if ((this.importCachedRelayDescriptors ||
+ this.importDirectoryArchives || this.downloadRelayDescriptors) &&
+ !this.writeDirectoryArchives) {
+ logger.warning("We are configured to import/download relay "
+ + "descriptors, but we don't have a single data sink to write "
+ + "relay descriptors to.");
+ }
+ if (!(this.importCachedRelayDescriptors ||
+ this.importDirectoryArchives || this.downloadRelayDescriptors) &&
+ this.writeDirectoryArchives) {
+ logger.warning("We are configured to write relay descriptor to at "
+ + "least one data sink, but we don't have a single data source "
+ + "containing relay descriptors.");
+ }
+ if (this.importBridgeSnapshots && !this.writeSanitizedBridges) {
+ logger.warning("We are configured to import/download bridge "
+ + "descriptors, but we don't have a single data sink to write "
+ + "bridge descriptors to.");
+ }
+ if (!this.importBridgeSnapshots && this.writeSanitizedBridges) {
+ logger.warning("We are configured to write bridge descriptor to at "
+ + "least one data sink, but we don't have a single data source "
+ + "containing bridge descriptors.");
+ }
+ }
+ public boolean getWriteDirectoryArchives() {
+ return this.writeDirectoryArchives;
+ }
+ public String getDirectoryArchivesOutputDirectory() {
+ return this.directoryArchivesOutputDirectory;
+ }
+ public boolean getImportCachedRelayDescriptors() {
+ return this.importCachedRelayDescriptors;
+ }
+ public List<String> getCachedRelayDescriptorDirectory() {
+ return this.cachedRelayDescriptorsDirectory;
+ }
+ public boolean getImportDirectoryArchives() {
+ return this.importDirectoryArchives;
+ }
+ public String getDirectoryArchivesDirectory() {
+ return this.directoryArchivesDirectory;
+ }
+ public boolean getKeepDirectoryArchiveImportHistory() {
+ return this.keepDirectoryArchiveImportHistory;
+ }
+ public boolean getWriteSanitizedBridges() {
+ return this.writeSanitizedBridges;
+ }
+ public boolean getReplaceIPAddressesWithHashes() {
+ return this.replaceIPAddressesWithHashes;
+ }
+ public long getLimitBridgeDescriptorMappings() {
+ return this.limitBridgeDescriptorMappings;
+ }
+ public String getSanitizedBridgesWriteDirectory() {
+ return this.sanitizedBridgesWriteDirectory;
+ }
+ public boolean getImportBridgeSnapshots() {
+ return this.importBridgeSnapshots;
+ }
+ public String getBridgeSnapshotsDirectory() {
+ return this.bridgeSnapshotsDirectory;
+ }
+ public boolean getDownloadRelayDescriptors() {
+ return this.downloadRelayDescriptors;
+ }
+ public List<String> getDownloadFromDirectoryAuthorities() {
+ return this.downloadFromDirectoryAuthorities;
+ }
+ public boolean getDownloadCurrentConsensus() {
+ return this.downloadCurrentConsensus;
+ }
+ public boolean getDownloadCurrentVotes() {
+ return this.downloadCurrentVotes;
+ }
+ public boolean getDownloadMissingServerDescriptors() {
+ return this.downloadMissingServerDescriptors;
+ }
+ public boolean getDownloadMissingExtraInfoDescriptors() {
+ return this.downloadMissingExtraInfoDescriptors;
+ }
+ public boolean getDownloadAllServerDescriptors() {
+ return this.downloadAllServerDescriptors;
+ }
+ public boolean getDownloadAllExtraInfoDescriptors() {
+ return this.downloadAllExtraInfoDescriptors;
+ }
+ public boolean getCompressRelayDescriptorDownloads() {
+ return this.compressRelayDescriptorDownloads;
+ }
+ public boolean getDownloadExitList() {
+ return this.downloadExitList;
+ }
+ public boolean getProcessBridgePoolAssignments() {
+ return processBridgePoolAssignments;
+ }
+ public String getAssignmentsDirectory() {
+ return assignmentsDirectory;
+ }
+ public String getSanitizedAssignmentsDirectory() {
+ return sanitizedAssignmentsDirectory;
+ }
+ public boolean getProcessTorperfFiles() {
+ return this.processTorperfFiles;
+ }
+ public String getTorperfOutputDirectory() {
+ return this.torperfOutputDirectory;
+ }
+ public SortedMap<String, String> getTorperfSources() {
+ return this.torperfSources;
+ }
+ public List<String> getTorperfFiles() {
+ return this.torperfFiles;
+ }
+ public boolean getProvideFilesViaRsync() {
+ return this.provideFilesViaRsync;
+ }
+ public String getRsyncDirectory() {
+ return this.rsyncDirectory;
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/main/LockFile.java b/src/org/torproject/ernie/db/main/LockFile.java
new file mode 100644
index 0000000..68375ec
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/LockFile.java
@@ -0,0 +1,52 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.logging.Logger;
+
+public class LockFile {
+
+ private File lockFile;
+ private Logger logger;
+
+ public LockFile() {
+ this.lockFile = new File("lock");
+ this.logger = Logger.getLogger(LockFile.class.getName());
+ }
+
+ public boolean acquireLock() {
+ this.logger.fine("Trying to acquire lock...");
+ try {
+ if (this.lockFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader("lock"));
+ long runStarted = Long.parseLong(br.readLine());
+ br.close();
+ if (System.currentTimeMillis() - runStarted < 55L * 60L * 1000L) {
+ return false;
+ }
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter("lock"));
+ bw.append("" + System.currentTimeMillis() + "\n");
+ bw.close();
+ this.logger.fine("Acquired lock.");
+ return true;
+ } catch (IOException e) {
+ this.logger.warning("Caught exception while trying to acquire "
+ + "lock!");
+ return false;
+ }
+ }
+
+ public void releaseLock() {
+ this.logger.fine("Releasing lock...");
+ this.lockFile.delete();
+ this.logger.fine("Released lock.");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/main/LoggingConfiguration.java b/src/org/torproject/ernie/db/main/LoggingConfiguration.java
new file mode 100644
index 0000000..b0ddeaa
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/LoggingConfiguration.java
@@ -0,0 +1,93 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+/**
+ * Initialize logging configuration.
+ *
+ * Log levels used by ERNIE:
+ *
+ * - SEVERE: An event made it impossible to continue program execution.
+ * - WARNING: A potential problem occurred that requires the operator to
+ * look after the otherwise unattended setup
+ * - INFO: Messages on INFO level are meant to help the operator in making
+ * sure that operation works as expected.
+ * - FINE: Debug messages that are used to identify problems and which are
+ * turned on by default.
+ * - FINER: More detailed debug messages to investigate problems in more
+ * detail. Not turned on by default. Increase log file limit when using
+ * FINER.
+ * - FINEST: Most detailed debug messages. Not used.
+ */
+public class LoggingConfiguration {
+ public LoggingConfiguration() {
+
+ /* Remove default console handler. */
+ for (Handler h : Logger.getLogger("").getHandlers()) {
+ Logger.getLogger("").removeHandler(h);
+ }
+
+ /* Disable logging of internal Sun classes. */
+ Logger.getLogger("sun").setLevel(Level.OFF);
+
+ /* Set minimum log level we care about from INFO to FINER. */
+ Logger.getLogger("").setLevel(Level.FINER);
+
+ /* Create log handler that writes messages on WARNING or higher to the
+ * console. */
+ final SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Formatter cf = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getMessage() + "\n";
+ }
+ };
+ Handler ch = new ConsoleHandler();
+ ch.setFormatter(cf);
+ ch.setLevel(Level.WARNING);
+ Logger.getLogger("").addHandler(ch);
+
+ /* Initialize own logger for this class. */
+ Logger logger = Logger.getLogger(
+ LoggingConfiguration.class.getName());
+
+ /* Create log handler that writes all messages on FINE or higher to a
+ * local file. */
+ Formatter ff = new Formatter() {
+ public String format(LogRecord record) {
+ return dateTimeFormat.format(new Date(record.getMillis())) + " "
+ + record.getLevel() + " " + record.getSourceClassName() + " "
+ + record.getSourceMethodName() + " " + record.getMessage()
+ + (record.getThrown() != null ? " " + record.getThrown() : "")
+ + "\n";
+ }
+ };
+ try {
+ FileHandler fh = new FileHandler("log", 5000000, 5, true);
+ fh.setFormatter(ff);
+ fh.setLevel(Level.FINE);
+ Logger.getLogger("").addHandler(fh);
+ } catch (SecurityException e) {
+ logger.log(Level.WARNING, "No permission to create log file. "
+ + "Logging to file is disabled.", e);
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write to log file. Logging to "
+ + "file is disabled.", e);
+ }
+ }
+}
diff --git a/src/org/torproject/ernie/db/main/Main.java b/src/org/torproject/ernie/db/main/Main.java
new file mode 100644
index 0000000..e008eca
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/Main.java
@@ -0,0 +1,172 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.File;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.torproject.ernie.db.bridgedescs.BridgeDescriptorParser;
+import org.torproject.ernie.db.bridgedescs.BridgeSnapshotReader;
+import org.torproject.ernie.db.bridgedescs.SanitizedBridgesWriter;
+import org.torproject.ernie.db.bridgepools.BridgePoolAssignmentsProcessor;
+import org.torproject.ernie.db.exitlists.ExitListDownloader;
+import org.torproject.ernie.db.relaydescs.ArchiveReader;
+import org.torproject.ernie.db.relaydescs.ArchiveWriter;
+import org.torproject.ernie.db.relaydescs.CachedRelayDescriptorReader;
+import org.torproject.ernie.db.relaydescs.RelayDescriptorDownloader;
+import org.torproject.ernie.db.relaydescs.RelayDescriptorParser;
+import org.torproject.ernie.db.torperf.TorperfDownloader;
+
+/**
+ * Coordinate downloading and parsing of descriptors and extraction of
+ * statistically relevant data for later processing with R.
+ */
+public class Main {
+ public static void main(String[] args) {
+
+ /* Initialize logging configuration. */
+ new LoggingConfiguration();
+
+ Logger logger = Logger.getLogger(Main.class.getName());
+ logger.info("Starting ERNIE.");
+
+ // Initialize configuration
+ Configuration config = new Configuration();
+
+ // Use lock file to avoid overlapping runs
+ LockFile lf = new LockFile();
+ if (!lf.acquireLock()) {
+ logger.severe("Warning: ERNIE is already running or has not exited "
+ + "cleanly! Exiting!");
+ System.exit(1);
+ }
+
+ // Define stats directory for temporary files
+ File statsDirectory = new File("stats");
+
+ // Prepare writing relay descriptor archive to disk
+ ArchiveWriter aw = config.getWriteDirectoryArchives() ?
+ new ArchiveWriter(
+ new File(config.getDirectoryArchivesOutputDirectory())) : null;
+
+ // Prepare relay descriptor parser (only if we are writing stats or
+ // directory archives to disk)
+ RelayDescriptorParser rdp = aw != null ?
+ new RelayDescriptorParser(aw) : null;
+
+ // Import/download relay descriptors from the various sources
+ if (rdp != null) {
+ RelayDescriptorDownloader rdd = null;
+ if (config.getDownloadRelayDescriptors()) {
+ List<String> dirSources =
+ config.getDownloadFromDirectoryAuthorities();
+ rdd = new RelayDescriptorDownloader(rdp, dirSources,
+ config.getDownloadCurrentConsensus(),
+ config.getDownloadCurrentVotes(),
+ config.getDownloadMissingServerDescriptors(),
+ config.getDownloadMissingExtraInfoDescriptors(),
+ config.getDownloadAllServerDescriptors(),
+ config.getDownloadAllExtraInfoDescriptors(),
+ config.getCompressRelayDescriptorDownloads());
+ rdp.setRelayDescriptorDownloader(rdd);
+ }
+ if (config.getImportCachedRelayDescriptors()) {
+ new CachedRelayDescriptorReader(rdp,
+ config.getCachedRelayDescriptorDirectory(), statsDirectory);
+ if (aw != null) {
+ aw.intermediateStats("importing relay descriptors from local "
+ + "Tor data directories");
+ }
+ }
+ if (config.getImportDirectoryArchives()) {
+ new ArchiveReader(rdp,
+ new File(config.getDirectoryArchivesDirectory()),
+ statsDirectory,
+ config.getKeepDirectoryArchiveImportHistory());
+ if (aw != null) {
+ aw.intermediateStats("importing relay descriptors from local "
+ + "directory");
+ }
+ }
+ if (rdd != null) {
+ rdd.downloadDescriptors();
+ rdd.writeFile();
+ rdd = null;
+ if (aw != null) {
+ aw.intermediateStats("downloading relay descriptors from the "
+ + "directory authorities");
+ }
+ }
+ }
+
+ // Write output to disk that only depends on relay descriptors
+ if (aw != null) {
+ aw.dumpStats();
+ aw = null;
+ }
+
+ // Prepare sanitized bridge descriptor writer
+ SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ?
+ new SanitizedBridgesWriter(
+ new File(config.getSanitizedBridgesWriteDirectory()),
+ statsDirectory, config.getReplaceIPAddressesWithHashes(),
+ config.getLimitBridgeDescriptorMappings()) : null;
+
+ // Prepare bridge descriptor parser
+ BridgeDescriptorParser bdp = config.getWriteSanitizedBridges()
+ ? new BridgeDescriptorParser(sbw) : null;
+
+ // Import bridge descriptors
+ if (bdp != null && config.getImportBridgeSnapshots()) {
+ new BridgeSnapshotReader(bdp,
+ new File(config.getBridgeSnapshotsDirectory()),
+ statsDirectory);
+ }
+
+ // Finish writing sanitized bridge descriptors to disk
+ if (sbw != null) {
+ sbw.finishWriting();
+ sbw = null;
+ }
+
+ // Download exit list and store it to disk
+ if (config.getDownloadExitList()) {
+ new ExitListDownloader();
+ }
+
+ // Process bridge pool assignments
+ if (config.getProcessBridgePoolAssignments()) {
+ new BridgePoolAssignmentsProcessor(
+ new File(config.getAssignmentsDirectory()),
+ new File(config.getSanitizedAssignmentsDirectory()));
+ }
+
+ // Process Torperf files
+ if (config.getProcessTorperfFiles()) {
+ new TorperfDownloader(new File(config.getTorperfOutputDirectory()),
+ config.getTorperfSources(), config.getTorperfFiles());
+ }
+
+ // Copy recently published files to a local directory that can then
+ // be served via rsync.
+ if (config.getProvideFilesViaRsync()) {
+ new RsyncDataProvider(
+ !config.getWriteDirectoryArchives() ? null :
+ new File(config.getDirectoryArchivesOutputDirectory()),
+ !config.getWriteSanitizedBridges() ? null :
+ new File(config.getSanitizedBridgesWriteDirectory()),
+ !config.getProcessBridgePoolAssignments() ? null :
+ new File(config.getSanitizedAssignmentsDirectory()),
+ config.getDownloadExitList(),
+ !config.getProcessTorperfFiles() ? null :
+ new File(config.getTorperfOutputDirectory()),
+ new File(config.getRsyncDirectory()));
+ }
+
+ // Remove lock file
+ lf.releaseLock();
+
+ logger.info("Terminating ERNIE.");
+ }
+}
diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
new file mode 100644
index 0000000..cd4a6f9
--- /dev/null
+++ b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
@@ -0,0 +1,217 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.main;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Stack;
+import java.util.logging.Logger;
+
+/**
+ * Copy files published in the last 3 days to a local directory that can
+ * then be served via rsync.
+ */
+public class RsyncDataProvider {
+ public RsyncDataProvider(File directoryArchivesOutputDirectory,
+ File sanitizedBridgesWriteDirectory,
+ File sanitizedAssignmentsDirectory,
+ boolean downloadExitList,
+ File torperfOutputDirectory, File rsyncDirectory) {
+
+ /* Initialize logger. */
+ Logger logger = Logger.getLogger(RsyncDataProvider.class.getName());
+
+ /* Determine the cut-off time for files in rsync/. */
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+
+ /* Create rsync/ directory if it doesn't exist. */
+ if (!rsyncDirectory.exists()) {
+ rsyncDirectory.mkdirs();
+ }
+
+ /* Make a list of all files in the rsync/ directory to delete those
+ * that we didn't copy in this run. */
+ Set<String> fileNamesInRsync = new HashSet<String>();
+ Stack<File> files = new Stack<File>();
+ files.add(rsyncDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else {
+ fileNamesInRsync.add(pop.getName());
+ }
+ }
+ logger.info("Found " + fileNamesInRsync.size() + " files in "
+ + rsyncDirectory.getAbsolutePath() + " that we're either "
+ + "overwriting or deleting in this execution.");
+
+ /* Copy relay descriptors from the last 3 days. */
+ if (directoryArchivesOutputDirectory != null) {
+ files.add(directoryArchivesOutputDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ if (pop.getAbsolutePath().contains("/consensus/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/consensuses/" + fileName));
+ } else if (pop.getAbsolutePath().contains("/vote/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/votes/" + fileName));
+ } else if (pop.getAbsolutePath().contains(
+ "/server-descriptor/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/server-descriptors/" + fileName));
+ } else if (pop.getAbsolutePath().contains("/extra-info/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "relay-descriptors/extra-infos/" + fileName));
+ } else {
+ continue;
+ }
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying relay descriptors, there are still "
+ + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy sanitized bridge descriptors from the last 3 days. */
+ if (sanitizedBridgesWriteDirectory != null) {
+ files.add(sanitizedBridgesWriteDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ if (pop.getAbsolutePath().contains("/statuses/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-descriptors/statuses/" + fileName));
+ } else if (pop.getAbsolutePath().contains(
+ "/server-descriptors/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-descriptors/server-descriptors/" + fileName));
+ } else if (pop.getAbsolutePath().contains("/extra-infos/")) {
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-descriptors/extra-infos/" + fileName));
+ } else {
+ continue;
+ }
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying sanitized bridge descriptors, there are "
+ + "still " + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy sanitized bridge pool assignments from the last 3 days. */
+ if (sanitizedAssignmentsDirectory != null) {
+ files.add(sanitizedAssignmentsDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(rsyncDirectory,
+ "bridge-pool-assignments/" + fileName));
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying sanitized bridge pool assignments, there "
+ + "are still " + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy exit lists from the last 3 days. */
+ if (downloadExitList) {
+ files.add(new File("exitlist"));
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(rsyncDirectory,
+ "exit-lists/" + fileName));
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying exit lists, there are still "
+ + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Copy Torperf files. */
+ if (torperfOutputDirectory != null) {
+ files.add(torperfOutputDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.getName().endsWith(".tpf") &&
+ pop.lastModified() >= cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(rsyncDirectory,
+ "torperf/" + fileName));
+ fileNamesInRsync.remove(pop.getName());
+ }
+ }
+ }
+ logger.info("After copying Torperf files, there are still "
+ + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+
+ /* Delete all files that we didn't (over-)write in this run. */
+ files.add(rsyncDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (fileNamesInRsync.contains(pop.getName())) {
+ fileNamesInRsync.remove(pop.getName());
+ pop.delete();
+ }
+ }
+ logger.info("After deleting files that we didn't overwrite in this "
+ + "run, there are " + fileNamesInRsync.size() + " files left in "
+ + rsyncDirectory.getAbsolutePath() + ".");
+ }
+
+ private void copyFile(File from, File to) {
+ if (from.exists() && to.exists() &&
+ from.lastModified() == to.lastModified() &&
+ from.length() == to.length()) {
+ return;
+ }
+ try {
+ to.getParentFile().mkdirs();
+ FileInputStream fis = new FileInputStream(from);
+ BufferedInputStream bis = new BufferedInputStream(fis);
+ FileOutputStream fos = new FileOutputStream(to);
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ fos.write(data, 0, len);
+ }
+ bis.close();
+ fos.close();
+ to.setLastModified(from.lastModified());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
new file mode 100644
index 0000000..fba0a9f
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
@@ -0,0 +1,146 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+
+/**
+ * Read in all files in a given directory and pass buffered readers of
+ * them to the relay descriptor parser.
+ */
+public class ArchiveReader {
+ public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory,
+ File statsDirectory, boolean keepImportHistory) {
+
+ if (rdp == null || archivesDirectory == null ||
+ statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ int parsedFiles = 0, ignoredFiles = 0;
+ Logger logger = Logger.getLogger(ArchiveReader.class.getName());
+ SortedSet<String> archivesImportHistory = new TreeSet<String>();
+ File archivesImportHistoryFile = new File(statsDirectory,
+ "archives-import-history");
+ if (keepImportHistory && archivesImportHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ archivesImportHistoryFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ archivesImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read in archives import "
+ + "history file. Skipping.");
+ }
+ }
+ if (archivesDirectory.exists()) {
+ logger.fine("Importing files in directory " + archivesDirectory
+ + "/...");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(archivesDirectory);
+ List<File> problems = new ArrayList<File>();
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ if (rdp != null) {
+ try {
+ BufferedInputStream bis = null;
+ if (keepImportHistory &&
+ archivesImportHistory.contains(pop.getName())) {
+ ignoredFiles++;
+ continue;
+ } else if (pop.getName().endsWith(".tar.bz2")) {
+ logger.warning("Cannot parse compressed tarball "
+ + pop.getAbsolutePath() + ". Skipping.");
+ continue;
+ } else if (pop.getName().endsWith(".bz2")) {
+ FileInputStream fis = new FileInputStream(pop);
+ BZip2CompressorInputStream bcis =
+ new BZip2CompressorInputStream(fis);
+ bis = new BufferedInputStream(bcis);
+ } else {
+ FileInputStream fis = new FileInputStream(pop);
+ bis = new BufferedInputStream(fis);
+ }
+ if (keepImportHistory) {
+ archivesImportHistory.add(pop.getName());
+ }
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ rdp.parse(allData);
+ parsedFiles++;
+ } catch (IOException e) {
+ problems.add(pop);
+ if (problems.size() > 3) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (problems.isEmpty()) {
+ logger.fine("Finished importing files in directory "
+ + archivesDirectory + "/.");
+ } else {
+ StringBuilder sb = new StringBuilder("Failed importing files in "
+ + "directory " + archivesDirectory + "/:");
+ int printed = 0;
+ for (File f : problems) {
+ sb.append("\n " + f.getAbsolutePath());
+ if (++printed >= 3) {
+ sb.append("\n ... more");
+ break;
+ }
+ }
+ }
+ }
+ if (keepImportHistory) {
+ try {
+ archivesImportHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ archivesImportHistoryFile));
+ for (String line : archivesImportHistory) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write archives import "
+ + "history file.");
+ }
+ }
+ logger.info("Finished importing relay descriptors from local "
+ + "directory:\nParsed " + parsedFiles + ", ignored "
+ + ignoredFiles + " files.");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
new file mode 100644
index 0000000..c632656
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -0,0 +1,339 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.SortedSet;
+import java.util.Stack;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.impl.DescriptorParseException;
+
+public class ArchiveWriter {
+ private Logger logger;
+ private File outputDirectory;
+ private DescriptorParser descriptorParser;
+ private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0,
+ storedServerDescriptors = 0, storedExtraInfoDescriptors = 0;
+
+ public ArchiveWriter(File outputDirectory) {
+
+ if (outputDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ this.logger = Logger.getLogger(ArchiveWriter.class.getName());
+ this.outputDirectory = outputDirectory;
+ this.descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+ }
+
+ private boolean store(byte[] typeAnnotation, byte[] data,
+ String filename) {
+ try {
+ File file = new File(filename);
+ if (!file.exists()) {
+ this.logger.finer("Storing " + filename);
+ if (this.descriptorParser.parseDescriptors(data, filename).size()
+ != 1) {
+ this.logger.info("Relay descriptor file " + filename
+ + " doesn't contain exactly one descriptor. Not storing.");
+ return false;
+ }
+ file.getParentFile().mkdirs();
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(file));
+ if (data.length > 0 && data[0] != '@') {
+ bos.write(typeAnnotation, 0, typeAnnotation.length);
+ }
+ bos.write(data, 0, data.length);
+ bos.close();
+ return true;
+ }
+ } catch (DescriptorParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse relay descriptor "
+ + filename + " before storing it to disk. Skipping.", e);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not store relay descriptor "
+ + filename, e);
+ }
+ return false;
+ }
+
+ private static final byte[] CONSENSUS_ANNOTATION =
+ "@type network-status-consensus-3 1.0\n".getBytes();
+ public void storeConsensus(byte[] data, long validAfter) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/consensus/"
+ + printFormat.format(new Date(validAfter)) + "-consensus";
+ if (this.store(CONSENSUS_ANNOTATION, data, filename)) {
+ this.storedConsensuses++;
+ }
+ }
+
+ private static final byte[] VOTE_ANNOTATION =
+ "@type network-status-vote-3 1.0\n".getBytes();
+ public void storeVote(byte[] data, long validAfter,
+ String fingerprint, String digest) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/vote/"
+ + printFormat.format(new Date(validAfter)) + "-vote-"
+ + fingerprint + "-" + digest;
+ if (this.store(VOTE_ANNOTATION, data, filename)) {
+ this.storedVotes++;
+ }
+ }
+
+ private static final byte[] CERTIFICATE_ANNOTATION =
+ "@type dir-key-certificate-3 1.0\n".getBytes();
+ public void storeCertificate(byte[] data, String fingerprint,
+ long published) {
+ SimpleDateFormat printFormat = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/certs/"
+ + fingerprint + "-" + printFormat.format(new Date(published));
+ if (this.store(CERTIFICATE_ANNOTATION, data, filename)) {
+ this.storedCerts++;
+ }
+ }
+
+ private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
+ "@type server-descriptor 1.0\n".getBytes();
+ public void storeServerDescriptor(byte[] data, String digest,
+ long published) {
+ SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
+ printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/server-descriptor/"
+ + printFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
+ + digest;
+ if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) {
+ this.storedServerDescriptors++;
+ }
+ }
+
+ private static final byte[] EXTRA_INFO_ANNOTATION =
+ "@type extra-info 1.0\n".getBytes();
+ public void storeExtraInfoDescriptor(byte[] data,
+ String extraInfoDigest, long published) {
+ SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String filename = outputDirectory + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest;
+ if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) {
+ this.storedExtraInfoDescriptors++;
+ }
+ }
+
+ private StringBuilder intermediateStats = new StringBuilder();
+ public void intermediateStats(String event) {
+ intermediateStats.append("While " + event + ", we stored "
+ + this.storedConsensuses + " consensus(es), " + this.storedVotes
+ + " vote(s), " + this.storedCerts + " certificate(s), "
+ + this.storedServerDescriptors + " server descriptor(s), and "
+ + this.storedExtraInfoDescriptors
+ + " extra-info descriptor(s) to disk.\n");
+ this.storedConsensuses = 0;
+ this.storedVotes = 0;
+ this.storedCerts = 0;
+ this.storedServerDescriptors = 0;
+ this.storedExtraInfoDescriptors = 0;
+ }
+ /**
+ * Dump some statistics on the completeness of descriptors to the logs
+ * on level INFO.
+ */
+ public void dumpStats() {
+ StringBuilder sb = new StringBuilder("Finished writing relay "
+ + "descriptors to disk.\n");
+ sb.append(intermediateStats.toString());
+ sb.append("Statistics on the completeness of written relay "
+ + "descriptors of the last 3 consensuses (Consensus/Vote, "
+ + "valid-after, votes, server descriptors, extra-infos):");
+ try {
+ SimpleDateFormat validAfterFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ validAfterFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat consensusVoteFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat descriptorFormat =
+ new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ SortedSet<File> consensuses = new TreeSet<File>();
+ Stack<File> leftToParse = new Stack<File>();
+ leftToParse.add(new File(outputDirectory + "/consensus"));
+ while (!leftToParse.isEmpty()) {
+ File pop = leftToParse.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ leftToParse.add(f);
+ }
+ } else if (pop.length() > 0) {
+ consensuses.add(pop);
+ }
+ while (consensuses.size() > 3) {
+ consensuses.remove(consensuses.first());
+ }
+ }
+ for (File f : consensuses) {
+ BufferedReader br = new BufferedReader(new FileReader(f));
+ String line = null, validAfterTime = null,
+ voteFilenamePrefix = null, dirSource = null;
+ int allVotes = 0, foundVotes = 0,
+ allServerDescs = 0, foundServerDescs = 0,
+ allExtraInfos = 0, foundExtraInfos = 0;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("valid-after ")) {
+ validAfterTime = line.substring("valid-after ".length());
+ long validAfter = validAfterFormat.parse(
+ validAfterTime).getTime();
+ voteFilenamePrefix = outputDirectory + "/vote/"
+ + consensusVoteFormat.format(new Date(validAfter))
+ + "-vote-";
+ } else if (line.startsWith("dir-source ")) {
+ dirSource = line.split(" ")[2];
+ } else if (line.startsWith("vote-digest ")) {
+ allVotes++;
+ File voteFile = new File(voteFilenamePrefix + dirSource + "-"
+ + line.split(" ")[1]);
+ if (voteFile.exists()) {
+ foundVotes++;
+ BufferedReader vbr = new BufferedReader(new FileReader(
+ voteFile));
+ String line3 = null;
+ int voteAllServerDescs = 0, voteFoundServerDescs = 0,
+ voteAllExtraInfos = 0, voteFoundExtraInfos = 0;
+ while ((line3 = vbr.readLine()) != null) {
+ if (line3.startsWith("r ")) {
+ voteAllServerDescs++;
+ String digest = Hex.encodeHexString(Base64.decodeBase64(
+ line3.split(" ")[3] + "=")).toLowerCase();
+ long published = validAfterFormat.parse(
+ line3.split(" ")[4] + " "
+ + line3.split(" ")[5]).getTime();
+ String filename = outputDirectory
+ + "/server-descriptor/"
+ + descriptorFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/"
+ + digest.substring(1, 2) + "/" + digest;
+ if (new File(filename).exists()) {
+ BufferedReader sbr = new BufferedReader(new FileReader(
+ new File(filename)));
+ String line2 = null;
+ while ((line2 = sbr.readLine()) != null) {
+ if (line2.startsWith("opt extra-info-digest ") ||
+ line2.startsWith("extra-info-digest ")) {
+ voteAllExtraInfos++;
+ String extraInfoDigest = line2.startsWith("opt ") ?
+ line2.split(" ")[2].toLowerCase() :
+ line2.split(" ")[1].toLowerCase();
+ String filename2 =
+ outputDirectory.getAbsolutePath()
+ + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest;
+ if (new File(filename2).exists()) {
+ voteFoundExtraInfos++;
+ }
+ }
+ }
+ sbr.close();
+ voteFoundServerDescs++;
+ }
+ }
+ }
+ vbr.close();
+ sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), "
+ + "%d/%d (%.1f%%)", validAfterTime,
+ voteFoundServerDescs, voteAllServerDescs,
+ 100.0D * (double) voteFoundServerDescs /
+ (double) voteAllServerDescs,
+ voteFoundExtraInfos, voteAllExtraInfos,
+ 100.0D * (double) voteFoundExtraInfos /
+ (double) voteAllExtraInfos));
+ }
+ } else if (line.startsWith("r ")) {
+ allServerDescs++;
+ String digest = Hex.encodeHexString(Base64.decodeBase64(
+ line.split(" ")[3] + "=")).toLowerCase();
+ long published = validAfterFormat.parse(
+ line.split(" ")[4] + " " + line.split(" ")[5]).getTime();
+ String filename = outputDirectory.getAbsolutePath()
+ + "/server-descriptor/"
+ + descriptorFormat.format(new Date(published))
+ + digest.substring(0, 1) + "/"
+ + digest.substring(1, 2) + "/" + digest;
+ if (new File (filename).exists()) {
+ BufferedReader sbr = new BufferedReader(new FileReader(
+ new File(filename)));
+ String line2 = null;
+ while ((line2 = sbr.readLine()) != null) {
+ if (line2.startsWith("opt extra-info-digest ") ||
+ line2.startsWith("extra-info-digest ")) {
+ allExtraInfos++;
+ String extraInfoDigest = line2.startsWith("opt ") ?
+ line2.split(" ")[2].toLowerCase() :
+ line2.split(" ")[1].toLowerCase();
+ String filename2 = outputDirectory.getAbsolutePath()
+ + "/extra-info/"
+ + descriptorFormat.format(new Date(published))
+ + extraInfoDigest.substring(0, 1) + "/"
+ + extraInfoDigest.substring(1, 2) + "/"
+ + extraInfoDigest;
+ if (new File (filename2).exists()) {
+ foundExtraInfos++;
+ }
+ }
+ }
+ sbr.close();
+ foundServerDescs++;
+ }
+ }
+ }
+ br.close();
+ sb.append(String.format("%nC, %s, %d/%d (%.1f%%), "
+ + "%d/%d (%.1f%%), %d/%d (%.1f%%)",
+ validAfterTime, foundVotes, allVotes,
+ 100.0D * (double) foundVotes / (double) allVotes,
+ foundServerDescs, allServerDescs,
+ 100.0D * (double) foundServerDescs / (double) allServerDescs,
+ foundExtraInfos, allExtraInfos,
+ 100.0D * (double) foundExtraInfos / (double) allExtraInfos));
+ }
+ this.logger.info(sb.toString());
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
+ e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not dump statistics to disk.",
+ e);
+ }
+ }
+}
diff --git a/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java
new file mode 100644
index 0000000..194e0a2
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/CachedRelayDescriptorReader.java
@@ -0,0 +1,235 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Parses all descriptors in local directory cacheddesc/ and sorts them
+ * into directory structure in directory-archive/.
+ */
+public class CachedRelayDescriptorReader {
+ public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
+ List<String> inputDirectories, File statsDirectory) {
+
+ if (rdp == null || inputDirectories == null ||
+ inputDirectories.isEmpty() || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ StringBuilder dumpStats = new StringBuilder("Finished importing "
+ + "relay descriptors from local Tor data directories:");
+ Logger logger = Logger.getLogger(
+ CachedRelayDescriptorReader.class.getName());
+
+ /* Read import history containing SHA-1 digests of previously parsed
+ * statuses and descriptors, so that we can skip them in this run. */
+ Set<String> lastImportHistory = new HashSet<String>(),
+ currentImportHistory = new HashSet<String>();
+ File importHistoryFile = new File(statsDirectory,
+ "cacheddesc-import-history");
+ if (importHistoryFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ importHistoryFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ lastImportHistory.add(line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not read import history from "
+ + importHistoryFile.getAbsolutePath() + ".", e);
+ }
+ }
+
+ /* Read cached descriptors directories. */
+ for (String inputDirectory : inputDirectories) {
+ File cachedDescDir = new File(inputDirectory);
+ if (!cachedDescDir.exists()) {
+ logger.warning("Directory " + cachedDescDir.getAbsolutePath()
+ + " does not exist. Skipping.");
+ continue;
+ }
+ logger.fine("Reading " + cachedDescDir.getAbsolutePath()
+ + " directory.");
+ for (File f : cachedDescDir.listFiles()) {
+ try {
+ // descriptors may contain non-ASCII chars; read as bytes to
+ // determine digests
+ BufferedInputStream bis =
+ new BufferedInputStream(new FileInputStream(f));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ if (f.getName().equals("cached-consensus")) {
+ /* Check if directory information is stale. */
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("valid-after ")) {
+ dumpStats.append("\n" + f.getName() + ": " + line.substring(
+ "valid-after ".length()));
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (dateTimeFormat.parse(line.substring("valid-after ".
+ length())).getTime() < System.currentTimeMillis()
+ - 6L * 60L * 60L * 1000L) {
+ logger.warning("Cached descriptor files in "
+ + cachedDescDir.getAbsolutePath() + " are stale. "
+ + "The valid-after line in cached-consensus is '"
+ + line + "'.");
+ dumpStats.append(" (stale!)");
+ }
+ break;
+ }
+ }
+ br.close();
+
+ /* Parse the cached consensus if we haven't parsed it before
+ * (but regardless of whether it's stale or not). */
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ allData));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(allData);
+ } else {
+ dumpStats.append(" (skipped)");
+ }
+ currentImportHistory.add(digest);
+ }
+ } else if (f.getName().equals("v3-status-votes")) {
+ int parsedNum = 0, skippedNum = 0;
+ String ascii = new String(allData, "US-ASCII");
+ String startToken = "network-status-version ";
+ int end = ascii.length();
+ int start = ascii.indexOf(startToken);
+ while (start >= 0 && start < end) {
+ int next = ascii.indexOf(startToken, start + 1);
+ if (next < 0) {
+ next = end;
+ }
+ if (start < next) {
+ byte[] rawNetworkStatusBytes = new byte[next - start];
+ System.arraycopy(allData, start, rawNetworkStatusBytes, 0,
+ next - start);
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ rawNetworkStatusBytes));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(rawNetworkStatusBytes);
+ parsedNum++;
+ } else {
+ skippedNum++;
+ }
+ currentImportHistory.add(digest);
+ }
+ }
+ start = next;
+ }
+ dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
+ + ", skipped " + skippedNum + " votes");
+ } else if (f.getName().startsWith("cached-descriptors") ||
+ f.getName().startsWith("cached-extrainfo")) {
+ String ascii = new String(allData, "US-ASCII");
+ int start = -1, sig = -1, end = -1;
+ String startToken =
+ f.getName().startsWith("cached-descriptors") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ int parsedNum = 0, skippedNum = 0;
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ if (rdp != null) {
+ String digest = Hex.encodeHexString(DigestUtils.sha(
+ descBytes));
+ if (!lastImportHistory.contains(digest) &&
+ !currentImportHistory.contains(digest)) {
+ rdp.parse(descBytes);
+ parsedNum++;
+ } else {
+ skippedNum++;
+ }
+ currentImportHistory.add(digest);
+ }
+ }
+ dumpStats.append("\n" + f.getName() + ": parsed " + parsedNum
+ + ", skipped " + skippedNum + " "
+ + (f.getName().startsWith("cached-descriptors") ?
+ "server" : "extra-info") + " descriptors");
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed reading "
+ + cachedDescDir.getAbsolutePath() + " directory.", e);
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Failed reading "
+ + cachedDescDir.getAbsolutePath() + " directory.", e);
+ }
+ }
+ logger.fine("Finished reading "
+ + cachedDescDir.getAbsolutePath() + " directory.");
+ }
+
+ /* Write import history containing SHA-1 digests to disk. */
+ try {
+ importHistoryFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ importHistoryFile));
+ for (String digest : currentImportHistory) {
+ bw.write(digest + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write import history to "
+ + importHistoryFile.getAbsolutePath() + ".", e);
+ }
+
+ logger.info(dumpStats.toString());
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
new file mode 100644
index 0000000..0bea50a
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
@@ -0,0 +1,821 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.zip.InflaterInputStream;
+
+/**
+ * Downloads relay descriptors from the directory authorities via HTTP.
+ * Keeps a list of missing descriptors that gets updated by parse results
+ * from <code>RelayDescriptorParser</code> and downloads all missing
+ * descriptors that have been published in the last 24 hours. Also
+ * downloads all server and extra-info descriptors known to a directory
+ * authority at most once a day.
+ */
+public class RelayDescriptorDownloader {
+
+ /**
+ * Text file containing the descriptors that we are missing and that we
+ * want to download. Lines are formatted as:
+ *
+ * - "consensus,<validafter>,<parsed>",
+ * - "vote,<validafter>,<fingerprint>,<parsed>",
+ * - "server,<published>,<relayid>,<descid>,<parsed>", or
+ * - "extra,<published>,<relayid>,<descid>,<parsed>".
+ */
+ private File missingDescriptorsFile;
+
+ /**
+ * Relay descriptors that we are missing and that we want to download
+ * either in this execution or write to disk and try next time. Map keys
+ * contain comma-separated values as in the missing descriptors files
+ * without the "parsed" column. Map values contain the "parsed" column.
+ */
+ private SortedMap<String, String> missingDescriptors;
+
+ /**
+ * Text file containing the IP addresses (and Dir ports if not 80) of
+ * directory authorities and when we last downloaded all server and
+ * extra-info descriptors from them, so that we can avoid downloading
+ * them too often.
+ */
+ private File lastDownloadedAllDescriptorsFile;
+
+ /**
+ * Map of directory authorities and when we last downloaded all server
+ * and extra-info descriptors from them. Map keys are IP addresses (and
+ * Dir ports if not 80), map values are timestamps.
+ */
+ private Map<String, String> lastDownloadedAllDescriptors;
+
+ /**
+ * <code>RelayDescriptorParser</code> that we will hand over the
+ * downloaded descriptors for parsing.
+ */
+ private RelayDescriptorParser rdp;
+
+ /**
+ * Directory authorities that we will try to download missing
+ * descriptors from.
+ */
+ private List<String> authorities;
+
+ /**
+ * Should we try to download the current consensus if we don't have it?
+ */
+ private boolean downloadCurrentConsensus;
+
+ /**
+ * Should we try to download current votes if we don't have them?
+ */
+ private boolean downloadCurrentVotes;
+
+ /**
+ * Should we try to download missing server descriptors that have been
+ * published within the past 24 hours?
+ */
+ private boolean downloadMissingServerDescriptors;
+
+ /**
+ * Should we try to download missing extra-info descriptors that have
+ * been published within the past 24 hours?
+ */
+ private boolean downloadMissingExtraInfos;
+
+ /**
+ * Should we try to download all server descriptors from the authorities
+ * once every 24 hours?
+ */
+ private boolean downloadAllServerDescriptors;
+
+ /**
+ * Should we try to download all extra-info descriptors from the
+ * authorities once every 24 hours?
+ */
+ private boolean downloadAllExtraInfos;
+
+ /**
+ * Should we download zlib-compressed versions of descriptors by adding
+ * ".z" to URLs?
+ */
+ private boolean downloadCompressed;
+
+ /**
+ * valid-after time that we expect the current consensus and votes to
+ * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
+ * consensuses and votes with this valid-after time on the directory
+ * authorities. This time is initialized as the beginning of the current
+ * hour.
+ */
+ private String currentValidAfter;
+
+ /**
+ * Cut-off time for missing server and extra-info descriptors, formatted
+ * "yyyy-MM-dd HH:mm:ss". This time is initialized as the current system
+ * time minus 24 hours.
+ */
+ private String descriptorCutOff;
+
+ /**
+ * Cut-off time for downloading all server and extra-info descriptors
+ * from the directory authorities, formatted "yyyy-MM-dd HH:mm:ss". This
+ * time is initialized as the current system time minus 23:30 hours.
+ */
+ private String downloadAllDescriptorsCutOff;
+
+ /**
+ * Directory authorities that we plan to download all server and
+ * extra-info descriptors from in this execution.
+ */
+ private Set<String> downloadAllDescriptorsFromAuthorities;
+
+ /**
+ * Current timestamp that is written to the missing list for descriptors
+ * that we parsed in this execution and for authorities that we
+ * downloaded all server and extra-info descriptors from.
+ */
+ private String currentTimestamp;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ /**
+ * Number of descriptors requested by directory authority to be included
+ * in logs.
+ */
+ private Map<String, Integer> requestsByAuthority;
+
+ /**
+ * Counters for descriptors that we had on the missing list at the
+ * beginning of the execution, that we added to the missing list,
+ * that we requested, and that we successfully downloaded in this
+ * execution.
+ */
+ private int oldMissingConsensuses = 0, oldMissingVotes = 0,
+ oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
+ newMissingConsensuses = 0, newMissingVotes = 0,
+ newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
+ requestedConsensuses = 0, requestedVotes = 0,
+ requestedMissingServerDescriptors = 0,
+ requestedAllServerDescriptors = 0,
+ requestedMissingExtraInfoDescriptors = 0,
+ requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0,
+ downloadedVotes = 0, downloadedMissingServerDescriptors = 0,
+ downloadedAllServerDescriptors = 0,
+ downloadedMissingExtraInfoDescriptors = 0,
+ downloadedAllExtraInfoDescriptors = 0;
+
+ /**
+ * Initializes this class, including reading in missing descriptors from
+ * <code>stats/missing-relay-descriptors</code> and the times when we
+ * last downloaded all server and extra-info descriptors from
+ * <code>stats/last-downloaded-all-descriptors</code>.
+ */
+ public RelayDescriptorDownloader(RelayDescriptorParser rdp,
+ List<String> authorities, boolean downloadCurrentConsensus,
+ boolean downloadCurrentVotes,
+ boolean downloadMissingServerDescriptors,
+ boolean downloadMissingExtraInfos,
+ boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
+ boolean downloadCompressed) {
+
+ /* Memorize argument values. */
+ this.rdp = rdp;
+ this.authorities = new ArrayList<String>(authorities);
+ this.downloadCurrentConsensus = downloadCurrentConsensus;
+ this.downloadCurrentVotes = downloadCurrentVotes;
+ this.downloadMissingServerDescriptors =
+ downloadMissingServerDescriptors;
+ this.downloadMissingExtraInfos = downloadMissingExtraInfos;
+ this.downloadAllServerDescriptors = downloadAllServerDescriptors;
+ this.downloadAllExtraInfos = downloadAllExtraInfos;
+ this.downloadCompressed = downloadCompressed;
+
+ /* Shuffle list of authorities for better load balancing over time. */
+ Collections.shuffle(this.authorities);
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(
+ RelayDescriptorDownloader.class.getName());
+
+ /* Prepare cut-off times and timestamp for the missing descriptors
+ * list and the list of authorities to download all server and
+ * extra-info descriptors from. */
+ SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ format.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long now = System.currentTimeMillis();
+ this.currentValidAfter = format.format((now / (60L * 60L * 1000L)) *
+ (60L * 60L * 1000L));
+ this.descriptorCutOff = format.format(now - 24L * 60L * 60L * 1000L);
+ this.currentTimestamp = format.format(now);
+ this.downloadAllDescriptorsCutOff = format.format(now
+ - 23L * 60L * 60L * 1000L - 30L * 60L * 1000L);
+
+ /* Read list of missing descriptors from disk and memorize those that
+ * we are interested in and that are likely to be found on the
+ * directory authorities. */
+ this.missingDescriptors = new TreeMap<String, String>();
+ this.missingDescriptorsFile = new File(
+ "stats/missing-relay-descriptors");
+ if (this.missingDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.missingDescriptorsFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.missingDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length > 2) {
+ String published = line.split(",")[1];
+ if (((line.startsWith("consensus,") ||
+ line.startsWith("vote,")) &&
+ this.currentValidAfter.equals(published)) ||
+ ((line.startsWith("server,") ||
+ line.startsWith("extra,")) &&
+ this.descriptorCutOff.compareTo(published) < 0)) {
+ if (!line.endsWith("NA")) {
+ /* Not missing. */
+ } else if (line.startsWith("consensus,")) {
+ oldMissingConsensuses++;
+ } else if (line.startsWith("vote,")) {
+ oldMissingVotes++;
+ } else if (line.startsWith("server,")) {
+ oldMissingServerDescriptors++;
+ } else if (line.startsWith("extra,")) {
+ oldMissingExtraInfoDescriptors++;
+ }
+ int separateAt = line.lastIndexOf(",");
+ this.missingDescriptors.put(line.substring(0,
+ separateAt), line.substring(separateAt + 1));
+ }
+ } else {
+ this.logger.fine("Invalid line '" + line + "' in "
+ + this.missingDescriptorsFile.getAbsolutePath()
+ + ". Ignoring.");
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.missingDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.missingDescriptorsFile.getAbsolutePath()
+ + "! This means that we might forget to dowload relay "
+ + "descriptors we are missing.", e);
+ }
+ }
+
+ /* Read list of directory authorities and when we last downloaded all
+ * server and extra-info descriptors from them. */
+ this.lastDownloadedAllDescriptors = new HashMap<String, String>();
+ this.lastDownloadedAllDescriptorsFile = new File(
+ "stats/last-downloaded-all-descriptors");
+ if (this.lastDownloadedAllDescriptorsFile.exists()) {
+ try {
+ this.logger.fine("Reading file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.lastDownloadedAllDescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length != 2) {
+ this.logger.fine("Invalid line '" + line + "' in "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ". Ignoring.");
+ } else {
+ String[] parts = line.split(",");
+ String authority = parts[0];
+ String lastDownloaded = parts[1];
+ this.lastDownloadedAllDescriptors.put(authority,
+ lastDownloaded);
+ }
+ }
+ br.close();
+ this.logger.fine("Finished reading file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "! This means that we might download all server and "
+ + "extra-info descriptors more often than we should.", e);
+ }
+ }
+
+ /* Make a list of at most two directory authorities that we want to
+ * download all server and extra-info descriptors from. */
+ this.downloadAllDescriptorsFromAuthorities = new HashSet<String>();
+ for (String authority : this.authorities) {
+ if (!this.lastDownloadedAllDescriptors.containsKey(authority) ||
+ this.lastDownloadedAllDescriptors.get(authority).compareTo(
+ this.downloadAllDescriptorsCutOff) < 0) {
+ this.downloadAllDescriptorsFromAuthorities.add(authority);
+ }
+ if (this.downloadAllDescriptorsFromAuthorities.size() >= 2) {
+ break;
+ }
+ }
+
+ /* Prepare statistics on this execution. */
+ this.requestsByAuthority = new HashMap<String, Integer>();
+ for (String authority : this.authorities) {
+ this.requestsByAuthority.put(authority, 0);
+ }
+ }
+
+ /**
+ * We have parsed a consensus. Take this consensus off the missing list
+ * and add the votes created by the given <code>authorities</code> and
+ * the <code>serverDescriptors</code> which are in the format
+ * "<published>,<relayid>,<descid>" to that list.
+ */
+ public void haveParsedConsensus(String validAfter,
+ Set<String> authorities, Set<String> serverDescriptors) {
+
+ /* Mark consensus as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String consensusKey = "consensus," + validAfter;
+ this.missingDescriptors.put(consensusKey, this.currentTimestamp);
+
+ /* Add votes to missing list. */
+ for (String authority : authorities) {
+ String voteKey = "vote," + validAfter + "," + authority;
+ if (!this.missingDescriptors.containsKey(voteKey)) {
+ this.missingDescriptors.put(voteKey, "NA");
+ this.newMissingVotes++;
+ }
+ }
+ }
+
+ /* Add server descriptors to missing list. */
+ for (String serverDescriptor : serverDescriptors) {
+ String published = serverDescriptor.split(",")[0];
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a vote. Take this vote off the missing list and add
+ * the <code>serverDescriptors</code> which are in the format
+ * "<published>,<relayid>,<descid>" to that list.
+ */
+ public void haveParsedVote(String validAfter, String fingerprint,
+ Set<String> serverDescriptors) {
+
+ /* Mark vote as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String voteKey = "vote," + validAfter + "," + fingerprint;
+ this.missingDescriptors.put(voteKey, this.currentTimestamp);
+ }
+
+ /* Add server descriptors to missing list. */
+ for (String serverDescriptor : serverDescriptors) {
+ String published = serverDescriptor.split(",")[0];
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + serverDescriptor;
+ if (!this.missingDescriptors.containsKey(
+ serverDescriptorKey)) {
+ this.missingDescriptors.put(serverDescriptorKey, "NA");
+ this.newMissingServerDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed a server descriptor. Take this server descriptor off
+ * the missing list and put the extra-info descriptor digest on that
+ * list.
+ */
+ public void haveParsedServerDescriptor(String published,
+ String relayIdentity, String serverDescriptorDigest,
+ String extraInfoDigest) {
+
+ /* Mark server descriptor as parsed. */
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String serverDescriptorKey = "server," + published + ","
+ + relayIdentity + "," + serverDescriptorDigest;
+ this.missingDescriptors.put(serverDescriptorKey,
+ this.currentTimestamp);
+
+ /* Add extra-info descriptor to missing list. */
+ if (extraInfoDigest != null) {
+ String extraInfoKey = "extra," + published + ","
+ + relayIdentity + "," + extraInfoDigest;
+ if (!this.missingDescriptors.containsKey(extraInfoKey)) {
+ this.missingDescriptors.put(extraInfoKey, "NA");
+ this.newMissingExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
+ * We have parsed an extra-info descriptor. Take it off the missing
+ * list.
+ */
+ public void haveParsedExtraInfoDescriptor(String published,
+ String relayIdentity, String extraInfoDigest) {
+ if (this.descriptorCutOff.compareTo(published) < 0) {
+ String extraInfoKey = "extra," + published + ","
+ + relayIdentity + "," + extraInfoDigest;
+ this.missingDescriptors.put(extraInfoKey, this.currentTimestamp);
+ }
+ }
+
+ /**
+ * Downloads missing descriptors that we think might still be available
+ * on the directory authorities as well as all server and extra-info
+ * descriptors once per day.
+ */
+ public void downloadDescriptors() {
+
+ /* Put the current consensus on the missing list, unless we already
+ * have it. */
+ String consensusKey = "consensus," + this.currentValidAfter;
+ if (!this.missingDescriptors.containsKey(consensusKey)) {
+ this.missingDescriptors.put(consensusKey, "NA");
+ this.newMissingConsensuses++;
+ }
+
+ /* Download descriptors from authorities which are in random order, so
+ * that we distribute the load somewhat fairly over time. */
+ for (String authority : authorities) {
+
+ /* Make all requests to an authority in a single try block. If
+ * something goes wrong with this authority, we give up on all
+ * downloads and continue with the next authority. */
+ /* TODO Some authorities provide very little bandwidth and could
+ * slow down the entire download process. Ponder adding a timeout of
+ * 3 or 5 minutes per authority to avoid getting in the way of the
+ * next execution. */
+ try {
+
+ /* Start with downloading the current consensus, unless we already
+ * have it. */
+ if (downloadCurrentConsensus) {
+ if (this.missingDescriptors.containsKey(consensusKey) &&
+ this.missingDescriptors.get(consensusKey).equals("NA")) {
+ this.requestedConsensuses++;
+ this.downloadedConsensuses +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/consensus");
+ }
+ }
+
+ /* Next, try to download current votes that we're missing. */
+ if (downloadCurrentVotes) {
+ String voteKeyPrefix = "vote," + this.currentValidAfter;
+ SortedSet<String> fingerprints = new TreeSet<String>();
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ if (e.getValue().equals("NA") &&
+ e.getKey().startsWith(voteKeyPrefix)) {
+ String fingerprint = e.getKey().split(",")[2];
+ fingerprints.add(fingerprint);
+ }
+ }
+ for (String fingerprint : fingerprints) {
+ this.requestedVotes++;
+ this.downloadedVotes +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/" + fingerprint);
+ }
+ }
+
+ /* Download either all server and extra-info descriptors or only
+ * those that we're missing. Start with server descriptors, then
+ * request extra-info descriptors. */
+ List<String> types = new ArrayList<String>(Arrays.asList(
+ "server,extra".split(",")));
+ for (String type : types) {
+
+ /* Download all server or extra-info descriptors from this
+ * authority if we haven't done so for 24 hours and if we're
+ * configured to do so. */
+ if (this.downloadAllDescriptorsFromAuthorities.contains(
+ authority) && ((type.equals("server") &&
+ this.downloadAllServerDescriptors) ||
+ (type.equals("extra") && this.downloadAllExtraInfos))) {
+ int downloadedAllDescriptors =
+ this.downloadResourceFromAuthority(authority, "/tor/"
+ + type + "/all");
+ if (type.equals("server")) {
+ this.requestedAllServerDescriptors++;
+ this.downloadedAllServerDescriptors +=
+ downloadedAllDescriptors;
+ } else {
+ this.requestedAllExtraInfoDescriptors++;
+ this.downloadedAllExtraInfoDescriptors +=
+ downloadedAllDescriptors;
+ }
+
+ /* Download missing server or extra-info descriptors if we're
+ * configured to do so. */
+ } else if ((type.equals("server") &&
+ this.downloadMissingServerDescriptors) ||
+ (type.equals("extra") && this.downloadMissingExtraInfos)) {
+
+ /* Go through the list of missing descriptors of this type
+ * and combine the descriptor identifiers to a URL of up to
+ * 96 descriptors that we can download at once. */
+ SortedSet<String> descriptorIdentifiers =
+ new TreeSet<String>();
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ if (e.getValue().equals("NA") &&
+ e.getKey().startsWith(type + ",") &&
+ this.descriptorCutOff.compareTo(
+ e.getKey().split(",")[1]) < 0) {
+ String descriptorIdentifier = e.getKey().split(",")[3];
+ descriptorIdentifiers.add(descriptorIdentifier);
+ }
+ }
+ StringBuilder combinedResource = null;
+ int descriptorsInCombinedResource = 0,
+ requestedDescriptors = 0, downloadedDescriptors = 0;
+ for (String descriptorIdentifier : descriptorIdentifiers) {
+ if (descriptorsInCombinedResource >= 96) {
+ requestedDescriptors += descriptorsInCombinedResource;
+ downloadedDescriptors +=
+ this.downloadResourceFromAuthority(authority,
+ combinedResource.toString());
+ combinedResource = null;
+ descriptorsInCombinedResource = 0;
+ }
+ if (descriptorsInCombinedResource == 0) {
+ combinedResource = new StringBuilder("/tor/" + type
+ + "/d/" + descriptorIdentifier);
+ } else {
+ combinedResource.append("+" + descriptorIdentifier);
+ }
+ descriptorsInCombinedResource++;
+ }
+ if (descriptorsInCombinedResource > 0) {
+ requestedDescriptors += descriptorsInCombinedResource;
+ downloadedDescriptors +=
+ this.downloadResourceFromAuthority(authority,
+ combinedResource.toString());
+ }
+ if (type.equals("server")) {
+ this.requestedMissingServerDescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingServerDescriptors +=
+ downloadedDescriptors;
+ } else {
+ this.requestedMissingExtraInfoDescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingExtraInfoDescriptors +=
+ downloadedDescriptors;
+ }
+ }
+ }
+
+ /* If a download failed, stop requesting descriptors from this
+ * authority and move on to the next. */
+ } catch (IOException e) {
+ logger.log(Level.FINE, "Failed downloading from " + authority
+ + "!", e);
+ }
+ }
+ }
+
+ /**
+ * Attempts to download one or more descriptors identified by a resource
+ * string from a directory authority and passes the returned
+ * descriptor(s) to the <code>RelayDescriptorParser</code> upon success.
+ * Returns the number of descriptors contained in the reply. Throws an
+ * <code>IOException</code> if something goes wrong while downloading.
+ */
+ private int downloadResourceFromAuthority(String authority,
+ String resource) throws IOException {
+ byte[] allData = null;
+ this.requestsByAuthority.put(authority,
+ this.requestsByAuthority.get(authority) + 1);
+ /* TODO Disable compressed downloads for extra-info descriptors,
+ * because zlib decompression doesn't work correctly. Figure out why
+ * this is and fix it. */
+ String fullUrl = "http://" + authority + resource
+ + (this.downloadCompressed && !resource.startsWith("/tor/extra/")
+ ? ".z" : "");
+ URL u = new URL(fullUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response == 200) {
+ BufferedInputStream in = this.downloadCompressed &&
+ !resource.startsWith("/tor/extra/")
+ ? new BufferedInputStream(new InflaterInputStream(
+ huc.getInputStream()))
+ : new BufferedInputStream(huc.getInputStream());
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ in.close();
+ allData = baos.toByteArray();
+ }
+ logger.fine("Downloaded " + fullUrl + " -> " + response + " ("
+ + (allData == null ? 0 : allData.length) + " bytes)");
+ int receivedDescriptors = 0;
+ if (allData != null) {
+ if (resource.startsWith("/tor/status-vote/current/")) {
+ this.rdp.parse(allData);
+ receivedDescriptors = 1;
+ } else if (resource.startsWith("/tor/server/") ||
+ resource.startsWith("/tor/extra/")) {
+ if (resource.equals("/tor/server/all")) {
+ this.lastDownloadedAllDescriptors.put(authority,
+ this.currentTimestamp);
+ }
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, sig = -1, end = -1;
+ String startToken = resource.startsWith("/tor/server/") ?
+ "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ this.rdp.parse(descBytes);
+ receivedDescriptors++;
+ }
+ }
+ }
+ return receivedDescriptors;
+ }
+
+ /**
+ * Writes status files to disk and logs statistics about downloading
+ * relay descriptors in this execution.
+ */
+ public void writeFile() {
+
+ /* Write missing descriptors file to disk. */
+ int missingConsensuses = 0, missingVotes = 0,
+ missingServerDescriptors = 0, missingExtraInfoDescriptors = 0;
+ try {
+ this.logger.fine("Writing file "
+ + this.missingDescriptorsFile.getAbsolutePath() + "...");
+ this.missingDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.missingDescriptorsFile));
+ for (Map.Entry<String, String> e :
+ this.missingDescriptors.entrySet()) {
+ String key = e.getKey(), value = e.getValue();
+ if (!value.equals("NA")) {
+ /* Not missing. */
+ } else if (key.startsWith("consensus,")) {
+ missingConsensuses++;
+ } else if (key.startsWith("vote,")) {
+ missingVotes++;
+ } else if (key.startsWith("server,")) {
+ missingServerDescriptors++;
+ } else if (key.startsWith("extra,")) {
+ missingExtraInfoDescriptors++;
+ }
+ bw.write(key + "," + value + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.missingDescriptorsFile.getAbsolutePath() + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed writing "
+ + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Write text file containing the directory authorities and when we
+ * last downloaded all server and extra-info descriptors from them to
+ * disk. */
+ try {
+ this.logger.fine("Writing file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + "...");
+ this.lastDownloadedAllDescriptorsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.lastDownloadedAllDescriptorsFile));
+ for (Map.Entry<String, String> e :
+ this.lastDownloadedAllDescriptors.entrySet()) {
+ String authority = e.getKey();
+ String lastDownloaded = e.getValue();
+ bw.write(authority + "," + lastDownloaded + "\n");
+ }
+ bw.close();
+ this.logger.fine("Finished writing file "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath()
+ + ".");
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed writing "
+ + this.lastDownloadedAllDescriptorsFile.getAbsolutePath() + "!",
+ e);
+ }
+
+ /* Log statistics about this execution. */
+ this.logger.info("Finished downloading relay descriptors from the "
+ + "directory authorities.");
+ this.logger.info("At the beginning of this execution, we were "
+ + "missing " + oldMissingConsensuses + " consensus(es), "
+ + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
+ + " server descriptor(s), and " + oldMissingExtraInfoDescriptors
+ + " extra-info descriptor(s).");
+ this.logger.info("During this execution, we added "
+ + this.newMissingConsensuses + " consensus(es), "
+ + this.newMissingVotes + " vote(s), "
+ + this.newMissingServerDescriptors + " server descriptor(s), and "
+ + this.newMissingExtraInfoDescriptors + " extra-info "
+ + "descriptor(s) to the missing list, some of which we also "
+ + "requested and removed from the list again.");
+ this.logger.info("We requested " + this.requestedConsensuses
+ + " consensus(es), " + this.requestedVotes + " vote(s), "
+ + this.requestedMissingServerDescriptors + " missing server "
+ + "descriptor(s), " + this.requestedAllServerDescriptors
+ + " times all server descriptors, "
+ + this.requestedMissingExtraInfoDescriptors + " missing "
+ + "extra-info descriptor(s), and "
+ + this.requestedAllExtraInfoDescriptors + " times all extra-info "
+ + "descriptors from the directory authorities.");
+ StringBuilder sb = new StringBuilder();
+ for (String authority : this.authorities) {
+ sb.append(" " + authority + "="
+ + this.requestsByAuthority.get(authority));
+ }
+ this.logger.info("We sent these numbers of requests to the directory "
+ + "authorities:" + sb.toString());
+ this.logger.info("We successfully downloaded "
+ + this.downloadedConsensuses + " consensus(es), "
+ + this.downloadedVotes + " vote(s), "
+ + this.downloadedMissingServerDescriptors + " missing server "
+ + "descriptor(s), " + this.downloadedAllServerDescriptors
+ + " server descriptor(s) when downloading all descriptors, "
+ + this.downloadedMissingExtraInfoDescriptors + " missing "
+ + "extra-info descriptor(s) and "
+ + this.downloadedAllExtraInfoDescriptors + " extra-info "
+ + "descriptor(s) when downloading all descriptors.");
+ this.logger.info("At the end of this execution, we are missing "
+ + missingConsensuses + " consensus(es), " + missingVotes
+ + " vote(s), " + missingServerDescriptors + " server "
+ + "descriptor(s), and " + missingExtraInfoDescriptors
+ + " extra-info descriptor(s), some of which we may try in the next "
+ + "execution.");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
new file mode 100644
index 0000000..6f04c20
--- /dev/null
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
@@ -0,0 +1,265 @@
+/* Copyright 2010--2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.relaydescs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+
+/**
+ * Parses relay descriptors including network status consensuses and
+ * votes, server and extra-info descriptors, and passes the results to the
+ * stats handlers, to the archive writer, or to the relay descriptor
+ * downloader.
+ */
+public class RelayDescriptorParser {
+
+ /**
+ * File writer that writes descriptor contents to files in a
+ * directory-archive directory structure.
+ */
+ private ArchiveWriter aw;
+
+ /**
+ * Missing descriptor downloader that uses the parse results to learn
+ * which descriptors we are missing and want to download.
+ */
+ private RelayDescriptorDownloader rdd;
+
+ /**
+ * Logger for this class.
+ */
+ private Logger logger;
+
+ private SimpleDateFormat dateTimeFormat;
+
+ /**
+ * Initializes this class.
+ */
+ public RelayDescriptorParser(ArchiveWriter aw) {
+ this.aw = aw;
+
+ /* Initialize logger. */
+ this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
+
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ public void setRelayDescriptorDownloader(
+ RelayDescriptorDownloader rdd) {
+ this.rdd = rdd;
+ }
+
+ public void parse(byte[] data) {
+ try {
+ /* Convert descriptor to ASCII for parsing. This means we'll lose
+ * the non-ASCII chars, but we don't care about them for parsing
+ * anyway. */
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, "US-ASCII")));
+ String line;
+ do {
+ line = br.readLine();
+ } while (line != null && line.startsWith("@"));
+ if (line == null) {
+ this.logger.fine("We were given an empty descriptor for "
+ + "parsing. Ignoring.");
+ return;
+ }
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ if (line.equals("network-status-version 3")) {
+ // TODO when parsing the current consensus, check the fresh-until
+ // time to see when we switch from hourly to half-hourly
+ // consensuses
+ boolean isConsensus = true;
+ String validAfterTime = null, fingerprint = null,
+ dirSource = null;
+ long validAfter = -1L, dirKeyPublished = -1L;
+ SortedSet<String> dirSources = new TreeSet<String>();
+ SortedSet<String> serverDescriptors = new TreeSet<String>();
+ SortedSet<String> hashedRelayIdentities = new TreeSet<String>();
+ StringBuilder certificateStringBuilder = null;
+ String certificateString = null;
+ while ((line = br.readLine()) != null) {
+ if (certificateStringBuilder != null) {
+ if (line.startsWith("r ")) {
+ certificateString = certificateStringBuilder.toString();
+ certificateStringBuilder = null;
+ } else {
+ certificateStringBuilder.append(line + "\n");
+ }
+ }
+ if (line.equals("vote-status vote")) {
+ isConsensus = false;
+ } else if (line.startsWith("valid-after ")) {
+ validAfterTime = line.substring("valid-after ".length());
+ validAfter = parseFormat.parse(validAfterTime).getTime();
+ } else if (line.startsWith("dir-source ")) {
+ dirSource = line.split(" ")[2];
+ } else if (line.startsWith("vote-digest ")) {
+ dirSources.add(dirSource);
+ } else if (line.startsWith("dir-key-certificate-version ")) {
+ certificateStringBuilder = new StringBuilder();
+ certificateStringBuilder.append(line + "\n");
+ } else if (line.startsWith("fingerprint ")) {
+ fingerprint = line.split(" ")[1];
+ } else if (line.startsWith("dir-key-published ")) {
+ String dirKeyPublishedTime = line.substring(
+ "dir-key-published ".length());
+ dirKeyPublished = parseFormat.parse(dirKeyPublishedTime).
+ getTime();
+ } else if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ if (parts.length < 9) {
+ this.logger.log(Level.WARNING, "Could not parse r line '"
+ + line + "' in descriptor. Skipping.");
+ break;
+ }
+ String publishedTime = parts[4] + " " + parts[5];
+ String relayIdentity = Hex.encodeHexString(
+ Base64.decodeBase64(parts[2] + "=")).
+ toLowerCase();
+ String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
+ parts[3] + "=")).toLowerCase();
+ serverDescriptors.add(publishedTime + "," + relayIdentity
+ + "," + serverDesc);
+ hashedRelayIdentities.add(DigestUtils.shaHex(
+ Base64.decodeBase64(parts[2] + "=")).
+ toUpperCase());
+ }
+ }
+ if (isConsensus) {
+ if (this.rdd != null) {
+ this.rdd.haveParsedConsensus(validAfterTime, dirSources,
+ serverDescriptors);
+ }
+ if (this.aw != null) {
+ this.aw.storeConsensus(data, validAfter);
+ }
+ } else {
+ if (this.aw != null || this.rdd != null) {
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "network-status-version ";
+ String sigToken = "directory-signature ";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken);
+ if (start >= 0 && sig >= 0 && sig > start) {
+ sig += sigToken.length();
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ String digest = DigestUtils.shaHex(forDigest).toUpperCase();
+ if (this.aw != null) {
+ this.aw.storeVote(data, validAfter, dirSource, digest);
+ }
+ if (this.rdd != null) {
+ this.rdd.haveParsedVote(validAfterTime, fingerprint,
+ serverDescriptors);
+ }
+ }
+ if (certificateString != null) {
+ if (this.aw != null) {
+ this.aw.storeCertificate(certificateString.getBytes(),
+ dirSource, dirKeyPublished);
+ }
+ }
+ }
+ }
+ } else if (line.startsWith("router ")) {
+ String publishedTime = null, extraInfoDigest = null,
+ relayIdentifier = null;
+ long published = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ } else if (line.startsWith("opt fingerprint") ||
+ line.startsWith("fingerprint")) {
+ relayIdentifier = line.substring(line.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ } else if (line.startsWith("opt extra-info-digest ") ||
+ line.startsWith("extra-info-digest ")) {
+ extraInfoDigest = line.startsWith("opt ") ?
+ line.split(" ")[2].toLowerCase() :
+ line.split(" ")[1].toLowerCase();
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "router ";
+ String sigToken = "\nrouter-signature\n";
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ String digest = null;
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.aw != null && digest != null) {
+ this.aw.storeServerDescriptor(data, digest, published);
+ }
+ if (this.rdd != null && digest != null) {
+ this.rdd.haveParsedServerDescriptor(publishedTime,
+ relayIdentifier, digest, extraInfoDigest);
+ }
+ } else if (line.startsWith("extra-info ")) {
+ String publishedTime = null, relayIdentifier = line.split(" ")[2];
+ long published = -1L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("published ")) {
+ publishedTime = line.substring("published ".length());
+ published = parseFormat.parse(publishedTime).getTime();
+ }
+ }
+ String ascii = new String(data, "US-ASCII");
+ String startToken = "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String digest = null;
+ int start = ascii.indexOf(startToken);
+ if (start > 0) {
+ /* Do not confuse "extra-info " in "@type extra-info 1.0" with
+ * "extra-info 0000...". TODO This is a hack that should be
+ * solved by using metrics-lib some day. */
+ start = ascii.indexOf("\n" + startToken);
+ if (start > 0) {
+ start++;
+ }
+ }
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 || sig >= 0 || sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(data, start, forDigest, 0, sig - start);
+ digest = DigestUtils.shaHex(forDigest);
+ }
+ if (this.aw != null && digest != null) {
+ this.aw.storeExtraInfoDescriptor(data, digest, published);
+ }
+ if (this.rdd != null && digest != null) {
+ this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
+ relayIdentifier.toLowerCase(), digest);
+ }
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ + "Skipping.", e);
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
new file mode 100644
index 0000000..1ac593a
--- /dev/null
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -0,0 +1,573 @@
+/* Copyright 2012 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db.torperf;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/* Download possibly truncated Torperf .data and .extradata files from
+ * configured sources, append them to the files we already have, and merge
+ * the two files into the .tpf format. */
+public class TorperfDownloader {
+
+ private File torperfOutputDirectory = null;
+ private SortedMap<String, String> torperfSources = null;
+ private List<String> torperfFilesLines = null;
+ private Logger logger = null;
+ private SimpleDateFormat dateFormat;
+
+ public TorperfDownloader(File torperfOutputDirectory,
+ SortedMap<String, String> torperfSources,
+ List<String> torperfFilesLines) {
+ if (torperfOutputDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+ this.torperfOutputDirectory = torperfOutputDirectory;
+ this.torperfSources = torperfSources;
+ this.torperfFilesLines = torperfFilesLines;
+ if (!this.torperfOutputDirectory.exists()) {
+ this.torperfOutputDirectory.mkdirs();
+ }
+ this.logger = Logger.getLogger(TorperfDownloader.class.getName());
+ this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ this.readLastMergedTimestamps();
+ for (String torperfFilesLine : this.torperfFilesLines) {
+ this.downloadAndMergeFiles(torperfFilesLine);
+ }
+ this.writeLastMergedTimestamps();
+ }
+
+ private File torperfLastMergedFile =
+ new File("stats/torperf-last-merged");
+ SortedMap<String, String> lastMergedTimestamps =
+ new TreeMap<String, String>();
+ private void readLastMergedTimestamps() {
+ if (!this.torperfLastMergedFile.exists()) {
+ return;
+ }
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.torperfLastMergedFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ String fileName = null, timestamp = null;
+ if (parts.length == 2) {
+ try {
+ Double.parseDouble(parts[1]);
+ fileName = parts[0];
+ timestamp = parts[1];
+ } catch (NumberFormatException e) {
+ /* Handle below. */
+ }
+ }
+ if (fileName == null || timestamp == null) {
+ this.logger.log(Level.WARNING, "Invalid line '" + line + "' in "
+ + this.torperfLastMergedFile.getAbsolutePath() + ". "
+ + "Ignoring past history of merging .data and .extradata "
+ + "files.");
+ this.lastMergedTimestamps.clear();
+ break;
+ }
+ this.lastMergedTimestamps.put(fileName, timestamp);
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Error while reading '"
+ + this.torperfLastMergedFile.getAbsolutePath() + ". Ignoring "
+ + "past history of merging .data and .extradata files.");
+ this.lastMergedTimestamps.clear();
+ }
+ }
+
+ private void writeLastMergedTimestamps() {
+ try {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.torperfLastMergedFile));
+ for (Map.Entry<String, String> e :
+ this.lastMergedTimestamps.entrySet()) {
+ String fileName = e.getKey();
+ String timestamp = e.getValue();
+ bw.write(fileName + " " + timestamp + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Error while writing '"
+ + this.torperfLastMergedFile.getAbsolutePath() + ". This may "
+ + "result in ignoring history of merging .data and .extradata "
+ + "files in the next execution.", e);
+ }
+ }
+
+ private void downloadAndMergeFiles(String torperfFilesLine) {
+ String[] parts = torperfFilesLine.split(" ");
+ String sourceName = parts[1];
+ int fileSize = -1;
+ try {
+ fileSize = Integer.parseInt(parts[2]);
+ } catch (NumberFormatException e) {
+ this.logger.log(Level.WARNING, "Could not parse file size in "
+ + "TorperfFiles configuration line '" + torperfFilesLine
+ + "'.");
+ return;
+ }
+
+ /* Download and append the .data file. */
+ String dataFileName = parts[3];
+ String sourceBaseUrl = torperfSources.get(sourceName);
+ String dataUrl = sourceBaseUrl + dataFileName;
+ String dataOutputFileName = sourceName + "-" + dataFileName;
+ File dataOutputFile = new File(torperfOutputDirectory,
+ dataOutputFileName);
+ boolean downloadedDataFile = this.downloadAndAppendFile(dataUrl,
+ dataOutputFile, true);
+
+ /* Download and append the .extradata file. */
+ String extradataFileName = parts[4];
+ String extradataUrl = sourceBaseUrl + extradataFileName;
+ String extradataOutputFileName = sourceName + "-" + extradataFileName;
+ File extradataOutputFile = new File(torperfOutputDirectory,
+ extradataOutputFileName);
+ boolean downloadedExtradataFile = this.downloadAndAppendFile(
+ extradataUrl, extradataOutputFile, false);
+
+ /* Merge both files into .tpf format. */
+ if (!downloadedDataFile && !downloadedExtradataFile) {
+ return;
+ }
+ String skipUntil = null;
+ if (this.lastMergedTimestamps.containsKey(dataOutputFileName)) {
+ skipUntil = this.lastMergedTimestamps.get(dataOutputFileName);
+ }
+ try {
+ skipUntil = this.mergeFiles(dataOutputFile, extradataOutputFile,
+ sourceName, fileSize, skipUntil);
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed merging " + dataOutputFile
+ + " and " + extradataOutputFile + ".", e);
+ }
+ if (skipUntil != null) {
+ this.lastMergedTimestamps.put(dataOutputFileName, skipUntil);
+ }
+ }
+
+ private boolean downloadAndAppendFile(String url, File outputFile,
+ boolean isDataFile) {
+
+ /* Read an existing output file to determine which line will be the
+ * first to append to it. */
+ String lastTimestampLine = null;
+ int linesAfterLastTimestampLine = 0;
+ if (outputFile.exists() && outputFile.lastModified() >
+ System.currentTimeMillis() - 330L * 60L * 1000L) {
+ return false;
+ } else if (outputFile.exists()) {
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(
+ outputFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (isDataFile || line.contains(" LAUNCH")) {
+ lastTimestampLine = line;
+ linesAfterLastTimestampLine = 0;
+ } else {
+ linesAfterLastTimestampLine++;
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed reading '"
+ + outputFile.getAbsolutePath() + "' to determine the first "
+ + "line to append to it.", e);
+ return false;
+ }
+ }
+ try {
+ this.logger.fine("Downloading " + (isDataFile ? ".data" :
+ ".extradata") + " file from '" + url + "' and merging it into "
+ + "'" + outputFile.getAbsolutePath() + "'.");
+ URL u = new URL(url);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ huc.getInputStream()));
+ String line;
+ BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile,
+ true));
+ boolean copyLines = lastTimestampLine == null;
+ while ((line = br.readLine()) != null) {
+ if (copyLines && linesAfterLastTimestampLine == 0) {
+ if (isDataFile || line.contains(" LAUNCH")) {
+ lastTimestampLine = line;
+ }
+ bw.write(line + "\n");
+ } else if (copyLines && linesAfterLastTimestampLine > 0) {
+ linesAfterLastTimestampLine--;
+ } else if (line.equals(lastTimestampLine)) {
+ copyLines = true;
+ }
+ }
+ bw.close();
+ br.close();
+ if (!copyLines) {
+ this.logger.warning("The last timestamp line in '"
+ + outputFile.getAbsolutePath() + "' is not contained in the "
+ + "new file downloaded from '" + url + "'. Cannot append "
+ + "new lines without possibly leaving a gap. Skipping.");
+ return false;
+ }
+ } catch (IOException e) {
+ this.logger.log(Level.WARNING, "Failed downloading and/or merging '"
+ + url + "'.", e);
+ return false;
+ }
+ if (lastTimestampLine == null) {
+ this.logger.warning("'" + outputFile.getAbsolutePath()
+ + "' doesn't contain any timestamp lines. Unable to check "
+ + "whether that file is stale or not.");
+ } else {
+ long lastTimestampMillis = -1L;
+ if (isDataFile) {
+ lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
+ 0, lastTimestampLine.indexOf(" "))) * 1000L;
+ } else {
+ lastTimestampMillis = Long.parseLong(lastTimestampLine.substring(
+ lastTimestampLine.indexOf(" LAUNCH=") + " LAUNCH=".length(),
+ lastTimestampLine.indexOf(".",
+ lastTimestampLine.indexOf(" LAUNCH=")))) * 1000L;
+ }
+ if (lastTimestampMillis < System.currentTimeMillis()
+ - 330L * 60L * 1000L) {
+ this.logger.warning("The last timestamp in '"
+ + outputFile.getAbsolutePath() + "' is more than 5:30 hours "
+ + "old: " + lastTimestampMillis);
+ }
+ }
+ return true;
+ }
+
+ private String mergeFiles(File dataFile, File extradataFile,
+ String source, int fileSize, String skipUntil) throws IOException {
+ SortedMap<String, String> config = new TreeMap<String, String>();
+ config.put("SOURCE", source);
+ config.put("FILESIZE", String.valueOf(fileSize));
+ if (!dataFile.exists() || !extradataFile.exists()) {
+ this.logger.warning("File " + dataFile.getAbsolutePath() + " or "
+ + extradataFile.getAbsolutePath() + " is missing.");
+ return null;
+ }
+ this.logger.fine("Merging " + dataFile.getAbsolutePath() + " and "
+ + extradataFile.getAbsolutePath() + " into .tpf format.");
+ BufferedReader brD = new BufferedReader(new FileReader(dataFile)),
+ brE = new BufferedReader(new FileReader(extradataFile));
+ String lineD = brD.readLine(), lineE = brE.readLine();
+ int d = 1, e = 1;
+ String maxDataComplete = null, maxUsedAt = null;
+ while (lineD != null) {
+
+ /* Parse .data line. Every valid .data line will go into the .tpf
+ * format, either with additional information from the .extradata
+ * file or without it. */
+ if (lineD.isEmpty()) {
+ this.logger.finer("Skipping empty line " + dataFile.getName()
+ + ":" + d++ + ".");
+ lineD = brD.readLine();
+ continue;
+ }
+ SortedMap<String, String> data = this.parseDataLine(lineD);
+ if (data == null) {
+ this.logger.finer("Skipping illegal line " + dataFile.getName()
+ + ":" + d++ + " '" + lineD + "'.");
+ lineD = brD.readLine();
+ continue;
+ }
+ String dataComplete = data.get("DATACOMPLETE");
+ double dataCompleteSeconds = Double.parseDouble(dataComplete);
+ if (skipUntil != null && dataComplete.compareTo(skipUntil) < 0) {
+ this.logger.finer("Skipping " + dataFile.getName() + ":"
+ + d++ + " which we already processed before.");
+ lineD = brD.readLine();
+ continue;
+ }
+ maxDataComplete = dataComplete;
+
+ /* Parse .extradata line if available and try to find the one that
+ * matches the .data line. */
+ SortedMap<String, String> extradata = null;
+ while (lineE != null) {
+ if (lineE.isEmpty()) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is empty.");
+ lineE = brE.readLine();
+ continue;
+ }
+ if (lineE.startsWith("BUILDTIMEOUT_SET ")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is a BUILDTIMEOUT_SET line.");
+ lineE = brE.readLine();
+ continue;
+ } else if (lineE.startsWith("ok ") ||
+ lineE.startsWith("error ")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is in the old format.");
+ lineE = brE.readLine();
+ continue;
+ }
+ extradata = this.parseExtradataLine(lineE);
+ if (extradata == null) {
+ this.logger.finer("Skipping Illegal line "
+ + extradataFile.getName() + ":" + e++ + " '" + lineE
+ + "'.");
+ lineE = brE.readLine();
+ continue;
+ }
+ if (!extradata.containsKey("USED_AT")) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which doesn't contain a USED_AT element.");
+ lineE = brE.readLine();
+ continue;
+ }
+ String usedAt = extradata.get("USED_AT");
+ double usedAtSeconds = Double.parseDouble(usedAt);
+ if (skipUntil != null && usedAt.compareTo(skipUntil) < 0) {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which we already processed before.");
+ lineE = brE.readLine();
+ continue;
+ }
+ maxUsedAt = usedAt;
+ if (Math.abs(usedAtSeconds - dataCompleteSeconds) <= 1.0) {
+ this.logger.fine("Merging " + extradataFile.getName() + ":"
+ + e++ + " into the current .data line.");
+ lineE = brE.readLine();
+ break;
+ } else if (usedAtSeconds > dataCompleteSeconds) {
+ this.logger.finer("Comparing " + extradataFile.getName()
+ + " to the next .data line.");
+ extradata = null;
+ break;
+ } else {
+ this.logger.finer("Skipping " + extradataFile.getName() + ":"
+ + e++ + " which is too old to be merged with "
+ + dataFile.getName() + ":" + d + ".");
+ lineE = brE.readLine();
+ continue;
+ }
+ }
+
+ /* Write output line to .tpf file. */
+ SortedMap<String, String> keysAndValues =
+ new TreeMap<String, String>();
+ if (extradata != null) {
+ keysAndValues.putAll(extradata);
+ }
+ keysAndValues.putAll(data);
+ keysAndValues.putAll(config);
+ this.logger.fine("Writing " + dataFile.getName() + ":" + d++ + ".");
+ lineD = brD.readLine();
+ try {
+ this.writeTpfLine(source, fileSize, keysAndValues);
+ } catch (IOException ex) {
+ this.logger.log(Level.WARNING, "Error writing output line. "
+ + "Aborting to merge " + dataFile.getName() + " and "
+ + extradataFile.getName() + ".", e);
+ break;
+ }
+ }
+ brD.close();
+ brE.close();
+ this.writeCachedTpfLines();
+ if (maxDataComplete == null) {
+ return maxUsedAt;
+ } else if (maxUsedAt == null) {
+ return maxDataComplete;
+ } else if (maxDataComplete.compareTo(maxUsedAt) > 0) {
+ return maxUsedAt;
+ } else {
+ return maxDataComplete;
+ }
+ }
+
+ private SortedMap<Integer, String> dataTimestamps;
+ private SortedMap<String, String> parseDataLine(String line) {
+ String[] parts = line.trim().split(" ");
+ if (line.length() == 0 || parts.length < 20) {
+ return null;
+ }
+ if (this.dataTimestamps == null) {
+ this.dataTimestamps = new TreeMap<Integer, String>();
+ this.dataTimestamps.put(0, "START");
+ this.dataTimestamps.put(2, "SOCKET");
+ this.dataTimestamps.put(4, "CONNECT");
+ this.dataTimestamps.put(6, "NEGOTIATE");
+ this.dataTimestamps.put(8, "REQUEST");
+ this.dataTimestamps.put(10, "RESPONSE");
+ this.dataTimestamps.put(12, "DATAREQUEST");
+ this.dataTimestamps.put(14, "DATARESPONSE");
+ this.dataTimestamps.put(16, "DATACOMPLETE");
+ this.dataTimestamps.put(21, "DATAPERC10");
+ this.dataTimestamps.put(23, "DATAPERC20");
+ this.dataTimestamps.put(25, "DATAPERC30");
+ this.dataTimestamps.put(27, "DATAPERC40");
+ this.dataTimestamps.put(29, "DATAPERC50");
+ this.dataTimestamps.put(31, "DATAPERC60");
+ this.dataTimestamps.put(33, "DATAPERC70");
+ this.dataTimestamps.put(35, "DATAPERC80");
+ this.dataTimestamps.put(37, "DATAPERC90");
+ }
+ SortedMap<String, String> data = new TreeMap<String, String>();
+ try {
+ for (Map.Entry<Integer, String> e : this.dataTimestamps.entrySet()) {
+ int i = e.getKey();
+ if (parts.length > i + 1) {
+ String key = e.getValue();
+ String value = String.format("%s.%02d", parts[i],
+ Integer.parseInt(parts[i + 1]) / 10000);
+ data.put(key, value);
+ }
+ }
+ } catch (NumberFormatException e) {
+ return null;
+ }
+ data.put("WRITEBYTES", parts[18]);
+ data.put("READBYTES", parts[19]);
+ if (parts.length >= 21) {
+ data.put("DIDTIMEOUT", parts[20]);
+ }
+ return data;
+ }
+
+ private SortedMap<String, String> parseExtradataLine(String line) {
+ String[] parts = line.split(" ");
+ SortedMap<String, String> extradata = new TreeMap<String, String>();
+ String previousKey = null;
+ for (String part : parts) {
+ String[] keyAndValue = part.split("=", -1);
+ if (keyAndValue.length == 2) {
+ String key = keyAndValue[0];
+ previousKey = key;
+ String value = keyAndValue[1];
+ if (value.contains(".") && value.lastIndexOf(".") ==
+ value.length() - 2) {
+ /* Make sure that all floats have two trailing digits. */
+ value += "0";
+ }
+ extradata.put(key, value);
+ } else if (keyAndValue.length == 1 && previousKey != null) {
+ String value = keyAndValue[0];
+ if (previousKey.equals("STREAM_FAIL_REASONS") &&
+ (value.equals("MISC") || value.equals("EXITPOLICY") ||
+ value.equals("RESOURCELIMIT") ||
+ value.equals("RESOLVEFAILED"))) {
+ extradata.put(previousKey, extradata.get(previousKey) + ":"
+ + value);
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+ return extradata;
+ }
+
+ private String cachedSource;
+ private int cachedFileSize;
+ private String cachedStartDate;
+ private SortedMap<String, String> cachedTpfLines;
+ private void writeTpfLine(String source, int fileSize,
+ SortedMap<String, String> keysAndValues) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ int written = 0;
+ for (Map.Entry<String, String> keyAndValue :
+ keysAndValues.entrySet()) {
+ String key = keyAndValue.getKey();
+ String value = keyAndValue.getValue();
+ sb.append((written++ > 0 ? " " : "") + key + "=" + value);
+ }
+ String line = sb.toString();
+ String startString = keysAndValues.get("START");
+ long startMillis = Long.parseLong(startString.substring(0,
+ startString.indexOf("."))) * 1000L;
+ String startDate = dateFormat.format(startMillis);
+ if (this.cachedTpfLines == null || !source.equals(this.cachedSource) ||
+ fileSize != this.cachedFileSize ||
+ !startDate.equals(this.cachedStartDate)) {
+ this.writeCachedTpfLines();
+ this.readTpfLinesToCache(source, fileSize, startDate);
+ }
+ if (!this.cachedTpfLines.containsKey(startString) ||
+ line.length() > this.cachedTpfLines.get(startString).length()) {
+ this.cachedTpfLines.put(startString, line);
+ }
+ }
+
+ private void readTpfLinesToCache(String source, int fileSize,
+ String startDate) throws IOException {
+ this.cachedTpfLines = new TreeMap<String, String>();
+ this.cachedSource = source;
+ this.cachedFileSize = fileSize;
+ this.cachedStartDate = startDate;
+ File tpfFile = new File(torperfOutputDirectory,
+ startDate.replaceAll("-", "/") + "/"
+ + source + "-" + String.valueOf(fileSize) + "-" + startDate
+ + ".tpf");
+ if (!tpfFile.exists()) {
+ return;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(tpfFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("@type ")) {
+ continue;
+ }
+ if (line.contains("START=")) {
+ String startString = line.substring(line.indexOf("START=")
+ + "START=".length()).split(" ")[0];
+ this.cachedTpfLines.put(startString, line);
+ }
+ }
+ br.close();
+ }
+
+ private void writeCachedTpfLines() throws IOException {
+ if (this.cachedSource == null || this.cachedFileSize == 0 ||
+ this.cachedStartDate == null || this.cachedTpfLines == null) {
+ return;
+ }
+ File tpfFile = new File(torperfOutputDirectory,
+ this.cachedStartDate.replaceAll("-", "/")
+ + "/" + this.cachedSource + "-"
+ + String.valueOf(this.cachedFileSize) + "-"
+ + this.cachedStartDate + ".tpf");
+ tpfFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile));
+ bw.write("@type torperf 1.0\n");
+ for (String line : this.cachedTpfLines.values()) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ this.cachedSource = null;
+ this.cachedFileSize = 0;
+ this.cachedStartDate = null;
+ this.cachedTpfLines = null;
+ }
+}
+
diff --git a/test/org/torproject/ernie/db/ArchiveReaderTest.java b/test/org/torproject/ernie/db/ArchiveReaderTest.java
deleted file mode 100644
index cb3dea7..0000000
--- a/test/org/torproject/ernie/db/ArchiveReaderTest.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class ArchiveReaderTest {
-
- private File tempArchivesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempArchivesDirectory = folder.newFolder("sanitized-bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testRelayDescriptorParserNull() {
- new ArchiveReader(null, this.tempArchivesDirectory,
- this.tempStatsDirectory, false);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/ArchiveWriterTest.java b/test/org/torproject/ernie/db/ArchiveWriterTest.java
deleted file mode 100644
index 2a18e74..0000000
--- a/test/org/torproject/ernie/db/ArchiveWriterTest.java
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class ArchiveWriterTest {
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Test(expected = IllegalArgumentException.class)
- public void testArchivesDirectoryNull() {
- new ArchiveWriter(null);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java b/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java
deleted file mode 100644
index f1ad03b..0000000
--- a/test/org/torproject/ernie/db/BridgeSnapshotReaderTest.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class BridgeSnapshotReaderTest {
-
- private File tempBridgeDirectoriesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempBridgeDirectoriesDirectory = folder.newFolder("bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testBridgeDescriptorParserNull() {
- new BridgeSnapshotReader(null, this.tempBridgeDirectoriesDirectory,
- this.tempStatsDirectory);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java b/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java
deleted file mode 100644
index f101249..0000000
--- a/test/org/torproject/ernie/db/CachedRelayDescriptorReaderTest.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-import java.util.ArrayList;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class CachedRelayDescriptorReaderTest {
-
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testRelayDescriptorParserNull() {
- new CachedRelayDescriptorReader(null, new ArrayList<String>(),
- this.tempStatsDirectory);
- }
-}
-
diff --git a/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java b/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java
deleted file mode 100644
index f8b4cdd..0000000
--- a/test/org/torproject/ernie/db/SanitizedBridgesWriterTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.File;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
-public class SanitizedBridgesWriterTest {
-
- private File tempSanitizedBridgesDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempSanitizedBridgesDirectory =
- folder.newFolder("sanitized-bridges");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testSanitizedBridgesDirectoryNull() {
- new SanitizedBridgesWriter(null, this.tempStatsDirectory, false, -1L);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testStatsDirectoryNull() {
- new SanitizedBridgesWriter(this.tempSanitizedBridgesDirectory, null,
- false, -1L);
- }
-}
-
1
0

[metrics-db/master] Run five data-processing modules in parallel.
by karsten@torproject.org 27 Oct '12
by karsten@torproject.org 27 Oct '12
27 Oct '12
commit 7db75dbedf2fc77a3855e4fdff051bcbdcccac5e
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 15:13:21 2012 -0400
Run five data-processing modules in parallel.
---
.../db/bridgedescs/SanitizedBridgesWriter.java | 19 ++++++++++++-------
.../BridgePoolAssignmentsProcessor.java | 8 +++++++-
.../ernie/db/exitlists/ExitListDownloader.java | 6 +++++-
src/org/torproject/ernie/db/main/Main.java | 14 +++++---------
.../ernie/db/relaydescs/ArchiveWriter.java | 12 ++++++++++--
.../ernie/db/torperf/TorperfDownloader.java | 10 ++++++++--
6 files changed, 47 insertions(+), 22 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 10d2baa..cb08df1 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -43,7 +43,16 @@ import org.torproject.ernie.db.main.RsyncDataProvider;
* by the bridge to advertise their capabilities), and extra-info
* descriptors (published by the bridge, mainly for statistical analysis).
*/
-public class SanitizedBridgesWriter {
+public class SanitizedBridgesWriter extends Thread {
+
+ private Configuration config;
+
+ /**
+ * Initializes this class.
+ */
+ public SanitizedBridgesWriter(Configuration config) {
+ this.config = config;
+ }
/**
* Logger for this class.
@@ -71,12 +80,7 @@ public class SanitizedBridgesWriter {
private SecureRandom secureRandom;
- /**
- * Initializes this class.
- */
- public SanitizedBridgesWriter(Configuration config,
- File statsDirectory) {
-
+ public void run() {
File bridgeDirectoriesDirectory =
new File(config.getBridgeSnapshotsDirectory());
File sanitizedBridgesDirectory =
@@ -85,6 +89,7 @@ public class SanitizedBridgesWriter {
config.getReplaceIPAddressesWithHashes();
long limitBridgeSanitizingInterval =
config.getLimitBridgeDescriptorMappings();
+ File statsDirectory = new File("stats");
if (bridgeDirectoriesDirectory == null ||
sanitizedBridgesDirectory == null || statsDirectory == null) {
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 3893495..d822d11 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -29,9 +29,15 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.torproject.ernie.db.main.Configuration;
import org.torproject.ernie.db.main.RsyncDataProvider;
-public class BridgePoolAssignmentsProcessor {
+public class BridgePoolAssignmentsProcessor extends Thread {
+
+ private Configuration config;
public BridgePoolAssignmentsProcessor(Configuration config) {
+ this.config = config;
+ }
+
+ public void run() {
File assignmentsDirectory =
new File(config.getAssignmentsDirectory());
File sanitizedAssignmentsDirectory =
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index f6f2865..bf67bb7 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -21,8 +21,12 @@ import java.util.logging.Logger;
import org.torproject.ernie.db.main.Configuration;
import org.torproject.ernie.db.main.RsyncDataProvider;
-public class ExitListDownloader {
+public class ExitListDownloader extends Thread {
+
public ExitListDownloader(Configuration config) {
+ }
+
+ public void run() {
Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
try {
logger.fine("Downloading exit list...");
diff --git a/src/org/torproject/ernie/db/main/Main.java b/src/org/torproject/ernie/db/main/Main.java
index 093f002..88f2372 100644
--- a/src/org/torproject/ernie/db/main/Main.java
+++ b/src/org/torproject/ernie/db/main/Main.java
@@ -2,7 +2,6 @@
* See LICENSE for licensing information */
package org.torproject.ernie.db.main;
-import java.io.File;
import java.util.logging.Logger;
import org.torproject.ernie.db.bridgedescs.SanitizedBridgesWriter;
@@ -35,33 +34,30 @@ public class Main {
System.exit(1);
}
- // Define stats directory for temporary files
- File statsDirectory = new File("stats");
-
// Import/download relay descriptors from the various sources
if (config.getWriteDirectoryArchives()) {
- new ArchiveWriter(config, statsDirectory);
+ new ArchiveWriter(config).start();
}
// Sanitize bridge descriptors
if (config.getImportBridgeSnapshots() &&
config.getWriteSanitizedBridges()) {
- new SanitizedBridgesWriter(config, statsDirectory);
+ new SanitizedBridgesWriter(config).start();
}
// Download exit list and store it to disk
if (config.getDownloadExitList()) {
- new ExitListDownloader(config);
+ new ExitListDownloader(config).start();
}
// Process bridge pool assignments
if (config.getProcessBridgePoolAssignments()) {
- new BridgePoolAssignmentsProcessor(config);
+ new BridgePoolAssignmentsProcessor(config).start();
}
// Process Torperf files
if (config.getProcessTorperfFiles()) {
- new TorperfDownloader(config);
+ new TorperfDownloader(config).start();
}
// Remove lock file
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index a9cb604..5c3d2e9 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -27,17 +27,25 @@ import org.torproject.descriptor.impl.DescriptorParseException;
import org.torproject.ernie.db.main.Configuration;
import org.torproject.ernie.db.main.RsyncDataProvider;
-public class ArchiveWriter {
+public class ArchiveWriter extends Thread {
+
+ private Configuration config;
+
+ public ArchiveWriter(Configuration config) {
+ this.config = config;
+ }
+
private Logger logger;
private File outputDirectory;
private DescriptorParser descriptorParser;
private int storedConsensuses = 0, storedVotes = 0, storedCerts = 0,
storedServerDescriptors = 0, storedExtraInfoDescriptors = 0;
- public ArchiveWriter(Configuration config, File statsDirectory) {
+ public void run() {
File outputDirectory =
new File(config.getDirectoryArchivesOutputDirectory());
+ File statsDirectory = new File("stats");
this.logger = Logger.getLogger(ArchiveWriter.class.getName());
this.outputDirectory = outputDirectory;
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
index 5f09038..ef67ae7 100644
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -26,7 +26,13 @@ import org.torproject.ernie.db.main.RsyncDataProvider;
/* Download possibly truncated Torperf .data and .extradata files from
* configured sources, append them to the files we already have, and merge
* the two files into the .tpf format. */
-public class TorperfDownloader {
+public class TorperfDownloader extends Thread {
+
+ private Configuration config;
+
+ public TorperfDownloader(Configuration config) {
+ this.config = config;
+ }
private File torperfOutputDirectory = null;
private SortedMap<String, String> torperfSources = null;
@@ -34,7 +40,7 @@ public class TorperfDownloader {
private Logger logger = null;
private SimpleDateFormat dateFormat;
- public TorperfDownloader(Configuration config) {
+ public void run() {
File torperfOutputDirectory =
new File(config.getTorperfOutputDirectory());
1
0

[metrics-db/master] Prepare for running relay descriptor downloader twice per hour.
by karsten@torproject.org 27 Oct '12
by karsten@torproject.org 27 Oct '12
27 Oct '12
commit a2db139da101941d5c8e13b6c426093e62f3de3b
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 15:32:02 2012 -0400
Prepare for running relay descriptor downloader twice per hour.
---
.../db/bridgedescs/SanitizedBridgesWriter.java | 7 +++++++
.../BridgePoolAssignmentsProcessor.java | 7 +++++++
.../ernie/db/exitlists/ExitListDownloader.java | 7 +++++++
.../ernie/db/torperf/TorperfDownloader.java | 6 ++++++
4 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index cb08df1..8db267a 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -81,6 +81,13 @@ public class SanitizedBridgesWriter extends Thread {
private SecureRandom secureRandom;
public void run() {
+
+ if (((System.currentTimeMillis() / 60000L) % 60L) > 30L) {
+ /* Don't start in second half of an hour, when we only want to
+ * process other data. */
+ return;
+ }
+
File bridgeDirectoriesDirectory =
new File(config.getBridgeSnapshotsDirectory());
File sanitizedBridgesDirectory =
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index d822d11..4f8dcb6 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -38,6 +38,13 @@ public class BridgePoolAssignmentsProcessor extends Thread {
}
public void run() {
+
+ if (((System.currentTimeMillis() / 60000L) % 60L) > 30L) {
+ /* Don't start in second half of an hour, when we only want to
+ * process other data. */
+ return;
+ }
+
File assignmentsDirectory =
new File(config.getAssignmentsDirectory());
File sanitizedAssignmentsDirectory =
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index bf67bb7..bbd93ac 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -27,6 +27,13 @@ public class ExitListDownloader extends Thread {
}
public void run() {
+
+ if (((System.currentTimeMillis() / 60000L) % 60L) > 30L) {
+ /* Don't start in second half of an hour, when we only want to
+ * process other data. */
+ return;
+ }
+
Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
try {
logger.fine("Downloading exit list...");
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
index ef67ae7..143010c 100644
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -42,6 +42,12 @@ public class TorperfDownloader extends Thread {
public void run() {
+ if (((System.currentTimeMillis() / 60000L) % 60L) > 30L) {
+ /* Don't start in second half of an hour, when we only want to
+ * process other data. */
+ return;
+ }
+
File torperfOutputDirectory =
new File(config.getTorperfOutputDirectory());
SortedMap<String, String> torperfSources = config.getTorperfSources();
1
0

[metrics-db/master] Wait until all threads are done before exiting.
by karsten@torproject.org 27 Oct '12
by karsten@torproject.org 27 Oct '12
27 Oct '12
commit 0336ad311f7862553cdd8f8b73f056d3a7dc643a
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 21:15:14 2012 -0400
Wait until all threads are done before exiting.
---
src/org/torproject/ernie/db/main/Main.java | 27 ++++++++++++++++++++++-----
1 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/src/org/torproject/ernie/db/main/Main.java b/src/org/torproject/ernie/db/main/Main.java
index 88f2372..d1361cf 100644
--- a/src/org/torproject/ernie/db/main/Main.java
+++ b/src/org/torproject/ernie/db/main/Main.java
@@ -2,6 +2,8 @@
* See LICENSE for licensing information */
package org.torproject.ernie.db.main;
+import java.util.ArrayList;
+import java.util.List;
import java.util.logging.Logger;
import org.torproject.ernie.db.bridgedescs.SanitizedBridgesWriter;
@@ -34,30 +36,45 @@ public class Main {
System.exit(1);
}
+ // Keep a list of all threads, so that we can join them all before
+ // exiting.
+ List<Thread> threads = new ArrayList<Thread>();
+
// Import/download relay descriptors from the various sources
if (config.getWriteDirectoryArchives()) {
- new ArchiveWriter(config).start();
+ threads.add(new ArchiveWriter(config));
}
// Sanitize bridge descriptors
if (config.getImportBridgeSnapshots() &&
config.getWriteSanitizedBridges()) {
- new SanitizedBridgesWriter(config).start();
+ threads.add(new SanitizedBridgesWriter(config));
}
// Download exit list and store it to disk
if (config.getDownloadExitList()) {
- new ExitListDownloader(config).start();
+ threads.add(new ExitListDownloader(config));
}
// Process bridge pool assignments
if (config.getProcessBridgePoolAssignments()) {
- new BridgePoolAssignmentsProcessor(config).start();
+ threads.add(new BridgePoolAssignmentsProcessor(config));
}
// Process Torperf files
if (config.getProcessTorperfFiles()) {
- new TorperfDownloader(config).start();
+ threads.add(new TorperfDownloader(config));
+ }
+
+ // Run threads
+ for (Thread thread : threads) {
+ thread.start();
+ }
+ for (Thread thread : threads) {
+ try {
+ thread.join();
+ } catch (InterruptedException e) {
+ }
}
// Remove lock file
1
0

27 Oct '12
commit f7b58361aa6df5fd2afe40c38f9dd111e2820f89
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Oct 27 13:52:42 2012 -0400
Recognize when data sources become stale.
Implements #3850.
---
.../db/bridgedescs/SanitizedBridgesWriter.java | 58 +++++++++++
.../BridgePoolAssignmentsProcessor.java | 16 +++
.../ernie/db/exitlists/ExitListDownloader.java | 100 +++++++++++++++-----
.../ernie/db/relaydescs/ArchiveWriter.java | 46 +++++++++
4 files changed, 194 insertions(+), 26 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 87593bd..7de9961 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -193,6 +193,8 @@ public class SanitizedBridgesWriter extends Thread {
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
+ this.checkStaleDescriptors();
+
this.cleanUpRsyncDirectory();
}
@@ -368,6 +370,8 @@ public class SanitizedBridgesWriter extends Thread {
return this.secretsForHashingIPAddresses.get(month);
}
+ private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
+
/**
* Sanitizes a network status and writes it to disk.
*/
@@ -380,6 +384,10 @@ public class SanitizedBridgesWriter extends Thread {
return;
}
+ if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) {
+ maxNetworkStatusPublishedTime = publicationTime;
+ }
+
if (this.bridgeSanitizingCutOffTimestamp.
compareTo(publicationTime) > 0) {
this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
@@ -543,6 +551,8 @@ public class SanitizedBridgesWriter extends Thread {
}
}
+ private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00";
+
/**
* Sanitizes a bridge server descriptor and writes it to disk.
*/
@@ -590,6 +600,9 @@ public class SanitizedBridgesWriter extends Thread {
* sanitizing interval. */
} else if (line.startsWith("published ")) {
published = line.substring("published ".length());
+ if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
+ maxServerDescriptorPublishedTime = published;
+ }
if (this.bridgeSanitizingCutOffTimestamp.
compareTo(published) > 0) {
this.logger.log(!this.haveWarnedAboutInterval
@@ -799,6 +812,9 @@ public class SanitizedBridgesWriter extends Thread {
}
}
+ private String maxExtraInfoDescriptorPublishedTime =
+ "1970-01-01 00:00:00";
+
/**
* Sanitizes an extra-info descriptor and writes it to disk.
*/
@@ -827,6 +843,10 @@ public class SanitizedBridgesWriter extends Thread {
} else if (line.startsWith("published ")) {
scrubbed.append(line + "\n");
published = line.substring("published ".length());
+ if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
+ > 0) {
+ maxExtraInfoDescriptorPublishedTime = published;
+ }
/* Remove everything from transport lines except the transport
* name. */
@@ -968,6 +988,44 @@ public class SanitizedBridgesWriter extends Thread {
}
}
+ private void checkStaleDescriptors() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+ try {
+ long maxNetworkStatusPublishedMillis =
+ dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime();
+ if (maxNetworkStatusPublishedMillis > 0L &&
+ maxNetworkStatusPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge network status was "
+ + "published " + maxNetworkStatusPublishedTime + ", which is "
+ + "more than 5:30 hours in the past.");
+ }
+ long maxServerDescriptorPublishedMillis =
+ dateTimeFormat.parse(maxServerDescriptorPublishedTime).
+ getTime();
+ if (maxServerDescriptorPublishedMillis > 0L &&
+ maxServerDescriptorPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge server descriptor was "
+ + "published " + maxServerDescriptorPublishedTime + ", which "
+ + "is more than 5:30 hours in the past.");
+ }
+ long maxExtraInfoDescriptorPublishedMillis =
+ dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime).
+ getTime();
+ if (maxExtraInfoDescriptorPublishedMillis > 0L &&
+ maxExtraInfoDescriptorPublishedMillis < tooOldMillis) {
+ this.logger.warning("The last known bridge extra-info descriptor "
+ + "was published " + maxExtraInfoDescriptorPublishedTime
+ + ", which is more than 5:30 hours in the past.");
+ }
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Unable to parse timestamp for "
+ + "stale check.", e);
+ }
+ }
+
/* Delete all files from the rsync directory that have not been modified
* in the last three days. */
public void cleanUpRsyncDirectory() {
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 0ac6f90..43d3427 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -77,6 +77,7 @@ public class BridgePoolAssignmentsProcessor extends Thread {
SimpleDateFormat filenameFormat =
new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long maxBridgePoolAssignmentTime = 0L;
for (File assignmentFile : assignmentFiles) {
logger.info("Processing bridge pool assignment file '"
+ assignmentFile.getAbsolutePath() + "'...");
@@ -120,6 +121,9 @@ public class BridgePoolAssignmentsProcessor extends Thread {
long bridgePoolAssignmentTime = assignmentFormat.parse(
bridgePoolAssignmentLine.substring(
"bridge-pool-assignment ".length())).getTime();
+ maxBridgePoolAssignmentTime = Math.max(
+ maxBridgePoolAssignmentTime,
+ bridgePoolAssignmentTime);
File tarballFile = new File(
sanitizedAssignmentsDirectory, filenameFormat.format(
bridgePoolAssignmentTime));
@@ -192,6 +196,18 @@ public class BridgePoolAssignmentsProcessor extends Thread {
}
}
+ if (maxBridgePoolAssignmentTime > 0L &&
+ maxBridgePoolAssignmentTime + 330L * 60L * 1000L
+ < System.currentTimeMillis()) {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ logger.warning("The last known bridge pool assignment list was "
+ + "published at "
+ + dateTimeFormat.format(maxBridgePoolAssignmentTime)
+ + ", which is more than 5:30 hours in the past.");
+ }
+
this.cleanUpRsyncDirectory();
logger.info("Finished processing bridge pool assignment file(s).");
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index 26e944c..9b1f40b 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -12,6 +12,7 @@ import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
+import java.util.List;
import java.util.SortedSet;
import java.util.Stack;
import java.util.TimeZone;
@@ -19,6 +20,12 @@ import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitListEntry;
+import org.torproject.descriptor.impl.DescriptorParseException;
import org.torproject.ernie.db.main.Configuration;
public class ExitListDownloader extends Thread {
@@ -35,8 +42,19 @@ public class ExitListDownloader extends Thread {
}
Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+
+ SimpleDateFormat dateTimeFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ Date downloadedDate = new Date();
+ String downloadedExitList = null;
try {
logger.fine("Downloading exit list...");
+ StringBuilder sb = new StringBuilder();
+ sb.append("@type tordnsel 1.0\n");
+ sb.append("Downloaded " + dateTimeFormat.format(downloadedDate)
+ + "\n");
String exitAddressesUrl =
"http://exitlist.torproject.org/exit-addresses";
URL u = new URL(exitAddressesUrl);
@@ -51,42 +69,72 @@ public class ExitListDownloader extends Thread {
}
BufferedInputStream in = new BufferedInputStream(
huc.getInputStream());
- SimpleDateFormat printFormat =
- new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
- printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- Date downloadedDate = new Date();
- File tarballFile = new File("exitlist/" + printFormat.format(
- downloadedDate));
- tarballFile.getParentFile().mkdirs();
- File rsyncFile = new File("rsync/exit-lists/"
- + tarballFile.getName());
- rsyncFile.getParentFile().mkdirs();
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- BufferedWriter bwT = new BufferedWriter(new FileWriter(
- tarballFile));
- BufferedWriter bwR = new BufferedWriter(new FileWriter(
- rsyncFile));
- bwT.write("@type tordnsel 1.0\n");
- bwT.write("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
- bwR.write("@type tordnsel 1.0\n");
- bwR.write("Downloaded " + dateTimeFormat.format(downloadedDate)
- + "\n");
int len;
byte[] data = new byte[1024];
while ((len = in.read(data, 0, 1024)) >= 0) {
- bwT.write(new String(data, 0, len));
- bwR.write(new String(data, 0, len));
+ sb.append(new String(data, 0, len));
}
in.close();
- bwT.close();
- bwR.close();
+ downloadedExitList = sb.toString();
logger.fine("Finished downloading exit list.");
} catch (IOException e) {
logger.log(Level.WARNING, "Failed downloading exit list", e);
return;
}
+ if (downloadedExitList == null) {
+ logger.warning("Failed downloading exit list");
+ return;
+ }
+
+ SimpleDateFormat tarballFormat =
+ new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+ tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File("exitlist/" + tarballFormat.format(
+ downloadedDate));
+
+ long maxScanMillis = 0L;
+ try {
+ DescriptorParser descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+ List<Descriptor> parsedDescriptors =
+ descriptorParser.parseDescriptors(downloadedExitList.getBytes(),
+ tarballFile.getName());
+ if (parsedDescriptors.size() != 1 ||
+ !(parsedDescriptors.get(0) instanceof ExitList)) {
+ logger.warning("Could not parse downloaded exit list");
+ return;
+ }
+ ExitList parsedExitList = (ExitList) parsedDescriptors.get(0);
+ for (ExitListEntry entry : parsedExitList.getExitListEntries()) {
+ maxScanMillis = Math.max(maxScanMillis, entry.getScanMillis());
+ }
+ } catch (DescriptorParseException e) {
+ logger.log(Level.WARNING, "Could not parse downloaded exit list",
+ e);
+ }
+ if (maxScanMillis > 0L &&
+ maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) {
+ logger.warning("The last reported scan in the downloaded exit list "
+ + "took place at " + dateTimeFormat.format(maxScanMillis)
+ + ", which is more than 5:30 hours in the past.");
+ }
+
+ /* Write to disk. */
+ File rsyncFile = new File("rsync/exit-lists/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ try {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write(downloadedExitList);
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Could not write downloaded exit list "
+ + "to " + outputFile.getAbsolutePath(), e);
+ }
+ }
/* Write stats. */
StringBuilder dumpStats = new StringBuilder("Finished downloading "
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index f95bbf7..9a07ada 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -94,6 +94,8 @@ public class ArchiveWriter extends Thread {
// Write output to disk that only depends on relay descriptors
this.dumpStats();
+ this.checkStaledescriptors();
+
this.cleanUpRsyncDirectory();
}
@@ -128,9 +130,12 @@ public class ArchiveWriter extends Thread {
return false;
}
+ private long maxConsensusValidAfter = 0L;
private static final byte[] CONSENSUS_ANNOTATION =
"@type network-status-consensus-3 1.0\n".getBytes();
public void storeConsensus(byte[] data, long validAfter) {
+ this.maxConsensusValidAfter = Math.max(this.maxConsensusValidAfter,
+ validAfter);
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -144,10 +149,12 @@ public class ArchiveWriter extends Thread {
}
}
+ private long maxVoteValidAfter = 0L;
private static final byte[] VOTE_ANNOTATION =
"@type network-status-vote-3 1.0\n".getBytes();
public void storeVote(byte[] data, long validAfter,
String fingerprint, String digest) {
+ this.maxVoteValidAfter = Math.max(this.maxVoteValidAfter, validAfter);
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -177,10 +184,13 @@ public class ArchiveWriter extends Thread {
}
}
+ private long maxServerDescriptorPublished = 0L;
private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
"@type server-descriptor 1.0\n".getBytes();
public void storeServerDescriptor(byte[] data, String digest,
long published) {
+ this.maxServerDescriptorPublished = Math.max(
+ this.maxServerDescriptorPublished, published);
SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
File tarballFile = new File(this.outputDirectory
@@ -195,10 +205,13 @@ public class ArchiveWriter extends Thread {
}
}
+ private long maxExtraInfoDescriptorPublished = 0L;
private static final byte[] EXTRA_INFO_ANNOTATION =
"@type extra-info 1.0\n".getBytes();
public void storeExtraInfoDescriptor(byte[] data,
String extraInfoDigest, long published) {
+ this.maxExtraInfoDescriptorPublished = Math.max(
+ this.maxExtraInfoDescriptorPublished, published);
SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
File tarballFile = new File(this.outputDirectory + "/extra-info/"
@@ -403,6 +416,39 @@ public class ArchiveWriter extends Thread {
}
}
+ private void checkStaledescriptors() {
+ SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+ if (maxConsensusValidAfter > 0L &&
+ maxConsensusValidAfter < tooOldMillis) {
+ this.logger.warning("The last known relay network status "
+ + "consensus was valid after "
+ + dateTimeFormat.format(maxConsensusValidAfter)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (maxVoteValidAfter > 0L && maxVoteValidAfter < tooOldMillis) {
+ this.logger.warning("The last known relay network status vote "
+ + "was valid after " + dateTimeFormat.format(maxVoteValidAfter)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (maxServerDescriptorPublished > 0L &&
+ maxServerDescriptorPublished < tooOldMillis) {
+ this.logger.warning("The last known relay server descriptor was "
+ + "published at "
+ + dateTimeFormat.format(maxServerDescriptorPublished)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ if (maxExtraInfoDescriptorPublished > 0L &&
+ maxExtraInfoDescriptorPublished < tooOldMillis) {
+ this.logger.warning("The last known relay extra-info descriptor "
+ + "was published at "
+ + dateTimeFormat.format(maxExtraInfoDescriptorPublished)
+ + ", which is more than 5:30 hours in the past.");
+ }
+ }
+
/* Delete all files from the rsync directory that have not been modified
* in the last three days. */
public void cleanUpRsyncDirectory() {
1
0

27 Oct '12
commit 663cdab7217ef66fec2c60757c88c42bd2e08d8f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 20:57:08 2012 -0400
Make copying to rsync/ more efficient.
Each module now writes to its own subdirectory in rsync/ in parallel to
writing to its output directory for making tarballs. This should be more
efficient than going over the output directories and copying files to
rsync/.
---
.../db/bridgedescs/SanitizedBridgesWriter.java | 107 ++++++++++--------
.../BridgePoolAssignmentsProcessor.java | 51 ++++++---
.../ernie/db/exitlists/ExitListDownloader.java | 50 ++++++--
.../ernie/db/main/RsyncDataProvider.java | 118 --------------------
.../ernie/db/relaydescs/ArchiveWriter.java | 109 ++++++++++--------
.../ernie/db/torperf/TorperfDownloader.java | 42 +++++--
6 files changed, 226 insertions(+), 251 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 8db267a..87593bd 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -15,9 +15,11 @@ import java.security.SecureRandom;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
+import java.util.Stack;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.logging.Level;
@@ -28,7 +30,6 @@ import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
/**
* Sanitizes bridge descriptors, i.e., removes all possibly sensitive
@@ -192,16 +193,7 @@ public class SanitizedBridgesWriter extends Thread {
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
- // Copy sanitized bridge descriptors from the last 3 days to the rsync
- // directory.
- RsyncDataProvider rdp = new RsyncDataProvider();
- rdp.copyFiles(new File(sanitizedBridgesDirectory, "statuses"),
- "bridge-descriptors/statuses");
- rdp.copyFiles(
- new File(sanitizedBridgesDirectory, "server-descriptor"),
- "bridge-descriptors/server-descriptors");
- rdp.copyFiles(new File(sanitizedBridgesDirectory, "extra-info"),
- "bridge-descriptors/extra-infos");
+ this.cleanUpRsyncDirectory();
}
private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
@@ -519,32 +511,31 @@ public class SanitizedBridgesWriter extends Thread {
/* Write the sanitized network status to disk. */
try {
-
- /* Determine file name. */
String syear = publicationTime.substring(0, 4);
String smonth = publicationTime.substring(5, 7);
String sday = publicationTime.substring(8, 10);
String stime = publicationTime.substring(11, 13)
+ publicationTime.substring(14, 16)
+ publicationTime.substring(17, 19);
- File statusFile = new File(
+ File tarballFile = new File(
this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
+ "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
+ sday + "-" + stime + "-"
+ "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
-
- /* Create all parent directories to write this network status. */
- statusFile.getParentFile().mkdirs();
-
- /* Write sanitized network status to disk. */
- BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
- bw.write("@type bridge-network-status 1.0\n");
- bw.write("published " + publicationTime + "\n");
- for (String scrubbed : scrubbedLines.values()) {
- bw.write(scrubbed);
+ File rsyncFile = new File("rsync/bridge-descriptors/statuses/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write("@type bridge-network-status 1.0\n");
+ bw.write("published " + publicationTime + "\n");
+ for (String scrubbed : scrubbedLines.values()) {
+ bw.write(scrubbed);
+ }
+ bw.close();
}
- bw.close();
-
} catch (IOException e) {
this.logger.log(Level.WARNING, "Could not write sanitized bridge "
+ "network status to disk.", e);
@@ -781,22 +772,26 @@ public class SanitizedBridgesWriter extends Thread {
}
String dyear = published.substring(0, 4);
String dmonth = published.substring(5, 7);
- File newFile = new File(
+ File tarballFile = new File(
this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ dyear + "/" + dmonth + "/server-descriptors/"
+ "/" + descriptorDigest.charAt(0) + "/"
+ descriptorDigest.charAt(1) + "/"
+ descriptorDigest);
-
- /* Write sanitized server descriptor to disk, including all its parent
- * directories. */
+ File rsyncFile = new File(
+ "rsync/bridge-descriptors/server-descriptors/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
try {
- newFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
- bw.write("@type bridge-server-descriptor 1.0\n");
- bw.write(scrubbedDesc);
- bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
- bw.close();
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write("@type bridge-server-descriptor 1.0\n");
+ bw.write(scrubbedDesc);
+ bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+ bw.close();
+ }
} catch (IOException e) {
this.logger.log(Level.WARNING, "Could not write sanitized server "
+ "descriptor to disk.", e);
@@ -910,22 +905,25 @@ public class SanitizedBridgesWriter extends Thread {
}
String dyear = published.substring(0, 4);
String dmonth = published.substring(5, 7);
- File newFile = new File(
+ File tarballFile = new File(
this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
+ dyear + "/" + dmonth + "/extra-infos/"
+ descriptorDigest.charAt(0) + "/"
+ descriptorDigest.charAt(1) + "/"
+ descriptorDigest);
-
- /* Write sanitized extra-info descriptor to disk, including all its
- * parent directories. */
+ File rsyncFile = new File("rsync/bridge-descriptors/extra-infos/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
try {
- newFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
- bw.write("@type bridge-extra-info 1.1\n");
- bw.write(scrubbedDesc);
- bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
- bw.close();
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write("@type bridge-extra-info 1.1\n");
+ bw.write(scrubbedDesc);
+ bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+ bw.close();
+ }
} catch (Exception e) {
this.logger.log(Level.WARNING, "Could not write sanitized "
+ "extra-info descriptor to disk.", e);
@@ -969,5 +967,22 @@ public class SanitizedBridgesWriter extends Thread {
}
}
}
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("rsync/bridge-descriptors"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
}
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 4f8dcb6..0ac6f90 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -27,7 +27,6 @@ import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
public class BridgePoolAssignmentsProcessor extends Thread {
@@ -121,19 +120,26 @@ public class BridgePoolAssignmentsProcessor extends Thread {
long bridgePoolAssignmentTime = assignmentFormat.parse(
bridgePoolAssignmentLine.substring(
"bridge-pool-assignment ".length())).getTime();
- File sanitizedAssignmentsFile = new File(
+ File tarballFile = new File(
sanitizedAssignmentsDirectory, filenameFormat.format(
bridgePoolAssignmentTime));
- if (!sanitizedAssignmentsFile.exists()) {
- sanitizedAssignmentsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- sanitizedAssignmentsFile));
- bw.write("@type bridge-pool-assignment 1.0\n");
- bw.write(bridgePoolAssignmentLine + "\n");
- for (String assignmentLine : sanitizedAssignments) {
- bw.write(assignmentLine + "\n");
+ File rsyncFile = new File(
+ "rsync/bridge-pool-assignments/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile,
+ rsyncFile };
+ for (File outputFile : outputFiles) {
+ if (!outputFile.exists()) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ outputFile));
+ bw.write("@type bridge-pool-assignment 1.0\n");
+ bw.write(bridgePoolAssignmentLine + "\n");
+ for (String assignmentLine : sanitizedAssignments) {
+ bw.write(assignmentLine + "\n");
+ }
+ bw.close();
}
- bw.close();
}
} catch (IOException e) {
logger.log(Level.WARNING, "Could not write sanitized "
@@ -186,13 +192,26 @@ public class BridgePoolAssignmentsProcessor extends Thread {
}
}
- // Copy sanitized bridge pool assignments from the last 3 days to the
- // rsync directory.
- RsyncDataProvider rdp = new RsyncDataProvider();
- rdp.copyFiles(sanitizedAssignmentsDirectory,
- "bridge-pool-assignments");
+ this.cleanUpRsyncDirectory();
logger.info("Finished processing bridge pool assignment file(s).");
}
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("rsync/bridge-pool-assignments"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
}
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index bbd93ac..26e944c 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -10,6 +10,7 @@ import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
import java.util.Date;
import java.util.SortedSet;
import java.util.Stack;
@@ -19,7 +20,6 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
public class ExitListDownloader extends Thread {
@@ -55,24 +55,33 @@ public class ExitListDownloader extends Thread {
new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
Date downloadedDate = new Date();
- File exitListFile = new File("exitlist/" + printFormat.format(
+ File tarballFile = new File("exitlist/" + printFormat.format(
downloadedDate));
- exitListFile.getParentFile().mkdirs();
+ tarballFile.getParentFile().mkdirs();
+ File rsyncFile = new File("rsync/exit-lists/"
+ + tarballFile.getName());
+ rsyncFile.getParentFile().mkdirs();
SimpleDateFormat dateTimeFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- exitListFile));
- bw.write("@type tordnsel 1.0\n");
- bw.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+ BufferedWriter bwT = new BufferedWriter(new FileWriter(
+ tarballFile));
+ BufferedWriter bwR = new BufferedWriter(new FileWriter(
+ rsyncFile));
+ bwT.write("@type tordnsel 1.0\n");
+ bwT.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+ + "\n");
+ bwR.write("@type tordnsel 1.0\n");
+ bwR.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+ "\n");
int len;
byte[] data = new byte[1024];
while ((len = in.read(data, 0, 1024)) >= 0) {
- bw.write(new String(data, 0, len));
+ bwT.write(new String(data, 0, len));
+ bwR.write(new String(data, 0, len));
}
in.close();
- bw.close();
+ bwT.close();
+ bwR.close();
logger.fine("Finished downloading exit list.");
} catch (IOException e) {
logger.log(Level.WARNING, "Failed downloading exit list", e);
@@ -110,9 +119,24 @@ public class ExitListDownloader extends Thread {
}
logger.info(dumpStats.toString());
- /* Copy exit lists from the last 3 days to the rsync directory. */
- RsyncDataProvider rdp = new RsyncDataProvider();
- rdp.copyFiles(new File("exitlist"), "exit-lists");
+ this.cleanUpRsyncDirectory();
+ }
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("rsync/exit-lists"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
}
}
diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
deleted file mode 100644
index 7788584..0000000
--- a/src/org/torproject/ernie/db/main/RsyncDataProvider.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.main;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.Stack;
-import java.util.logging.Logger;
-
-/**
- * Copy files published in the last 3 days to a local directory that can
- * then be served via rsync.
- */
-public class RsyncDataProvider {
-
- private Logger logger;
-
- private long cutOffMillis;
-
- private File rsyncDirectory;
-
- public RsyncDataProvider() {
-
- /* Initialize logger. */
- this.logger = Logger.getLogger(RsyncDataProvider.class.getName());
-
- /* Determine the cut-off time for files in rsync/. */
- this.cutOffMillis = System.currentTimeMillis()
- - 3L * 24L * 60L * 60L * 1000L;
-
- /* Create rsync/ directory if it doesn't exist. */
- this.rsyncDirectory = new File("rsync");
- if (!rsyncDirectory.exists()) {
- rsyncDirectory.mkdirs();
- }
- }
-
- public void copyFiles(File fromDirectory, String toRsyncSubDirectory) {
-
- File toDirectory = new File(this.rsyncDirectory, toRsyncSubDirectory);
-
- /* Make a list of all files in the rsync/ subdirectory to delete those
- * that we didn't copy in this run. */
- Set<String> fileNamesInRsync = new HashSet<String>();
- Stack<File> files = new Stack<File>();
- files.add(toDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else {
- fileNamesInRsync.add(pop.getName());
- }
- }
- logger.info("Found " + fileNamesInRsync.size() + " files in "
- + toDirectory.getAbsolutePath() + " that we're either "
- + "overwriting or deleting in this execution.");
-
- /* Copy files modified in the last 3 days. */
- files.add(fromDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= this.cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(toDirectory, fileName));
- fileNamesInRsync.remove(fileName);
- }
- }
-
- /* Delete all files that we didn't (over-)write in this run. */
- files.add(toDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (fileNamesInRsync.contains(pop.getName())) {
- fileNamesInRsync.remove(pop.getName());
- pop.delete();
- }
- }
- logger.info("After deleting files that we didn't overwrite in this "
- + "run, there are " + fileNamesInRsync.size() + " files left in "
- + toDirectory.getAbsolutePath() + ".");
- }
-
- private void copyFile(File from, File to) {
- if (from.exists() && to.exists() &&
- from.lastModified() == to.lastModified() &&
- from.length() == to.length()) {
- return;
- }
- try {
- to.getParentFile().mkdirs();
- FileInputStream fis = new FileInputStream(from);
- BufferedInputStream bis = new BufferedInputStream(fis);
- FileOutputStream fos = new FileOutputStream(to);
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- fos.write(data, 0, len);
- }
- bis.close();
- fos.close();
- to.setLastModified(from.lastModified());
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 5c3d2e9..f95bbf7 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -10,6 +10,7 @@ import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.SortedSet;
@@ -25,7 +26,6 @@ import org.torproject.descriptor.DescriptorParser;
import org.torproject.descriptor.DescriptorSourceFactory;
import org.torproject.descriptor.impl.DescriptorParseException;
import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
public class ArchiveWriter extends Thread {
@@ -94,51 +94,36 @@ public class ArchiveWriter extends Thread {
// Write output to disk that only depends on relay descriptors
this.dumpStats();
- /* Copy relay descriptors from the last 3 days to the rsync
- * directory. */
- RsyncDataProvider rsdp = new RsyncDataProvider();
- rsdp.copyFiles(
- new File(outputDirectory, "consensus"),
- "relay-descriptors/consensuses");
- rsdp.copyFiles(
- new File(outputDirectory, "vote"),
- "relay-descriptors/votes");
- rsdp.copyFiles(
- new File(outputDirectory, "server-descriptor"),
- "relay-descriptors/server-descriptors");
- rsdp.copyFiles(
- new File(outputDirectory, "extra-info"),
- "relay-descriptors/extra-infos");
+ this.cleanUpRsyncDirectory();
}
private boolean store(byte[] typeAnnotation, byte[] data,
- String filename) {
+ File[] outputFiles) {
try {
- File file = new File(filename);
- if (!file.exists()) {
- this.logger.finer("Storing " + filename);
- if (this.descriptorParser.parseDescriptors(data, filename).size()
- != 1) {
- this.logger.info("Relay descriptor file " + filename
- + " doesn't contain exactly one descriptor. Not storing.");
- return false;
- }
- file.getParentFile().mkdirs();
+ this.logger.finer("Storing " + outputFiles[0]);
+ if (this.descriptorParser.parseDescriptors(data,
+ outputFiles[0].getName()).size() != 1) {
+ this.logger.info("Relay descriptor file " + outputFiles[0]
+ + " doesn't contain exactly one descriptor. Not storing.");
+ return false;
+ }
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
BufferedOutputStream bos = new BufferedOutputStream(
- new FileOutputStream(file));
+ new FileOutputStream(outputFile));
if (data.length > 0 && data[0] != '@') {
bos.write(typeAnnotation, 0, typeAnnotation.length);
}
bos.write(data, 0, data.length);
bos.close();
- return true;
}
+ return true;
} catch (DescriptorParseException e) {
this.logger.log(Level.WARNING, "Could not parse relay descriptor "
- + filename + " before storing it to disk. Skipping.", e);
+ + outputFiles[0] + " before storing it to disk. Skipping.", e);
} catch (IOException e) {
this.logger.log(Level.WARNING, "Could not store relay descriptor "
- + filename, e);
+ + outputFiles[0], e);
}
return false;
}
@@ -149,9 +134,12 @@ public class ArchiveWriter extends Thread {
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/consensus/"
- + printFormat.format(new Date(validAfter)) + "-consensus";
- if (this.store(CONSENSUS_ANNOTATION, data, filename)) {
+ File tarballFile = new File(this.outputDirectory + "/consensus/"
+ + printFormat.format(new Date(validAfter)) + "-consensus");
+ File rsyncFile = new File("rsync/relay-descriptors/consensuses/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(CONSENSUS_ANNOTATION, data, outputFiles)) {
this.storedConsensuses++;
}
}
@@ -163,10 +151,13 @@ public class ArchiveWriter extends Thread {
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/vote/"
+ File tarballFile = new File(this.outputDirectory + "/vote/"
+ printFormat.format(new Date(validAfter)) + "-vote-"
- + fingerprint + "-" + digest;
- if (this.store(VOTE_ANNOTATION, data, filename)) {
+ + fingerprint + "-" + digest);
+ File rsyncFile = new File("rsync/relay-descriptors/votes/"
+ + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(VOTE_ANNOTATION, data, outputFiles)) {
this.storedVotes++;
}
}
@@ -178,9 +169,10 @@ public class ArchiveWriter extends Thread {
SimpleDateFormat printFormat = new SimpleDateFormat(
"yyyy-MM-dd-HH-mm-ss");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/certs/"
- + fingerprint + "-" + printFormat.format(new Date(published));
- if (this.store(CERTIFICATE_ANNOTATION, data, filename)) {
+ File tarballFile = new File(this.outputDirectory + "/certs/"
+ + fingerprint + "-" + printFormat.format(new Date(published)));
+ File[] outputFiles = new File[] { tarballFile };
+ if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles)) {
this.storedCerts++;
}
}
@@ -191,11 +183,14 @@ public class ArchiveWriter extends Thread {
long published) {
SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/server-descriptor/"
- + printFormat.format(new Date(published))
+ File tarballFile = new File(this.outputDirectory
+ + "/server-descriptor/" + printFormat.format(new Date(published))
+ digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
- + digest;
- if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) {
+ + digest);
+ File rsyncFile = new File(
+ "rsync/relay-descriptors/server-descriptors/" + digest);
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles)) {
this.storedServerDescriptors++;
}
}
@@ -206,12 +201,15 @@ public class ArchiveWriter extends Thread {
String extraInfoDigest, long published) {
SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String filename = outputDirectory + "/extra-info/"
+ File tarballFile = new File(this.outputDirectory + "/extra-info/"
+ descriptorFormat.format(new Date(published))
+ extraInfoDigest.substring(0, 1) + "/"
+ extraInfoDigest.substring(1, 2) + "/"
- + extraInfoDigest;
- if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) {
+ + extraInfoDigest);
+ File rsyncFile = new File("rsync/relay-descriptors/extra-infos/"
+ + extraInfoDigest);
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles)) {
this.storedExtraInfoDescriptors++;
}
}
@@ -404,4 +402,21 @@ public class ArchiveWriter extends Thread {
e);
}
}
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("rsync/relay-descriptors"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
}
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
index 143010c..19b4a54 100644
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -12,16 +12,17 @@ import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
+import java.util.Stack;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
/* Download possibly truncated Torperf .data and .extradata files from
* configured sources, append them to the files we already have, and merge
@@ -68,9 +69,7 @@ public class TorperfDownloader extends Thread {
}
this.writeLastMergedTimestamps();
- /* Copy Torperf files from the last 3 days to the rsync directory. */
- RsyncDataProvider rdp = new RsyncDataProvider();
- rdp.copyFiles(torperfOutputDirectory, "torperf");
+ this.cleanUpRsyncDirectory();
}
private File torperfLastMergedFile =
@@ -572,22 +571,43 @@ public class TorperfDownloader extends Thread {
this.cachedStartDate == null || this.cachedTpfLines == null) {
return;
}
- File tpfFile = new File(torperfOutputDirectory,
+ File tarballFile = new File(torperfOutputDirectory,
this.cachedStartDate.replaceAll("-", "/")
+ "/" + this.cachedSource + "-"
+ String.valueOf(this.cachedFileSize) + "-"
+ this.cachedStartDate + ".tpf");
- tpfFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile));
- bw.write("@type torperf 1.0\n");
- for (String line : this.cachedTpfLines.values()) {
- bw.write(line + "\n");
+ File rsyncFile = new File("rsync/torperf/" + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ for (File outputFile : outputFiles) {
+ outputFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile));
+ bw.write("@type torperf 1.0\n");
+ for (String line : this.cachedTpfLines.values()) {
+ bw.write(line + "\n");
+ }
+ bw.close();
}
- bw.close();
this.cachedSource = null;
this.cachedFileSize = 0;
this.cachedStartDate = null;
this.cachedTpfLines = null;
}
+
+ /* Delete all files from the rsync directory that have not been modified
+ * in the last three days. */
+ public void cleanUpRsyncDirectory() {
+ long cutOffMillis = System.currentTimeMillis()
+ - 3L * 24L * 60L * 60L * 1000L;
+ Stack<File> allFiles = new Stack<File>();
+ allFiles.add(new File("rsync/torperf"));
+ while (!allFiles.isEmpty()) {
+ File file = allFiles.pop();
+ if (file.isDirectory()) {
+ allFiles.addAll(Arrays.asList(file.listFiles()));
+ } else if (file.lastModified() < cutOffMillis) {
+ file.delete();
+ }
+ }
+ }
}
1
0

[metrics-db/master] Take out options for copying to rsync directory.
by karsten@torproject.org 27 Oct '12
by karsten@torproject.org 27 Oct '12
27 Oct '12
commit 5bdf0b012116bd3dddc257a463402824f92cacf3
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 14:20:02 2012 -0400
Take out options for copying to rsync directory.
---
config.template | 10 -------
.../db/bridgedescs/SanitizedBridgesWriter.java | 21 ++++++--------
.../BridgePoolAssignmentsProcessor.java | 9 ++----
.../ernie/db/exitlists/ExitListDownloader.java | 7 +---
.../torproject/ernie/db/main/Configuration.java | 13 ---------
.../ernie/db/main/RsyncDataProvider.java | 4 +-
.../ernie/db/relaydescs/ArchiveWriter.java | 29 +++++++++-----------
.../ernie/db/torperf/TorperfDownloader.java | 7 +---
8 files changed, 31 insertions(+), 69 deletions(-)
diff --git a/config.template b/config.template
index 94e28e3..8f9888b 100644
--- a/config.template
+++ b/config.template
@@ -102,16 +102,6 @@
#DownloadExitList 0
#
#
-######## Rsync directory ########
-#
-## Copy recently published decriptors to another directory to provide them
-## via rsync
-#ProvideFilesViaRsync 0
-#
-## Relative path to the directory that shall be provided via rsync
-#RsyncDirectory rsync/
-#
-#
######## Torperf downloader ########
#
## Download and merge Torperf .data and .extradata files
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 15cb8f1..10d2baa 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -180,19 +180,16 @@ public class SanitizedBridgesWriter {
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
- // Copy sanitized bridge descriptors from the last 3 days to rsync
+ // Copy sanitized bridge descriptors from the last 3 days to the rsync
// directory.
- if (config.getProvideFilesViaRsync()) {
- RsyncDataProvider rdp = new RsyncDataProvider(
- new File(config.getRsyncDirectory()));
- rdp.copyFiles(new File(sanitizedBridgesDirectory, "statuses"),
- "bridge-descriptors/statuses");
- rdp.copyFiles(
- new File(sanitizedBridgesDirectory, "server-descriptor"),
- "bridge-descriptors/server-descriptors");
- rdp.copyFiles(new File(sanitizedBridgesDirectory, "extra-info"),
- "bridge-descriptors/extra-infos");
- }
+ RsyncDataProvider rdp = new RsyncDataProvider();
+ rdp.copyFiles(new File(sanitizedBridgesDirectory, "statuses"),
+ "bridge-descriptors/statuses");
+ rdp.copyFiles(
+ new File(sanitizedBridgesDirectory, "server-descriptor"),
+ "bridge-descriptors/server-descriptors");
+ rdp.copyFiles(new File(sanitizedBridgesDirectory, "extra-info"),
+ "bridge-descriptors/extra-infos");
}
private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 63b7c4d..3893495 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -175,12 +175,9 @@ public class BridgePoolAssignmentsProcessor {
// Copy sanitized bridge pool assignments from the last 3 days to the
// rsync directory.
- if (config.getProvideFilesViaRsync()) {
- RsyncDataProvider rdp = new RsyncDataProvider(
- new File(config.getRsyncDirectory()));
- rdp.copyFiles(sanitizedAssignmentsDirectory,
- "bridge-pool-assignments");
- }
+ RsyncDataProvider rdp = new RsyncDataProvider();
+ rdp.copyFiles(sanitizedAssignmentsDirectory,
+ "bridge-pool-assignments");
logger.info("Finished processing bridge pool assignment file(s).");
}
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index 9fb714b..f6f2865 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -100,11 +100,8 @@ public class ExitListDownloader {
logger.info(dumpStats.toString());
/* Copy exit lists from the last 3 days to the rsync directory. */
- if (config.getProvideFilesViaRsync()) {
- RsyncDataProvider rdp = new RsyncDataProvider(
- new File(config.getRsyncDirectory()));
- rdp.copyFiles(new File("exitlist"), "exit-lists");
- }
+ RsyncDataProvider rdp = new RsyncDataProvider();
+ rdp.copyFiles(new File("exitlist"), "exit-lists");
}
}
diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java
index adf22cc..65ab948 100644
--- a/src/org/torproject/ernie/db/main/Configuration.java
+++ b/src/org/torproject/ernie/db/main/Configuration.java
@@ -56,8 +56,6 @@ public class Configuration {
private String torperfOutputDirectory = "torperf/";
private SortedMap<String, String> torperfSources = null;
private List<String> torperfFiles = null;
- private boolean provideFilesViaRsync = false;
- private String rsyncDirectory = "rsync";
public Configuration() {
/* Initialize logger. */
@@ -191,11 +189,6 @@ public class Configuration {
System.exit(1);
}
this.torperfFiles.add(line);
- } else if (line.startsWith("ProvideFilesViaRsync")) {
- this.provideFilesViaRsync = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RsyncDirectory")) {
- this.rsyncDirectory = line.split(" ")[1];
} else {
logger.severe("Configuration file contains unrecognized "
+ "configuration key in line '" + line + "'! Exiting!");
@@ -349,11 +342,5 @@ public class Configuration {
public List<String> getTorperfFiles() {
return this.torperfFiles;
}
- public boolean getProvideFilesViaRsync() {
- return this.provideFilesViaRsync;
- }
- public String getRsyncDirectory() {
- return this.rsyncDirectory;
- }
}
diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
index 78ea1f3..7788584 100644
--- a/src/org/torproject/ernie/db/main/RsyncDataProvider.java
+++ b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
@@ -25,7 +25,7 @@ public class RsyncDataProvider {
private File rsyncDirectory;
- public RsyncDataProvider(File rsyncDirectory) {
+ public RsyncDataProvider() {
/* Initialize logger. */
this.logger = Logger.getLogger(RsyncDataProvider.class.getName());
@@ -35,10 +35,10 @@ public class RsyncDataProvider {
- 3L * 24L * 60L * 60L * 1000L;
/* Create rsync/ directory if it doesn't exist. */
+ this.rsyncDirectory = new File("rsync");
if (!rsyncDirectory.exists()) {
rsyncDirectory.mkdirs();
}
- this.rsyncDirectory = rsyncDirectory;
}
public void copyFiles(File fromDirectory, String toRsyncSubDirectory) {
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 8d7e413..a9cb604 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -88,22 +88,19 @@ public class ArchiveWriter {
/* Copy relay descriptors from the last 3 days to the rsync
* directory. */
- if (config.getProvideFilesViaRsync()) {
- RsyncDataProvider rsdp = new RsyncDataProvider(
- new File(config.getRsyncDirectory()));
- rsdp.copyFiles(
- new File(outputDirectory, "consensus"),
- "relay-descriptors/consensuses");
- rsdp.copyFiles(
- new File(outputDirectory, "vote"),
- "relay-descriptors/votes");
- rsdp.copyFiles(
- new File(outputDirectory, "server-descriptor"),
- "relay-descriptors/server-descriptors");
- rsdp.copyFiles(
- new File(outputDirectory, "extra-info"),
- "relay-descriptors/extra-infos");
- }
+ RsyncDataProvider rsdp = new RsyncDataProvider();
+ rsdp.copyFiles(
+ new File(outputDirectory, "consensus"),
+ "relay-descriptors/consensuses");
+ rsdp.copyFiles(
+ new File(outputDirectory, "vote"),
+ "relay-descriptors/votes");
+ rsdp.copyFiles(
+ new File(outputDirectory, "server-descriptor"),
+ "relay-descriptors/server-descriptors");
+ rsdp.copyFiles(
+ new File(outputDirectory, "extra-info"),
+ "relay-descriptors/extra-infos");
}
private boolean store(byte[] typeAnnotation, byte[] data,
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
index 07b444e..5f09038 100644
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -57,11 +57,8 @@ public class TorperfDownloader {
this.writeLastMergedTimestamps();
/* Copy Torperf files from the last 3 days to the rsync directory. */
- if (config.getProvideFilesViaRsync()) {
- RsyncDataProvider rdp = new RsyncDataProvider(
- new File(config.getRsyncDirectory()));
- rdp.copyFiles(torperfOutputDirectory, "torperf");
- }
+ RsyncDataProvider rdp = new RsyncDataProvider();
+ rdp.copyFiles(torperfOutputDirectory, "torperf");
}
private File torperfLastMergedFile =
1
0

[metrics-db/master] Make each module copy its own files to rsync/.
by karsten@torproject.org 27 Oct '12
by karsten@torproject.org 27 Oct '12
27 Oct '12
commit 9766f4c4e9282253090c2e682ee55037c8864383
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 26 14:15:16 2012 -0400
Make each module copy its own files to rsync/.
---
.../db/bridgedescs/SanitizedBridgesWriter.java | 15 ++
.../BridgePoolAssignmentsProcessor.java | 10 ++
.../ernie/db/exitlists/ExitListDownloader.java | 12 ++-
src/org/torproject/ernie/db/main/Main.java | 18 +--
.../ernie/db/main/RsyncDataProvider.java | 161 ++++----------------
.../ernie/db/relaydescs/ArchiveWriter.java | 20 +++
.../ernie/db/torperf/TorperfDownloader.java | 8 +
7 files changed, 96 insertions(+), 148 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index e46450f..15cb8f1 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -28,6 +28,7 @@ import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.torproject.ernie.db.main.Configuration;
+import org.torproject.ernie.db.main.RsyncDataProvider;
/**
* Sanitizes bridge descriptors, i.e., removes all possibly sensitive
@@ -178,6 +179,20 @@ public class SanitizedBridgesWriter {
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
+
+ // Copy sanitized bridge descriptors from the last 3 days to rsync
+ // directory.
+ if (config.getProvideFilesViaRsync()) {
+ RsyncDataProvider rdp = new RsyncDataProvider(
+ new File(config.getRsyncDirectory()));
+ rdp.copyFiles(new File(sanitizedBridgesDirectory, "statuses"),
+ "bridge-descriptors/statuses");
+ rdp.copyFiles(
+ new File(sanitizedBridgesDirectory, "server-descriptor"),
+ "bridge-descriptors/server-descriptors");
+ rdp.copyFiles(new File(sanitizedBridgesDirectory, "extra-info"),
+ "bridge-descriptors/extra-infos");
+ }
}
private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 76fa539..63b7c4d 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -27,6 +27,7 @@ import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.torproject.ernie.db.main.Configuration;
+import org.torproject.ernie.db.main.RsyncDataProvider;
public class BridgePoolAssignmentsProcessor {
@@ -172,6 +173,15 @@ public class BridgePoolAssignmentsProcessor {
}
}
+ // Copy sanitized bridge pool assignments from the last 3 days to the
+ // rsync directory.
+ if (config.getProvideFilesViaRsync()) {
+ RsyncDataProvider rdp = new RsyncDataProvider(
+ new File(config.getRsyncDirectory()));
+ rdp.copyFiles(sanitizedAssignmentsDirectory,
+ "bridge-pool-assignments");
+ }
+
logger.info("Finished processing bridge pool assignment file(s).");
}
}
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index 64f6a3b..9fb714b 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -18,8 +18,11 @@ import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.torproject.ernie.db.main.Configuration;
+import org.torproject.ernie.db.main.RsyncDataProvider;
+
public class ExitListDownloader {
- public ExitListDownloader() {
+ public ExitListDownloader(Configuration config) {
Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
try {
logger.fine("Downloading exit list...");
@@ -95,6 +98,13 @@ public class ExitListDownloader {
dumpStats.append("\n" + f.getName());
}
logger.info(dumpStats.toString());
+
+ /* Copy exit lists from the last 3 days to the rsync directory. */
+ if (config.getProvideFilesViaRsync()) {
+ RsyncDataProvider rdp = new RsyncDataProvider(
+ new File(config.getRsyncDirectory()));
+ rdp.copyFiles(new File("exitlist"), "exit-lists");
+ }
}
}
diff --git a/src/org/torproject/ernie/db/main/Main.java b/src/org/torproject/ernie/db/main/Main.java
index 9109a75..093f002 100644
--- a/src/org/torproject/ernie/db/main/Main.java
+++ b/src/org/torproject/ernie/db/main/Main.java
@@ -51,7 +51,7 @@ public class Main {
// Download exit list and store it to disk
if (config.getDownloadExitList()) {
- new ExitListDownloader();
+ new ExitListDownloader(config);
}
// Process bridge pool assignments
@@ -64,22 +64,6 @@ public class Main {
new TorperfDownloader(config);
}
- // Copy recently published files to a local directory that can then
- // be served via rsync.
- if (config.getProvideFilesViaRsync()) {
- new RsyncDataProvider(
- !config.getWriteDirectoryArchives() ? null :
- new File(config.getDirectoryArchivesOutputDirectory()),
- !config.getWriteSanitizedBridges() ? null :
- new File(config.getSanitizedBridgesWriteDirectory()),
- !config.getProcessBridgePoolAssignments() ? null :
- new File(config.getSanitizedAssignmentsDirectory()),
- config.getDownloadExitList(),
- !config.getProcessTorperfFiles() ? null :
- new File(config.getTorperfOutputDirectory()),
- new File(config.getRsyncDirectory()));
- }
-
// Remove lock file
lf.releaseLock();
diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
index cd4a6f9..78ea1f3 100644
--- a/src/org/torproject/ernie/db/main/RsyncDataProvider.java
+++ b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
@@ -18,29 +18,38 @@ import java.util.logging.Logger;
* then be served via rsync.
*/
public class RsyncDataProvider {
- public RsyncDataProvider(File directoryArchivesOutputDirectory,
- File sanitizedBridgesWriteDirectory,
- File sanitizedAssignmentsDirectory,
- boolean downloadExitList,
- File torperfOutputDirectory, File rsyncDirectory) {
+
+ private Logger logger;
+
+ private long cutOffMillis;
+
+ private File rsyncDirectory;
+
+ public RsyncDataProvider(File rsyncDirectory) {
/* Initialize logger. */
- Logger logger = Logger.getLogger(RsyncDataProvider.class.getName());
+ this.logger = Logger.getLogger(RsyncDataProvider.class.getName());
/* Determine the cut-off time for files in rsync/. */
- long cutOffMillis = System.currentTimeMillis()
+ this.cutOffMillis = System.currentTimeMillis()
- 3L * 24L * 60L * 60L * 1000L;
/* Create rsync/ directory if it doesn't exist. */
if (!rsyncDirectory.exists()) {
rsyncDirectory.mkdirs();
}
+ this.rsyncDirectory = rsyncDirectory;
+ }
- /* Make a list of all files in the rsync/ directory to delete those
+ public void copyFiles(File fromDirectory, String toRsyncSubDirectory) {
+
+ File toDirectory = new File(this.rsyncDirectory, toRsyncSubDirectory);
+
+ /* Make a list of all files in the rsync/ subdirectory to delete those
* that we didn't copy in this run. */
Set<String> fileNamesInRsync = new HashSet<String>();
Stack<File> files = new Stack<File>();
- files.add(rsyncDirectory);
+ files.add(toDirectory);
while (!files.isEmpty()) {
File pop = files.pop();
if (pop.isDirectory()) {
@@ -50,132 +59,24 @@ public class RsyncDataProvider {
}
}
logger.info("Found " + fileNamesInRsync.size() + " files in "
- + rsyncDirectory.getAbsolutePath() + " that we're either "
+ + toDirectory.getAbsolutePath() + " that we're either "
+ "overwriting or deleting in this execution.");
- /* Copy relay descriptors from the last 3 days. */
- if (directoryArchivesOutputDirectory != null) {
- files.add(directoryArchivesOutputDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- if (pop.getAbsolutePath().contains("/consensus/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/consensuses/" + fileName));
- } else if (pop.getAbsolutePath().contains("/vote/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/votes/" + fileName));
- } else if (pop.getAbsolutePath().contains(
- "/server-descriptor/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/server-descriptors/" + fileName));
- } else if (pop.getAbsolutePath().contains("/extra-info/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "relay-descriptors/extra-infos/" + fileName));
- } else {
- continue;
- }
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying relay descriptors, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy sanitized bridge descriptors from the last 3 days. */
- if (sanitizedBridgesWriteDirectory != null) {
- files.add(sanitizedBridgesWriteDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- if (pop.getAbsolutePath().contains("/statuses/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/statuses/" + fileName));
- } else if (pop.getAbsolutePath().contains(
- "/server-descriptors/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/server-descriptors/" + fileName));
- } else if (pop.getAbsolutePath().contains("/extra-infos/")) {
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-descriptors/extra-infos/" + fileName));
- } else {
- continue;
- }
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying sanitized bridge descriptors, there are "
- + "still " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy sanitized bridge pool assignments from the last 3 days. */
- if (sanitizedAssignmentsDirectory != null) {
- files.add(sanitizedAssignmentsDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "bridge-pool-assignments/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying sanitized bridge pool assignments, there "
- + "are still " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy exit lists from the last 3 days. */
- if (downloadExitList) {
- files.add(new File("exitlist"));
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "exit-lists/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
- }
- }
- logger.info("After copying exit lists, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
-
- /* Copy Torperf files. */
- if (torperfOutputDirectory != null) {
- files.add(torperfOutputDirectory);
- while (!files.isEmpty()) {
- File pop = files.pop();
- if (pop.isDirectory()) {
- files.addAll(Arrays.asList(pop.listFiles()));
- } else if (pop.getName().endsWith(".tpf") &&
- pop.lastModified() >= cutOffMillis) {
- String fileName = pop.getName();
- this.copyFile(pop, new File(rsyncDirectory,
- "torperf/" + fileName));
- fileNamesInRsync.remove(pop.getName());
- }
+ /* Copy files modified in the last 3 days. */
+ files.add(fromDirectory);
+ while (!files.isEmpty()) {
+ File pop = files.pop();
+ if (pop.isDirectory()) {
+ files.addAll(Arrays.asList(pop.listFiles()));
+ } else if (pop.lastModified() >= this.cutOffMillis) {
+ String fileName = pop.getName();
+ this.copyFile(pop, new File(toDirectory, fileName));
+ fileNamesInRsync.remove(fileName);
}
}
- logger.info("After copying Torperf files, there are still "
- + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
/* Delete all files that we didn't (over-)write in this run. */
- files.add(rsyncDirectory);
+ files.add(toDirectory);
while (!files.isEmpty()) {
File pop = files.pop();
if (pop.isDirectory()) {
@@ -187,7 +88,7 @@ public class RsyncDataProvider {
}
logger.info("After deleting files that we didn't overwrite in this "
+ "run, there are " + fileNamesInRsync.size() + " files left in "
- + rsyncDirectory.getAbsolutePath() + ".");
+ + toDirectory.getAbsolutePath() + ".");
}
private void copyFile(File from, File to) {
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index d53f726..8d7e413 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -25,6 +25,7 @@ import org.torproject.descriptor.DescriptorParser;
import org.torproject.descriptor.DescriptorSourceFactory;
import org.torproject.descriptor.impl.DescriptorParseException;
import org.torproject.ernie.db.main.Configuration;
+import org.torproject.ernie.db.main.RsyncDataProvider;
public class ArchiveWriter {
private Logger logger;
@@ -84,6 +85,25 @@ public class ArchiveWriter {
// Write output to disk that only depends on relay descriptors
this.dumpStats();
+
+ /* Copy relay descriptors from the last 3 days to the rsync
+ * directory. */
+ if (config.getProvideFilesViaRsync()) {
+ RsyncDataProvider rsdp = new RsyncDataProvider(
+ new File(config.getRsyncDirectory()));
+ rsdp.copyFiles(
+ new File(outputDirectory, "consensus"),
+ "relay-descriptors/consensuses");
+ rsdp.copyFiles(
+ new File(outputDirectory, "vote"),
+ "relay-descriptors/votes");
+ rsdp.copyFiles(
+ new File(outputDirectory, "server-descriptor"),
+ "relay-descriptors/server-descriptors");
+ rsdp.copyFiles(
+ new File(outputDirectory, "extra-info"),
+ "relay-descriptors/extra-infos");
+ }
}
private boolean store(byte[] typeAnnotation, byte[] data,
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
index a931965..07b444e 100644
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -21,6 +21,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import org.torproject.ernie.db.main.Configuration;
+import org.torproject.ernie.db.main.RsyncDataProvider;
/* Download possibly truncated Torperf .data and .extradata files from
* configured sources, append them to the files we already have, and merge
@@ -54,6 +55,13 @@ public class TorperfDownloader {
this.downloadAndMergeFiles(torperfFilesLine);
}
this.writeLastMergedTimestamps();
+
+ /* Copy Torperf files from the last 3 days to the rsync directory. */
+ if (config.getProvideFilesViaRsync()) {
+ RsyncDataProvider rdp = new RsyncDataProvider(
+ new File(config.getRsyncDirectory()));
+ rdp.copyFiles(torperfOutputDirectory, "torperf");
+ }
}
private File torperfLastMergedFile =
1
0

[translation/https_everywhere_completed] Update translations for https_everywhere_completed
by translation@torproject.org 27 Oct '12
by translation@torproject.org 27 Oct '12
27 Oct '12
commit e7337f276cdd02b8f670de4ad4d11fd3d4017d4a
Author: Translation commit bot <translation(a)torproject.org>
Date: Sat Oct 27 17:45:57 2012 +0000
Update translations for https_everywhere_completed
---
tr/https-everywhere.dtd | 2 +-
tr/ssl-observatory.dtd | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tr/https-everywhere.dtd b/tr/https-everywhere.dtd
index 99b0198..c34bbdf 100644
--- a/tr/https-everywhere.dtd
+++ b/tr/https-everywhere.dtd
@@ -24,7 +24,7 @@
<!ENTITY https-everywhere.prefs.notes "Notlar">
<!ENTITY https-everywhere.prefs.list_caption "Hangi HTTPS yönlendirme kuralları etkin olarak uygulansın?">
<!ENTITY https-everywhere.prefs.enabled "Etkin">
-<!ENTITY https-everywhere.prefs.ruleset_howto "Kendi kural setini nasıl yazabileceğini ">
+<!ENTITY https-everywhere.prefs.ruleset_howto "Kendi kural setini nasıl yazabileceğini">
<!ENTITY https-everywhere.prefs.here_link "öğrenebilirsin.">
<!ENTITY https-everywhere.prefs.toggle "Değiştir">
<!ENTITY https-everywhere.prefs.reset_default "Varsayılana Dön">
diff --git a/tr/ssl-observatory.dtd b/tr/ssl-observatory.dtd
index f424f19..7a972a0 100644
--- a/tr/ssl-observatory.dtd
+++ b/tr/ssl-observatory.dtd
@@ -58,7 +58,7 @@ Mouseover the options for further details:">-->
<!ENTITY ssl-observatory.prefs.explanation2
-"Örneğin, https://www.hack4career.com adresini ziyaret ettiğinde ve eğer ki sayfayı daha önce hiç kimse ziyaret etmemişse sertifika Gözlemci tarafından ileri de bu siteyi ziyaret edecekler için alınır, Daha fazla bilgi için aşağıdan gözlemciyi aktif etmelisiniz:">
+"Örneğin, https://www.hack4career.com adresini ziyaret ettiğinde ve eğer ki sayfayı daha önce hiç kimse ziyaret etmemişse sertifika Gözlemci tarafından ileride bu siteyi ziyaret edecekler için alınır. Daha fazla bilgi için aşağıdan gözlemciyi aktif etmelisiniz:">
<!ENTITY ssl-observatory.prefs.hide "Gelişmiş ayarları gizle">
1
0

[translation/https_everywhere] Update translations for https_everywhere
by translation@torproject.org 27 Oct '12
by translation@torproject.org 27 Oct '12
27 Oct '12
commit 9186abad299fdb8987ab4cab818bed8a8ee3b08e
Author: Translation commit bot <translation(a)torproject.org>
Date: Sat Oct 27 17:45:46 2012 +0000
Update translations for https_everywhere
---
tr/https-everywhere.dtd | 2 +-
tr/ssl-observatory.dtd | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tr/https-everywhere.dtd b/tr/https-everywhere.dtd
index 99b0198..c34bbdf 100644
--- a/tr/https-everywhere.dtd
+++ b/tr/https-everywhere.dtd
@@ -24,7 +24,7 @@
<!ENTITY https-everywhere.prefs.notes "Notlar">
<!ENTITY https-everywhere.prefs.list_caption "Hangi HTTPS yönlendirme kuralları etkin olarak uygulansın?">
<!ENTITY https-everywhere.prefs.enabled "Etkin">
-<!ENTITY https-everywhere.prefs.ruleset_howto "Kendi kural setini nasıl yazabileceğini ">
+<!ENTITY https-everywhere.prefs.ruleset_howto "Kendi kural setini nasıl yazabileceğini">
<!ENTITY https-everywhere.prefs.here_link "öğrenebilirsin.">
<!ENTITY https-everywhere.prefs.toggle "Değiştir">
<!ENTITY https-everywhere.prefs.reset_default "Varsayılana Dön">
diff --git a/tr/ssl-observatory.dtd b/tr/ssl-observatory.dtd
index f424f19..7a972a0 100644
--- a/tr/ssl-observatory.dtd
+++ b/tr/ssl-observatory.dtd
@@ -58,7 +58,7 @@ Mouseover the options for further details:">-->
<!ENTITY ssl-observatory.prefs.explanation2
-"Örneğin, https://www.hack4career.com adresini ziyaret ettiğinde ve eğer ki sayfayı daha önce hiç kimse ziyaret etmemişse sertifika Gözlemci tarafından ileri de bu siteyi ziyaret edecekler için alınır, Daha fazla bilgi için aşağıdan gözlemciyi aktif etmelisiniz:">
+"Örneğin, https://www.hack4career.com adresini ziyaret ettiğinde ve eğer ki sayfayı daha önce hiç kimse ziyaret etmemişse sertifika Gözlemci tarafından ileride bu siteyi ziyaret edecekler için alınır. Daha fazla bilgi için aşağıdan gözlemciyi aktif etmelisiniz:">
<!ENTITY ssl-observatory.prefs.hide "Gelişmiş ayarları gizle">
1
0