commit 4694f0501915d00478062d9066c4132e6f7b52aa
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Jan 9 14:26:36 2014 +0100
Download microdesc consensuses and microdescriptors.
Also support reading microdescriptors from disk.
Implements #2785.
---
config.template | 8 +
deps/metrics-lib | 2 +-
.../torproject/ernie/db/main/Configuration.java | 14 +
.../ernie/db/relaydescs/ArchiveReader.java | 145 +++++++-
.../ernie/db/relaydescs/ArchiveWriter.java | 255 +++++++++++---
.../db/relaydescs/RelayDescriptorDownloader.java | 357 +++++++++++++++++---
.../ernie/db/relaydescs/RelayDescriptorParser.java | 105 +++++-
7 files changed, 771 insertions(+), 115 deletions(-)
diff --git a/config.template b/config.template
index 1b84775..cd31802 100644
--- a/config.template
+++ b/config.template
@@ -33,6 +33,10 @@
## Download the current consensus (only if DownloadRelayDescriptors is 1)
#DownloadCurrentConsensus 1
#
+## Download the current microdesc consensus (only if
+## DownloadRelayDescriptors is 1)
+#DownloadCurrentMicrodescConsensus 1
+#
## Download current votes (only if DownloadRelayDescriptors is 1)
#DownloadCurrentVotes 1
#
@@ -44,6 +48,10 @@
## DownloadRelayDescriptors is 1)
#DownloadMissingExtraInfoDescriptors 1
#
+## Download missing microdescriptors (only if
+## DownloadRelayDescriptors is 1)
+#DownloadMissingMicrodescriptors 1
+#
## Download all server descriptors from the directory authorities at most
## once a day (only if DownloadRelayDescriptors is 1)
#DownloadAllServerDescriptors 0
diff --git a/deps/metrics-lib b/deps/metrics-lib
index 3e60ccd..38c48dd 160000
--- a/deps/metrics-lib
+++ b/deps/metrics-lib
@@ -1 +1 @@
-Subproject commit 3e60ccdaaba598cabb7281d45f9a415299b8e3e3
+Subproject commit 38c48ddd0c49978bbfa5e0a987cfd3a890692a5c
diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java
index 7b5e53d..85d889e 100644
--- a/src/org/torproject/ernie/db/main/Configuration.java
+++ b/src/org/torproject/ernie/db/main/Configuration.java
@@ -53,9 +53,11 @@ public class Configuration {
+ "ED03BB616EB2F60BEC80151114BB25CEF515B226,"
+ "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(","));
private boolean downloadCurrentConsensus = true;
+ private boolean downloadCurrentMicrodescConsensus = true;
private boolean downloadCurrentVotes = true;
private boolean downloadMissingServerDescriptors = true;
private boolean downloadMissingExtraInfoDescriptors = true;
+ private boolean downloadMissingMicrodescriptors = true;
private boolean downloadAllServerDescriptors = false;
private boolean downloadAllExtraInfoDescriptors = false;
private boolean compressRelayDescriptorDownloads;
@@ -141,6 +143,9 @@ public class Configuration {
} else if (line.startsWith("DownloadCurrentConsensus")) {
this.downloadCurrentConsensus = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) {
+ this.downloadCurrentMicrodescConsensus = Integer.parseInt(
+ line.split(" ")[1]) != 0;
} else if (line.startsWith("DownloadCurrentVotes")) {
this.downloadCurrentVotes = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -151,6 +156,9 @@ public class Configuration {
"DownloadMissingExtraInfoDescriptors")) {
this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("DownloadMissingMicrodescriptors")) {
+ this.downloadMissingMicrodescriptors = Integer.parseInt(
+ line.split(" ")[1]) != 0;
} else if (line.startsWith("DownloadAllServerDescriptors")) {
this.downloadAllServerDescriptors = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -253,6 +261,9 @@ public class Configuration {
public boolean getDownloadCurrentConsensus() {
return this.downloadCurrentConsensus;
}
+ public boolean getDownloadCurrentMicrodescConsensus() {
+ return this.downloadCurrentMicrodescConsensus;
+ }
public boolean getDownloadCurrentVotes() {
return this.downloadCurrentVotes;
}
@@ -262,6 +273,9 @@ public class Configuration {
public boolean getDownloadMissingExtraInfoDescriptors() {
return this.downloadMissingExtraInfoDescriptors;
}
+ public boolean getDownloadMissingMicrodescriptors() {
+ return this.downloadMissingMicrodescriptors;
+ }
public boolean getDownloadAllServerDescriptors() {
return this.downloadAllServerDescriptors;
}
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
index fba0a9f..ea54874 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
* See LICENSE for licensing information */
package org.torproject.ernie.db.relaydescs;
@@ -11,14 +11,25 @@ import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
import java.util.SortedSet;
import java.util.Stack;
+import java.util.TimeZone;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
/**
@@ -34,6 +45,7 @@ public class ArchiveReader {
throw new IllegalArgumentException();
}
+ rdp.setArchiveReader(this);
int parsedFiles = 0, ignoredFiles = 0;
Logger logger = Logger.getLogger(ArchiveReader.class.getName());
SortedSet<String> archivesImportHistory = new TreeSet<String>();
@@ -59,6 +71,7 @@ public class ArchiveReader {
Stack<File> filesInInputDir = new Stack<File>();
filesInInputDir.add(archivesDirectory);
List<File> problems = new ArrayList<File>();
+ Set<File> filesToRetry = new HashSet<File>();
while (!filesInInputDir.isEmpty()) {
File pop = filesInInputDir.pop();
if (pop.isDirectory()) {
@@ -86,9 +99,6 @@ public class ArchiveReader {
FileInputStream fis = new FileInputStream(pop);
bis = new BufferedInputStream(fis);
}
- if (keepImportHistory) {
- archivesImportHistory.add(pop.getName());
- }
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int len;
byte[] data = new byte[1024];
@@ -97,7 +107,14 @@ public class ArchiveReader {
}
bis.close();
byte[] allData = baos.toByteArray();
- rdp.parse(allData);
+ boolean stored = rdp.parse(allData);
+ if (!stored) {
+ filesToRetry.add(pop);
+ continue;
+ }
+ if (keepImportHistory) {
+ archivesImportHistory.add(pop.getName());
+ }
parsedFiles++;
} catch (IOException e) {
problems.add(pop);
@@ -108,6 +125,109 @@ public class ArchiveReader {
}
}
}
+ for (File pop : filesToRetry) {
+ /* TODO We need to parse microdescriptors ourselves, rather than
+ * RelayDescriptorParser, because only we know the valid-after
+ * time(s) of microdesc consensus(es) containing this
+ * microdescriptor. However, this breaks functional abstraction
+ * pretty badly. */
+ if (rdp != null) {
+ try {
+ BufferedInputStream bis = null;
+ if (pop.getName().endsWith(".bz2")) {
+ FileInputStream fis = new FileInputStream(pop);
+ BZip2CompressorInputStream bcis =
+ new BZip2CompressorInputStream(fis);
+ bis = new BufferedInputStream(bcis);
+ } else {
+ FileInputStream fis = new FileInputStream(pop);
+ bis = new BufferedInputStream(fis);
+ }
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ bis.close();
+ byte[] allData = baos.toByteArray();
+ BufferedReader br = new BufferedReader(new StringReader(
+ new String(allData, "US-ASCII")));
+ String line;
+ do {
+ line = br.readLine();
+ } while (line != null && line.startsWith("@"));
+ br.close();
+ if (line == null) {
+ logger.fine("We were given an empty descriptor for "
+ + "parsing. Ignoring.");
+ continue;
+ }
+ if (!line.equals("onion-key")) {
+ logger.fine("Skipping non-recognized descriptor.");
+ continue;
+ }
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, end = -1;
+ String startToken = "onion-key\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ end = ascii.indexOf(startToken, start + 1);
+ if (end < 0) {
+ end = ascii.length();
+ if (end <= start) {
+ break;
+ }
+ }
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ String digest256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(descBytes)).replaceAll("=", "");
+ String digest256Hex = DigestUtils.sha256Hex(descBytes);
+ if (!this.microdescriptorValidAfterTimes.containsKey(
+ digest256Hex)) {
+ logger.fine("Could not store microdescriptor '"
+ + digest256Hex + "', which was not contained in a "
+ + "microdesc consensus.");
+ continue;
+ }
+ for (String validAfterTime :
+ this.microdescriptorValidAfterTimes.get(digest256Hex)) {
+ try {
+ long validAfter =
+ parseFormat.parse(validAfterTime).getTime();
+ rdp.storeMicrodescriptor(descBytes, digest256Hex,
+ digest256Base64, validAfter);
+ } catch (ParseException e) {
+ logger.log(Level.WARNING, "Could not parse "
+ + "valid-after time '" + validAfterTime + "'. Not "
+ + "storing microdescriptor.", e);
+ }
+ }
+ }
+ if (keepImportHistory) {
+ archivesImportHistory.add(pop.getName());
+ }
+ parsedFiles++;
+ } catch (IOException e) {
+ problems.add(pop);
+ if (problems.size() > 3) {
+ break;
+ }
+ }
+ }
+ }
if (problems.isEmpty()) {
logger.fine("Finished importing files in directory "
+ archivesDirectory + "/.");
@@ -142,5 +262,20 @@ public class ArchiveReader {
+ "directory:\nParsed " + parsedFiles + ", ignored "
+ ignoredFiles + " files.");
}
+
+ private Map<String, Set<String>> microdescriptorValidAfterTimes =
+ new HashMap<String, Set<String>>();
+ public void haveParsedMicrodescConsensus(String validAfterTime,
+ SortedSet<String> microdescriptorDigests) {
+ for (String microdescriptor : microdescriptorDigests) {
+ if (!this.microdescriptorValidAfterTimes.containsKey(
+ microdescriptor)) {
+ this.microdescriptorValidAfterTimes.put(microdescriptor,
+ new HashSet<String>());
+ }
+ this.microdescriptorValidAfterTimes.get(microdescriptor).add(
+ validAfterTime);
+ }
+ }
}
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 1b4f774..a179f5b 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
* See LICENSE for licensing information */
package org.torproject.ernie.db.relaydescs;
@@ -73,12 +73,16 @@ public class ArchiveWriter extends Thread {
private Logger logger;
private File outputDirectory;
private DescriptorParser descriptorParser;
- private int storedConsensusesCounter = 0, storedVotesCounter = 0,
+ private int storedConsensusesCounter = 0,
+ storedMicrodescConsensusesCounter = 0, storedVotesCounter = 0,
storedCertsCounter = 0, storedServerDescriptorsCounter = 0,
- storedExtraInfoDescriptorsCounter = 0;
+ storedExtraInfoDescriptorsCounter = 0,
+ storedMicrodescriptorsCounter = 0;
private SortedMap<Long, SortedSet<String>> storedConsensuses =
new TreeMap<Long, SortedSet<String>>();
+ private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses =
+ new TreeMap<Long, SortedSet<String>>();
private SortedMap<Long, Integer> expectedVotes =
new TreeMap<Long, Integer>();
private SortedMap<Long, SortedMap<String, SortedSet<String>>>
@@ -88,11 +92,15 @@ public class ArchiveWriter extends Thread {
new TreeMap<Long, Map<String, String>>();
private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
new TreeMap<Long, Set<String>>();
+ private SortedMap<Long, Set<String>> storedMicrodescriptors =
+ new TreeMap<Long, Set<String>>();
private File storedServerDescriptorsFile = new File(
"stats/stored-server-descriptors");
private File storedExtraInfoDescriptorsFile = new File(
"stats/stored-extra-info-descriptors");
+ private File storedMicrodescriptorsFile = new File(
+ "stats/stored-microdescriptors");
private void loadDescriptorDigests() {
SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
@@ -155,6 +163,33 @@ public class ArchiveWriter extends Thread {
}
br.close();
}
+ if (this.storedMicrodescriptorsFile.exists()) {
+ BufferedReader br = new BufferedReader(new FileReader(
+ this.storedMicrodescriptorsFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length != 2) {
+ this.logger.warning("Could not load microdescriptor digests "
+ + "because of illegal line '" + line + "'. We might not "
+ + "be able to correctly check descriptors for "
+ + "completeness.");
+ break;
+ }
+ long validAfter = dateTimeFormat.parse(parts[0]).getTime();
+ if (validAfter < this.now - 48L * 60L * 60L * 1000L) {
+ continue;
+ }
+ if (!this.storedMicrodescriptors.containsKey(validAfter)) {
+ this.storedMicrodescriptors.put(validAfter,
+ new HashSet<String>());
+ }
+ String microdescriptorDigest = parts[1];
+ this.storedMicrodescriptors.get(validAfter).add(
+ microdescriptorDigest);
+ }
+ br.close();
+ }
} catch (ParseException e) {
this.logger.log(Level.WARNING, "Could not load descriptor "
+ "digests. We might not be able to correctly check "
@@ -198,6 +233,18 @@ public class ArchiveWriter extends Thread {
}
}
bw.close();
+ this.storedMicrodescriptorsFile.getParentFile().mkdirs();
+ bw = new BufferedWriter(new FileWriter(
+ this.storedMicrodescriptorsFile));
+ for (Map.Entry<Long, Set<String>> e :
+ this.storedMicrodescriptors.entrySet()) {
+ String validAfter = dateTimeFormat.format(e.getKey());
+ for (String microdescriptorDigest : e.getValue()) {
+ bw.write(String.format("%s,%s%n", validAfter,
+ microdescriptorDigest));
+ }
+ }
+ bw.close();
} catch (IOException e) {
this.logger.log(Level.WARNING, "Could not save descriptor "
+ "digests. We might not be able to correctly check "
@@ -228,9 +275,11 @@ public class ArchiveWriter extends Thread {
rdd = new RelayDescriptorDownloader(rdp, dirSources,
config.getDownloadVotesByFingerprint(),
config.getDownloadCurrentConsensus(),
+ config.getDownloadCurrentMicrodescConsensus(),
config.getDownloadCurrentVotes(),
config.getDownloadMissingServerDescriptors(),
config.getDownloadMissingExtraInfoDescriptors(),
+ config.getDownloadMissingMicrodescriptors(),
config.getDownloadAllServerDescriptors(),
config.getDownloadAllExtraInfoDescriptors(),
config.getCompressRelayDescriptorDownloads());
@@ -314,15 +363,39 @@ public class ArchiveWriter extends Thread {
if (this.store(CONSENSUS_ANNOTATION, data, outputFiles)) {
this.storedConsensusesCounter++;
}
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
if (this.now - validAfter < 3L * 60L * 60L * 1000L) {
this.storedConsensuses.put(validAfter, serverDescriptorDigests);
this.expectedVotes.put(validAfter, dirSources.size());
}
}
+ private static final byte[] MICRODESCCONSENSUS_ANNOTATION =
+ "@type network-status-microdesc-consensus-3 1.0\n".getBytes();
+ public void storeMicrodescConsensus(byte[] data, long validAfter,
+ SortedSet<String> microdescriptorDigests) {
+ SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat(
+ "yyyy/MM");
+ yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat(
+ "dd/yyyy-MM-dd-HH-mm-ss");
+ dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory
+ + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter)
+ + "/consensus-microdesc/"
+ + dayDirectoryFileFormat.format(validAfter)
+ + "-consensus-microdesc");
+ File rsyncFile = new File("rsync/relay-descriptors/microdescs/"
+ + "consensus-microdesc/" + tarballFile.getName());
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles)) {
+ this.storedMicrodescConsensusesCounter++;
+ }
+ if (this.now - validAfter < 3L * 60L * 60L * 1000L) {
+ this.storedMicrodescConsensuses.put(validAfter,
+ microdescriptorDigests);
+ }
+ }
+
private static final byte[] VOTE_ANNOTATION =
"@type network-status-vote-3 1.0\n".getBytes();
public void storeVote(byte[] data, long validAfter,
@@ -340,9 +413,6 @@ public class ArchiveWriter extends Thread {
if (this.store(VOTE_ANNOTATION, data, outputFiles)) {
this.storedVotesCounter++;
}
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
if (this.now - validAfter < 3L * 60L * 60L * 1000L) {
if (!this.storedVotes.containsKey(validAfter)) {
this.storedVotes.put(validAfter,
@@ -384,9 +454,6 @@ public class ArchiveWriter extends Thread {
if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles)) {
this.storedServerDescriptorsCounter++;
}
- SimpleDateFormat dateTimeFormat =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
if (this.now - published < 48L * 60L * 60L * 1000L) {
if (!this.storedServerDescriptors.containsKey(published)) {
this.storedServerDescriptors.put(published,
@@ -423,20 +490,57 @@ public class ArchiveWriter extends Thread {
}
}
+ private static final byte[] MICRODESCRIPTOR_ANNOTATION =
+ "@type microdescriptor 1.0\n".getBytes();
+ public void storeMicrodescriptor(byte[] data,
+ String microdescriptorDigest, long validAfter) {
+ /* TODO We could check here whether we already stored the
+ * microdescriptor in the same valid-after month. This can happen,
+ * e.g., when two relays share the same microdescriptor. In that case
+ * this method gets called twice and the second call overwrites the
+ * file written in the first call. However, this method must be
+ * called twice to store the same microdescriptor in two different
+ * valid-after months. */
+ SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
+ descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ File tarballFile = new File(this.outputDirectory + "/microdesc/"
+ + descriptorFormat.format(validAfter) + "micro/"
+ + microdescriptorDigest.substring(0, 1) + "/"
+ + microdescriptorDigest.substring(1, 2) + "/"
+ + microdescriptorDigest);
+ File rsyncFile = new File("rsync/relay-descriptors/microdescs/micro/"
+ + microdescriptorDigest);
+ File[] outputFiles = new File[] { tarballFile, rsyncFile };
+ if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles)) {
+ this.storedMicrodescriptorsCounter++;
+ }
+ if (this.now - validAfter < 48L * 60L * 60L * 1000L) {
+ if (!this.storedMicrodescriptors.containsKey(validAfter)) {
+ this.storedMicrodescriptors.put(validAfter, new HashSet<String>());
+ }
+ this.storedMicrodescriptors.get(validAfter).add(
+ microdescriptorDigest);
+ }
+ }
+
private StringBuilder intermediateStats = new StringBuilder();
public void intermediateStats(String event) {
intermediateStats.append("While " + event + ", we stored "
+ this.storedConsensusesCounter + " consensus(es), "
- + this.storedVotesCounter + " vote(s), " + this.storedCertsCounter
- + " certificate(s), " + this.storedServerDescriptorsCounter
- + " server descriptor(s), and "
- + this.storedExtraInfoDescriptorsCounter
- + " extra-info descriptor(s) to disk.\n");
+ + this.storedMicrodescConsensusesCounter + " microdesc "
+ + "consensus(es), " + this.storedVotesCounter + " vote(s), "
+ + this.storedCertsCounter + " certificate(s), "
+ + this.storedServerDescriptorsCounter + " server descriptor(s), "
+ + this.storedExtraInfoDescriptorsCounter + " extra-info "
+ + "descriptor(s), and " + this.storedMicrodescriptorsCounter
+ + " microdescriptor(s) to disk.\n");
this.storedConsensusesCounter = 0;
+ this.storedMicrodescConsensusesCounter = 0;
this.storedVotesCounter = 0;
this.storedCertsCounter = 0;
this.storedServerDescriptorsCounter = 0;
this.storedExtraInfoDescriptorsCounter = 0;
+ this.storedMicrodescriptorsCounter = 0;
}
private void checkMissingDescriptors() {
@@ -444,8 +548,7 @@ public class ArchiveWriter extends Thread {
+ "descriptors to disk.\n");
sb.append(intermediateStats.toString());
sb.append("Statistics on the completeness of written relay "
- + "descriptors of the last 3 consensuses (Consensus/Vote, "
- + "valid-after, votes, server descriptors, extra-infos):");
+ + "descriptors:");
SimpleDateFormat dateTimeFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -460,7 +563,12 @@ public class ArchiveWriter extends Thread {
this.storedExtraInfoDescriptors.values()) {
knownExtraInfoDescriptors.addAll(descriptors);
}
- boolean missingDescriptors = false, missingVotes = false;
+ Set<String> knownMicrodescriptors = new HashSet<String>();
+ for (Set<String> descriptors : this.storedMicrodescriptors.values()) {
+ knownMicrodescriptors.addAll(descriptors);
+ }
+ boolean missingDescriptors = false, missingVotes = false,
+ missingMicrodescConsensus = false;
for (Map.Entry<Long, SortedSet<String>> c :
this.storedConsensuses.entrySet()) {
long validAfterMillis = c.getKey();
@@ -491,14 +599,23 @@ public class ArchiveWriter extends Thread {
}
}
}
- sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), "
- + "%d/%d (%.1f%%)", validAfterTime,
- voteFoundServerDescs, voteAllServerDescs,
- 100.0D * (double) voteFoundServerDescs /
- (double) voteAllServerDescs,
- voteFoundExtraInfos, voteAllExtraInfos,
- 100.0D * (double) voteFoundExtraInfos /
- (double) voteAllExtraInfos));
+ sb.append("\nV, " + validAfterTime);
+ if (voteAllServerDescs > 0) {
+ sb.append(String.format(", %d/%d S (%.1f%%)",
+ voteFoundServerDescs, voteAllServerDescs,
+ 100.0D * (double) voteFoundServerDescs /
+ (double) voteAllServerDescs));
+ } else {
+ sb.append(", 0/0 S");
+ }
+ if (voteAllExtraInfos > 0) {
+ sb.append(String.format(", %d/%d E (%.1f%%)",
+ voteFoundExtraInfos, voteAllExtraInfos,
+ 100.0D * (double) voteFoundExtraInfos /
+ (double) voteAllExtraInfos));
+ } else {
+ sb.append(", 0/0 E");
+ }
if (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 ||
voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995) {
missingDescriptors = true;
@@ -506,7 +623,8 @@ public class ArchiveWriter extends Thread {
}
}
int foundServerDescs = 0, allServerDescs = 0, foundExtraInfos = 0,
- allExtraInfos = 0;
+ allExtraInfos = 0, foundMicrodescriptors = 0,
+ allMicrodescriptors = 0;
for (String serverDescriptorDigest : c.getValue()) {
allServerDescs++;
if (knownServerDescriptors.containsKey(
@@ -524,16 +642,50 @@ public class ArchiveWriter extends Thread {
}
}
}
- sb.append(String.format("%nC, %s, %d/%d (%.1f%%), "
- + "%d/%d (%.1f%%), %d/%d (%.1f%%)",
- validAfterTime, foundVotes, allVotes,
- 100.0D * (double) foundVotes / (double) allVotes,
- foundServerDescs, allServerDescs,
- 100.0D * (double) foundServerDescs / (double) allServerDescs,
- foundExtraInfos, allExtraInfos,
- 100.0D * (double) foundExtraInfos / (double) allExtraInfos));
+ sb.append("\nC, " + validAfterTime);
+ if (allVotes > 0) {
+ sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes,
+ 100.0D * (double) foundVotes / (double) allVotes));
+ } else {
+ sb.append(", 0/0 V");
+ }
+ if (allServerDescs > 0) {
+ sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs,
+ allServerDescs, 100.0D * (double) foundServerDescs /
+ (double) allServerDescs));
+ } else {
+ sb.append(", 0/0 S");
+ }
+ if (allExtraInfos > 0) {
+ sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos,
+ allExtraInfos, 100.0D * (double) foundExtraInfos /
+ (double) allExtraInfos));
+ } else {
+ sb.append(", 0/0 E");
+ }
+ if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) {
+ for (String microdescriptorDigest :
+ this.storedMicrodescConsensuses.get(validAfterMillis)) {
+ allMicrodescriptors++;
+ if (knownMicrodescriptors.contains(microdescriptorDigest)) {
+ foundMicrodescriptors++;
+ }
+ }
+ sb.append("\nM, " + validAfterTime);
+ if (allMicrodescriptors > 0) {
+ sb.append(String.format(", %d/%d M (%.1f%%)",
+ foundMicrodescriptors, allMicrodescriptors,
+ 100.0D * (double) foundMicrodescriptors /
+ (double) allMicrodescriptors));
+ } else {
+ sb.append(", 0/0 M");
+ }
+ } else {
+ missingMicrodescConsensus = true;
+ }
if (foundServerDescs * 1000 < allServerDescs * 995 ||
- foundExtraInfos * 1000 < allExtraInfos * 995) {
+ foundExtraInfos * 1000 < allExtraInfos * 995 ||
+ foundMicrodescriptors * 1000 < allMicrodescriptors * 995) {
missingDescriptors = true;
}
if (foundVotes < allVotes) {
@@ -544,12 +696,22 @@ public class ArchiveWriter extends Thread {
if (missingDescriptors) {
this.logger.warning("We are missing at least 0.5% of server or "
+ "extra-info descriptors referenced from a consensus or "
- + "vote.");
+ + "vote or at least 0.5% of microdescriptors referenced from a "
+ + "microdesc consensus.");
}
if (missingVotes) {
+ /* TODO Shouldn't warn if we're not trying to archive votes at
+ * all. */
this.logger.warning("We are missing at least one vote that was "
+ "referenced from a consensus.");
}
+ if (missingMicrodescConsensus) {
+ /* TODO Shouldn't warn if we're not trying to archive microdesc
+ * consensuses at all. */
+ this.logger.warning("We are missing at least one microdesc "
+ + "consensus that was published together with a known "
+ + "consensus.");
+ }
}
private void checkStaledescriptors() {
@@ -564,6 +726,14 @@ public class ArchiveWriter extends Thread {
+ dateTimeFormat.format(this.storedConsensuses.lastKey())
+ ", which is more than 5:30 hours in the past.");
}
+ if (!this.storedMicrodescConsensuses.isEmpty() &&
+ this.storedMicrodescConsensuses.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay network status "
+ + "microdesc consensus was valid after "
+ + dateTimeFormat.format(
+ this.storedMicrodescConsensuses.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
if (!this.storedVotes.isEmpty() &&
this.storedVotes.lastKey() < tooOldMillis) {
this.logger.warning("The last known relay network status vote "
@@ -585,6 +755,13 @@ public class ArchiveWriter extends Thread {
this.storedExtraInfoDescriptors.lastKey())
+ ", which is more than 5:30 hours in the past.");
}
+ if (!this.storedMicrodescriptors.isEmpty() &&
+ this.storedMicrodescriptors.lastKey() < tooOldMillis) {
+ this.logger.warning("The last known relay microdescriptor was "
+ + "contained in a microdesc consensus that was valid after "
+ + dateTimeFormat.format(this.storedMicrodescriptors.lastKey())
+ + ", which is more than 5:30 hours in the past.");
+ }
}
/* Delete all files from the rsync directory that have not been modified
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
index ce2f16a..1c8a375 100644
--- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
* See LICENSE for licensing information */
package org.torproject.ernie.db.relaydescs;
@@ -13,9 +13,9 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
+import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -31,6 +31,9 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.InflaterInputStream;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+
/**
* Downloads relay descriptors from the directory authorities via HTTP.
* Keeps a list of missing descriptors that gets updated by parse results
@@ -46,9 +49,11 @@ public class RelayDescriptorDownloader {
* want to download. Lines are formatted as:
*
* - "consensus,<validafter>,<parsed>",
+ * - "consensus-microdesc,<validafter>,<parsed>",
* - "vote,<validafter>,<fingerprint>,<parsed>",
- * - "server,<published>,<relayid>,<descid>,<parsed>", or
- * - "extra,<published>,<relayid>,<descid>,<parsed>".
+ * - "server,<published>,<relayid>,<descid>,<parsed>",
+ * - "extra,<published>,<relayid>,<descid>,<parsed>", or
+ * - "micro,<validafter>,<relayid>,<descid>,<parsed>".
*/
private File missingDescriptorsFile;
@@ -61,6 +66,27 @@ public class RelayDescriptorDownloader {
private SortedMap<String, String> missingDescriptors;
/**
+ * Map from base64 microdescriptor digests to keys in missingDescriptors
+ * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because
+ * we can't learn <validafter> or <relayid> from parsing
+ * microdescriptors, but we need to know <validafter> to store
+ * microdescriptors to disk and both <validafter> and <relayid> to
+ * remove microdescriptors from the missing list. There are potentially
+ * many matching keys in missingDescriptors for the same microdescriptor
+ * digest. Also, in rare cases relays share the same microdescriptor
+ * (which is only possible if they share the same onion key), and then
+ * we don't have to download their microdescriptor more than once.
+ */
+ private Map<String, Set<String>> microdescriptorKeys;
+
+ /**
+ * Set of microdescriptor digests that are currently missing. Used for
+ * logging statistics instead of "micro,<validafter>,..." keys which may
+ * contain the same microdescriptor digest multiple times.
+ */
+ private Set<String> missingMicrodescriptors;
+
+ /**
* Text file containing the IP addresses (and Dir ports if not 80) of
* directory authorities and when we last downloaded all server and
* extra-info descriptors from them, so that we can avoid downloading
@@ -99,6 +125,12 @@ public class RelayDescriptorDownloader {
private boolean downloadCurrentConsensus;
/**
+ * Should we try to download the current microdesc consensus if we don't
+ * have it?
+ */
+ private boolean downloadCurrentMicrodescConsensus;
+
+ /**
* Should we try to download current votes if we don't have them?
*/
private boolean downloadCurrentVotes;
@@ -116,6 +148,12 @@ public class RelayDescriptorDownloader {
private boolean downloadMissingExtraInfos;
/**
+ * Should we try to download missing microdescriptors that have been
+ * published within the past 24 hours?
+ */
+ private boolean downloadMissingMicrodescriptors;
+
+ /**
* Should we try to download all server descriptors from the authorities
* once every 24 hours?
*/
@@ -134,11 +172,11 @@ public class RelayDescriptorDownloader {
private boolean downloadCompressed;
/**
- * valid-after time that we expect the current consensus and votes to
- * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
- * consensuses and votes with this valid-after time on the directory
- * authorities. This time is initialized as the beginning of the current
- * hour.
+ * valid-after time that we expect the current consensus,
+ * microdescriptor consensus, and votes to have, formatted
+ * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this
+ * valid-after time on the directory authorities. This time is
+ * initialized as the beginning of the current hour.
*/
private String currentValidAfter;
@@ -186,19 +224,25 @@ public class RelayDescriptorDownloader {
* that we requested, and that we successfully downloaded in this
* execution.
*/
- private int oldMissingConsensuses = 0, oldMissingVotes = 0,
+ private int oldMissingConsensuses = 0,
+ oldMissingMicrodescConsensuses = 0, oldMissingVotes = 0,
oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
- newMissingConsensuses = 0, newMissingVotes = 0,
+ oldMissingMicrodescriptors = 0, newMissingConsensuses = 0,
+ newMissingMicrodescConsensuses = 0, newMissingVotes = 0,
newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
- requestedConsensuses = 0, requestedVotes = 0,
+ newMissingMicrodescriptors = 0, requestedConsensuses = 0,
+ requestedMicrodescConsensuses = 0, requestedVotes = 0,
requestedMissingServerDescriptors = 0,
requestedAllServerDescriptors = 0,
requestedMissingExtraInfoDescriptors = 0,
- requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0,
- downloadedVotes = 0, downloadedMissingServerDescriptors = 0,
+ requestedAllExtraInfoDescriptors = 0,
+ requestedMissingMicrodescriptors = 0, downloadedConsensuses = 0,
+ downloadedMicrodescConsensuses = 0, downloadedVotes = 0,
+ downloadedMissingServerDescriptors = 0,
downloadedAllServerDescriptors = 0,
downloadedMissingExtraInfoDescriptors = 0,
- downloadedAllExtraInfoDescriptors = 0;
+ downloadedAllExtraInfoDescriptors = 0,
+ downloadedMissingMicrodescriptors = 0;
/**
* Initializes this class, including reading in missing descriptors from
@@ -209,9 +253,11 @@ public class RelayDescriptorDownloader {
public RelayDescriptorDownloader(RelayDescriptorParser rdp,
List<String> authorities, List<String> authorityFingerprints,
boolean downloadCurrentConsensus,
+ boolean downloadCurrentMicrodescConsensus,
boolean downloadCurrentVotes,
boolean downloadMissingServerDescriptors,
boolean downloadMissingExtraInfos,
+ boolean downloadMissingMicrodescriptors,
boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
boolean downloadCompressed) {
@@ -221,10 +267,14 @@ public class RelayDescriptorDownloader {
this.authorityFingerprints = new ArrayList<String>(
authorityFingerprints);
this.downloadCurrentConsensus = downloadCurrentConsensus;
+ this.downloadCurrentMicrodescConsensus =
+ downloadCurrentMicrodescConsensus;
this.downloadCurrentVotes = downloadCurrentVotes;
this.downloadMissingServerDescriptors =
downloadMissingServerDescriptors;
this.downloadMissingExtraInfos = downloadMissingExtraInfos;
+ this.downloadMissingMicrodescriptors =
+ downloadMissingMicrodescriptors;
this.downloadAllServerDescriptors = downloadAllServerDescriptors;
this.downloadAllExtraInfos = downloadAllExtraInfos;
this.downloadCompressed = downloadCompressed;
@@ -253,6 +303,8 @@ public class RelayDescriptorDownloader {
* we are interested in and that are likely to be found on the
* directory authorities. */
this.missingDescriptors = new TreeMap<String, String>();
+ this.microdescriptorKeys = new HashMap<String, Set<String>>();
+ this.missingMicrodescriptors = new HashSet<String>();
this.missingDescriptorsFile = new File(
"stats/missing-relay-descriptors");
if (this.missingDescriptorsFile.exists()) {
@@ -266,15 +318,19 @@ public class RelayDescriptorDownloader {
if (line.split(",").length > 2) {
String published = line.split(",")[1];
if (((line.startsWith("consensus,") ||
+ line.startsWith("consensus-microdesc,") ||
line.startsWith("vote,")) &&
this.currentValidAfter.equals(published)) ||
((line.startsWith("server,") ||
- line.startsWith("extra,")) &&
+ line.startsWith("extra,") ||
+ line.startsWith("micro,")) &&
this.descriptorCutOff.compareTo(published) < 0)) {
if (!line.endsWith("NA")) {
/* Not missing. */
} else if (line.startsWith("consensus,")) {
oldMissingConsensuses++;
+ } else if (line.startsWith("consensus-microdesc,")) {
+ oldMissingMicrodescConsensuses++;
} else if (line.startsWith("vote,")) {
oldMissingVotes++;
} else if (line.startsWith("server,")) {
@@ -285,6 +341,23 @@ public class RelayDescriptorDownloader {
int separateAt = line.lastIndexOf(",");
this.missingDescriptors.put(line.substring(0,
separateAt), line.substring(separateAt + 1));
+ if (line.startsWith("micro,")) {
+ String microdescriptorDigest = line.split(",")[3];
+ String microdescriptorKey = line.substring(0,
+ line.lastIndexOf(","));
+ if (!this.microdescriptorKeys.containsKey(
+ microdescriptorDigest)) {
+ this.microdescriptorKeys.put(
+ microdescriptorDigest, new HashSet<String>());
+ }
+ this.microdescriptorKeys.get(microdescriptorDigest).add(
+ microdescriptorKey);
+ if (line.endsWith("NA") && !this.missingMicrodescriptors.
+ contains(microdescriptorDigest)) {
+ this.missingMicrodescriptors.add(microdescriptorDigest);
+ oldMissingMicrodescriptors++;
+ }
+ }
}
} else {
this.logger.fine("Invalid line '" + line + "' in "
@@ -401,6 +474,65 @@ public class RelayDescriptorDownloader {
}
/**
+ * We have parsed a microdesc consensus. Take this microdesc consensus
+ * off the missing list and add the <code>microdescriptors</code> which
+ * are in the format "<validafter>,<relayid>,<descid>" to that
+ * list.
+ */
+ public void haveParsedMicrodescConsensus(String validAfter,
+ Set<String> microdescriptors) {
+
+ /* Mark microdesc consensus as parsed. */
+ if (this.currentValidAfter.equals(validAfter)) {
+ String microdescConsensusKey = "consensus-microdesc," + validAfter;
+ this.missingDescriptors.put(microdescConsensusKey,
+ this.currentTimestamp);
+ }
+
+ /* Add microdescriptors to missing list. Exclude those that we already
+ * downloaded this month. (We download each microdescriptor at least
+ * once per month to keep the storage logic sane; otherwise we'd have
+ * to copy microdescriptors from the earlier month to the current
+ * month, and that gets messy.) */
+ if (this.descriptorCutOff.compareTo(validAfter) < 0) {
+ String validAfterYearMonth = validAfter.substring(0,
+ "YYYY-MM".length());
+ for (String microdescriptor : microdescriptors) {
+ String microdescriptorKey = "micro," + microdescriptor;
+ String parsed = "NA";
+ String microdescriptorDigest = microdescriptor.split(",")[2];
+ if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) {
+ for (String otherMicrodescriptorKey :
+ this.microdescriptorKeys.get(microdescriptorDigest)) {
+ String otherValidAfter =
+ otherMicrodescriptorKey.split(",")[1];
+ if (!otherValidAfter.startsWith(validAfterYearMonth)) {
+ continue;
+ }
+ String otherParsed = this.missingDescriptors.get(
+ otherMicrodescriptorKey);
+ if (otherParsed != null && !otherParsed.equals("NA")) {
+ parsed = otherParsed;
+ break;
+ }
+ }
+ } else {
+ this.microdescriptorKeys.put(
+ microdescriptorDigest, new HashSet<String>());
+ }
+ this.microdescriptorKeys.get(microdescriptorDigest).add(
+ microdescriptorKey);
+ this.missingDescriptors.put(microdescriptorKey, parsed);
+ if (parsed.equals("NA") &&
+ !this.missingMicrodescriptors.contains(microdescriptorDigest)) {
+ this.missingMicrodescriptors.add(microdescriptorDigest);
+ this.newMissingMicrodescriptors++;
+ }
+ }
+ }
+ }
+
+ /**
* We have parsed a vote. Take this vote off the missing list and add
* the <code>serverDescriptors</code> which are in the format
* "<published>,<relayid>,<descid>" to that list.
@@ -470,6 +602,23 @@ public class RelayDescriptorDownloader {
}
/**
+ * We have parsed a microdescriptor. Take it off the missing list.
+ */
+ public void haveParsedMicrodescriptor(String descriptorDigest) {
+ if (this.microdescriptorKeys.containsKey(descriptorDigest)) {
+ for (String microdescriptorKey :
+ this.microdescriptorKeys.get(descriptorDigest)) {
+ String validAfter = microdescriptorKey.split(",")[0];
+ if (this.descriptorCutOff.compareTo(validAfter) < 0) {
+ this.missingDescriptors.put(microdescriptorKey,
+ this.currentTimestamp);
+ }
+ }
+ this.missingMicrodescriptors.remove(descriptorDigest);
+ }
+ }
+
+ /**
* Downloads missing descriptors that we think might still be available
* on the directory authorities as well as all server and extra-info
* descriptors once per day.
@@ -483,6 +632,12 @@ public class RelayDescriptorDownloader {
this.missingDescriptors.put(consensusKey, "NA");
this.newMissingConsensuses++;
}
+ String microdescConsensusKey = "consensus-microdesc,"
+ + this.currentValidAfter;
+ if (!this.missingDescriptors.containsKey(microdescConsensusKey)) {
+ this.missingDescriptors.put(microdescConsensusKey, "NA");
+ this.newMissingMicrodescConsensuses++;
+ }
for (String authority : authorityFingerprints) {
String voteKey = "vote," + this.currentValidAfter + "," + authority;
if (!this.missingDescriptors.containsKey(voteKey)) {
@@ -516,6 +671,19 @@ public class RelayDescriptorDownloader {
}
}
+ /* Then try to download the microdesc consensus. */
+ if (downloadCurrentMicrodescConsensus) {
+ if (this.missingDescriptors.containsKey(
+ microdescConsensusKey) &&
+ this.missingDescriptors.get(microdescConsensusKey).
+ equals("NA")) {
+ this.requestedMicrodescConsensuses++;
+ this.downloadedMicrodescConsensuses +=
+ this.downloadResourceFromAuthority(authority,
+ "/tor/status-vote/current/consensus-microdesc");
+ }
+ }
+
/* Next, try to download current votes that we're missing. */
if (downloadCurrentVotes) {
String voteKeyPrefix = "vote," + this.currentValidAfter;
@@ -538,10 +706,9 @@ public class RelayDescriptorDownloader {
/* Download either all server and extra-info descriptors or only
* those that we're missing. Start with server descriptors, then
- * request extra-info descriptors. */
- List<String> types = new ArrayList<String>(Arrays.asList(
- "server,extra".split(",")));
- for (String type : types) {
+ * request extra-info descriptors. Finally, request missing
+ * microdescriptors. */
+ for (String type : new String[] { "server", "extra", "micro" }) {
/* Download all server or extra-info descriptors from this
* authority if we haven't done so for 24 hours and if we're
@@ -557,21 +724,24 @@ public class RelayDescriptorDownloader {
this.requestedAllServerDescriptors++;
this.downloadedAllServerDescriptors +=
downloadedAllDescriptors;
- } else {
+ } else if (type.equals("extra")) {
this.requestedAllExtraInfoDescriptors++;
this.downloadedAllExtraInfoDescriptors +=
downloadedAllDescriptors;
}
- /* Download missing server or extra-info descriptors if we're
- * configured to do so. */
+ /* Download missing server descriptors, extra-info descriptors,
+ * and microdescriptors if we're configured to do so. */
} else if ((type.equals("server") &&
this.downloadMissingServerDescriptors) ||
- (type.equals("extra") && this.downloadMissingExtraInfos)) {
+ (type.equals("extra") && this.downloadMissingExtraInfos) ||
+ (type.equals("micro") &&
+ this.downloadMissingMicrodescriptors)) {
/* Go through the list of missing descriptors of this type
* and combine the descriptor identifiers to a URL of up to
- * 96 descriptors that we can download at once. */
+ * 96 server or extra-info descriptors or 92 microdescriptors
+ * that we can download at once. */
SortedSet<String> descriptorIdentifiers =
new TreeSet<String>();
for (Map.Entry<String, String> e :
@@ -587,8 +757,12 @@ public class RelayDescriptorDownloader {
StringBuilder combinedResource = null;
int descriptorsInCombinedResource = 0,
requestedDescriptors = 0, downloadedDescriptors = 0;
+ int maxDescriptorsInCombinedResource =
+ type.equals("micro") ? 92 : 96;
+ String separator = type.equals("micro") ? "-" : "+";
for (String descriptorIdentifier : descriptorIdentifiers) {
- if (descriptorsInCombinedResource >= 96) {
+ if (descriptorsInCombinedResource >=
+ maxDescriptorsInCombinedResource) {
requestedDescriptors += descriptorsInCombinedResource;
downloadedDescriptors +=
this.downloadResourceFromAuthority(authority,
@@ -600,7 +774,7 @@ public class RelayDescriptorDownloader {
combinedResource = new StringBuilder("/tor/" + type
+ "/d/" + descriptorIdentifier);
} else {
- combinedResource.append("+" + descriptorIdentifier);
+ combinedResource.append(separator + descriptorIdentifier);
}
descriptorsInCombinedResource++;
}
@@ -615,11 +789,16 @@ public class RelayDescriptorDownloader {
requestedDescriptors;
this.downloadedMissingServerDescriptors +=
downloadedDescriptors;
- } else {
+ } else if (type.equals("extra")) {
this.requestedMissingExtraInfoDescriptors +=
requestedDescriptors;
this.downloadedMissingExtraInfoDescriptors +=
downloadedDescriptors;
+ } else if (type.equals("micro")) {
+ this.requestedMissingMicrodescriptors +=
+ requestedDescriptors;
+ this.downloadedMissingMicrodescriptors +=
+ downloadedDescriptors;
}
}
}
@@ -680,7 +859,8 @@ public class RelayDescriptorDownloader {
receivedDescriptors = 1;
} else if (resource.startsWith("/tor/server/") ||
resource.startsWith("/tor/extra/")) {
- if (resource.equals("/tor/server/all")) {
+ if (resource.equals("/tor/server/all") ||
+ resource.equals("/tor/extra/all")) {
this.lastDownloadedAllDescriptors.put(authority,
this.currentTimestamp);
}
@@ -715,6 +895,60 @@ public class RelayDescriptorDownloader {
this.rdp.parse(descBytes);
receivedDescriptors++;
}
+ } else if (resource.startsWith("/tor/micro/")) {
+ /* TODO We need to parse microdescriptors ourselves, rather than
+ * RelayDescriptorParser, because only we know the valid-after
+ * time(s) of microdesc consensus(es) containing this
+ * microdescriptor. However, this breaks functional abstraction
+ * pretty badly. */
+ SimpleDateFormat parseFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String ascii = null;
+ try {
+ ascii = new String(allData, "US-ASCII");
+ } catch (UnsupportedEncodingException e) {
+ /* No way that US-ASCII is not supported. */
+ }
+ int start = -1, end = -1;
+ String startToken = "onion-key\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ end = ascii.indexOf(startToken, start + 1);
+ if (end < 0) {
+ end = ascii.length();
+ if (end <= start) {
+ break;
+ }
+ }
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0, end - start);
+ String digest256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(descBytes)).replaceAll("=", "");
+ if (!this.microdescriptorKeys.containsKey(digest256Base64)) {
+ continue;
+ }
+ String digest256Hex = DigestUtils.sha256Hex(descBytes);
+ for (String microdescriptorKey :
+ this.microdescriptorKeys.get(digest256Base64)) {
+ String validAfterTime = microdescriptorKey.split(",")[1];
+ try {
+ long validAfter =
+ parseFormat.parse(validAfterTime).getTime();
+ this.rdp.storeMicrodescriptor(descBytes, digest256Hex,
+ digest256Base64, validAfter);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Could not parse "
+ + "valid-after time '" + validAfterTime + "' in "
+ + "microdescriptor key. Not storing microdescriptor.",
+ e);
+ }
+ }
+ receivedDescriptors++;
+ }
}
}
return receivedDescriptors;
@@ -727,8 +961,9 @@ public class RelayDescriptorDownloader {
public void writeFile() {
/* Write missing descriptors file to disk. */
- int missingConsensuses = 0, missingVotes = 0,
- missingServerDescriptors = 0, missingExtraInfoDescriptors = 0;
+ int missingConsensuses = 0, missingMicrodescConsensuses = 0,
+ missingVotes = 0, missingServerDescriptors = 0,
+ missingExtraInfoDescriptors = 0;
try {
this.logger.fine("Writing file "
+ this.missingDescriptorsFile.getAbsolutePath() + "...");
@@ -742,12 +977,15 @@ public class RelayDescriptorDownloader {
/* Not missing. */
} else if (key.startsWith("consensus,")) {
missingConsensuses++;
+ } else if (key.startsWith("consensus-microdesc,")) {
+ missingMicrodescConsensuses++;
} else if (key.startsWith("vote,")) {
missingVotes++;
} else if (key.startsWith("server,")) {
missingServerDescriptors++;
} else if (key.startsWith("extra,")) {
missingExtraInfoDescriptors++;
+ } else if (key.startsWith("micro,")) {
}
bw.write(key + "," + value + "\n");
}
@@ -758,6 +996,7 @@ public class RelayDescriptorDownloader {
this.logger.log(Level.WARNING, "Failed writing "
+ this.missingDescriptorsFile.getAbsolutePath() + "!", e);
}
+ int missingMicrodescriptors = this.missingMicrodescriptors.size();
/* Write text file containing the directory authorities and when we
* last downloaded all server and extra-info descriptors from them to
@@ -790,25 +1029,33 @@ public class RelayDescriptorDownloader {
+ "directory authorities.");
this.logger.info("At the beginning of this execution, we were "
+ "missing " + oldMissingConsensuses + " consensus(es), "
+ + oldMissingMicrodescConsensuses + " microdesc consensus(es), "
+ oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
- + " server descriptor(s), and " + oldMissingExtraInfoDescriptors
- + " extra-info descriptor(s).");
+ + " server descriptor(s), " + oldMissingExtraInfoDescriptors
+ + " extra-info descriptor(s), and " + oldMissingMicrodescriptors
+ + " microdescriptor(s).");
this.logger.info("During this execution, we added "
+ this.newMissingConsensuses + " consensus(es), "
- + this.newMissingVotes + " vote(s), "
- + this.newMissingServerDescriptors + " server descriptor(s), and "
- + this.newMissingExtraInfoDescriptors + " extra-info "
- + "descriptor(s) to the missing list, some of which we also "
+ + this.newMissingMicrodescConsensuses
+ + " microdesc consensus(es), " + this.newMissingVotes
+ + " vote(s), " + this.newMissingServerDescriptors
+ + " server descriptor(s), " + this.newMissingExtraInfoDescriptors
+ + " extra-info descriptor(s), and "
+ + this.newMissingMicrodescriptors + " microdescriptor(s) to the "
+ + "missing list, some of which we also "
+ "requested and removed from the list again.");
this.logger.info("We requested " + this.requestedConsensuses
- + " consensus(es), " + this.requestedVotes + " vote(s), "
- + this.requestedMissingServerDescriptors + " missing server "
- + "descriptor(s), " + this.requestedAllServerDescriptors
+ + " consensus(es), " + this.requestedMicrodescConsensuses
+ + " microdesc consensus(es), " + this.requestedVotes
+ + " vote(s), " + this.requestedMissingServerDescriptors
+ + " missing server descriptor(s), "
+ + this.requestedAllServerDescriptors
+ " times all server descriptors, "
+ this.requestedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s), and "
+ + "extra-info descriptor(s), "
+ this.requestedAllExtraInfoDescriptors + " times all extra-info "
- + "descriptors from the directory authorities.");
+ + "descriptors, and " + this.requestedMissingMicrodescriptors
+ + " missing microdescriptor(s) from the directory authorities.");
StringBuilder sb = new StringBuilder();
for (String authority : this.authorities) {
sb.append(" " + authority + "="
@@ -818,20 +1065,26 @@ public class RelayDescriptorDownloader {
+ "authorities:" + sb.toString());
this.logger.info("We successfully downloaded "
+ this.downloadedConsensuses + " consensus(es), "
- + this.downloadedVotes + " vote(s), "
- + this.downloadedMissingServerDescriptors + " missing server "
- + "descriptor(s), " + this.downloadedAllServerDescriptors
+ + this.downloadedMicrodescConsensuses
+ + " microdesc consensus(es), " + this.downloadedVotes
+ + " vote(s), " + this.downloadedMissingServerDescriptors
+ + " missing server descriptor(s), "
+ + this.downloadedAllServerDescriptors
+ " server descriptor(s) when downloading all descriptors, "
+ this.downloadedMissingExtraInfoDescriptors + " missing "
- + "extra-info descriptor(s) and "
+ + "extra-info descriptor(s), "
+ this.downloadedAllExtraInfoDescriptors + " extra-info "
- + "descriptor(s) when downloading all descriptors.");
+ + "descriptor(s) when downloading all descriptors, and "
+ + this.downloadedMissingMicrodescriptors
+ + " missing microdescriptor(s).");
this.logger.info("At the end of this execution, we are missing "
- + missingConsensuses + " consensus(es), " + missingVotes
- + " vote(s), " + missingServerDescriptors + " server "
- + "descriptor(s), and " + missingExtraInfoDescriptors
- + " extra-info descriptor(s), some of which we may try in the next "
- + "execution.");
+ + missingConsensuses + " consensus(es), "
+ + missingMicrodescConsensuses + " microdesc consensus(es), "
+ + missingVotes + " vote(s), " + missingServerDescriptors
+ + " server descriptor(s), " + missingExtraInfoDescriptors
+ + " extra-info descriptor(s), and " + missingMicrodescriptors
+ + " microdescriptor(s), some of which we may try in the next "
+ + "execution.");
}
}
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
index 107ba73..2873909 100644
--- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
* See LICENSE for licensing information */
package org.torproject.ernie.db.relaydescs;
@@ -31,6 +31,8 @@ public class RelayDescriptorParser {
*/
private ArchiveWriter aw;
+ private ArchiveReader ar;
+
/**
* Missing descriptor downloader that uses the parse results to learn
* which descriptors we are missing and want to download.
@@ -62,7 +64,12 @@ public class RelayDescriptorParser {
this.rdd = rdd;
}
- public void parse(byte[] data) {
+ public void setArchiveReader(ArchiveReader ar) {
+ this.ar = ar;
+ }
+
+ public boolean parse(byte[] data) {
+ boolean stored = false;
try {
/* Convert descriptor to ASCII for parsing. This means we'll lose
* the non-ASCII chars, but we don't care about them for parsing
@@ -76,21 +83,27 @@ public class RelayDescriptorParser {
if (line == null) {
this.logger.fine("We were given an empty descriptor for "
+ "parsing. Ignoring.");
- return;
+ return false;
}
SimpleDateFormat parseFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- if (line.equals("network-status-version 3")) {
- boolean isConsensus = true;
+ if (line.startsWith("network-status-version 3")) {
+ String statusType = "consensus";
+ if (line.equals("network-status-version 3 microdesc")) {
+ statusType = "consensus-microdesc";
+ }
String validAfterTime = null, fingerprint = null,
dirSource = null;
long validAfter = -1L, dirKeyPublished = -1L;
SortedSet<String> dirSources = new TreeSet<String>();
SortedSet<String> serverDescriptors = new TreeSet<String>();
SortedSet<String> serverDescriptorDigests = new TreeSet<String>();
+ SortedSet<String> microdescriptorKeys = new TreeSet<String>();
+ SortedSet<String> microdescriptorDigests = new TreeSet<String>();
StringBuilder certificateStringBuilder = null;
String certificateString = null;
+ String lastRelayIdentity = null;
while ((line = br.readLine()) != null) {
if (certificateStringBuilder != null) {
if (line.startsWith("r ")) {
@@ -101,7 +114,7 @@ public class RelayDescriptorParser {
}
}
if (line.equals("vote-status vote")) {
- isConsensus = false;
+ statusType = "vote";
} else if (line.startsWith("valid-after ")) {
validAfterTime = line.substring("valid-after ".length());
validAfter = parseFormat.parse(validAfterTime).getTime();
@@ -121,23 +134,43 @@ public class RelayDescriptorParser {
getTime();
} else if (line.startsWith("r ")) {
String[] parts = line.split(" ");
- if (parts.length < 9) {
+ if (parts.length == 8) {
+ lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "=")).toLowerCase();
+ } else if (parts.length == 9) {
+ lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "=")).toLowerCase();
+ String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
+ parts[3] + "=")).toLowerCase();
+ String publishedTime = parts[4] + " " + parts[5];
+ serverDescriptors.add(publishedTime + ","
+ + lastRelayIdentity + "," + serverDesc);
+ serverDescriptorDigests.add(serverDesc);
+ } else {
this.logger.log(Level.WARNING, "Could not parse r line '"
+ line + "' in descriptor. Skipping.");
break;
}
- String publishedTime = parts[4] + " " + parts[5];
- String relayIdentity = Hex.encodeHexString(
- Base64.decodeBase64(parts[2] + "=")).
- toLowerCase();
- String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
- parts[3] + "=")).toLowerCase();
- serverDescriptors.add(publishedTime + "," + relayIdentity
- + "," + serverDesc);
- serverDescriptorDigests.add(serverDesc);
+ } else if (line.startsWith("m ")) {
+ String[] parts = line.split(" ");
+ if (parts.length == 2 && parts[1].length() == 43) {
+ String digest256Base64 = parts[1];
+ microdescriptorKeys.add(validAfterTime + ","
+ + lastRelayIdentity + "," + digest256Base64);
+ String digest256Hex = Hex.encodeHexString(
+ Base64.decodeBase64(digest256Base64 + "=")).
+ toLowerCase();
+ microdescriptorDigests.add(digest256Hex);
+ } else if (parts.length != 3 ||
+ !parts[2].startsWith("sha256=") ||
+ parts[2].length() != 50) {
+ this.logger.log(Level.WARNING, "Could not parse m line '"
+ + line + "' in descriptor. Skipping.");
+ break;
+ }
}
}
- if (isConsensus) {
+ if (statusType.equals("consensus")) {
if (this.rdd != null) {
this.rdd.haveParsedConsensus(validAfterTime, dirSources,
serverDescriptors);
@@ -145,6 +178,21 @@ public class RelayDescriptorParser {
if (this.aw != null) {
this.aw.storeConsensus(data, validAfter, dirSources,
serverDescriptorDigests);
+ stored = true;
+ }
+ } else if (statusType.equals("consensus-microdesc")) {
+ if (this.rdd != null) {
+ this.rdd.haveParsedMicrodescConsensus(validAfterTime,
+ microdescriptorKeys);
+ }
+ if (this.ar != null) {
+ this.ar.haveParsedMicrodescConsensus(validAfterTime,
+ microdescriptorDigests);
+ }
+ if (this.aw != null) {
+ this.aw.storeMicrodescConsensus(data, validAfter,
+ microdescriptorDigests);
+ stored = true;
}
} else {
if (this.aw != null || this.rdd != null) {
@@ -161,6 +209,7 @@ public class RelayDescriptorParser {
if (this.aw != null) {
this.aw.storeVote(data, validAfter, dirSource, digest,
serverDescriptorDigests);
+ stored = true;
}
if (this.rdd != null) {
this.rdd.haveParsedVote(validAfterTime, fingerprint,
@@ -171,6 +220,7 @@ public class RelayDescriptorParser {
if (this.aw != null) {
this.aw.storeCertificate(certificateString.getBytes(),
dirSource, dirKeyPublished);
+ stored = true;
}
}
}
@@ -209,6 +259,7 @@ public class RelayDescriptorParser {
if (this.aw != null && digest != null) {
this.aw.storeServerDescriptor(data, digest, published,
extraInfoDigest);
+ stored = true;
}
if (this.rdd != null && digest != null) {
this.rdd.haveParsedServerDescriptor(publishedTime,
@@ -238,19 +289,26 @@ public class RelayDescriptorParser {
}
}
int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 || sig >= 0 || sig > start) {
+ if (start >= 0 && sig >= 0 && sig > start) {
byte[] forDigest = new byte[sig - start];
System.arraycopy(data, start, forDigest, 0, sig - start);
digest = DigestUtils.shaHex(forDigest);
}
if (this.aw != null && digest != null) {
this.aw.storeExtraInfoDescriptor(data, digest, published);
+ stored = true;
}
if (this.rdd != null && digest != null) {
this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
relayIdentifier.toLowerCase(), digest);
}
+ } else if (line.equals("onion-key")) {
+ /* Cannot store microdescriptors without knowing valid-after
+ * time(s) of microdesc consensuses containing them, because we
+ * don't know which month directories to put them in. Have to use
+ * storeMicrodescriptor below. */
}
+ br.close();
} catch (IOException e) {
this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ "Skipping.", e);
@@ -258,6 +316,17 @@ public class RelayDescriptorParser {
this.logger.log(Level.WARNING, "Could not parse descriptor. "
+ "Skipping.", e);
}
+ return stored;
+ }
+
+ public void storeMicrodescriptor(byte[] data, String digest256Hex,
+ String digest256Base64, long validAfter) {
+ if (this.aw != null) {
+ this.aw.storeMicrodescriptor(data, digest256Hex, validAfter);
+ }
+ if (this.rdd != null) {
+ this.rdd.haveParsedMicrodescriptor(digest256Base64);
+ }
}
}