commit 4694f0501915d00478062d9066c4132e6f7b52aa Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Jan 9 14:26:36 2014 +0100
Download microdesc consensuses and microdescriptors.
Also support reading microdescriptors from disk.
Implements #2785. --- config.template | 8 + deps/metrics-lib | 2 +- .../torproject/ernie/db/main/Configuration.java | 14 + .../ernie/db/relaydescs/ArchiveReader.java | 145 +++++++- .../ernie/db/relaydescs/ArchiveWriter.java | 255 +++++++++++--- .../db/relaydescs/RelayDescriptorDownloader.java | 357 +++++++++++++++++--- .../ernie/db/relaydescs/RelayDescriptorParser.java | 105 +++++- 7 files changed, 771 insertions(+), 115 deletions(-)
diff --git a/config.template b/config.template index 1b84775..cd31802 100644 --- a/config.template +++ b/config.template @@ -33,6 +33,10 @@ ## Download the current consensus (only if DownloadRelayDescriptors is 1) #DownloadCurrentConsensus 1 # +## Download the current microdesc consensus (only if +## DownloadRelayDescriptors is 1) +#DownloadCurrentMicrodescConsensus 1 +# ## Download current votes (only if DownloadRelayDescriptors is 1) #DownloadCurrentVotes 1 # @@ -44,6 +48,10 @@ ## DownloadRelayDescriptors is 1) #DownloadMissingExtraInfoDescriptors 1 # +## Download missing microdescriptors (only if +## DownloadRelayDescriptors is 1) +#DownloadMissingMicrodescriptors 1 +# ## Download all server descriptors from the directory authorities at most ## once a day (only if DownloadRelayDescriptors is 1) #DownloadAllServerDescriptors 0 diff --git a/deps/metrics-lib b/deps/metrics-lib index 3e60ccd..38c48dd 160000 --- a/deps/metrics-lib +++ b/deps/metrics-lib @@ -1 +1 @@ -Subproject commit 3e60ccdaaba598cabb7281d45f9a415299b8e3e3 +Subproject commit 38c48ddd0c49978bbfa5e0a987cfd3a890692a5c diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java index 7b5e53d..85d889e 100644 --- a/src/org/torproject/ernie/db/main/Configuration.java +++ b/src/org/torproject/ernie/db/main/Configuration.java @@ -53,9 +53,11 @@ public class Configuration { + "ED03BB616EB2F60BEC80151114BB25CEF515B226," + "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(",")); private boolean downloadCurrentConsensus = true; + private boolean downloadCurrentMicrodescConsensus = true; private boolean downloadCurrentVotes = true; private boolean downloadMissingServerDescriptors = true; private boolean downloadMissingExtraInfoDescriptors = true; + private boolean downloadMissingMicrodescriptors = true; private boolean downloadAllServerDescriptors = false; private boolean downloadAllExtraInfoDescriptors = false; private boolean compressRelayDescriptorDownloads; @@ -141,6 +143,9 @@ public class Configuration { } else if (line.startsWith("DownloadCurrentConsensus")) { this.downloadCurrentConsensus = Integer.parseInt( line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) { + this.downloadCurrentMicrodescConsensus = Integer.parseInt( + line.split(" ")[1]) != 0; } else if (line.startsWith("DownloadCurrentVotes")) { this.downloadCurrentVotes = Integer.parseInt( line.split(" ")[1]) != 0; @@ -151,6 +156,9 @@ public class Configuration { "DownloadMissingExtraInfoDescriptors")) { this.downloadMissingExtraInfoDescriptors = Integer.parseInt( line.split(" ")[1]) != 0; + } else if (line.startsWith("DownloadMissingMicrodescriptors")) { + this.downloadMissingMicrodescriptors = Integer.parseInt( + line.split(" ")[1]) != 0; } else if (line.startsWith("DownloadAllServerDescriptors")) { this.downloadAllServerDescriptors = Integer.parseInt( line.split(" ")[1]) != 0; @@ -253,6 +261,9 @@ public class Configuration { public boolean getDownloadCurrentConsensus() { return this.downloadCurrentConsensus; } + public boolean getDownloadCurrentMicrodescConsensus() { + return this.downloadCurrentMicrodescConsensus; + } public boolean getDownloadCurrentVotes() { return this.downloadCurrentVotes; } @@ -262,6 +273,9 @@ public class Configuration { public boolean getDownloadMissingExtraInfoDescriptors() { return this.downloadMissingExtraInfoDescriptors; } + public boolean getDownloadMissingMicrodescriptors() { + return this.downloadMissingMicrodescriptors; + } public boolean getDownloadAllServerDescriptors() { return this.downloadAllServerDescriptors; } diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java index fba0a9f..ea54874 100644 --- a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java +++ b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java @@ -1,4 +1,4 @@ -/* Copyright 2010--2012 The Tor Project +/* Copyright 2010--2014 The Tor Project * See LICENSE for licensing information */ package org.torproject.ernie.db.relaydescs;
@@ -11,14 +11,25 @@ import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.StringReader; +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.Stack; +import java.util.TimeZone; import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger;
+import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
/** @@ -34,6 +45,7 @@ public class ArchiveReader { throw new IllegalArgumentException(); }
+ rdp.setArchiveReader(this); int parsedFiles = 0, ignoredFiles = 0; Logger logger = Logger.getLogger(ArchiveReader.class.getName()); SortedSet<String> archivesImportHistory = new TreeSet<String>(); @@ -59,6 +71,7 @@ public class ArchiveReader { Stack<File> filesInInputDir = new Stack<File>(); filesInInputDir.add(archivesDirectory); List<File> problems = new ArrayList<File>(); + Set<File> filesToRetry = new HashSet<File>(); while (!filesInInputDir.isEmpty()) { File pop = filesInInputDir.pop(); if (pop.isDirectory()) { @@ -86,9 +99,6 @@ public class ArchiveReader { FileInputStream fis = new FileInputStream(pop); bis = new BufferedInputStream(fis); } - if (keepImportHistory) { - archivesImportHistory.add(pop.getName()); - } ByteArrayOutputStream baos = new ByteArrayOutputStream(); int len; byte[] data = new byte[1024]; @@ -97,7 +107,14 @@ public class ArchiveReader { } bis.close(); byte[] allData = baos.toByteArray(); - rdp.parse(allData); + boolean stored = rdp.parse(allData); + if (!stored) { + filesToRetry.add(pop); + continue; + } + if (keepImportHistory) { + archivesImportHistory.add(pop.getName()); + } parsedFiles++; } catch (IOException e) { problems.add(pop); @@ -108,6 +125,109 @@ public class ArchiveReader { } } } + for (File pop : filesToRetry) { + /* TODO We need to parse microdescriptors ourselves, rather than + * RelayDescriptorParser, because only we know the valid-after + * time(s) of microdesc consensus(es) containing this + * microdescriptor. However, this breaks functional abstraction + * pretty badly. */ + if (rdp != null) { + try { + BufferedInputStream bis = null; + if (pop.getName().endsWith(".bz2")) { + FileInputStream fis = new FileInputStream(pop); + BZip2CompressorInputStream bcis = + new BZip2CompressorInputStream(fis); + bis = new BufferedInputStream(bcis); + } else { + FileInputStream fis = new FileInputStream(pop); + bis = new BufferedInputStream(fis); + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] data = new byte[1024]; + while ((len = bis.read(data, 0, 1024)) >= 0) { + baos.write(data, 0, len); + } + bis.close(); + byte[] allData = baos.toByteArray(); + BufferedReader br = new BufferedReader(new StringReader( + new String(allData, "US-ASCII"))); + String line; + do { + line = br.readLine(); + } while (line != null && line.startsWith("@")); + br.close(); + if (line == null) { + logger.fine("We were given an empty descriptor for " + + "parsing. Ignoring."); + continue; + } + if (!line.equals("onion-key")) { + logger.fine("Skipping non-recognized descriptor."); + continue; + } + SimpleDateFormat parseFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String ascii = null; + try { + ascii = new String(allData, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* No way that US-ASCII is not supported. */ + } + int start = -1, end = -1; + String startToken = "onion-key\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + end = ascii.indexOf(startToken, start + 1); + if (end < 0) { + end = ascii.length(); + if (end <= start) { + break; + } + } + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + String digest256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(descBytes)).replaceAll("=", ""); + String digest256Hex = DigestUtils.sha256Hex(descBytes); + if (!this.microdescriptorValidAfterTimes.containsKey( + digest256Hex)) { + logger.fine("Could not store microdescriptor '" + + digest256Hex + "', which was not contained in a " + + "microdesc consensus."); + continue; + } + for (String validAfterTime : + this.microdescriptorValidAfterTimes.get(digest256Hex)) { + try { + long validAfter = + parseFormat.parse(validAfterTime).getTime(); + rdp.storeMicrodescriptor(descBytes, digest256Hex, + digest256Base64, validAfter); + } catch (ParseException e) { + logger.log(Level.WARNING, "Could not parse " + + "valid-after time '" + validAfterTime + "'. Not " + + "storing microdescriptor.", e); + } + } + } + if (keepImportHistory) { + archivesImportHistory.add(pop.getName()); + } + parsedFiles++; + } catch (IOException e) { + problems.add(pop); + if (problems.size() > 3) { + break; + } + } + } + } if (problems.isEmpty()) { logger.fine("Finished importing files in directory " + archivesDirectory + "/."); @@ -142,5 +262,20 @@ public class ArchiveReader { + "directory:\nParsed " + parsedFiles + ", ignored " + ignoredFiles + " files."); } + + private Map<String, Set<String>> microdescriptorValidAfterTimes = + new HashMap<String, Set<String>>(); + public void haveParsedMicrodescConsensus(String validAfterTime, + SortedSet<String> microdescriptorDigests) { + for (String microdescriptor : microdescriptorDigests) { + if (!this.microdescriptorValidAfterTimes.containsKey( + microdescriptor)) { + this.microdescriptorValidAfterTimes.put(microdescriptor, + new HashSet<String>()); + } + this.microdescriptorValidAfterTimes.get(microdescriptor).add( + validAfterTime); + } + } }
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java index 1b4f774..a179f5b 100644 --- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java +++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java @@ -1,4 +1,4 @@ -/* Copyright 2010--2012 The Tor Project +/* Copyright 2010--2014 The Tor Project * See LICENSE for licensing information */ package org.torproject.ernie.db.relaydescs;
@@ -73,12 +73,16 @@ public class ArchiveWriter extends Thread { private Logger logger; private File outputDirectory; private DescriptorParser descriptorParser; - private int storedConsensusesCounter = 0, storedVotesCounter = 0, + private int storedConsensusesCounter = 0, + storedMicrodescConsensusesCounter = 0, storedVotesCounter = 0, storedCertsCounter = 0, storedServerDescriptorsCounter = 0, - storedExtraInfoDescriptorsCounter = 0; + storedExtraInfoDescriptorsCounter = 0, + storedMicrodescriptorsCounter = 0;
private SortedMap<Long, SortedSet<String>> storedConsensuses = new TreeMap<Long, SortedSet<String>>(); + private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses = + new TreeMap<Long, SortedSet<String>>(); private SortedMap<Long, Integer> expectedVotes = new TreeMap<Long, Integer>(); private SortedMap<Long, SortedMap<String, SortedSet<String>>> @@ -88,11 +92,15 @@ public class ArchiveWriter extends Thread { new TreeMap<Long, Map<String, String>>(); private SortedMap<Long, Set<String>> storedExtraInfoDescriptors = new TreeMap<Long, Set<String>>(); + private SortedMap<Long, Set<String>> storedMicrodescriptors = + new TreeMap<Long, Set<String>>();
private File storedServerDescriptorsFile = new File( "stats/stored-server-descriptors"); private File storedExtraInfoDescriptorsFile = new File( "stats/stored-extra-info-descriptors"); + private File storedMicrodescriptorsFile = new File( + "stats/stored-microdescriptors");
private void loadDescriptorDigests() { SimpleDateFormat dateTimeFormat = new SimpleDateFormat( @@ -155,6 +163,33 @@ public class ArchiveWriter extends Thread { } br.close(); } + if (this.storedMicrodescriptorsFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader( + this.storedMicrodescriptorsFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.split(","); + if (parts.length != 2) { + this.logger.warning("Could not load microdescriptor digests " + + "because of illegal line '" + line + "'. We might not " + + "be able to correctly check descriptors for " + + "completeness."); + break; + } + long validAfter = dateTimeFormat.parse(parts[0]).getTime(); + if (validAfter < this.now - 48L * 60L * 60L * 1000L) { + continue; + } + if (!this.storedMicrodescriptors.containsKey(validAfter)) { + this.storedMicrodescriptors.put(validAfter, + new HashSet<String>()); + } + String microdescriptorDigest = parts[1]; + this.storedMicrodescriptors.get(validAfter).add( + microdescriptorDigest); + } + br.close(); + } } catch (ParseException e) { this.logger.log(Level.WARNING, "Could not load descriptor " + "digests. We might not be able to correctly check " @@ -198,6 +233,18 @@ public class ArchiveWriter extends Thread { } } bw.close(); + this.storedMicrodescriptorsFile.getParentFile().mkdirs(); + bw = new BufferedWriter(new FileWriter( + this.storedMicrodescriptorsFile)); + for (Map.Entry<Long, Set<String>> e : + this.storedMicrodescriptors.entrySet()) { + String validAfter = dateTimeFormat.format(e.getKey()); + for (String microdescriptorDigest : e.getValue()) { + bw.write(String.format("%s,%s%n", validAfter, + microdescriptorDigest)); + } + } + bw.close(); } catch (IOException e) { this.logger.log(Level.WARNING, "Could not save descriptor " + "digests. We might not be able to correctly check " @@ -228,9 +275,11 @@ public class ArchiveWriter extends Thread { rdd = new RelayDescriptorDownloader(rdp, dirSources, config.getDownloadVotesByFingerprint(), config.getDownloadCurrentConsensus(), + config.getDownloadCurrentMicrodescConsensus(), config.getDownloadCurrentVotes(), config.getDownloadMissingServerDescriptors(), config.getDownloadMissingExtraInfoDescriptors(), + config.getDownloadMissingMicrodescriptors(), config.getDownloadAllServerDescriptors(), config.getDownloadAllExtraInfoDescriptors(), config.getCompressRelayDescriptorDownloads()); @@ -314,15 +363,39 @@ public class ArchiveWriter extends Thread { if (this.store(CONSENSUS_ANNOTATION, data, outputFiles)) { this.storedConsensusesCounter++; } - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); if (this.now - validAfter < 3L * 60L * 60L * 1000L) { this.storedConsensuses.put(validAfter, serverDescriptorDigests); this.expectedVotes.put(validAfter, dirSources.size()); } }
+ private static final byte[] MICRODESCCONSENSUS_ANNOTATION = + "@type network-status-microdesc-consensus-3 1.0\n".getBytes(); + public void storeMicrodescConsensus(byte[] data, long validAfter, + SortedSet<String> microdescriptorDigests) { + SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat( + "yyyy/MM"); + yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat( + "dd/yyyy-MM-dd-HH-mm-ss"); + dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter) + + "/consensus-microdesc/" + + dayDirectoryFileFormat.format(validAfter) + + "-consensus-microdesc"); + File rsyncFile = new File("rsync/relay-descriptors/microdescs/" + + "consensus-microdesc/" + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles)) { + this.storedMicrodescConsensusesCounter++; + } + if (this.now - validAfter < 3L * 60L * 60L * 1000L) { + this.storedMicrodescConsensuses.put(validAfter, + microdescriptorDigests); + } + } + private static final byte[] VOTE_ANNOTATION = "@type network-status-vote-3 1.0\n".getBytes(); public void storeVote(byte[] data, long validAfter, @@ -340,9 +413,6 @@ public class ArchiveWriter extends Thread { if (this.store(VOTE_ANNOTATION, data, outputFiles)) { this.storedVotesCounter++; } - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); if (this.now - validAfter < 3L * 60L * 60L * 1000L) { if (!this.storedVotes.containsKey(validAfter)) { this.storedVotes.put(validAfter, @@ -384,9 +454,6 @@ public class ArchiveWriter extends Thread { if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles)) { this.storedServerDescriptorsCounter++; } - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); if (this.now - published < 48L * 60L * 60L * 1000L) { if (!this.storedServerDescriptors.containsKey(published)) { this.storedServerDescriptors.put(published, @@ -423,20 +490,57 @@ public class ArchiveWriter extends Thread { } }
+ private static final byte[] MICRODESCRIPTOR_ANNOTATION = + "@type microdescriptor 1.0\n".getBytes(); + public void storeMicrodescriptor(byte[] data, + String microdescriptorDigest, long validAfter) { + /* TODO We could check here whether we already stored the + * microdescriptor in the same valid-after month. This can happen, + * e.g., when two relays share the same microdescriptor. In that case + * this method gets called twice and the second call overwrites the + * file written in the first call. However, this method must be + * called twice to store the same microdescriptor in two different + * valid-after months. */ + SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/microdesc/" + + descriptorFormat.format(validAfter) + "micro/" + + microdescriptorDigest.substring(0, 1) + "/" + + microdescriptorDigest.substring(1, 2) + "/" + + microdescriptorDigest); + File rsyncFile = new File("rsync/relay-descriptors/microdescs/micro/" + + microdescriptorDigest); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles)) { + this.storedMicrodescriptorsCounter++; + } + if (this.now - validAfter < 48L * 60L * 60L * 1000L) { + if (!this.storedMicrodescriptors.containsKey(validAfter)) { + this.storedMicrodescriptors.put(validAfter, new HashSet<String>()); + } + this.storedMicrodescriptors.get(validAfter).add( + microdescriptorDigest); + } + } + private StringBuilder intermediateStats = new StringBuilder(); public void intermediateStats(String event) { intermediateStats.append("While " + event + ", we stored " + this.storedConsensusesCounter + " consensus(es), " - + this.storedVotesCounter + " vote(s), " + this.storedCertsCounter - + " certificate(s), " + this.storedServerDescriptorsCounter - + " server descriptor(s), and " - + this.storedExtraInfoDescriptorsCounter - + " extra-info descriptor(s) to disk.\n"); + + this.storedMicrodescConsensusesCounter + " microdesc " + + "consensus(es), " + this.storedVotesCounter + " vote(s), " + + this.storedCertsCounter + " certificate(s), " + + this.storedServerDescriptorsCounter + " server descriptor(s), " + + this.storedExtraInfoDescriptorsCounter + " extra-info " + + "descriptor(s), and " + this.storedMicrodescriptorsCounter + + " microdescriptor(s) to disk.\n"); this.storedConsensusesCounter = 0; + this.storedMicrodescConsensusesCounter = 0; this.storedVotesCounter = 0; this.storedCertsCounter = 0; this.storedServerDescriptorsCounter = 0; this.storedExtraInfoDescriptorsCounter = 0; + this.storedMicrodescriptorsCounter = 0; }
private void checkMissingDescriptors() { @@ -444,8 +548,7 @@ public class ArchiveWriter extends Thread { + "descriptors to disk.\n"); sb.append(intermediateStats.toString()); sb.append("Statistics on the completeness of written relay " - + "descriptors of the last 3 consensuses (Consensus/Vote, " - + "valid-after, votes, server descriptors, extra-infos):"); + + "descriptors:"); SimpleDateFormat dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); @@ -460,7 +563,12 @@ public class ArchiveWriter extends Thread { this.storedExtraInfoDescriptors.values()) { knownExtraInfoDescriptors.addAll(descriptors); } - boolean missingDescriptors = false, missingVotes = false; + Set<String> knownMicrodescriptors = new HashSet<String>(); + for (Set<String> descriptors : this.storedMicrodescriptors.values()) { + knownMicrodescriptors.addAll(descriptors); + } + boolean missingDescriptors = false, missingVotes = false, + missingMicrodescConsensus = false; for (Map.Entry<Long, SortedSet<String>> c : this.storedConsensuses.entrySet()) { long validAfterMillis = c.getKey(); @@ -491,14 +599,23 @@ public class ArchiveWriter extends Thread { } } } - sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), " - + "%d/%d (%.1f%%)", validAfterTime, - voteFoundServerDescs, voteAllServerDescs, - 100.0D * (double) voteFoundServerDescs / - (double) voteAllServerDescs, - voteFoundExtraInfos, voteAllExtraInfos, - 100.0D * (double) voteFoundExtraInfos / - (double) voteAllExtraInfos)); + sb.append("\nV, " + validAfterTime); + if (voteAllServerDescs > 0) { + sb.append(String.format(", %d/%d S (%.1f%%)", + voteFoundServerDescs, voteAllServerDescs, + 100.0D * (double) voteFoundServerDescs / + (double) voteAllServerDescs)); + } else { + sb.append(", 0/0 S"); + } + if (voteAllExtraInfos > 0) { + sb.append(String.format(", %d/%d E (%.1f%%)", + voteFoundExtraInfos, voteAllExtraInfos, + 100.0D * (double) voteFoundExtraInfos / + (double) voteAllExtraInfos)); + } else { + sb.append(", 0/0 E"); + } if (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 || voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995) { missingDescriptors = true; @@ -506,7 +623,8 @@ public class ArchiveWriter extends Thread { } } int foundServerDescs = 0, allServerDescs = 0, foundExtraInfos = 0, - allExtraInfos = 0; + allExtraInfos = 0, foundMicrodescriptors = 0, + allMicrodescriptors = 0; for (String serverDescriptorDigest : c.getValue()) { allServerDescs++; if (knownServerDescriptors.containsKey( @@ -524,16 +642,50 @@ public class ArchiveWriter extends Thread { } } } - sb.append(String.format("%nC, %s, %d/%d (%.1f%%), " - + "%d/%d (%.1f%%), %d/%d (%.1f%%)", - validAfterTime, foundVotes, allVotes, - 100.0D * (double) foundVotes / (double) allVotes, - foundServerDescs, allServerDescs, - 100.0D * (double) foundServerDescs / (double) allServerDescs, - foundExtraInfos, allExtraInfos, - 100.0D * (double) foundExtraInfos / (double) allExtraInfos)); + sb.append("\nC, " + validAfterTime); + if (allVotes > 0) { + sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes, + 100.0D * (double) foundVotes / (double) allVotes)); + } else { + sb.append(", 0/0 V"); + } + if (allServerDescs > 0) { + sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs, + allServerDescs, 100.0D * (double) foundServerDescs / + (double) allServerDescs)); + } else { + sb.append(", 0/0 S"); + } + if (allExtraInfos > 0) { + sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos, + allExtraInfos, 100.0D * (double) foundExtraInfos / + (double) allExtraInfos)); + } else { + sb.append(", 0/0 E"); + } + if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) { + for (String microdescriptorDigest : + this.storedMicrodescConsensuses.get(validAfterMillis)) { + allMicrodescriptors++; + if (knownMicrodescriptors.contains(microdescriptorDigest)) { + foundMicrodescriptors++; + } + } + sb.append("\nM, " + validAfterTime); + if (allMicrodescriptors > 0) { + sb.append(String.format(", %d/%d M (%.1f%%)", + foundMicrodescriptors, allMicrodescriptors, + 100.0D * (double) foundMicrodescriptors / + (double) allMicrodescriptors)); + } else { + sb.append(", 0/0 M"); + } + } else { + missingMicrodescConsensus = true; + } if (foundServerDescs * 1000 < allServerDescs * 995 || - foundExtraInfos * 1000 < allExtraInfos * 995) { + foundExtraInfos * 1000 < allExtraInfos * 995 || + foundMicrodescriptors * 1000 < allMicrodescriptors * 995) { missingDescriptors = true; } if (foundVotes < allVotes) { @@ -544,12 +696,22 @@ public class ArchiveWriter extends Thread { if (missingDescriptors) { this.logger.warning("We are missing at least 0.5% of server or " + "extra-info descriptors referenced from a consensus or " - + "vote."); + + "vote or at least 0.5% of microdescriptors referenced from a " + + "microdesc consensus."); } if (missingVotes) { + /* TODO Shouldn't warn if we're not trying to archive votes at + * all. */ this.logger.warning("We are missing at least one vote that was " + "referenced from a consensus."); } + if (missingMicrodescConsensus) { + /* TODO Shouldn't warn if we're not trying to archive microdesc + * consensuses at all. */ + this.logger.warning("We are missing at least one microdesc " + + "consensus that was published together with a known " + + "consensus."); + } }
private void checkStaledescriptors() { @@ -564,6 +726,14 @@ public class ArchiveWriter extends Thread { + dateTimeFormat.format(this.storedConsensuses.lastKey()) + ", which is more than 5:30 hours in the past."); } + if (!this.storedMicrodescConsensuses.isEmpty() && + this.storedMicrodescConsensuses.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status " + + "microdesc consensus was valid after " + + dateTimeFormat.format( + this.storedMicrodescConsensuses.lastKey()) + + ", which is more than 5:30 hours in the past."); + } if (!this.storedVotes.isEmpty() && this.storedVotes.lastKey() < tooOldMillis) { this.logger.warning("The last known relay network status vote " @@ -585,6 +755,13 @@ public class ArchiveWriter extends Thread { this.storedExtraInfoDescriptors.lastKey()) + ", which is more than 5:30 hours in the past."); } + if (!this.storedMicrodescriptors.isEmpty() && + this.storedMicrodescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay microdescriptor was " + + "contained in a microdesc consensus that was valid after " + + dateTimeFormat.format(this.storedMicrodescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } }
/* Delete all files from the rsync directory that have not been modified diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java index ce2f16a..1c8a375 100644 --- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java +++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java @@ -1,4 +1,4 @@ -/* Copyright 2010--2012 The Tor Project +/* Copyright 2010--2014 The Tor Project * See LICENSE for licensing information */ package org.torproject.ernie.db.relaydescs;
@@ -13,9 +13,9 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -31,6 +31,9 @@ import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.InflaterInputStream;
+import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.digest.DigestUtils; + /** * Downloads relay descriptors from the directory authorities via HTTP. * Keeps a list of missing descriptors that gets updated by parse results @@ -46,9 +49,11 @@ public class RelayDescriptorDownloader { * want to download. Lines are formatted as: * * - "consensus,<validafter>,<parsed>", + * - "consensus-microdesc,<validafter>,<parsed>", * - "vote,<validafter>,<fingerprint>,<parsed>", - * - "server,<published>,<relayid>,<descid>,<parsed>", or - * - "extra,<published>,<relayid>,<descid>,<parsed>". + * - "server,<published>,<relayid>,<descid>,<parsed>", + * - "extra,<published>,<relayid>,<descid>,<parsed>", or + * - "micro,<validafter>,<relayid>,<descid>,<parsed>". */ private File missingDescriptorsFile;
@@ -61,6 +66,27 @@ public class RelayDescriptorDownloader { private SortedMap<String, String> missingDescriptors;
/** + * Map from base64 microdescriptor digests to keys in missingDescriptors + * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because + * we can't learn <validafter> or <relayid> from parsing + * microdescriptors, but we need to know <validafter> to store + * microdescriptors to disk and both <validafter> and <relayid> to + * remove microdescriptors from the missing list. There are potentially + * many matching keys in missingDescriptors for the same microdescriptor + * digest. Also, in rare cases relays share the same microdescriptor + * (which is only possible if they share the same onion key), and then + * we don't have to download their microdescriptor more than once. + */ + private Map<String, Set<String>> microdescriptorKeys; + + /** + * Set of microdescriptor digests that are currently missing. Used for + * logging statistics instead of "micro,<validafter>,..." keys which may + * contain the same microdescriptor digest multiple times. + */ + private Set<String> missingMicrodescriptors; + + /** * Text file containing the IP addresses (and Dir ports if not 80) of * directory authorities and when we last downloaded all server and * extra-info descriptors from them, so that we can avoid downloading @@ -99,6 +125,12 @@ public class RelayDescriptorDownloader { private boolean downloadCurrentConsensus;
/** + * Should we try to download the current microdesc consensus if we don't + * have it? + */ + private boolean downloadCurrentMicrodescConsensus; + + /** * Should we try to download current votes if we don't have them? */ private boolean downloadCurrentVotes; @@ -116,6 +148,12 @@ public class RelayDescriptorDownloader { private boolean downloadMissingExtraInfos;
/** + * Should we try to download missing microdescriptors that have been + * published within the past 24 hours? + */ + private boolean downloadMissingMicrodescriptors; + + /** * Should we try to download all server descriptors from the authorities * once every 24 hours? */ @@ -134,11 +172,11 @@ public class RelayDescriptorDownloader { private boolean downloadCompressed;
/** - * valid-after time that we expect the current consensus and votes to - * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find - * consensuses and votes with this valid-after time on the directory - * authorities. This time is initialized as the beginning of the current - * hour. + * valid-after time that we expect the current consensus, + * microdescriptor consensus, and votes to have, formatted + * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this + * valid-after time on the directory authorities. This time is + * initialized as the beginning of the current hour. */ private String currentValidAfter;
@@ -186,19 +224,25 @@ public class RelayDescriptorDownloader { * that we requested, and that we successfully downloaded in this * execution. */ - private int oldMissingConsensuses = 0, oldMissingVotes = 0, + private int oldMissingConsensuses = 0, + oldMissingMicrodescConsensuses = 0, oldMissingVotes = 0, oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0, - newMissingConsensuses = 0, newMissingVotes = 0, + oldMissingMicrodescriptors = 0, newMissingConsensuses = 0, + newMissingMicrodescConsensuses = 0, newMissingVotes = 0, newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0, - requestedConsensuses = 0, requestedVotes = 0, + newMissingMicrodescriptors = 0, requestedConsensuses = 0, + requestedMicrodescConsensuses = 0, requestedVotes = 0, requestedMissingServerDescriptors = 0, requestedAllServerDescriptors = 0, requestedMissingExtraInfoDescriptors = 0, - requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0, - downloadedVotes = 0, downloadedMissingServerDescriptors = 0, + requestedAllExtraInfoDescriptors = 0, + requestedMissingMicrodescriptors = 0, downloadedConsensuses = 0, + downloadedMicrodescConsensuses = 0, downloadedVotes = 0, + downloadedMissingServerDescriptors = 0, downloadedAllServerDescriptors = 0, downloadedMissingExtraInfoDescriptors = 0, - downloadedAllExtraInfoDescriptors = 0; + downloadedAllExtraInfoDescriptors = 0, + downloadedMissingMicrodescriptors = 0;
/** * Initializes this class, including reading in missing descriptors from @@ -209,9 +253,11 @@ public class RelayDescriptorDownloader { public RelayDescriptorDownloader(RelayDescriptorParser rdp, List<String> authorities, List<String> authorityFingerprints, boolean downloadCurrentConsensus, + boolean downloadCurrentMicrodescConsensus, boolean downloadCurrentVotes, boolean downloadMissingServerDescriptors, boolean downloadMissingExtraInfos, + boolean downloadMissingMicrodescriptors, boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos, boolean downloadCompressed) {
@@ -221,10 +267,14 @@ public class RelayDescriptorDownloader { this.authorityFingerprints = new ArrayList<String>( authorityFingerprints); this.downloadCurrentConsensus = downloadCurrentConsensus; + this.downloadCurrentMicrodescConsensus = + downloadCurrentMicrodescConsensus; this.downloadCurrentVotes = downloadCurrentVotes; this.downloadMissingServerDescriptors = downloadMissingServerDescriptors; this.downloadMissingExtraInfos = downloadMissingExtraInfos; + this.downloadMissingMicrodescriptors = + downloadMissingMicrodescriptors; this.downloadAllServerDescriptors = downloadAllServerDescriptors; this.downloadAllExtraInfos = downloadAllExtraInfos; this.downloadCompressed = downloadCompressed; @@ -253,6 +303,8 @@ public class RelayDescriptorDownloader { * we are interested in and that are likely to be found on the * directory authorities. */ this.missingDescriptors = new TreeMap<String, String>(); + this.microdescriptorKeys = new HashMap<String, Set<String>>(); + this.missingMicrodescriptors = new HashSet<String>(); this.missingDescriptorsFile = new File( "stats/missing-relay-descriptors"); if (this.missingDescriptorsFile.exists()) { @@ -266,15 +318,19 @@ public class RelayDescriptorDownloader { if (line.split(",").length > 2) { String published = line.split(",")[1]; if (((line.startsWith("consensus,") || + line.startsWith("consensus-microdesc,") || line.startsWith("vote,")) && this.currentValidAfter.equals(published)) || ((line.startsWith("server,") || - line.startsWith("extra,")) && + line.startsWith("extra,") || + line.startsWith("micro,")) && this.descriptorCutOff.compareTo(published) < 0)) { if (!line.endsWith("NA")) { /* Not missing. */ } else if (line.startsWith("consensus,")) { oldMissingConsensuses++; + } else if (line.startsWith("consensus-microdesc,")) { + oldMissingMicrodescConsensuses++; } else if (line.startsWith("vote,")) { oldMissingVotes++; } else if (line.startsWith("server,")) { @@ -285,6 +341,23 @@ public class RelayDescriptorDownloader { int separateAt = line.lastIndexOf(","); this.missingDescriptors.put(line.substring(0, separateAt), line.substring(separateAt + 1)); + if (line.startsWith("micro,")) { + String microdescriptorDigest = line.split(",")[3]; + String microdescriptorKey = line.substring(0, + line.lastIndexOf(",")); + if (!this.microdescriptorKeys.containsKey( + microdescriptorDigest)) { + this.microdescriptorKeys.put( + microdescriptorDigest, new HashSet<String>()); + } + this.microdescriptorKeys.get(microdescriptorDigest).add( + microdescriptorKey); + if (line.endsWith("NA") && !this.missingMicrodescriptors. + contains(microdescriptorDigest)) { + this.missingMicrodescriptors.add(microdescriptorDigest); + oldMissingMicrodescriptors++; + } + } } } else { this.logger.fine("Invalid line '" + line + "' in " @@ -401,6 +474,65 @@ public class RelayDescriptorDownloader { }
/** + * We have parsed a microdesc consensus. Take this microdesc consensus + * off the missing list and add the <code>microdescriptors</code> which + * are in the format "<validafter>,<relayid>,<descid>" to that + * list. + */ + public void haveParsedMicrodescConsensus(String validAfter, + Set<String> microdescriptors) { + + /* Mark microdesc consensus as parsed. */ + if (this.currentValidAfter.equals(validAfter)) { + String microdescConsensusKey = "consensus-microdesc," + validAfter; + this.missingDescriptors.put(microdescConsensusKey, + this.currentTimestamp); + } + + /* Add microdescriptors to missing list. Exclude those that we already + * downloaded this month. (We download each microdescriptor at least + * once per month to keep the storage logic sane; otherwise we'd have + * to copy microdescriptors from the earlier month to the current + * month, and that gets messy.) */ + if (this.descriptorCutOff.compareTo(validAfter) < 0) { + String validAfterYearMonth = validAfter.substring(0, + "YYYY-MM".length()); + for (String microdescriptor : microdescriptors) { + String microdescriptorKey = "micro," + microdescriptor; + String parsed = "NA"; + String microdescriptorDigest = microdescriptor.split(",")[2]; + if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) { + for (String otherMicrodescriptorKey : + this.microdescriptorKeys.get(microdescriptorDigest)) { + String otherValidAfter = + otherMicrodescriptorKey.split(",")[1]; + if (!otherValidAfter.startsWith(validAfterYearMonth)) { + continue; + } + String otherParsed = this.missingDescriptors.get( + otherMicrodescriptorKey); + if (otherParsed != null && !otherParsed.equals("NA")) { + parsed = otherParsed; + break; + } + } + } else { + this.microdescriptorKeys.put( + microdescriptorDigest, new HashSet<String>()); + } + this.microdescriptorKeys.get(microdescriptorDigest).add( + microdescriptorKey); + this.missingDescriptors.put(microdescriptorKey, parsed); + if (parsed.equals("NA") && + !this.missingMicrodescriptors.contains(microdescriptorDigest)) { + this.missingMicrodescriptors.add(microdescriptorDigest); + this.newMissingMicrodescriptors++; + } + } + } + } + + /** * We have parsed a vote. Take this vote off the missing list and add * the <code>serverDescriptors</code> which are in the format * "<published>,<relayid>,<descid>" to that list. @@ -470,6 +602,23 @@ public class RelayDescriptorDownloader { }
/** + * We have parsed a microdescriptor. Take it off the missing list. + */ + public void haveParsedMicrodescriptor(String descriptorDigest) { + if (this.microdescriptorKeys.containsKey(descriptorDigest)) { + for (String microdescriptorKey : + this.microdescriptorKeys.get(descriptorDigest)) { + String validAfter = microdescriptorKey.split(",")[0]; + if (this.descriptorCutOff.compareTo(validAfter) < 0) { + this.missingDescriptors.put(microdescriptorKey, + this.currentTimestamp); + } + } + this.missingMicrodescriptors.remove(descriptorDigest); + } + } + + /** * Downloads missing descriptors that we think might still be available * on the directory authorities as well as all server and extra-info * descriptors once per day. @@ -483,6 +632,12 @@ public class RelayDescriptorDownloader { this.missingDescriptors.put(consensusKey, "NA"); this.newMissingConsensuses++; } + String microdescConsensusKey = "consensus-microdesc," + + this.currentValidAfter; + if (!this.missingDescriptors.containsKey(microdescConsensusKey)) { + this.missingDescriptors.put(microdescConsensusKey, "NA"); + this.newMissingMicrodescConsensuses++; + } for (String authority : authorityFingerprints) { String voteKey = "vote," + this.currentValidAfter + "," + authority; if (!this.missingDescriptors.containsKey(voteKey)) { @@ -516,6 +671,19 @@ public class RelayDescriptorDownloader { } }
+ /* Then try to download the microdesc consensus. */ + if (downloadCurrentMicrodescConsensus) { + if (this.missingDescriptors.containsKey( + microdescConsensusKey) && + this.missingDescriptors.get(microdescConsensusKey). + equals("NA")) { + this.requestedMicrodescConsensuses++; + this.downloadedMicrodescConsensuses += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/current/consensus-microdesc"); + } + } + /* Next, try to download current votes that we're missing. */ if (downloadCurrentVotes) { String voteKeyPrefix = "vote," + this.currentValidAfter; @@ -538,10 +706,9 @@ public class RelayDescriptorDownloader {
/* Download either all server and extra-info descriptors or only * those that we're missing. Start with server descriptors, then - * request extra-info descriptors. */ - List<String> types = new ArrayList<String>(Arrays.asList( - "server,extra".split(","))); - for (String type : types) { + * request extra-info descriptors. Finally, request missing + * microdescriptors. */ + for (String type : new String[] { "server", "extra", "micro" }) {
/* Download all server or extra-info descriptors from this * authority if we haven't done so for 24 hours and if we're @@ -557,21 +724,24 @@ public class RelayDescriptorDownloader { this.requestedAllServerDescriptors++; this.downloadedAllServerDescriptors += downloadedAllDescriptors; - } else { + } else if (type.equals("extra")) { this.requestedAllExtraInfoDescriptors++; this.downloadedAllExtraInfoDescriptors += downloadedAllDescriptors; }
- /* Download missing server or extra-info descriptors if we're - * configured to do so. */ + /* Download missing server descriptors, extra-info descriptors, + * and microdescriptors if we're configured to do so. */ } else if ((type.equals("server") && this.downloadMissingServerDescriptors) || - (type.equals("extra") && this.downloadMissingExtraInfos)) { + (type.equals("extra") && this.downloadMissingExtraInfos) || + (type.equals("micro") && + this.downloadMissingMicrodescriptors)) {
/* Go through the list of missing descriptors of this type * and combine the descriptor identifiers to a URL of up to - * 96 descriptors that we can download at once. */ + * 96 server or extra-info descriptors or 92 microdescriptors + * that we can download at once. */ SortedSet<String> descriptorIdentifiers = new TreeSet<String>(); for (Map.Entry<String, String> e : @@ -587,8 +757,12 @@ public class RelayDescriptorDownloader { StringBuilder combinedResource = null; int descriptorsInCombinedResource = 0, requestedDescriptors = 0, downloadedDescriptors = 0; + int maxDescriptorsInCombinedResource = + type.equals("micro") ? 92 : 96; + String separator = type.equals("micro") ? "-" : "+"; for (String descriptorIdentifier : descriptorIdentifiers) { - if (descriptorsInCombinedResource >= 96) { + if (descriptorsInCombinedResource >= + maxDescriptorsInCombinedResource) { requestedDescriptors += descriptorsInCombinedResource; downloadedDescriptors += this.downloadResourceFromAuthority(authority, @@ -600,7 +774,7 @@ public class RelayDescriptorDownloader { combinedResource = new StringBuilder("/tor/" + type + "/d/" + descriptorIdentifier); } else { - combinedResource.append("+" + descriptorIdentifier); + combinedResource.append(separator + descriptorIdentifier); } descriptorsInCombinedResource++; } @@ -615,11 +789,16 @@ public class RelayDescriptorDownloader { requestedDescriptors; this.downloadedMissingServerDescriptors += downloadedDescriptors; - } else { + } else if (type.equals("extra")) { this.requestedMissingExtraInfoDescriptors += requestedDescriptors; this.downloadedMissingExtraInfoDescriptors += downloadedDescriptors; + } else if (type.equals("micro")) { + this.requestedMissingMicrodescriptors += + requestedDescriptors; + this.downloadedMissingMicrodescriptors += + downloadedDescriptors; } } } @@ -680,7 +859,8 @@ public class RelayDescriptorDownloader { receivedDescriptors = 1; } else if (resource.startsWith("/tor/server/") || resource.startsWith("/tor/extra/")) { - if (resource.equals("/tor/server/all")) { + if (resource.equals("/tor/server/all") || + resource.equals("/tor/extra/all")) { this.lastDownloadedAllDescriptors.put(authority, this.currentTimestamp); } @@ -715,6 +895,60 @@ public class RelayDescriptorDownloader { this.rdp.parse(descBytes); receivedDescriptors++; } + } else if (resource.startsWith("/tor/micro/")) { + /* TODO We need to parse microdescriptors ourselves, rather than + * RelayDescriptorParser, because only we know the valid-after + * time(s) of microdesc consensus(es) containing this + * microdescriptor. However, this breaks functional abstraction + * pretty badly. */ + SimpleDateFormat parseFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String ascii = null; + try { + ascii = new String(allData, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* No way that US-ASCII is not supported. */ + } + int start = -1, end = -1; + String startToken = "onion-key\n"; + while (end < ascii.length()) { + start = ascii.indexOf(startToken, end); + if (start < 0) { + break; + } + end = ascii.indexOf(startToken, start + 1); + if (end < 0) { + end = ascii.length(); + if (end <= start) { + break; + } + } + byte[] descBytes = new byte[end - start]; + System.arraycopy(allData, start, descBytes, 0, end - start); + String digest256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(descBytes)).replaceAll("=", ""); + if (!this.microdescriptorKeys.containsKey(digest256Base64)) { + continue; + } + String digest256Hex = DigestUtils.sha256Hex(descBytes); + for (String microdescriptorKey : + this.microdescriptorKeys.get(digest256Base64)) { + String validAfterTime = microdescriptorKey.split(",")[1]; + try { + long validAfter = + parseFormat.parse(validAfterTime).getTime(); + this.rdp.storeMicrodescriptor(descBytes, digest256Hex, + digest256Base64, validAfter); + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Could not parse " + + "valid-after time '" + validAfterTime + "' in " + + "microdescriptor key. Not storing microdescriptor.", + e); + } + } + receivedDescriptors++; + } } } return receivedDescriptors; @@ -727,8 +961,9 @@ public class RelayDescriptorDownloader { public void writeFile() {
/* Write missing descriptors file to disk. */ - int missingConsensuses = 0, missingVotes = 0, - missingServerDescriptors = 0, missingExtraInfoDescriptors = 0; + int missingConsensuses = 0, missingMicrodescConsensuses = 0, + missingVotes = 0, missingServerDescriptors = 0, + missingExtraInfoDescriptors = 0; try { this.logger.fine("Writing file " + this.missingDescriptorsFile.getAbsolutePath() + "..."); @@ -742,12 +977,15 @@ public class RelayDescriptorDownloader { /* Not missing. */ } else if (key.startsWith("consensus,")) { missingConsensuses++; + } else if (key.startsWith("consensus-microdesc,")) { + missingMicrodescConsensuses++; } else if (key.startsWith("vote,")) { missingVotes++; } else if (key.startsWith("server,")) { missingServerDescriptors++; } else if (key.startsWith("extra,")) { missingExtraInfoDescriptors++; + } else if (key.startsWith("micro,")) { } bw.write(key + "," + value + "\n"); } @@ -758,6 +996,7 @@ public class RelayDescriptorDownloader { this.logger.log(Level.WARNING, "Failed writing " + this.missingDescriptorsFile.getAbsolutePath() + "!", e); } + int missingMicrodescriptors = this.missingMicrodescriptors.size();
/* Write text file containing the directory authorities and when we * last downloaded all server and extra-info descriptors from them to @@ -790,25 +1029,33 @@ public class RelayDescriptorDownloader { + "directory authorities."); this.logger.info("At the beginning of this execution, we were " + "missing " + oldMissingConsensuses + " consensus(es), " + + oldMissingMicrodescConsensuses + " microdesc consensus(es), " + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors - + " server descriptor(s), and " + oldMissingExtraInfoDescriptors - + " extra-info descriptor(s)."); + + " server descriptor(s), " + oldMissingExtraInfoDescriptors + + " extra-info descriptor(s), and " + oldMissingMicrodescriptors + + " microdescriptor(s)."); this.logger.info("During this execution, we added " + this.newMissingConsensuses + " consensus(es), " - + this.newMissingVotes + " vote(s), " - + this.newMissingServerDescriptors + " server descriptor(s), and " - + this.newMissingExtraInfoDescriptors + " extra-info " - + "descriptor(s) to the missing list, some of which we also " + + this.newMissingMicrodescConsensuses + + " microdesc consensus(es), " + this.newMissingVotes + + " vote(s), " + this.newMissingServerDescriptors + + " server descriptor(s), " + this.newMissingExtraInfoDescriptors + + " extra-info descriptor(s), and " + + this.newMissingMicrodescriptors + " microdescriptor(s) to the " + + "missing list, some of which we also " + "requested and removed from the list again."); this.logger.info("We requested " + this.requestedConsensuses - + " consensus(es), " + this.requestedVotes + " vote(s), " - + this.requestedMissingServerDescriptors + " missing server " - + "descriptor(s), " + this.requestedAllServerDescriptors + + " consensus(es), " + this.requestedMicrodescConsensuses + + " microdesc consensus(es), " + this.requestedVotes + + " vote(s), " + this.requestedMissingServerDescriptors + + " missing server descriptor(s), " + + this.requestedAllServerDescriptors + " times all server descriptors, " + this.requestedMissingExtraInfoDescriptors + " missing " - + "extra-info descriptor(s), and " + + "extra-info descriptor(s), " + this.requestedAllExtraInfoDescriptors + " times all extra-info " - + "descriptors from the directory authorities."); + + "descriptors, and " + this.requestedMissingMicrodescriptors + + " missing microdescriptor(s) from the directory authorities."); StringBuilder sb = new StringBuilder(); for (String authority : this.authorities) { sb.append(" " + authority + "=" @@ -818,20 +1065,26 @@ public class RelayDescriptorDownloader { + "authorities:" + sb.toString()); this.logger.info("We successfully downloaded " + this.downloadedConsensuses + " consensus(es), " - + this.downloadedVotes + " vote(s), " - + this.downloadedMissingServerDescriptors + " missing server " - + "descriptor(s), " + this.downloadedAllServerDescriptors + + this.downloadedMicrodescConsensuses + + " microdesc consensus(es), " + this.downloadedVotes + + " vote(s), " + this.downloadedMissingServerDescriptors + + " missing server descriptor(s), " + + this.downloadedAllServerDescriptors + " server descriptor(s) when downloading all descriptors, " + this.downloadedMissingExtraInfoDescriptors + " missing " - + "extra-info descriptor(s) and " + + "extra-info descriptor(s), " + this.downloadedAllExtraInfoDescriptors + " extra-info " - + "descriptor(s) when downloading all descriptors."); + + "descriptor(s) when downloading all descriptors, and " + + this.downloadedMissingMicrodescriptors + + " missing microdescriptor(s)."); this.logger.info("At the end of this execution, we are missing " - + missingConsensuses + " consensus(es), " + missingVotes - + " vote(s), " + missingServerDescriptors + " server " - + "descriptor(s), and " + missingExtraInfoDescriptors - + " extra-info descriptor(s), some of which we may try in the next " - + "execution."); + + missingConsensuses + " consensus(es), " + + missingMicrodescConsensuses + " microdesc consensus(es), " + + missingVotes + " vote(s), " + missingServerDescriptors + + " server descriptor(s), " + missingExtraInfoDescriptors + + " extra-info descriptor(s), and " + missingMicrodescriptors + + " microdescriptor(s), some of which we may try in the next " + + "execution."); } }
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java index 107ba73..2873909 100644 --- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java +++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java @@ -1,4 +1,4 @@ -/* Copyright 2010--2012 The Tor Project +/* Copyright 2010--2014 The Tor Project * See LICENSE for licensing information */ package org.torproject.ernie.db.relaydescs;
@@ -31,6 +31,8 @@ public class RelayDescriptorParser { */ private ArchiveWriter aw;
+ private ArchiveReader ar; + /** * Missing descriptor downloader that uses the parse results to learn * which descriptors we are missing and want to download. @@ -62,7 +64,12 @@ public class RelayDescriptorParser { this.rdd = rdd; }
- public void parse(byte[] data) { + public void setArchiveReader(ArchiveReader ar) { + this.ar = ar; + } + + public boolean parse(byte[] data) { + boolean stored = false; try { /* Convert descriptor to ASCII for parsing. This means we'll lose * the non-ASCII chars, but we don't care about them for parsing @@ -76,21 +83,27 @@ public class RelayDescriptorParser { if (line == null) { this.logger.fine("We were given an empty descriptor for " + "parsing. Ignoring."); - return; + return false; } SimpleDateFormat parseFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (line.equals("network-status-version 3")) { - boolean isConsensus = true; + if (line.startsWith("network-status-version 3")) { + String statusType = "consensus"; + if (line.equals("network-status-version 3 microdesc")) { + statusType = "consensus-microdesc"; + } String validAfterTime = null, fingerprint = null, dirSource = null; long validAfter = -1L, dirKeyPublished = -1L; SortedSet<String> dirSources = new TreeSet<String>(); SortedSet<String> serverDescriptors = new TreeSet<String>(); SortedSet<String> serverDescriptorDigests = new TreeSet<String>(); + SortedSet<String> microdescriptorKeys = new TreeSet<String>(); + SortedSet<String> microdescriptorDigests = new TreeSet<String>(); StringBuilder certificateStringBuilder = null; String certificateString = null; + String lastRelayIdentity = null; while ((line = br.readLine()) != null) { if (certificateStringBuilder != null) { if (line.startsWith("r ")) { @@ -101,7 +114,7 @@ public class RelayDescriptorParser { } } if (line.equals("vote-status vote")) { - isConsensus = false; + statusType = "vote"; } else if (line.startsWith("valid-after ")) { validAfterTime = line.substring("valid-after ".length()); validAfter = parseFormat.parse(validAfterTime).getTime(); @@ -121,23 +134,43 @@ public class RelayDescriptorParser { getTime(); } else if (line.startsWith("r ")) { String[] parts = line.split(" "); - if (parts.length < 9) { + if (parts.length == 8) { + lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64( + parts[2] + "=")).toLowerCase(); + } else if (parts.length == 9) { + lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64( + parts[2] + "=")).toLowerCase(); + String serverDesc = Hex.encodeHexString(Base64.decodeBase64( + parts[3] + "=")).toLowerCase(); + String publishedTime = parts[4] + " " + parts[5]; + serverDescriptors.add(publishedTime + "," + + lastRelayIdentity + "," + serverDesc); + serverDescriptorDigests.add(serverDesc); + } else { this.logger.log(Level.WARNING, "Could not parse r line '" + line + "' in descriptor. Skipping."); break; } - String publishedTime = parts[4] + " " + parts[5]; - String relayIdentity = Hex.encodeHexString( - Base64.decodeBase64(parts[2] + "=")). - toLowerCase(); - String serverDesc = Hex.encodeHexString(Base64.decodeBase64( - parts[3] + "=")).toLowerCase(); - serverDescriptors.add(publishedTime + "," + relayIdentity - + "," + serverDesc); - serverDescriptorDigests.add(serverDesc); + } else if (line.startsWith("m ")) { + String[] parts = line.split(" "); + if (parts.length == 2 && parts[1].length() == 43) { + String digest256Base64 = parts[1]; + microdescriptorKeys.add(validAfterTime + "," + + lastRelayIdentity + "," + digest256Base64); + String digest256Hex = Hex.encodeHexString( + Base64.decodeBase64(digest256Base64 + "=")). + toLowerCase(); + microdescriptorDigests.add(digest256Hex); + } else if (parts.length != 3 || + !parts[2].startsWith("sha256=") || + parts[2].length() != 50) { + this.logger.log(Level.WARNING, "Could not parse m line '" + + line + "' in descriptor. Skipping."); + break; + } } } - if (isConsensus) { + if (statusType.equals("consensus")) { if (this.rdd != null) { this.rdd.haveParsedConsensus(validAfterTime, dirSources, serverDescriptors); @@ -145,6 +178,21 @@ public class RelayDescriptorParser { if (this.aw != null) { this.aw.storeConsensus(data, validAfter, dirSources, serverDescriptorDigests); + stored = true; + } + } else if (statusType.equals("consensus-microdesc")) { + if (this.rdd != null) { + this.rdd.haveParsedMicrodescConsensus(validAfterTime, + microdescriptorKeys); + } + if (this.ar != null) { + this.ar.haveParsedMicrodescConsensus(validAfterTime, + microdescriptorDigests); + } + if (this.aw != null) { + this.aw.storeMicrodescConsensus(data, validAfter, + microdescriptorDigests); + stored = true; } } else { if (this.aw != null || this.rdd != null) { @@ -161,6 +209,7 @@ public class RelayDescriptorParser { if (this.aw != null) { this.aw.storeVote(data, validAfter, dirSource, digest, serverDescriptorDigests); + stored = true; } if (this.rdd != null) { this.rdd.haveParsedVote(validAfterTime, fingerprint, @@ -171,6 +220,7 @@ public class RelayDescriptorParser { if (this.aw != null) { this.aw.storeCertificate(certificateString.getBytes(), dirSource, dirKeyPublished); + stored = true; } } } @@ -209,6 +259,7 @@ public class RelayDescriptorParser { if (this.aw != null && digest != null) { this.aw.storeServerDescriptor(data, digest, published, extraInfoDigest); + stored = true; } if (this.rdd != null && digest != null) { this.rdd.haveParsedServerDescriptor(publishedTime, @@ -238,19 +289,26 @@ public class RelayDescriptorParser { } } int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 || sig >= 0 || sig > start) { + if (start >= 0 && sig >= 0 && sig > start) { byte[] forDigest = new byte[sig - start]; System.arraycopy(data, start, forDigest, 0, sig - start); digest = DigestUtils.shaHex(forDigest); } if (this.aw != null && digest != null) { this.aw.storeExtraInfoDescriptor(data, digest, published); + stored = true; } if (this.rdd != null && digest != null) { this.rdd.haveParsedExtraInfoDescriptor(publishedTime, relayIdentifier.toLowerCase(), digest); } + } else if (line.equals("onion-key")) { + /* Cannot store microdescriptors without knowing valid-after + * time(s) of microdesc consensuses containing them, because we + * don't know which month directories to put them in. Have to use + * storeMicrodescriptor below. */ } + br.close(); } catch (IOException e) { this.logger.log(Level.WARNING, "Could not parse descriptor. " + "Skipping.", e); @@ -258,6 +316,17 @@ public class RelayDescriptorParser { this.logger.log(Level.WARNING, "Could not parse descriptor. " + "Skipping.", e); } + return stored; + } + + public void storeMicrodescriptor(byte[] data, String digest256Hex, + String digest256Base64, long validAfter) { + if (this.aw != null) { + this.aw.storeMicrodescriptor(data, digest256Hex, validAfter); + } + if (this.rdd != null) { + this.rdd.haveParsedMicrodescriptor(digest256Base64); + } } }