[tor-commits] [metrics-db/master] Download microdesc consensuses and microdescriptors.

karsten at torproject.org karsten at torproject.org
Wed Jan 22 08:14:00 UTC 2014


commit 4694f0501915d00478062d9066c4132e6f7b52aa
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Jan 9 14:26:36 2014 +0100

    Download microdesc consensuses and microdescriptors.
    
    Also support reading microdescriptors from disk.
    
    Implements #2785.
---
 config.template                                    |    8 +
 deps/metrics-lib                                   |    2 +-
 .../torproject/ernie/db/main/Configuration.java    |   14 +
 .../ernie/db/relaydescs/ArchiveReader.java         |  145 +++++++-
 .../ernie/db/relaydescs/ArchiveWriter.java         |  255 +++++++++++---
 .../db/relaydescs/RelayDescriptorDownloader.java   |  357 +++++++++++++++++---
 .../ernie/db/relaydescs/RelayDescriptorParser.java |  105 +++++-
 7 files changed, 771 insertions(+), 115 deletions(-)

diff --git a/config.template b/config.template
index 1b84775..cd31802 100644
--- a/config.template
+++ b/config.template
@@ -33,6 +33,10 @@
 ## Download the current consensus (only if DownloadRelayDescriptors is 1)
 #DownloadCurrentConsensus 1
 #
+## Download the current microdesc consensus (only if
+## DownloadRelayDescriptors is 1)
+#DownloadCurrentMicrodescConsensus 1
+#
 ## Download current votes (only if DownloadRelayDescriptors is 1)
 #DownloadCurrentVotes 1
 #
@@ -44,6 +48,10 @@
 ## DownloadRelayDescriptors is 1)
 #DownloadMissingExtraInfoDescriptors 1
 #
+## Download missing microdescriptors (only if
+## DownloadRelayDescriptors is 1)
+#DownloadMissingMicrodescriptors 1
+#
 ## Download all server descriptors from the directory authorities at most
 ## once a day (only if DownloadRelayDescriptors is 1)
 #DownloadAllServerDescriptors 0
diff --git a/deps/metrics-lib b/deps/metrics-lib
index 3e60ccd..38c48dd 160000
--- a/deps/metrics-lib
+++ b/deps/metrics-lib
@@ -1 +1 @@
-Subproject commit 3e60ccdaaba598cabb7281d45f9a415299b8e3e3
+Subproject commit 38c48ddd0c49978bbfa5e0a987cfd3a890692a5c
diff --git a/src/org/torproject/ernie/db/main/Configuration.java b/src/org/torproject/ernie/db/main/Configuration.java
index 7b5e53d..85d889e 100644
--- a/src/org/torproject/ernie/db/main/Configuration.java
+++ b/src/org/torproject/ernie/db/main/Configuration.java
@@ -53,9 +53,11 @@ public class Configuration {
       + "ED03BB616EB2F60BEC80151114BB25CEF515B226,"
       + "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97").split(","));
   private boolean downloadCurrentConsensus = true;
+  private boolean downloadCurrentMicrodescConsensus = true;
   private boolean downloadCurrentVotes = true;
   private boolean downloadMissingServerDescriptors = true;
   private boolean downloadMissingExtraInfoDescriptors = true;
+  private boolean downloadMissingMicrodescriptors = true;
   private boolean downloadAllServerDescriptors = false;
   private boolean downloadAllExtraInfoDescriptors = false;
   private boolean compressRelayDescriptorDownloads;
@@ -141,6 +143,9 @@ public class Configuration {
         } else if (line.startsWith("DownloadCurrentConsensus")) {
           this.downloadCurrentConsensus = Integer.parseInt(
               line.split(" ")[1]) != 0;
+        } else if (line.startsWith("DownloadCurrentMicrodescConsensus")) {
+          this.downloadCurrentMicrodescConsensus = Integer.parseInt(
+              line.split(" ")[1]) != 0;
         } else if (line.startsWith("DownloadCurrentVotes")) {
           this.downloadCurrentVotes = Integer.parseInt(
               line.split(" ")[1]) != 0;
@@ -151,6 +156,9 @@ public class Configuration {
             "DownloadMissingExtraInfoDescriptors")) {
           this.downloadMissingExtraInfoDescriptors = Integer.parseInt(
               line.split(" ")[1]) != 0;
+        } else if (line.startsWith("DownloadMissingMicrodescriptors")) {
+          this.downloadMissingMicrodescriptors = Integer.parseInt(
+              line.split(" ")[1]) != 0;
         } else if (line.startsWith("DownloadAllServerDescriptors")) {
           this.downloadAllServerDescriptors = Integer.parseInt(
               line.split(" ")[1]) != 0;
@@ -253,6 +261,9 @@ public class Configuration {
   public boolean getDownloadCurrentConsensus() {
     return this.downloadCurrentConsensus;
   }
+  public boolean getDownloadCurrentMicrodescConsensus() {
+    return this.downloadCurrentMicrodescConsensus;
+  }
   public boolean getDownloadCurrentVotes() {
     return this.downloadCurrentVotes;
   }
@@ -262,6 +273,9 @@ public class Configuration {
   public boolean getDownloadMissingExtraInfoDescriptors() {
     return this.downloadMissingExtraInfoDescriptors;
   }
+  public boolean getDownloadMissingMicrodescriptors() {
+    return this.downloadMissingMicrodescriptors;
+  }
   public boolean getDownloadAllServerDescriptors() {
     return this.downloadAllServerDescriptors;
   }
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
index fba0a9f..ea54874 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveReader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
  * See LICENSE for licensing information */
 package org.torproject.ernie.db.relaydescs;
 
@@ -11,14 +11,25 @@ import java.io.FileInputStream;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import java.util.SortedSet;
 import java.util.Stack;
+import java.util.TimeZone;
 import java.util.TreeSet;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 
 /**
@@ -34,6 +45,7 @@ public class ArchiveReader {
       throw new IllegalArgumentException();
     }
 
+    rdp.setArchiveReader(this);
     int parsedFiles = 0, ignoredFiles = 0;
     Logger logger = Logger.getLogger(ArchiveReader.class.getName());
     SortedSet<String> archivesImportHistory = new TreeSet<String>();
@@ -59,6 +71,7 @@ public class ArchiveReader {
       Stack<File> filesInInputDir = new Stack<File>();
       filesInInputDir.add(archivesDirectory);
       List<File> problems = new ArrayList<File>();
+      Set<File> filesToRetry = new HashSet<File>();
       while (!filesInInputDir.isEmpty()) {
         File pop = filesInInputDir.pop();
         if (pop.isDirectory()) {
@@ -86,9 +99,6 @@ public class ArchiveReader {
                 FileInputStream fis = new FileInputStream(pop);
                 bis = new BufferedInputStream(fis);
               }
-              if (keepImportHistory) {
-                archivesImportHistory.add(pop.getName());
-              }
               ByteArrayOutputStream baos = new ByteArrayOutputStream();
               int len;
               byte[] data = new byte[1024];
@@ -97,7 +107,14 @@ public class ArchiveReader {
               }
               bis.close();
               byte[] allData = baos.toByteArray();
-              rdp.parse(allData);
+              boolean stored = rdp.parse(allData);
+              if (!stored) {
+                filesToRetry.add(pop);
+                continue;
+              }
+              if (keepImportHistory) {
+                archivesImportHistory.add(pop.getName());
+              }
               parsedFiles++;
             } catch (IOException e) {
               problems.add(pop);
@@ -108,6 +125,109 @@ public class ArchiveReader {
           }
         }
       }
+      for (File pop : filesToRetry) {
+        /* TODO We need to parse microdescriptors ourselves, rather than
+         * RelayDescriptorParser, because only we know the valid-after
+         * time(s) of microdesc consensus(es) containing this
+         * microdescriptor.  However, this breaks functional abstraction
+         * pretty badly. */
+        if (rdp != null) {
+          try {
+            BufferedInputStream bis = null;
+            if (pop.getName().endsWith(".bz2")) {
+              FileInputStream fis = new FileInputStream(pop);
+              BZip2CompressorInputStream bcis =
+                  new BZip2CompressorInputStream(fis);
+              bis = new BufferedInputStream(bcis);
+            } else {
+              FileInputStream fis = new FileInputStream(pop);
+              bis = new BufferedInputStream(fis);
+            }
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            int len;
+            byte[] data = new byte[1024];
+            while ((len = bis.read(data, 0, 1024)) >= 0) {
+              baos.write(data, 0, len);
+            }
+            bis.close();
+            byte[] allData = baos.toByteArray();
+            BufferedReader br = new BufferedReader(new StringReader(
+                new String(allData, "US-ASCII")));
+            String line;
+            do {
+              line = br.readLine();
+            } while (line != null && line.startsWith("@"));
+            br.close();
+            if (line == null) {
+              logger.fine("We were given an empty descriptor for "
+                  + "parsing. Ignoring.");
+              continue;
+            }
+            if (!line.equals("onion-key")) {
+              logger.fine("Skipping non-recognized descriptor.");
+              continue;
+            }
+            SimpleDateFormat parseFormat =
+                new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+            parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+            String ascii = null;
+            try {
+              ascii = new String(allData, "US-ASCII");
+            } catch (UnsupportedEncodingException e) {
+              /* No way that US-ASCII is not supported. */
+            }
+            int start = -1, end = -1;
+            String startToken = "onion-key\n";
+            while (end < ascii.length()) {
+              start = ascii.indexOf(startToken, end);
+              if (start < 0) {
+                break;
+              }
+              end = ascii.indexOf(startToken, start + 1);
+              if (end < 0) {
+                end = ascii.length();
+                if (end <= start) {
+                  break;
+                }
+              }
+              byte[] descBytes = new byte[end - start];
+              System.arraycopy(allData, start, descBytes, 0, end - start);
+              String digest256Base64 = Base64.encodeBase64String(
+                  DigestUtils.sha256(descBytes)).replaceAll("=", "");
+              String digest256Hex = DigestUtils.sha256Hex(descBytes);
+              if (!this.microdescriptorValidAfterTimes.containsKey(
+                  digest256Hex)) {
+                logger.fine("Could not store microdescriptor '"
+                  + digest256Hex + "', which was not contained in a "
+                  + "microdesc consensus.");
+                continue;
+              }
+              for (String validAfterTime :
+                  this.microdescriptorValidAfterTimes.get(digest256Hex)) {
+                try {
+                  long validAfter =
+                      parseFormat.parse(validAfterTime).getTime();
+                  rdp.storeMicrodescriptor(descBytes, digest256Hex,
+                      digest256Base64, validAfter);
+                } catch (ParseException e) {
+                  logger.log(Level.WARNING, "Could not parse "
+                      + "valid-after time '" + validAfterTime + "'. Not "
+                      + "storing microdescriptor.", e);
+                }
+              }
+            }
+            if (keepImportHistory) {
+              archivesImportHistory.add(pop.getName());
+            }
+            parsedFiles++;
+          } catch (IOException e) {
+            problems.add(pop);
+            if (problems.size() > 3) {
+              break;
+            }
+          }
+        }
+      }
       if (problems.isEmpty()) {
         logger.fine("Finished importing files in directory "
             + archivesDirectory + "/.");
@@ -142,5 +262,20 @@ public class ArchiveReader {
         + "directory:\nParsed " + parsedFiles + ", ignored "
         + ignoredFiles + " files.");
   }
+
+  private Map<String, Set<String>> microdescriptorValidAfterTimes =
+      new HashMap<String, Set<String>>();
+  public void haveParsedMicrodescConsensus(String validAfterTime,
+      SortedSet<String> microdescriptorDigests) {
+    for (String microdescriptor : microdescriptorDigests) {
+      if (!this.microdescriptorValidAfterTimes.containsKey(
+          microdescriptor)) {
+        this.microdescriptorValidAfterTimes.put(microdescriptor,
+            new HashSet<String>());
+      }
+      this.microdescriptorValidAfterTimes.get(microdescriptor).add(
+          validAfterTime);
+    }
+  }
 }
 
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 1b4f774..a179f5b 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
  * See LICENSE for licensing information */
 package org.torproject.ernie.db.relaydescs;
 
@@ -73,12 +73,16 @@ public class ArchiveWriter extends Thread {
   private Logger logger;
   private File outputDirectory;
   private DescriptorParser descriptorParser;
-  private int storedConsensusesCounter = 0, storedVotesCounter = 0,
+  private int storedConsensusesCounter = 0,
+      storedMicrodescConsensusesCounter = 0, storedVotesCounter = 0,
       storedCertsCounter = 0, storedServerDescriptorsCounter = 0,
-      storedExtraInfoDescriptorsCounter = 0;
+      storedExtraInfoDescriptorsCounter = 0,
+      storedMicrodescriptorsCounter = 0;
 
   private SortedMap<Long, SortedSet<String>> storedConsensuses =
       new TreeMap<Long, SortedSet<String>>();
+  private SortedMap<Long, SortedSet<String>> storedMicrodescConsensuses =
+      new TreeMap<Long, SortedSet<String>>();
   private SortedMap<Long, Integer> expectedVotes =
       new TreeMap<Long, Integer>();
   private SortedMap<Long, SortedMap<String, SortedSet<String>>>
@@ -88,11 +92,15 @@ public class ArchiveWriter extends Thread {
       new TreeMap<Long, Map<String, String>>();
   private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
       new TreeMap<Long, Set<String>>();
+  private SortedMap<Long, Set<String>> storedMicrodescriptors =
+      new TreeMap<Long, Set<String>>();
 
   private File storedServerDescriptorsFile = new File(
       "stats/stored-server-descriptors");
   private File storedExtraInfoDescriptorsFile = new File(
       "stats/stored-extra-info-descriptors");
+  private File storedMicrodescriptorsFile = new File(
+      "stats/stored-microdescriptors");
 
   private void loadDescriptorDigests() {
     SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
@@ -155,6 +163,33 @@ public class ArchiveWriter extends Thread {
         }
         br.close();
       }
+      if (this.storedMicrodescriptorsFile.exists()) {
+        BufferedReader br = new BufferedReader(new FileReader(
+            this.storedMicrodescriptorsFile));
+        String line;
+        while ((line = br.readLine()) != null) {
+          String[] parts = line.split(",");
+          if (parts.length != 2) {
+            this.logger.warning("Could not load microdescriptor digests "
+                + "because of illegal line '" + line + "'.  We might not "
+                + "be able to correctly check descriptors for "
+                + "completeness.");
+            break;
+          }
+          long validAfter = dateTimeFormat.parse(parts[0]).getTime();
+          if (validAfter < this.now - 48L * 60L * 60L * 1000L) {
+            continue;
+          }
+          if (!this.storedMicrodescriptors.containsKey(validAfter)) {
+            this.storedMicrodescriptors.put(validAfter,
+                new HashSet<String>());
+          }
+          String microdescriptorDigest = parts[1];
+          this.storedMicrodescriptors.get(validAfter).add(
+              microdescriptorDigest);
+        }
+        br.close();
+      }
     } catch (ParseException e) {
       this.logger.log(Level.WARNING, "Could not load descriptor "
           + "digests.  We might not be able to correctly check "
@@ -198,6 +233,18 @@ public class ArchiveWriter extends Thread {
         }
       }
       bw.close();
+      this.storedMicrodescriptorsFile.getParentFile().mkdirs();
+      bw = new BufferedWriter(new FileWriter(
+          this.storedMicrodescriptorsFile));
+      for (Map.Entry<Long, Set<String>> e :
+          this.storedMicrodescriptors.entrySet()) {
+        String validAfter = dateTimeFormat.format(e.getKey());
+        for (String microdescriptorDigest : e.getValue()) {
+          bw.write(String.format("%s,%s%n", validAfter,
+              microdescriptorDigest));
+        }
+      }
+      bw.close();
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Could not save descriptor "
           + "digests.  We might not be able to correctly check "
@@ -228,9 +275,11 @@ public class ArchiveWriter extends Thread {
       rdd = new RelayDescriptorDownloader(rdp, dirSources,
           config.getDownloadVotesByFingerprint(),
           config.getDownloadCurrentConsensus(),
+          config.getDownloadCurrentMicrodescConsensus(),
           config.getDownloadCurrentVotes(),
           config.getDownloadMissingServerDescriptors(),
           config.getDownloadMissingExtraInfoDescriptors(),
+          config.getDownloadMissingMicrodescriptors(),
           config.getDownloadAllServerDescriptors(),
           config.getDownloadAllExtraInfoDescriptors(),
           config.getCompressRelayDescriptorDownloads());
@@ -314,15 +363,39 @@ public class ArchiveWriter extends Thread {
     if (this.store(CONSENSUS_ANNOTATION, data, outputFiles)) {
       this.storedConsensusesCounter++;
     }
-    SimpleDateFormat dateTimeFormat =
-        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     if (this.now - validAfter < 3L * 60L * 60L * 1000L) {
       this.storedConsensuses.put(validAfter, serverDescriptorDigests);
       this.expectedVotes.put(validAfter, dirSources.size());
     }
   }
 
+  private static final byte[] MICRODESCCONSENSUS_ANNOTATION =
+      "@type network-status-microdesc-consensus-3 1.0\n".getBytes();
+  public void storeMicrodescConsensus(byte[] data, long validAfter,
+      SortedSet<String> microdescriptorDigests) {
+    SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat(
+        "yyyy/MM");
+    yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat(
+        "dd/yyyy-MM-dd-HH-mm-ss");
+    dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    File tarballFile = new File(this.outputDirectory
+        + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter)
+        + "/consensus-microdesc/"
+        + dayDirectoryFileFormat.format(validAfter)
+        + "-consensus-microdesc");
+    File rsyncFile = new File("rsync/relay-descriptors/microdescs/"
+        + "consensus-microdesc/" + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles)) {
+      this.storedMicrodescConsensusesCounter++;
+    }
+    if (this.now - validAfter < 3L * 60L * 60L * 1000L) {
+      this.storedMicrodescConsensuses.put(validAfter,
+          microdescriptorDigests);
+    }
+  }
+
   private static final byte[] VOTE_ANNOTATION =
       "@type network-status-vote-3 1.0\n".getBytes();
   public void storeVote(byte[] data, long validAfter,
@@ -340,9 +413,6 @@ public class ArchiveWriter extends Thread {
     if (this.store(VOTE_ANNOTATION, data, outputFiles)) {
       this.storedVotesCounter++;
     }
-    SimpleDateFormat dateTimeFormat =
-        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     if (this.now - validAfter < 3L * 60L * 60L * 1000L) {
       if (!this.storedVotes.containsKey(validAfter)) {
         this.storedVotes.put(validAfter,
@@ -384,9 +454,6 @@ public class ArchiveWriter extends Thread {
     if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles)) {
       this.storedServerDescriptorsCounter++;
     }
-    SimpleDateFormat dateTimeFormat =
-        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     if (this.now - published < 48L * 60L * 60L * 1000L) {
       if (!this.storedServerDescriptors.containsKey(published)) {
         this.storedServerDescriptors.put(published,
@@ -423,20 +490,57 @@ public class ArchiveWriter extends Thread {
     }
   }
 
+  private static final byte[] MICRODESCRIPTOR_ANNOTATION =
+      "@type microdescriptor 1.0\n".getBytes();
+  public void storeMicrodescriptor(byte[] data,
+      String microdescriptorDigest, long validAfter) {
+    /* TODO We could check here whether we already stored the
+     * microdescriptor in the same valid-after month.  This can happen,
+     * e.g., when two relays share the same microdescriptor.  In that case
+     * this method gets called twice and the second call overwrites the
+     * file written in the first call.  However, this method must be
+     * called twice to store the same microdescriptor in two different
+     * valid-after months. */
+    SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
+    descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    File tarballFile = new File(this.outputDirectory + "/microdesc/"
+        + descriptorFormat.format(validAfter) + "micro/"
+        + microdescriptorDigest.substring(0, 1) + "/"
+        + microdescriptorDigest.substring(1, 2) + "/"
+        + microdescriptorDigest);
+    File rsyncFile = new File("rsync/relay-descriptors/microdescs/micro/"
+        + microdescriptorDigest);
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles)) {
+      this.storedMicrodescriptorsCounter++;
+    }
+    if (this.now - validAfter < 48L * 60L * 60L * 1000L) {
+      if (!this.storedMicrodescriptors.containsKey(validAfter)) {
+        this.storedMicrodescriptors.put(validAfter, new HashSet<String>());
+      }
+      this.storedMicrodescriptors.get(validAfter).add(
+          microdescriptorDigest);
+    }
+  }
+
   private StringBuilder intermediateStats = new StringBuilder();
   public void intermediateStats(String event) {
     intermediateStats.append("While " + event + ", we stored "
         + this.storedConsensusesCounter + " consensus(es), "
-        + this.storedVotesCounter + " vote(s), " + this.storedCertsCounter
-        + " certificate(s), " + this.storedServerDescriptorsCounter
-        + " server descriptor(s), and "
-        + this.storedExtraInfoDescriptorsCounter
-        + " extra-info descriptor(s) to disk.\n");
+        + this.storedMicrodescConsensusesCounter + " microdesc "
+        + "consensus(es), " + this.storedVotesCounter + " vote(s), "
+        + this.storedCertsCounter + " certificate(s), "
+        + this.storedServerDescriptorsCounter + " server descriptor(s), "
+        + this.storedExtraInfoDescriptorsCounter + " extra-info "
+        + "descriptor(s), and " + this.storedMicrodescriptorsCounter
+        + " microdescriptor(s) to disk.\n");
     this.storedConsensusesCounter = 0;
+    this.storedMicrodescConsensusesCounter = 0;
     this.storedVotesCounter = 0;
     this.storedCertsCounter = 0;
     this.storedServerDescriptorsCounter = 0;
     this.storedExtraInfoDescriptorsCounter = 0;
+    this.storedMicrodescriptorsCounter = 0;
   }
 
   private void checkMissingDescriptors() {
@@ -444,8 +548,7 @@ public class ArchiveWriter extends Thread {
         + "descriptors to disk.\n");
     sb.append(intermediateStats.toString());
     sb.append("Statistics on the completeness of written relay "
-        + "descriptors of the last 3 consensuses (Consensus/Vote, "
-        + "valid-after, votes, server descriptors, extra-infos):");
+        + "descriptors:");
     SimpleDateFormat dateTimeFormat =
         new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -460,7 +563,12 @@ public class ArchiveWriter extends Thread {
         this.storedExtraInfoDescriptors.values()) {
       knownExtraInfoDescriptors.addAll(descriptors);
     }
-    boolean missingDescriptors = false, missingVotes = false;
+    Set<String> knownMicrodescriptors = new HashSet<String>();
+    for (Set<String> descriptors : this.storedMicrodescriptors.values()) {
+      knownMicrodescriptors.addAll(descriptors);
+    }
+    boolean missingDescriptors = false, missingVotes = false,
+        missingMicrodescConsensus = false;
     for (Map.Entry<Long, SortedSet<String>> c :
         this.storedConsensuses.entrySet()) {
       long validAfterMillis = c.getKey();
@@ -491,14 +599,23 @@ public class ArchiveWriter extends Thread {
               }
             }
           }
-          sb.append(String.format("%nV, %s, NA, %d/%d (%.1f%%), "
-              + "%d/%d (%.1f%%)", validAfterTime,
-              voteFoundServerDescs, voteAllServerDescs,
-              100.0D * (double) voteFoundServerDescs /
-                (double) voteAllServerDescs,
-              voteFoundExtraInfos, voteAllExtraInfos,
-              100.0D * (double) voteFoundExtraInfos /
-                (double) voteAllExtraInfos));
+          sb.append("\nV, " + validAfterTime);
+          if (voteAllServerDescs > 0) {
+            sb.append(String.format(", %d/%d S (%.1f%%)",
+                voteFoundServerDescs, voteAllServerDescs,
+                100.0D * (double) voteFoundServerDescs /
+                  (double) voteAllServerDescs));
+          } else {
+            sb.append(", 0/0 S");
+          }
+          if (voteAllExtraInfos > 0) {
+            sb.append(String.format(", %d/%d E (%.1f%%)",
+                voteFoundExtraInfos, voteAllExtraInfos,
+                100.0D * (double) voteFoundExtraInfos /
+                  (double) voteAllExtraInfos));
+          } else {
+            sb.append(", 0/0 E");
+          }
           if (voteFoundServerDescs * 1000 < voteAllServerDescs * 995 ||
               voteFoundExtraInfos * 1000 < voteAllExtraInfos * 995) {
             missingDescriptors = true;
@@ -506,7 +623,8 @@ public class ArchiveWriter extends Thread {
         }
       }
       int foundServerDescs = 0, allServerDescs = 0, foundExtraInfos = 0,
-          allExtraInfos = 0;
+          allExtraInfos = 0, foundMicrodescriptors = 0,
+          allMicrodescriptors = 0;
       for (String serverDescriptorDigest : c.getValue()) {
         allServerDescs++;
         if (knownServerDescriptors.containsKey(
@@ -524,16 +642,50 @@ public class ArchiveWriter extends Thread {
           }
         }
       }
-      sb.append(String.format("%nC, %s, %d/%d (%.1f%%), "
-          + "%d/%d (%.1f%%), %d/%d (%.1f%%)",
-          validAfterTime, foundVotes, allVotes,
-          100.0D * (double) foundVotes / (double) allVotes,
-          foundServerDescs, allServerDescs,
-          100.0D * (double) foundServerDescs / (double) allServerDescs,
-          foundExtraInfos, allExtraInfos,
-          100.0D * (double) foundExtraInfos / (double) allExtraInfos));
+      sb.append("\nC, " + validAfterTime);
+      if (allVotes > 0) {
+        sb.append(String.format(", %d/%d V (%.1f%%)", foundVotes, allVotes,
+            100.0D * (double) foundVotes / (double) allVotes));
+      } else {
+        sb.append(", 0/0 V");
+      }
+      if (allServerDescs > 0) {
+        sb.append(String.format(", %d/%d S (%.1f%%)", foundServerDescs,
+            allServerDescs, 100.0D * (double) foundServerDescs /
+            (double) allServerDescs));
+      } else {
+        sb.append(", 0/0 S");
+      }
+      if (allExtraInfos > 0) {
+        sb.append(String.format(", %d/%d E (%.1f%%)", foundExtraInfos,
+            allExtraInfos, 100.0D * (double) foundExtraInfos /
+            (double) allExtraInfos));
+      } else {
+        sb.append(", 0/0 E");
+      }
+      if (this.storedMicrodescConsensuses.containsKey(validAfterMillis)) {
+        for (String microdescriptorDigest :
+            this.storedMicrodescConsensuses.get(validAfterMillis)) {
+          allMicrodescriptors++;
+          if (knownMicrodescriptors.contains(microdescriptorDigest)) {
+            foundMicrodescriptors++;
+          }
+        }
+        sb.append("\nM, " + validAfterTime);
+        if (allMicrodescriptors > 0) {
+          sb.append(String.format(", %d/%d M (%.1f%%)",
+              foundMicrodescriptors, allMicrodescriptors,
+              100.0D * (double) foundMicrodescriptors /
+              (double) allMicrodescriptors));
+        } else {
+          sb.append(", 0/0 M");
+        }
+      } else {
+        missingMicrodescConsensus = true;
+      }
       if (foundServerDescs * 1000 < allServerDescs * 995 ||
-          foundExtraInfos * 1000 < allExtraInfos * 995) {
+          foundExtraInfos * 1000 < allExtraInfos * 995 ||
+          foundMicrodescriptors * 1000 < allMicrodescriptors * 995) {
         missingDescriptors = true;
       }
       if (foundVotes < allVotes) {
@@ -544,12 +696,22 @@ public class ArchiveWriter extends Thread {
     if (missingDescriptors) {
       this.logger.warning("We are missing at least 0.5% of server or "
           + "extra-info descriptors referenced from a consensus or "
-          + "vote.");
+          + "vote or at least 0.5% of microdescriptors referenced from a "
+          + "microdesc consensus.");
     }
     if (missingVotes) {
+      /* TODO Shouldn't warn if we're not trying to archive votes at
+       * all. */
       this.logger.warning("We are missing at least one vote that was "
           + "referenced from a consensus.");
     }
+    if (missingMicrodescConsensus) {
+      /* TODO Shouldn't warn if we're not trying to archive microdesc
+       * consensuses at all. */
+      this.logger.warning("We are missing at least one microdesc "
+          + "consensus that was published together with a known "
+          + "consensus.");
+    }
   }
 
   private void checkStaledescriptors() {
@@ -564,6 +726,14 @@ public class ArchiveWriter extends Thread {
           + dateTimeFormat.format(this.storedConsensuses.lastKey())
           + ", which is more than 5:30 hours in the past.");
     }
+    if (!this.storedMicrodescConsensuses.isEmpty() &&
+        this.storedMicrodescConsensuses.lastKey() < tooOldMillis) {
+      this.logger.warning("The last known relay network status "
+          + "microdesc consensus was valid after "
+          + dateTimeFormat.format(
+          this.storedMicrodescConsensuses.lastKey())
+          + ", which is more than 5:30 hours in the past.");
+    }
     if (!this.storedVotes.isEmpty() &&
         this.storedVotes.lastKey() < tooOldMillis) {
       this.logger.warning("The last known relay network status vote "
@@ -585,6 +755,13 @@ public class ArchiveWriter extends Thread {
           this.storedExtraInfoDescriptors.lastKey())
           + ", which is more than 5:30 hours in the past.");
     }
+    if (!this.storedMicrodescriptors.isEmpty() &&
+        this.storedMicrodescriptors.lastKey() < tooOldMillis) {
+      this.logger.warning("The last known relay microdescriptor was "
+          + "contained in a microdesc consensus that was valid after "
+          + dateTimeFormat.format(this.storedMicrodescriptors.lastKey())
+          + ", which is more than 5:30 hours in the past.");
+    }
   }
 
   /* Delete all files from the rsync directory that have not been modified
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
index ce2f16a..1c8a375 100644
--- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorDownloader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
  * See LICENSE for licensing information */
 package org.torproject.ernie.db.relaydescs;
 
@@ -13,9 +13,9 @@ import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
 import java.net.URL;
+import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -31,6 +31,9 @@ import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.zip.InflaterInputStream;
 
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+
 /**
  * Downloads relay descriptors from the directory authorities via HTTP.
  * Keeps a list of missing descriptors that gets updated by parse results
@@ -46,9 +49,11 @@ public class RelayDescriptorDownloader {
    * want to download. Lines are formatted as:
    *
    * - "consensus,<validafter>,<parsed>",
+   * - "consensus-microdesc,<validafter>,<parsed>",
    * - "vote,<validafter>,<fingerprint>,<parsed>",
-   * - "server,<published>,<relayid>,<descid>,<parsed>", or
-   * - "extra,<published>,<relayid>,<descid>,<parsed>".
+   * - "server,<published>,<relayid>,<descid>,<parsed>",
+   * - "extra,<published>,<relayid>,<descid>,<parsed>", or
+   * - "micro,<validafter>,<relayid>,<descid>,<parsed>".
    */
   private File missingDescriptorsFile;
 
@@ -61,6 +66,27 @@ public class RelayDescriptorDownloader {
   private SortedMap<String, String> missingDescriptors;
 
   /**
+   * Map from base64 microdescriptor digests to keys in missingDescriptors
+   * ("micro,<validafter>,<relayid>,<descid>"). We need this map, because
+   * we can't learn <validafter> or <relayid> from parsing
+   * microdescriptors, but we need to know <validafter> to store
+   * microdescriptors to disk and both <validafter> and <relayid> to
+   * remove microdescriptors from the missing list. There are potentially
+   * many matching keys in missingDescriptors for the same microdescriptor
+   * digest. Also, in rare cases relays share the same microdescriptor
+   * (which is only possible if they share the same onion key), and then
+   * we don't have to download their microdescriptor more than once.
+   */
+  private Map<String, Set<String>> microdescriptorKeys;
+
+  /**
+   * Set of microdescriptor digests that are currently missing. Used for
+   * logging statistics instead of "micro,<validafter>,..." keys which may
+   * contain the same microdescriptor digest multiple times.
+   */
+  private Set<String> missingMicrodescriptors;
+
+  /**
    * Text file containing the IP addresses (and Dir ports if not 80) of
    * directory authorities and when we last downloaded all server and
    * extra-info descriptors from them, so that we can avoid downloading
@@ -99,6 +125,12 @@ public class RelayDescriptorDownloader {
   private boolean downloadCurrentConsensus;
 
   /**
+   * Should we try to download the current microdesc consensus if we don't
+   * have it?
+   */
+  private boolean downloadCurrentMicrodescConsensus;
+
+  /**
    * Should we try to download current votes if we don't have them?
    */
   private boolean downloadCurrentVotes;
@@ -116,6 +148,12 @@ public class RelayDescriptorDownloader {
   private boolean downloadMissingExtraInfos;
 
   /**
+   * Should we try to download missing microdescriptors that have been
+   * published within the past 24 hours?
+   */
+  private boolean downloadMissingMicrodescriptors;
+
+  /**
    * Should we try to download all server descriptors from the authorities
    * once every 24 hours?
    */
@@ -134,11 +172,11 @@ public class RelayDescriptorDownloader {
   private boolean downloadCompressed;
 
   /**
-   * valid-after time that we expect the current consensus and votes to
-   * have, formatted "yyyy-MM-dd HH:mm:ss". We only expect to find
-   * consensuses and votes with this valid-after time on the directory
-   * authorities. This time is initialized as the beginning of the current
-   * hour.
+   * valid-after time that we expect the current consensus,
+   * microdescriptor consensus, and votes to have, formatted
+   * "yyyy-MM-dd HH:mm:ss". We only expect to find documents with this
+   * valid-after time on the directory authorities. This time is
+   * initialized as the beginning of the current hour.
    */
   private String currentValidAfter;
 
@@ -186,19 +224,25 @@ public class RelayDescriptorDownloader {
    * that we requested, and that we successfully downloaded in this
    * execution.
    */
-  private int oldMissingConsensuses = 0, oldMissingVotes = 0,
+  private int oldMissingConsensuses = 0,
+      oldMissingMicrodescConsensuses = 0, oldMissingVotes = 0,
       oldMissingServerDescriptors = 0, oldMissingExtraInfoDescriptors = 0,
-      newMissingConsensuses = 0, newMissingVotes = 0,
+      oldMissingMicrodescriptors = 0, newMissingConsensuses = 0,
+      newMissingMicrodescConsensuses = 0, newMissingVotes = 0,
       newMissingServerDescriptors = 0, newMissingExtraInfoDescriptors = 0,
-      requestedConsensuses = 0, requestedVotes = 0,
+      newMissingMicrodescriptors = 0, requestedConsensuses = 0,
+      requestedMicrodescConsensuses = 0, requestedVotes = 0,
       requestedMissingServerDescriptors = 0,
       requestedAllServerDescriptors = 0,
       requestedMissingExtraInfoDescriptors = 0,
-      requestedAllExtraInfoDescriptors = 0, downloadedConsensuses = 0,
-      downloadedVotes = 0, downloadedMissingServerDescriptors = 0,
+      requestedAllExtraInfoDescriptors = 0,
+      requestedMissingMicrodescriptors = 0, downloadedConsensuses = 0,
+      downloadedMicrodescConsensuses = 0, downloadedVotes = 0,
+      downloadedMissingServerDescriptors = 0,
       downloadedAllServerDescriptors = 0,
       downloadedMissingExtraInfoDescriptors = 0,
-      downloadedAllExtraInfoDescriptors = 0;
+      downloadedAllExtraInfoDescriptors = 0,
+      downloadedMissingMicrodescriptors = 0;
 
   /**
    * Initializes this class, including reading in missing descriptors from
@@ -209,9 +253,11 @@ public class RelayDescriptorDownloader {
   public RelayDescriptorDownloader(RelayDescriptorParser rdp,
       List<String> authorities, List<String> authorityFingerprints,
       boolean downloadCurrentConsensus,
+      boolean downloadCurrentMicrodescConsensus,
       boolean downloadCurrentVotes,
       boolean downloadMissingServerDescriptors,
       boolean downloadMissingExtraInfos,
+      boolean downloadMissingMicrodescriptors,
       boolean downloadAllServerDescriptors, boolean downloadAllExtraInfos,
       boolean downloadCompressed) {
 
@@ -221,10 +267,14 @@ public class RelayDescriptorDownloader {
     this.authorityFingerprints = new ArrayList<String>(
         authorityFingerprints);
     this.downloadCurrentConsensus = downloadCurrentConsensus;
+    this.downloadCurrentMicrodescConsensus =
+        downloadCurrentMicrodescConsensus;
     this.downloadCurrentVotes = downloadCurrentVotes;
     this.downloadMissingServerDescriptors =
         downloadMissingServerDescriptors;
     this.downloadMissingExtraInfos = downloadMissingExtraInfos;
+    this.downloadMissingMicrodescriptors =
+        downloadMissingMicrodescriptors;
     this.downloadAllServerDescriptors = downloadAllServerDescriptors;
     this.downloadAllExtraInfos = downloadAllExtraInfos;
     this.downloadCompressed = downloadCompressed;
@@ -253,6 +303,8 @@ public class RelayDescriptorDownloader {
      * we are interested in and that are likely to be found on the
      * directory authorities. */
     this.missingDescriptors = new TreeMap<String, String>();
+    this.microdescriptorKeys = new HashMap<String, Set<String>>();
+    this.missingMicrodescriptors = new HashSet<String>();
     this.missingDescriptorsFile = new File(
         "stats/missing-relay-descriptors");
     if (this.missingDescriptorsFile.exists()) {
@@ -266,15 +318,19 @@ public class RelayDescriptorDownloader {
           if (line.split(",").length > 2) {
             String published = line.split(",")[1];
             if (((line.startsWith("consensus,") ||
+                line.startsWith("consensus-microdesc,") ||
                 line.startsWith("vote,")) &&
                 this.currentValidAfter.equals(published)) ||
                 ((line.startsWith("server,") ||
-                line.startsWith("extra,")) &&
+                line.startsWith("extra,") ||
+                line.startsWith("micro,")) &&
                 this.descriptorCutOff.compareTo(published) < 0)) {
               if (!line.endsWith("NA")) {
                 /* Not missing. */
               } else if (line.startsWith("consensus,")) {
                 oldMissingConsensuses++;
+              } else if (line.startsWith("consensus-microdesc,")) {
+                oldMissingMicrodescConsensuses++;
               } else if (line.startsWith("vote,")) {
                 oldMissingVotes++;
               } else if (line.startsWith("server,")) {
@@ -285,6 +341,23 @@ public class RelayDescriptorDownloader {
               int separateAt = line.lastIndexOf(",");
               this.missingDescriptors.put(line.substring(0,
                   separateAt), line.substring(separateAt + 1));
+              if (line.startsWith("micro,")) {
+                String microdescriptorDigest = line.split(",")[3];
+                String microdescriptorKey = line.substring(0,
+                    line.lastIndexOf(","));
+                if (!this.microdescriptorKeys.containsKey(
+                    microdescriptorDigest)) {
+                  this.microdescriptorKeys.put(
+                      microdescriptorDigest, new HashSet<String>());
+                }
+                this.microdescriptorKeys.get(microdescriptorDigest).add(
+                    microdescriptorKey);
+                if (line.endsWith("NA") && !this.missingMicrodescriptors.
+                    contains(microdescriptorDigest)) {
+                  this.missingMicrodescriptors.add(microdescriptorDigest);
+                  oldMissingMicrodescriptors++;
+                }
+              }
             }
           } else {
             this.logger.fine("Invalid line '" + line + "' in "
@@ -401,6 +474,65 @@ public class RelayDescriptorDownloader {
   }
 
   /**
+   * We have parsed a microdesc consensus. Take this microdesc consensus
+   * off the missing list and add the <code>microdescriptors</code> which
+   * are in the format "<validafter>,<relayid>,<descid>" to that
+   * list.
+   */
+  public void haveParsedMicrodescConsensus(String validAfter,
+      Set<String> microdescriptors) {
+
+    /* Mark microdesc consensus as parsed. */
+    if (this.currentValidAfter.equals(validAfter)) {
+      String microdescConsensusKey = "consensus-microdesc," + validAfter;
+      this.missingDescriptors.put(microdescConsensusKey,
+          this.currentTimestamp);
+    }
+
+    /* Add microdescriptors to missing list. Exclude those that we already
+     * downloaded this month. (We download each microdescriptor at least
+     * once per month to keep the storage logic sane; otherwise we'd have
+     * to copy microdescriptors from the earlier month to the current
+     * month, and that gets messy.) */
+    if (this.descriptorCutOff.compareTo(validAfter) < 0) {
+      String validAfterYearMonth = validAfter.substring(0,
+          "YYYY-MM".length());
+      for (String microdescriptor : microdescriptors) {
+        String microdescriptorKey = "micro," + microdescriptor;
+        String parsed = "NA";
+        String microdescriptorDigest = microdescriptor.split(",")[2];
+        if (this.microdescriptorKeys.containsKey(microdescriptorDigest)) {
+          for (String otherMicrodescriptorKey :
+              this.microdescriptorKeys.get(microdescriptorDigest)) {
+            String otherValidAfter =
+                otherMicrodescriptorKey.split(",")[1];
+            if (!otherValidAfter.startsWith(validAfterYearMonth)) {
+              continue;
+            }
+            String otherParsed = this.missingDescriptors.get(
+                otherMicrodescriptorKey);
+            if (otherParsed != null && !otherParsed.equals("NA")) {
+              parsed = otherParsed;
+              break;
+            }
+          }
+        } else {
+          this.microdescriptorKeys.put(
+              microdescriptorDigest, new HashSet<String>());
+        }
+        this.microdescriptorKeys.get(microdescriptorDigest).add(
+            microdescriptorKey);
+        this.missingDescriptors.put(microdescriptorKey, parsed);
+        if (parsed.equals("NA") &&
+            !this.missingMicrodescriptors.contains(microdescriptorDigest)) {
+          this.missingMicrodescriptors.add(microdescriptorDigest);
+          this.newMissingMicrodescriptors++;
+        }
+      }
+    }
+  }
+
+  /**
    * We have parsed a vote. Take this vote off the missing list and add
    * the <code>serverDescriptors</code> which are in the format
    * "<published>,<relayid>,<descid>" to that list.
@@ -470,6 +602,23 @@ public class RelayDescriptorDownloader {
   }
 
   /**
+   * We have parsed a microdescriptor. Take it off the missing list.
+   */
+  public void haveParsedMicrodescriptor(String descriptorDigest) {
+    if (this.microdescriptorKeys.containsKey(descriptorDigest)) {
+      for (String microdescriptorKey :
+          this.microdescriptorKeys.get(descriptorDigest)) {
+        String validAfter = microdescriptorKey.split(",")[0];
+        if (this.descriptorCutOff.compareTo(validAfter) < 0) {
+          this.missingDescriptors.put(microdescriptorKey,
+              this.currentTimestamp);
+        }
+      }
+      this.missingMicrodescriptors.remove(descriptorDigest);
+    }
+  }
+
+  /**
    * Downloads missing descriptors that we think might still be available
    * on the directory authorities as well as all server and extra-info
    * descriptors once per day.
@@ -483,6 +632,12 @@ public class RelayDescriptorDownloader {
       this.missingDescriptors.put(consensusKey, "NA");
       this.newMissingConsensuses++;
     }
+    String microdescConsensusKey = "consensus-microdesc,"
+        + this.currentValidAfter;
+    if (!this.missingDescriptors.containsKey(microdescConsensusKey)) {
+      this.missingDescriptors.put(microdescConsensusKey, "NA");
+      this.newMissingMicrodescConsensuses++;
+    }
     for (String authority : authorityFingerprints) {
       String voteKey = "vote," + this.currentValidAfter + "," + authority;
       if (!this.missingDescriptors.containsKey(voteKey)) {
@@ -516,6 +671,19 @@ public class RelayDescriptorDownloader {
           }
         }
 
+        /* Then try to download the microdesc consensus. */
+        if (downloadCurrentMicrodescConsensus) {
+          if (this.missingDescriptors.containsKey(
+              microdescConsensusKey) &&
+              this.missingDescriptors.get(microdescConsensusKey).
+              equals("NA")) {
+            this.requestedMicrodescConsensuses++;
+            this.downloadedMicrodescConsensuses +=
+                this.downloadResourceFromAuthority(authority,
+                "/tor/status-vote/current/consensus-microdesc");
+          }
+        }
+
         /* Next, try to download current votes that we're missing. */
         if (downloadCurrentVotes) {
           String voteKeyPrefix = "vote," + this.currentValidAfter;
@@ -538,10 +706,9 @@ public class RelayDescriptorDownloader {
 
         /* Download either all server and extra-info descriptors or only
          * those that we're missing. Start with server descriptors, then
-         * request extra-info descriptors. */
-        List<String> types = new ArrayList<String>(Arrays.asList(
-            "server,extra".split(",")));
-        for (String type : types) {
+         * request extra-info descriptors. Finally, request missing
+         * microdescriptors. */
+        for (String type : new String[] { "server", "extra", "micro" }) {
 
           /* Download all server or extra-info descriptors from this
            * authority if we haven't done so for 24 hours and if we're
@@ -557,21 +724,24 @@ public class RelayDescriptorDownloader {
               this.requestedAllServerDescriptors++;
               this.downloadedAllServerDescriptors +=
                   downloadedAllDescriptors;
-            } else {
+            } else if (type.equals("extra")) {
               this.requestedAllExtraInfoDescriptors++;
               this.downloadedAllExtraInfoDescriptors +=
                   downloadedAllDescriptors;
             }
 
-          /* Download missing server or extra-info descriptors if we're
-           * configured to do so. */
+          /* Download missing server descriptors, extra-info descriptors,
+           * and microdescriptors if we're configured to do so. */
           } else if ((type.equals("server") &&
               this.downloadMissingServerDescriptors) ||
-              (type.equals("extra") && this.downloadMissingExtraInfos)) {
+              (type.equals("extra") && this.downloadMissingExtraInfos) ||
+              (type.equals("micro") &&
+              this.downloadMissingMicrodescriptors)) {
 
             /* Go through the list of missing descriptors of this type
              * and combine the descriptor identifiers to a URL of up to
-             * 96 descriptors that we can download at once. */
+             * 96 server or extra-info descriptors or 92 microdescriptors
+             * that we can download at once. */
             SortedSet<String> descriptorIdentifiers =
                 new TreeSet<String>();
             for (Map.Entry<String, String> e :
@@ -587,8 +757,12 @@ public class RelayDescriptorDownloader {
             StringBuilder combinedResource = null;
             int descriptorsInCombinedResource = 0,
                 requestedDescriptors = 0, downloadedDescriptors = 0;
+            int maxDescriptorsInCombinedResource =
+                type.equals("micro") ? 92 : 96;
+            String separator = type.equals("micro") ? "-" : "+";
             for (String descriptorIdentifier : descriptorIdentifiers) {
-              if (descriptorsInCombinedResource >= 96) {
+              if (descriptorsInCombinedResource >=
+                  maxDescriptorsInCombinedResource) {
                 requestedDescriptors += descriptorsInCombinedResource;
                 downloadedDescriptors +=
                     this.downloadResourceFromAuthority(authority,
@@ -600,7 +774,7 @@ public class RelayDescriptorDownloader {
                 combinedResource = new StringBuilder("/tor/" + type
                     + "/d/" + descriptorIdentifier);
               } else {
-                combinedResource.append("+" + descriptorIdentifier);
+                combinedResource.append(separator + descriptorIdentifier);
               }
               descriptorsInCombinedResource++;
             }
@@ -615,11 +789,16 @@ public class RelayDescriptorDownloader {
                   requestedDescriptors;
               this.downloadedMissingServerDescriptors +=
                   downloadedDescriptors;
-            } else {
+            } else if (type.equals("extra")) {
               this.requestedMissingExtraInfoDescriptors +=
                   requestedDescriptors;
               this.downloadedMissingExtraInfoDescriptors +=
                   downloadedDescriptors;
+            } else if (type.equals("micro")) {
+              this.requestedMissingMicrodescriptors +=
+                  requestedDescriptors;
+              this.downloadedMissingMicrodescriptors +=
+                  downloadedDescriptors;
             }
           }
         }
@@ -680,7 +859,8 @@ public class RelayDescriptorDownloader {
         receivedDescriptors = 1;
       } else if (resource.startsWith("/tor/server/") ||
           resource.startsWith("/tor/extra/")) {
-        if (resource.equals("/tor/server/all")) {
+        if (resource.equals("/tor/server/all") ||
+            resource.equals("/tor/extra/all")) {
           this.lastDownloadedAllDescriptors.put(authority,
               this.currentTimestamp);
         }
@@ -715,6 +895,60 @@ public class RelayDescriptorDownloader {
           this.rdp.parse(descBytes);
           receivedDescriptors++;
         }
+      } else if (resource.startsWith("/tor/micro/")) {
+        /* TODO We need to parse microdescriptors ourselves, rather than
+         * RelayDescriptorParser, because only we know the valid-after
+         * time(s) of microdesc consensus(es) containing this
+         * microdescriptor.  However, this breaks functional abstraction
+         * pretty badly. */
+        SimpleDateFormat parseFormat =
+            new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+        String ascii = null;
+        try {
+          ascii = new String(allData, "US-ASCII");
+        } catch (UnsupportedEncodingException e) {
+          /* No way that US-ASCII is not supported. */
+        }
+        int start = -1, end = -1;
+        String startToken = "onion-key\n";
+        while (end < ascii.length()) {
+          start = ascii.indexOf(startToken, end);
+          if (start < 0) {
+            break;
+          }
+          end = ascii.indexOf(startToken, start + 1);
+          if (end < 0) {
+            end = ascii.length();
+            if (end <= start) {
+              break;
+            }
+          }
+          byte[] descBytes = new byte[end - start];
+          System.arraycopy(allData, start, descBytes, 0, end - start);
+          String digest256Base64 = Base64.encodeBase64String(
+              DigestUtils.sha256(descBytes)).replaceAll("=", "");
+          if (!this.microdescriptorKeys.containsKey(digest256Base64)) {
+            continue;
+          }
+          String digest256Hex = DigestUtils.sha256Hex(descBytes);
+          for (String microdescriptorKey :
+              this.microdescriptorKeys.get(digest256Base64)) {
+            String validAfterTime = microdescriptorKey.split(",")[1];
+            try {
+              long validAfter =
+                  parseFormat.parse(validAfterTime).getTime();
+              this.rdp.storeMicrodescriptor(descBytes, digest256Hex,
+                  digest256Base64, validAfter);
+            } catch (ParseException e) {
+              this.logger.log(Level.WARNING, "Could not parse "
+                  + "valid-after time '" + validAfterTime + "' in "
+                  + "microdescriptor key. Not storing microdescriptor.",
+                  e);
+            }
+          }
+          receivedDescriptors++;
+        }
       }
     }
     return receivedDescriptors;
@@ -727,8 +961,9 @@ public class RelayDescriptorDownloader {
   public void writeFile() {
 
     /* Write missing descriptors file to disk. */
-    int missingConsensuses = 0, missingVotes = 0,
-        missingServerDescriptors = 0, missingExtraInfoDescriptors = 0;
+    int missingConsensuses = 0, missingMicrodescConsensuses = 0,
+        missingVotes = 0, missingServerDescriptors = 0,
+        missingExtraInfoDescriptors = 0;
     try {
       this.logger.fine("Writing file "
           + this.missingDescriptorsFile.getAbsolutePath() + "...");
@@ -742,12 +977,15 @@ public class RelayDescriptorDownloader {
           /* Not missing. */
         } else if (key.startsWith("consensus,")) {
           missingConsensuses++;
+        } else if (key.startsWith("consensus-microdesc,")) {
+          missingMicrodescConsensuses++;
         } else if (key.startsWith("vote,")) {
           missingVotes++;
         } else if (key.startsWith("server,")) {
           missingServerDescriptors++;
         } else if (key.startsWith("extra,")) {
           missingExtraInfoDescriptors++;
+        } else if (key.startsWith("micro,")) {
         }
         bw.write(key + "," + value + "\n");
       }
@@ -758,6 +996,7 @@ public class RelayDescriptorDownloader {
       this.logger.log(Level.WARNING, "Failed writing "
           + this.missingDescriptorsFile.getAbsolutePath() + "!", e);
     }
+    int missingMicrodescriptors = this.missingMicrodescriptors.size();
 
     /* Write text file containing the directory authorities and when we
      * last downloaded all server and extra-info descriptors from them to
@@ -790,25 +1029,33 @@ public class RelayDescriptorDownloader {
         + "directory authorities.");
     this.logger.info("At the beginning of this execution, we were "
         + "missing " + oldMissingConsensuses + " consensus(es), "
+        + oldMissingMicrodescConsensuses + " microdesc consensus(es), "
         + oldMissingVotes + " vote(s), " + oldMissingServerDescriptors
-        + " server descriptor(s), and " + oldMissingExtraInfoDescriptors
-        + " extra-info descriptor(s).");
+        + " server descriptor(s), " + oldMissingExtraInfoDescriptors
+        + " extra-info descriptor(s), and " + oldMissingMicrodescriptors
+        + " microdescriptor(s).");
     this.logger.info("During this execution, we added "
         + this.newMissingConsensuses + " consensus(es), "
-        + this.newMissingVotes + " vote(s), "
-        + this.newMissingServerDescriptors + " server descriptor(s), and "
-        + this.newMissingExtraInfoDescriptors + " extra-info "
-        + "descriptor(s) to the missing list, some of which we also "
+        + this.newMissingMicrodescConsensuses
+        + " microdesc consensus(es), " + this.newMissingVotes
+        + " vote(s), " + this.newMissingServerDescriptors
+        + " server descriptor(s), " + this.newMissingExtraInfoDescriptors
+        + " extra-info descriptor(s), and "
+        + this.newMissingMicrodescriptors + " microdescriptor(s) to the "
+        + "missing list, some of which we also "
         + "requested and removed from the list again.");
     this.logger.info("We requested " + this.requestedConsensuses
-        + " consensus(es), " + this.requestedVotes + " vote(s), "
-        + this.requestedMissingServerDescriptors + " missing server "
-        + "descriptor(s), " + this.requestedAllServerDescriptors
+        + " consensus(es), " + this.requestedMicrodescConsensuses
+        + " microdesc consensus(es), " + this.requestedVotes
+        + " vote(s), " + this.requestedMissingServerDescriptors
+        + " missing server descriptor(s), "
+        + this.requestedAllServerDescriptors
         + " times all server descriptors, "
         + this.requestedMissingExtraInfoDescriptors + " missing "
-        + "extra-info descriptor(s), and "
+        + "extra-info descriptor(s), "
         + this.requestedAllExtraInfoDescriptors + " times all extra-info "
-        + "descriptors from the directory authorities.");
+        + "descriptors, and " + this.requestedMissingMicrodescriptors
+        + " missing microdescriptor(s) from the directory authorities.");
     StringBuilder sb = new StringBuilder();
     for (String authority : this.authorities) {
       sb.append(" " + authority + "="
@@ -818,20 +1065,26 @@ public class RelayDescriptorDownloader {
         + "authorities:" + sb.toString());
     this.logger.info("We successfully downloaded "
         + this.downloadedConsensuses + " consensus(es), "
-        + this.downloadedVotes + " vote(s), "
-        + this.downloadedMissingServerDescriptors + " missing server "
-        + "descriptor(s), " + this.downloadedAllServerDescriptors
+        + this.downloadedMicrodescConsensuses
+        + " microdesc consensus(es), " + this.downloadedVotes
+        + " vote(s), " + this.downloadedMissingServerDescriptors
+        + " missing server descriptor(s), "
+        + this.downloadedAllServerDescriptors
         + " server descriptor(s) when downloading all descriptors, "
         + this.downloadedMissingExtraInfoDescriptors + " missing "
-        + "extra-info descriptor(s) and "
+        + "extra-info descriptor(s), "
         + this.downloadedAllExtraInfoDescriptors + " extra-info "
-        + "descriptor(s) when downloading all descriptors.");
+        + "descriptor(s) when downloading all descriptors, and "
+        + this.downloadedMissingMicrodescriptors
+        + " missing microdescriptor(s).");
     this.logger.info("At the end of this execution, we are missing "
-      + missingConsensuses + " consensus(es), " + missingVotes
-      + " vote(s), " + missingServerDescriptors + " server "
-      + "descriptor(s), and " + missingExtraInfoDescriptors
-      + " extra-info descriptor(s), some of which we may try in the next "
-      + "execution.");
+        + missingConsensuses + " consensus(es), "
+        + missingMicrodescConsensuses + " microdesc consensus(es), "
+        + missingVotes + " vote(s), " + missingServerDescriptors
+        + " server descriptor(s), " + missingExtraInfoDescriptors
+        + " extra-info descriptor(s), and " + missingMicrodescriptors
+        + " microdescriptor(s), some of which we may try in the next "
+        + "execution.");
   }
 }
 
diff --git a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
index 107ba73..2873909 100644
--- a/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
+++ b/src/org/torproject/ernie/db/relaydescs/RelayDescriptorParser.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2012 The Tor Project
+/* Copyright 2010--2014 The Tor Project
  * See LICENSE for licensing information */
 package org.torproject.ernie.db.relaydescs;
 
@@ -31,6 +31,8 @@ public class RelayDescriptorParser {
    */
   private ArchiveWriter aw;
 
+  private ArchiveReader ar;
+
   /**
    * Missing descriptor downloader that uses the parse results to learn
    * which descriptors we are missing and want to download.
@@ -62,7 +64,12 @@ public class RelayDescriptorParser {
     this.rdd = rdd;
   }
 
-  public void parse(byte[] data) {
+  public void setArchiveReader(ArchiveReader ar) {
+    this.ar = ar;
+  }
+
+  public boolean parse(byte[] data) {
+    boolean stored = false;
     try {
       /* Convert descriptor to ASCII for parsing. This means we'll lose
        * the non-ASCII chars, but we don't care about them for parsing
@@ -76,21 +83,27 @@ public class RelayDescriptorParser {
       if (line == null) {
         this.logger.fine("We were given an empty descriptor for "
             + "parsing. Ignoring.");
-        return;
+        return false;
       }
       SimpleDateFormat parseFormat =
           new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
       parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-      if (line.equals("network-status-version 3")) {
-        boolean isConsensus = true;
+      if (line.startsWith("network-status-version 3")) {
+        String statusType = "consensus";
+        if (line.equals("network-status-version 3 microdesc")) {
+          statusType = "consensus-microdesc";
+        }
         String validAfterTime = null, fingerprint = null,
             dirSource = null;
         long validAfter = -1L, dirKeyPublished = -1L;
         SortedSet<String> dirSources = new TreeSet<String>();
         SortedSet<String> serverDescriptors = new TreeSet<String>();
         SortedSet<String> serverDescriptorDigests = new TreeSet<String>();
+        SortedSet<String> microdescriptorKeys = new TreeSet<String>();
+        SortedSet<String> microdescriptorDigests = new TreeSet<String>();
         StringBuilder certificateStringBuilder = null;
         String certificateString = null;
+        String lastRelayIdentity = null;
         while ((line = br.readLine()) != null) {
           if (certificateStringBuilder != null) {
             if (line.startsWith("r ")) {
@@ -101,7 +114,7 @@ public class RelayDescriptorParser {
             }
           }
           if (line.equals("vote-status vote")) {
-            isConsensus = false;
+            statusType = "vote";
           } else if (line.startsWith("valid-after ")) {
             validAfterTime = line.substring("valid-after ".length());
             validAfter = parseFormat.parse(validAfterTime).getTime();
@@ -121,23 +134,43 @@ public class RelayDescriptorParser {
                 getTime();
           } else if (line.startsWith("r ")) {
             String[] parts = line.split(" ");
-            if (parts.length < 9) {
+            if (parts.length == 8) {
+              lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
+                  parts[2] + "=")).toLowerCase();
+            } else if (parts.length == 9) {
+              lastRelayIdentity = Hex.encodeHexString(Base64.decodeBase64(
+                  parts[2] + "=")).toLowerCase();
+              String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
+                  parts[3] + "=")).toLowerCase();
+              String publishedTime = parts[4] + " " + parts[5];
+              serverDescriptors.add(publishedTime + ","
+                  + lastRelayIdentity + "," + serverDesc);
+              serverDescriptorDigests.add(serverDesc);
+            } else {
               this.logger.log(Level.WARNING, "Could not parse r line '"
                   + line + "' in descriptor. Skipping.");
               break;
             }
-            String publishedTime = parts[4] + " " + parts[5];
-            String relayIdentity = Hex.encodeHexString(
-                Base64.decodeBase64(parts[2] + "=")).
-                toLowerCase();
-            String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
-                parts[3] + "=")).toLowerCase();
-            serverDescriptors.add(publishedTime + "," + relayIdentity
-                + "," + serverDesc);
-            serverDescriptorDigests.add(serverDesc);
+          } else if (line.startsWith("m ")) {
+            String[] parts = line.split(" ");
+            if (parts.length == 2 && parts[1].length() == 43) {
+              String digest256Base64 = parts[1];
+              microdescriptorKeys.add(validAfterTime + ","
+                  + lastRelayIdentity + "," + digest256Base64);
+              String digest256Hex = Hex.encodeHexString(
+                  Base64.decodeBase64(digest256Base64 + "=")).
+                  toLowerCase();
+              microdescriptorDigests.add(digest256Hex);
+            } else if (parts.length != 3 ||
+                !parts[2].startsWith("sha256=") ||
+                parts[2].length() != 50) {
+              this.logger.log(Level.WARNING, "Could not parse m line '"
+                  + line + "' in descriptor. Skipping.");
+              break;
+            }
           }
         }
-        if (isConsensus) {
+        if (statusType.equals("consensus")) {
           if (this.rdd != null) {
             this.rdd.haveParsedConsensus(validAfterTime, dirSources,
                 serverDescriptors);
@@ -145,6 +178,21 @@ public class RelayDescriptorParser {
           if (this.aw != null) {
             this.aw.storeConsensus(data, validAfter, dirSources,
                 serverDescriptorDigests);
+            stored = true;
+          }
+        } else if (statusType.equals("consensus-microdesc")) {
+          if (this.rdd != null) {
+            this.rdd.haveParsedMicrodescConsensus(validAfterTime,
+                microdescriptorKeys);
+          }
+          if (this.ar != null) {
+            this.ar.haveParsedMicrodescConsensus(validAfterTime,
+                microdescriptorDigests);
+          }
+          if (this.aw != null) {
+            this.aw.storeMicrodescConsensus(data, validAfter,
+                microdescriptorDigests);
+            stored = true;
           }
         } else {
           if (this.aw != null || this.rdd != null) {
@@ -161,6 +209,7 @@ public class RelayDescriptorParser {
               if (this.aw != null) {
                 this.aw.storeVote(data, validAfter, dirSource, digest,
                     serverDescriptorDigests);
+                stored = true;
               }
               if (this.rdd != null) {
                 this.rdd.haveParsedVote(validAfterTime, fingerprint,
@@ -171,6 +220,7 @@ public class RelayDescriptorParser {
               if (this.aw != null) {
                 this.aw.storeCertificate(certificateString.getBytes(),
                     dirSource, dirKeyPublished);
+                stored = true;
               }
             }
           }
@@ -209,6 +259,7 @@ public class RelayDescriptorParser {
         if (this.aw != null && digest != null) {
           this.aw.storeServerDescriptor(data, digest, published,
               extraInfoDigest);
+          stored = true;
         }
         if (this.rdd != null && digest != null) {
           this.rdd.haveParsedServerDescriptor(publishedTime,
@@ -238,19 +289,26 @@ public class RelayDescriptorParser {
           }
         }
         int sig = ascii.indexOf(sigToken) + sigToken.length();
-        if (start >= 0 || sig >= 0 || sig > start) {
+        if (start >= 0 && sig >= 0 && sig > start) {
           byte[] forDigest = new byte[sig - start];
           System.arraycopy(data, start, forDigest, 0, sig - start);
           digest = DigestUtils.shaHex(forDigest);
         }
         if (this.aw != null && digest != null) {
           this.aw.storeExtraInfoDescriptor(data, digest, published);
+          stored = true;
         }
         if (this.rdd != null && digest != null) {
           this.rdd.haveParsedExtraInfoDescriptor(publishedTime,
               relayIdentifier.toLowerCase(), digest);
         }
+      } else if (line.equals("onion-key")) {
+        /* Cannot store microdescriptors without knowing valid-after
+         * time(s) of microdesc consensuses containing them, because we
+         * don't know which month directories to put them in.  Have to use
+         * storeMicrodescriptor below. */
       }
+      br.close();
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Could not parse descriptor. "
           + "Skipping.", e);
@@ -258,6 +316,17 @@ public class RelayDescriptorParser {
       this.logger.log(Level.WARNING, "Could not parse descriptor. "
           + "Skipping.", e);
     }
+    return stored;
+  }
+
+  public void storeMicrodescriptor(byte[] data, String digest256Hex,
+      String digest256Base64, long validAfter) {
+    if (this.aw != null) {
+      this.aw.storeMicrodescriptor(data, digest256Hex, validAfter);
+    }
+    if (this.rdd != null) {
+      this.rdd.haveParsedMicrodescriptor(digest256Base64);
+    }
   }
 }
 



More information about the tor-commits mailing list