[or-cvs] [ernie/master] Maintain a list of descriptors we want to download.

karsten at torproject.org karsten at torproject.org
Wed Feb 24 16:49:38 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed, 24 Feb 2010 17:47:37 +0100
Subject: Maintain a list of descriptors we want to download.
Commit: 7503c5db66ab6d372c4e19ce55740e40f6f701a4

---
 config                               |    3 +
 src/ArchiveWriter.java               |  487 +++++++++++++++++++++++++++++-----
 src/CachedRelayDescriptorReader.java |   63 +++---
 src/Configuration.java               |   14 +
 src/Main.java                        |   15 +-
 src/RelayDescriptorDownloader.java   |  137 ++++++-----
 src/RelayDescriptorParser.java       |   24 ++-
 7 files changed, 569 insertions(+), 174 deletions(-)

diff --git a/config b/config
index 37fea66..61c24c7 100644
--- a/config
+++ b/config
@@ -39,6 +39,9 @@
 ## Write directory archives to disk
 #WriteDirectoryArchives 1
 
+## V3 directory authority fingerprints
+#V3DirectoryAuthorities 14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4,E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58,D586D18309DED4CD6D57C18FDB97EFA96D330566,585769C78764D58426B8B52B6651A5A71137189A,27B6B5996C426270A5C95488AA5BCEB6BCC86956,80550987E1D626E3EBA5E5E75A458DE0626D088C,ED03BB616EB2F60BEC80151114BB25CEF515B226
+
 ## Read cached-* files from a local Tor client
 #ImportCachedRelayDescriptors 1
 
diff --git a/src/ArchiveWriter.java b/src/ArchiveWriter.java
index 2ebe423..73c8030 100644
--- a/src/ArchiveWriter.java
+++ b/src/ArchiveWriter.java
@@ -1,88 +1,449 @@
 import java.io.*;
 import java.text.*;
 import java.util.*;
-import org.apache.commons.compress.archivers.tar.*;
+import java.util.logging.*;
+import org.apache.commons.codec.digest.*;
+import org.apache.commons.codec.binary.*;
 
 public class ArchiveWriter {
-  public ArchiveWriter() {
-  }
-  public void storeConsensus(BufferedReader br, String validAfterTime)
-      throws IOException, ParseException {
+  private String statsDir;
+  private SortedSet<String> v3DirectoryAuthorities;
+  private File archiveWriterParseHistory;
+  private SortedSet<String> missingDescriptors;
+  private String lastParsedConsensus;
+  private boolean initialized = false;
+  private boolean archiveWriterParseHistoryModified = false;
+  private Logger logger;
+  private String parseTime;
+  public ArchiveWriter(String statsDir,
+      SortedSet<String> v3DirectoryAuthorities) {
+    this.statsDir = statsDir;
+    this.v3DirectoryAuthorities = v3DirectoryAuthorities;
+    this.archiveWriterParseHistory = new File(statsDir
+        + "/archive-writer-parse-history");
+    this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
     SimpleDateFormat parseFormat =
         new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    long validAfter = parseFormat.parse(validAfterTime).getTime();
-    SimpleDateFormat printFormat =
+    this.parseTime = parseFormat.format(new Date());
+    this.missingDescriptors = new TreeSet<String>();
+    SimpleDateFormat consensusVoteFormat =
         new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
-    printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    File consensusFile = new File("directory-archive/consensus/"
-        + printFormat.format(new Date(validAfter)) + "-consensus");
-    consensusFile.getParentFile().mkdirs();
-    if (!consensusFile.exists()) {
-      BufferedWriter bw = new BufferedWriter(new FileWriter(
-          consensusFile));
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        bw.write(line + "\n");
-        if (line.startsWith("r ")) {
-          // TODO compile list of server descriptors that we might want to
-          // learn about
+    consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    SimpleDateFormat descriptorFormat =
+        new SimpleDateFormat("yyyy/MM/");
+    descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    if (this.archiveWriterParseHistory.exists()) {
+      this.logger.info("Reading file " + statsDir
+          + "/archive-writer-parse-history...");
+      try {
+        BufferedReader br = new BufferedReader(new FileReader(
+            this.archiveWriterParseHistory));
+        String line = null;
+        SimpleDateFormat publishedFormat =
+            new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        publishedFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+        long now = System.currentTimeMillis();
+        while ((line = br.readLine()) != null) {
+          // only add to download list if descriptors are still available
+          // on directories
+          long published = publishedFormat.parse(line.split(",")[2]).
+              getTime();
+          if (line.startsWith("consensus") &&
+              published + 55L * 60L * 1000L > now &&
+              !new File("directory-archive/consensus/"
+                + consensusVoteFormat.format(new Date(published))
+                + "-consensus").exists()) {
+            this.logger.fine("Initializing missing list with "
+                + "consensus: valid-after=" + line.split(",")[2]
+                + ", filename=directory-archive/consensus/"
+                + consensusVoteFormat.format(new Date(published))
+                + "-consensus");
+            this.missingDescriptors.add(line);
+          } else if (line.startsWith("vote") &&
+              published + 55L * 60L * 1000L > now &&
+              // TODO is vote even available for 55 minutes after its
+              // publication?
+              !new File("directory-archive/vote/"
+                + consensusVoteFormat.format(new Date(published))
+                + "-vote-" + line.split(",")[1]).exists()) {
+            this.logger.fine("Initializing missing list with vote: "
+                + "fingerprint=" + line.split(",")[1]
+                + ", valid-after="
+                + consensusVoteFormat.format(new Date(published))
+                + ", filename=directory-archive/vote/"
+                + consensusVoteFormat.format(new Date(published))
+                + "-vote-" + line.split(",")[1]);
+            this.missingDescriptors.add(line);
+          } else if ((line.startsWith("server") ||
+              line.startsWith("extra")) &&
+              published + 24L * 60L * 60L * 1000L > now) {
+              // TODO are 24 hours okay?
+            boolean isServerDesc = line.startsWith("server");
+            String digest = line.split(",")[1].toLowerCase();
+            if (!new File("directory-archive/"
+                + (isServerDesc ? "server-descriptor" : "extra-info")
+                + "/" + descriptorFormat.format(new Date(published))
+                + digest.substring(0, 1) + "/" + digest.substring(1, 2)
+                + "/" + digest).exists()) {
+              this.logger.fine("Initializing missing list with "
+                  + (isServerDesc ? "server" : "extra-info")
+                  + " descriptor: digest=" + digest
+                  + ", filename=directory-archive/server-descriptor/"
+                  + descriptorFormat.format(new Date(published))
+                  + line.split(",")[1].substring(0, 1) + "/"
+                  + line.split(",")[1].substring(1, 2) + "/"
+                  + line.split(",")[1]);
+              this.missingDescriptors.add(line);
+            }
+          }
         }
+        br.close();
+        this.logger.info("Finished reading file " + statsDir
+            + "/archive-writer-parse-history");
+      } catch (ParseException e) {
+        this.logger.log(Level.WARNING, "Failed reading file " + statsDir
+            + "/archive-writer-parse-history! This means that we might "
+            + "forget to dowload descriptors we are missing.", e);
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Failed reading file " + statsDir
+            + "/archive-writer-parse-history! This means that we might "
+            + "forget to dowload descriptors we are missing.", e);
+      }
+    }
+    // add current consensus and votes to list
+    SimpleDateFormat consensusFormat =
+        new SimpleDateFormat("yyyy-MM-dd HH");
+    consensusFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    String nowConsensusFormat = consensusFormat.format(new Date())
+        + ":00:00";
+    long nowConsensus = (System.currentTimeMillis() / (60L * 60L * 1000L))
+        * (60L * 60L * 1000L);
+    for (String authority : this.v3DirectoryAuthorities) {
+      if (!new File("directory-archive/vote/"
+            + consensusVoteFormat.format(new Date(nowConsensus))
+            + "-vote-" + authority).exists()) {
+        if (!this.missingDescriptors.contains("vote," + authority + ","
+            + nowConsensusFormat)) {
+          this.logger.fine("Adding vote to missing list: fingerprint="
+              + authority + ", valid-after="
+              + consensusVoteFormat.format(new Date(nowConsensus))
+              + ", filename=directory-archive/vote/"
+              + consensusVoteFormat.format(new Date(nowConsensus))
+              + "-vote-" + authority);
+          this.missingDescriptors.add("vote," + authority + ","
+              + nowConsensusFormat);
+          this.archiveWriterParseHistoryModified = true;
+        }
+      }
+    }
+    if (!new File("directory-archive/consensus/"
+        + consensusVoteFormat.format(new Date(nowConsensus))
+        + "-consensus").exists()) {
+      if (!this.missingDescriptors.contains("consensus,NA,"
+          + nowConsensusFormat)) {
+        this.logger.fine("Adding consensus to missing list: valid-after="
+            + nowConsensusFormat
+            + ", filename=directory-archive/consensus/"
+            + consensusVoteFormat.format(new Date(nowConsensus))
+            + "-consensus");
+        this.missingDescriptors.add("consensus,NA,"
+            + nowConsensusFormat);
+        this.archiveWriterParseHistoryModified = true;
       }
-      bw.close();
     }
   }
-  public void storeVote(BufferedReader br, String validAfterTime,
-      String authorityFingerprint) throws IOException {
-    // TODO implement me
-  }
-  public void storeServerDescriptor(BufferedReader br, String digest,
-      String publishedTime, String extraInfoDigest) throws IOException,
+  public void store(BufferedReader br) throws IOException,
       ParseException {
+    String line = br.readLine();
+    if (line == null) {
+      this.logger.warning("Someone gave us an empty file for storing!");
+      return;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(line + "\n");
     SimpleDateFormat parseFormat =
         new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    long published = parseFormat.parse(publishedTime).getTime();
-    SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
-    printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    File descriptorFile = new File("directory-archive/server-descriptor/"
-        + printFormat.format(new Date(published))
-        + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
-        + digest);
-    descriptorFile.getParentFile().mkdirs();
-    if (!descriptorFile.exists()) {
-      BufferedWriter bw = new BufferedWriter(new FileWriter(
-          descriptorFile));
-      String line = null;
+    if (line.equals("network-status-version 3")) {
+      // TODO when parsing the current consensus, check the fresh-until
+      // time to see when we switch from hourly to half-hourly
+      // consensuses; in that case, add next half-hourly consensus to
+      // missing list and warn!
+      boolean isConsensus = true;
+      String validAfterTime = null;
+      long validAfter = -1L;
+      long now = System.currentTimeMillis();
+      String fingerprint = null;
+      SimpleDateFormat descriptorFormat =
+          new SimpleDateFormat("yyyy/MM/");
+      descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
       while ((line = br.readLine()) != null) {
-        bw.write(line + "\n");
+        sb.append(line + "\n");
+        if (line.equals("vote-status vote")) {
+          isConsensus = false;
+        } else if (line.startsWith("valid-after ")) {
+          validAfterTime = line.substring("valid-after ".length());
+          validAfter = parseFormat.parse(validAfterTime).getTime();
+        } else if (line.startsWith("dir-source ") &&
+            !this.v3DirectoryAuthorities.contains(
+            line.split(" ")[2]) && validAfter + 55L * 60L * 1000L <
+            System.currentTimeMillis()) {
+          this.logger.warning("Unknown v3 directory authority fingerprint "
+              + "in consensus line '" + line + "'. You should update your "
+              + "V3DirectoryAuthorities config option!");
+          fingerprint = line.split(" ")[2];
+          long nowConsensus = (now / (60L * 60L * 1000L))
+              * (60L * 60L * 1000L);
+          SimpleDateFormat consensusVoteFormat =
+              new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+          consensusVoteFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+          if (!new File("directory-archive/vote/"
+                + consensusVoteFormat.format(new Date(nowConsensus))
+                + "-vote-" + fingerprint).exists()) {
+            if (!this.missingDescriptors.contains("vote," + fingerprint
+                + "," + parseFormat.format(new Date(nowConsensus)))) {
+              this.logger.fine("Adding vote to missing list: fingerprint="
+                  + fingerprint + ", valid-after="
+                  + parseFormat.format(new Date(nowConsensus))
+                  + ", filename=directory-archive/vote/"
+                  + consensusVoteFormat.format(new Date(nowConsensus))
+                  + "-vote-" + fingerprint);
+              this.missingDescriptors.add("vote," + fingerprint + ","
+                  + parseFormat.format(new Date(nowConsensus)));
+              this.archiveWriterParseHistoryModified = true;
+            }
+          }
+        } else if (line.startsWith("fingerprint ")) {
+          fingerprint = line.split(" ")[1];
+        } else if (line.startsWith("r ")) {
+          String publishedTime = line.split(" ")[4] + " "
+              + line.split(" ")[5];
+          long published = parseFormat.parse(publishedTime).getTime();
+          String digest = Hex.encodeHexString(Base64.decodeBase64(
+              line.split(" ")[3] + "=")).toLowerCase();
+          // TODO are 24 hours okay?
+          if (published + 24L * 60L * 60L * 1000L > now &&
+              !new File("directory-archive/server-descriptor/"
+              + descriptorFormat.format(new Date(published))
+              + digest.substring(0, 1) + "/" + digest.substring(1, 2)
+              + "/" + digest).exists()) {
+            if (!this.missingDescriptors.contains("server," + digest + ","
+                + publishedTime)) {
+              this.logger.fine("Adding server descriptor to missing list: "
+                  + "digest=" + digest
+                  + ", filename=directory-archive/server-descriptor/"
+                  + descriptorFormat.format(new Date(published))
+                  + digest.substring(0, 1) + "/" + digest.substring(1, 2)
+                  + "/" + digest);
+              this.missingDescriptors.add("server," + digest + ","
+                  + publishedTime);
+              this.archiveWriterParseHistoryModified = true;
+            }
+          }
+        }
+      }
+      SimpleDateFormat printFormat =
+          new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+      printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      if (isConsensus) {
+        File consensusFile = new File("directory-archive/consensus/"
+            + printFormat.format(new Date(validAfter)) + "-consensus");
+        if (!consensusFile.exists()) {
+          this.logger.fine("Storing consensus: valid-after="
+              + validAfterTime + ", filename=directory-archive/consensus/"
+              + printFormat.format(new Date(validAfter)) + "-consensus");
+          consensusFile.getParentFile().mkdirs();
+          BufferedReader br2 = new BufferedReader(new StringReader(
+              sb.toString()));
+          BufferedWriter bw = new BufferedWriter(new FileWriter(
+              consensusFile));
+          while ((line = br2.readLine()) != null) {
+              bw.write(line + "\n");
+          }
+          bw.close();
+          br2.close();
+          this.logger.fine("Removing consensus from missing list: "
+              + "valid-after=" + validAfterTime
+              + ", filename=directory-archive/consensus/"
+              + printFormat.format(new Date(validAfter)) + "-consensus");
+          this.missingDescriptors.remove("consensus,NA,"
+              + validAfterTime);
+          this.archiveWriterParseHistoryModified = true;
+        } else {
+          this.logger.info("Not storing consensus, because we already "
+              + "have it: valid-after=" + validAfterTime
+              + ", filename=directory-archive/consensus/"
+              + printFormat.format(new Date(validAfter)) + "-consensus");
+        }
+      } else {
+        File voteFile = new File("directory-archive/vote/"
+            + printFormat.format(new Date(validAfter)) + "-vote-"
+            + fingerprint);
+        if (!voteFile.exists()) {
+          this.logger.fine("Storing vote: fingerprint=" + fingerprint
+              + ", valid-after="
+              + printFormat.format(new Date(validAfter))
+              + ", filename=directory-archive/vote/"
+              + printFormat.format(new Date(validAfter)) + "-vote-"
+              + fingerprint);
+          voteFile.getParentFile().mkdirs();
+          BufferedReader br2 = new BufferedReader(new StringReader(
+              sb.toString()));
+          BufferedWriter bw = new BufferedWriter(new FileWriter(
+              voteFile));
+          while ((line = br2.readLine()) != null) {
+              bw.write(line + "\n");
+          }
+          bw.close();
+          br2.close();
+          this.logger.fine("Removing vote from missing list: "
+              + "fingerprint=" + fingerprint + ", valid-after="
+              + printFormat.format(new Date(validAfter))
+              + ", filename=directory-archive/vote/"
+              + printFormat.format(new Date(validAfter)) + "-vote-"
+              + fingerprint);
+          this.missingDescriptors.remove("vote," + fingerprint + ","
+              + validAfterTime);
+          this.archiveWriterParseHistoryModified = true;
+        } else {
+          this.logger.info("Not storing vote, because we already have "
+              + "it: fingerprint=" + fingerprint + ", valid-after="
+              + printFormat.format(new Date(validAfter))
+              + ", filename=directory-archive/vote/"
+              + printFormat.format(new Date(validAfter)) + "-vote-"
+              + fingerprint);
+        }
+      }
+    } else if (line.startsWith("router ") ||
+        line.startsWith("extra-info ")) {
+      boolean isServerDescriptor = line.startsWith("router ");
+      String publishedTime = null;
+      long published = -1L;
+      String digest = null;
+      while ((line = br.readLine()) != null) {
+        sb.append(line + "\n");
+        if (line.startsWith("published ")) {
+          publishedTime = line.substring("published ".length());
+          published = parseFormat.parse(publishedTime).getTime();
+        } else if (line.startsWith("opt extra-info-digest ") ||
+            line.startsWith("extra-info-digest ")) {
+          String extraInfoDigest = line.startsWith("opt ") ?
+              line.split(" ")[2].toLowerCase() :
+              line.split(" ")[1].toLowerCase();
+          SimpleDateFormat descriptorFormat =
+              new SimpleDateFormat("yyyy/MM/");
+          descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+          if (!new File("directory-archive/extra-info/"
+              + descriptorFormat.format(new Date(published))
+              + extraInfoDigest.substring(0, 1) + "/"
+              + extraInfoDigest.substring(1, 2) + "/"
+              + extraInfoDigest).exists()) {
+            if (!this.missingDescriptors.contains("extra,"
+                + extraInfoDigest + "," + publishedTime)) {
+              this.logger.fine("Adding extra-info descriptor to missing "
+                  + "list: digest=" + extraInfoDigest
+                  + ", filename=directory-archive/extra-info/"
+                  + descriptorFormat.format(new Date(published))
+                  + extraInfoDigest.substring(0, 1) + "/"
+                  + extraInfoDigest.substring(1, 2) + "/"
+                  + extraInfoDigest);
+              this.missingDescriptors.add("extra," + extraInfoDigest + ","
+                  + publishedTime);
+              this.archiveWriterParseHistoryModified = true;
+            }
+          }
+        } else if (line.equals("router-signature")) {
+          digest = DigestUtils.shaHex(sb.toString()).toLowerCase();
+        }
+      }
+      SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
+      printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      File descriptorFile = new File("directory-archive/"
+          + (isServerDescriptor ? "server-descriptor" : "extra-info") + "/"
+          + printFormat.format(new Date(published))
+          + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
+          + digest);
+      if (!descriptorFile.exists()) {
+        this.logger.fine("Storing " + (isServerDescriptor ?
+            "server descriptor" : "extra-info descriptor")
+            + ": digest=" + digest + ", filename=directory-archive/"
+            + (isServerDescriptor ? "server-descriptor" : "extra-info")
+            + "/" + printFormat.format(new Date(published))
+            + digest.substring(0, 1) + "/" + digest.substring(1, 2)
+            + "/" + digest);
+        descriptorFile.getParentFile().mkdirs();
+        BufferedReader br2 = new BufferedReader(new StringReader(
+            sb.toString()));
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            descriptorFile));
+        while ((line = br2.readLine()) != null) {
+          bw.write(line + "\n");
+        }
+        bw.close();
+        br2.close();
+        this.logger.fine("Removing " + (isServerDescriptor ?
+            "server descriptor" : "extra-info descriptor")
+            + " from missing list: digest=" + digest
+            + ", filename=directory-archive/"
+            + (isServerDescriptor ? "server-descriptor" : "extra-info")
+            + "/" + printFormat.format(new Date(published))
+            + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
+            + digest);
+        if (isServerDescriptor) {
+          this.missingDescriptors.remove("server," + digest + ","
+              + publishedTime);
+        } else {
+          this.missingDescriptors.remove("extra," + digest + ","
+              + publishedTime);
+        }
+        this.archiveWriterParseHistoryModified = true;
+      } else {
+        this.logger.info("Not storing " + (isServerDescriptor ?
+            "server descriptor" : "extra-info descriptor")
+            + ", because we already have it: digest=" + digest
+            + ", filename=directory-archive/"
+            + (isServerDescriptor ? "server-descriptor" : "extra-info")
+            + "/" + printFormat.format(new Date(published))
+            + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
+            + digest);
       }
-      bw.close();
     }
-    // TODO if extraInfoDigest != null, add digest to extra-info
-    // descriptors we want to download
   }
-  public void storeExtraInfo(BufferedReader br, String digest,
-      String publishedTime) throws IOException, ParseException {
-    SimpleDateFormat parseFormat =
-        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    parseFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    long published = parseFormat.parse(publishedTime).getTime();
-    SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
-    printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    File descriptorFile = new File("directory-archive/extra-info/"
-        + printFormat.format(new Date(published))
-        + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
-        + digest);
-    descriptorFile.getParentFile().mkdirs();
-    if (!descriptorFile.exists()) {
-      BufferedWriter bw = new BufferedWriter(new FileWriter(
-          descriptorFile));
-      String line = null, extraInfoDigest = null;
-      while ((line = br.readLine()) != null) {
-        bw.write(line + "\n");
+  public Set<String> getMissingDescriptorUrls() {
+    Set<String> urls = new HashSet<String>();
+    for (String line : this.missingDescriptors) {
+      if (line.startsWith("consensus,")) {
+        urls.add("/tor/status-vote/current/consensus");
+      } else if (line.startsWith("vote,")) {
+        urls.add("/tor/status-vote/current/" + line.split(",")[1]);
+      } else if (line.startsWith("server,")) {
+        urls.add("/tor/server/d/" + line.split(",")[1]);
+      } else if (line.startsWith("extra,")) {
+        urls.add("/tor/extra/d/" + line.split(",")[1]);
+      }
+    }
+    return urls;
+  }
+  public void writeFile() {
+    if (this.archiveWriterParseHistoryModified) {
+      try {
+        this.logger.info("Writing file " + this.statsDir
+            + "/archive-writer-parse-history...");
+        new File(this.statsDir).mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            this.archiveWriterParseHistory));
+        bw.write("type,source,published\n");
+        for (String line : this.missingDescriptors) {
+          bw.write(line + "\n");
+        }
+        bw.close();
+        this.logger.info("Finished writing file " + this.statsDir
+            + "/archive-writer-parse-history.");
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Failed writing " + this.statsDir
+            + "/archive-writer-parse-history!", e);
       }
-      bw.close();
     }
   }
 }
diff --git a/src/CachedRelayDescriptorReader.java b/src/CachedRelayDescriptorReader.java
index 8a7ed5f..d865b5c 100644
--- a/src/CachedRelayDescriptorReader.java
+++ b/src/CachedRelayDescriptorReader.java
@@ -1,5 +1,6 @@
 import java.io.*;
 import java.text.*;
+import java.util.logging.*;
 import org.apache.commons.codec.digest.*;
 
 /**
@@ -9,21 +10,33 @@ import org.apache.commons.codec.digest.*;
 public class CachedRelayDescriptorReader {
   public CachedRelayDescriptorReader(RelayDescriptorParser rdp,
       ArchiveWriter aw) {
+    Logger logger = Logger.getLogger(
+        CachedRelayDescriptorReader.class.getName());
     File cachedDescDir = new File("cacheddesc");
     if (cachedDescDir.exists()) {
+      logger.info("Reading cacheddesc/ directory.");
       try {
-        rdp.initialize();
+        rdp.initialize(); // TODO get rid of this non-sense
       } catch (IOException e) {
         return;
       }
       for (File f : cachedDescDir.listFiles()) {
         try {
-          if (f.getName().equals("cached-consensus") ||
-              f.getName().startsWith("cached-descriptors") ||
+          if (f.getName().equals("cached-consensus")) {
+            BufferedReader br = new BufferedReader(new FileReader(f));
+            if (aw != null) {
+              aw.store(br);
+            }
+            br.close();
+            br = new BufferedReader(new FileReader(f));
+            if (rdp != null) {
+              rdp.parse(br);
+            }
+            br.close();
+          } else if (f.getName().startsWith("cached-descriptors") ||
               f.getName().startsWith("cached-extrainfo")) {
             BufferedReader br = new BufferedReader(new FileReader(f));
-            String line = null, validAfterTime = null, publishedTime = null,
-                extraInfoDigest = null, digest = null;
+            String line = null;
             StringBuilder sb = new StringBuilder();
             while ((line = br.readLine()) != null || sb != null) {
               if (line == null && sb.length() < 1) {
@@ -34,19 +47,14 @@ public class CachedRelayDescriptorReader {
                 if (sb.length() > 0) {
                   BufferedReader storeBr = new BufferedReader(
                       new StringReader(sb.toString()));
-                  if (f.getName().equals("cached-consensus")) {
-                    aw.storeConsensus(storeBr, validAfterTime);
-                    validAfterTime = null;
-                  } else if (f.getName().startsWith("cached-descriptors")) {
-                    aw.storeServerDescriptor(storeBr, digest,
-                        publishedTime, extraInfoDigest);
-                    digest = null;
-                    publishedTime = null;
-                    extraInfoDigest = null;
-                  } else if (f.getName().startsWith("cached-extrainfo")) {
-                    aw.storeExtraInfo(storeBr, digest, publishedTime);
-                    digest = null;
-                    publishedTime = null;
+                  if (aw != null) {
+                    aw.store(storeBr);
+                  }
+                  storeBr.close();
+                  storeBr = new BufferedReader(
+                      new StringReader(sb.toString()));
+                  if (rdp != null) {
+                    rdp.parse(storeBr);
                   }
                   storeBr.close();
                 }
@@ -57,28 +65,19 @@ public class CachedRelayDescriptorReader {
                   sb = new StringBuilder();
                 }
               }
-              if (line.startsWith("valid-after ")) {
-                validAfterTime = line.substring("valid-after ".length());
-              } else if (line.startsWith("published ")) {
-                publishedTime = line.substring("published ".length());
-              } else if (line.startsWith("router-signature")) {
-                digest = DigestUtils.shaHex(sb.toString()
-                    + "router-signature\n").toUpperCase();
-              } else if (line.startsWith("opt extra-info-digest ")) {
-                extraInfoDigest = line.split(" ")[2];
-              } else if (line.startsWith("extra-info-digest ")) {
-                extraInfoDigest = line.split(" ")[1];
-              }
               if (!line.startsWith("@")) {
                 sb.append(line + "\n");
               }
             }
             br.close();
+            logger.info("Finished reading cacheddesc/ directory.");
           }
         } catch (IOException e) {
-          // TODO handle
+          logger.log(Level.WARNING, "Failed reading cacheddesc/ "
+              + "directory.", e);
         } catch (ParseException e) {
-          // TODO handle
+          logger.log(Level.WARNING, "Failed reading cacheddesc/ "
+              + "directory.", e);
         }
       }
     }
diff --git a/src/Configuration.java b/src/Configuration.java
index 9eb8f3c..0eeb524 100644
--- a/src/Configuration.java
+++ b/src/Configuration.java
@@ -18,6 +18,14 @@ public class Configuration {
       + "9695DFC35FFEB861329B9F1AB04C46397020CE31").split(",")));
   private boolean writeBridgeStats = true;
   private boolean writeDirectoryArchives = true;
+  private SortedSet<String> v3DirectoryAuthorities = new TreeSet<String>(
+      Arrays.asList(("14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4,"
+      + "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58,"
+      + "D586D18309DED4CD6D57C18FDB97EFA96D330566,"
+      + "585769C78764D58426B8B52B6651A5A71137189A,"
+      + "27B6B5996C426270A5C95488AA5BCEB6BCC86956,"
+      + "80550987E1D626E3EBA5E5E75A458DE0626D088C,"
+      + "ED03BB616EB2F60BEC80151114BB25CEF515B226").split(",")));
   private boolean importCachedRelayDescriptors = true;
   private boolean importDirectoryArchives = true;
   private boolean importSanitizedBridges = true;
@@ -67,6 +75,9 @@ public class Configuration {
         } else if (line.startsWith("WriteDirectoryArchives")) {
           this.writeDirectoryArchives = Integer.parseInt(
               line.split(" ")[1]) != 0;
+        } else if (line.startsWith("V3DirectoryAuthorities")) {
+          this.v3DirectoryAuthorities = new TreeSet<String>(
+              Arrays.asList(line.split(" ")[1].split(",")));
         } else if (line.startsWith("ImportCachedRelayDescriptors")) {
           this.importCachedRelayDescriptors = Integer.parseInt(
               line.split(" ")[1]) != 0;
@@ -155,6 +166,9 @@ public class Configuration {
   public boolean getWriteDirectoryArchives() {
     return this.writeDirectoryArchives;
   }
+  public SortedSet<String> getV3DirectoryAuthorities() {
+    return this.v3DirectoryAuthorities;
+  }
   public boolean getImportCachedRelayDescriptors() {
     return this.importCachedRelayDescriptors;
   }
diff --git a/src/Main.java b/src/Main.java
index c52313c..8d8b2a1 100644
--- a/src/Main.java
+++ b/src/Main.java
@@ -47,7 +47,8 @@ public class Main {
 
     // Prepare writing relay descriptor archive to disk
     ArchiveWriter aw = config.getWriteDirectoryArchives() ?
-        new ArchiveWriter() : null;
+        new ArchiveWriter(statsDirectory,
+        config.getV3DirectoryAuthorities()) : null;
     // TODO handle case aw==NULL below
 
     // import and/or download relay and bridge descriptors
@@ -58,14 +59,9 @@ public class Main {
       new ArchiveReader(rdp, "archives");
     }
     if (config.getDownloadRelayDescriptors()) {
-      // TODO make this smarter by letting rdd ask rdp which descriptors
-      // are still missing and only download those
-      // TODO move iteration over dirauths from main() to class code
-      for (String directoryAuthority : 
-          config.getDownloadFromDirectoryAuthorities()) {
-        new RelayDescriptorDownloader(rdp, aw, directoryAuthority,
-            directories);
-      }
+      new RelayDescriptorDownloader(rdp, aw,
+          config.getDownloadFromDirectoryAuthorities(),
+          directories);
     }
     if (config.getImportSanitizedBridges()) {
       new SanitizedBridgesReader(bdp, "bridges", countries);
@@ -86,6 +82,7 @@ public class Main {
       dsfh.writeFile();
     }
     rdp.writeFile();
+    aw.writeFile();
 
     // Import and process torperf stats
     if (config.getImportWriteTorperfStats()) {
diff --git a/src/RelayDescriptorDownloader.java b/src/RelayDescriptorDownloader.java
index 7f953be..b0f270d 100644
--- a/src/RelayDescriptorDownloader.java
+++ b/src/RelayDescriptorDownloader.java
@@ -1,5 +1,6 @@
 import java.io.*;
 import java.net.*;
+import java.text.*;
 import java.util.*;
 import java.util.logging.*;
 
@@ -9,74 +10,86 @@ import java.util.logging.*;
  */
 public class RelayDescriptorDownloader {
   public RelayDescriptorDownloader(RelayDescriptorParser rdp,
-      ArchiveWriter aw, String authority, SortedSet<String> directories) {
-    Logger logger =
-        Logger.getLogger(RelayDescriptorDownloader.class.getName());
-    // TODO use aw
-    try {
-      rdp.initialize();
-    } catch (IOException e) {
-      return;
+      ArchiveWriter aw, List<String> authorities,
+      SortedSet<String> directories) {
+    Logger logger = Logger.getLogger(
+        RelayDescriptorDownloader.class.getName());
+    List<String> remainingAuthorities =
+        new ArrayList<String>(authorities);
+    if (rdp != null) {
+      try {
+        rdp.initialize(); // TODO get rid of this non-sense
+      } catch (IOException e) {
+        return;
+      }
     }
-    try {
-      logger.info("Downloading current consensus from " + authority
-          + "...");
-      URL u = new URL("http://" + authority
-          + "/tor/status-vote/current/consensus");
-      HttpURLConnection huc = (HttpURLConnection) u.openConnection();
-      huc.setRequestMethod("GET");
-      huc.connect();
-      int response = huc.getResponseCode();
-      if (response == 200) {
-        BufferedInputStream in = new BufferedInputStream(
-            huc.getInputStream());
-        StringBuilder sb = new StringBuilder();
-        int len;
-        byte[] data = new byte[1024];
-        while ((len = in.read(data, 0, 1024)) >= 0) {
-          sb.append(new String(data, 0, len));
-        }
-        in.close();
-        String consensus = sb.toString();
-        rdp.parse(new BufferedReader(new StringReader(consensus)));
+    Set<String> urls = new HashSet<String>();
+    Set<String> downloaded = new HashSet<String>();
+    if (rdp != null) {
+      urls.addAll(rdp.getMissingDescriptorUrls());
+    }
+    do {
+      if (aw != null) {
+        urls.addAll(aw.getMissingDescriptorUrls());
       }
-      logger.info("Finished downloading current consensus from "
-          + authority + ".");
-      logger.info("Downloading extra-info descriptors from " + authority
-          + "...");
-      Stack<String> extraInfos = new Stack<String>();
-      for (String fingerprint : directories) {
-        u = new URL("http://" + authority + "/tor/extra/fp/"
-            + fingerprint);
-        huc = (HttpURLConnection) u.openConnection();
-        huc.setRequestMethod("GET");
-        huc.connect();
-        response = huc.getResponseCode();
-        if (response == 200) {
-          BufferedInputStream in = new BufferedInputStream(
-              huc.getInputStream());
-          StringBuilder sb = new StringBuilder();
-          int len;
-          byte[] data = new byte[1024];
-          while ((len = in.read(data, 0, 1024)) >= 0) {
-            sb.append(new String(data, 0, len));
+      urls.removeAll(downloaded);
+      SortedSet<String> sortedUrls = new TreeSet<String>(urls);
+      while (!remainingAuthorities.isEmpty() && !sortedUrls.isEmpty()) {
+        String authority = remainingAuthorities.get(0);
+        String url = sortedUrls.first();
+        try {
+          URL u = new URL("http://" + authority + url);
+          HttpURLConnection huc =
+              (HttpURLConnection) u.openConnection();
+          huc.setRequestMethod("GET");
+          huc.connect();
+          int response = huc.getResponseCode();
+          logger.fine("Downloading http://" + authority + url + " -> "
+              + response);
+          if (response == 200) {
+            BufferedInputStream in = new BufferedInputStream(
+                huc.getInputStream());
+            StringBuilder sb = new StringBuilder();
+            int len;
+            byte[] data = new byte[1024];
+            while ((len = in.read(data, 0, 1024)) >= 0) {
+              sb.append(new String(data, 0, len));
+            }
+            in.close();
+            String result = sb.toString();
+            if (rdp != null) {
+              BufferedReader br = new BufferedReader(new StringReader(
+                  result));
+              rdp.parse(br);
+              br.close();
+            }
+            if (aw != null) {
+              BufferedReader br = new BufferedReader(new StringReader(
+                  result));
+              try {
+                aw.store(br);
+              } catch (Exception e) {
+                e.printStackTrace();
+                //TODO find better way to handle this
+              }
+              br.close();
+            }
           }
-          in.close();
-          String extraInfo = sb.toString();
-          if (extraInfo.length() > 0) {
-            BufferedReader br = new BufferedReader(
-                new StringReader(extraInfo));
-            rdp.parse(br);
+          sortedUrls.remove(url);
+        } catch (IOException e) {
+          remainingAuthorities.remove(authority);
+          if (!remainingAuthorities.isEmpty()) {
+            logger.log(Level.INFO, "Failed downloading from "
+                + authority + "!", e);
+          } else {
+            logger.log(Level.WARNING, "Failed downloading from "
+                + authority + "! We have no authorities left to download "
+                + "from!", e);
           }
         }
       }
-      logger.info("Finished downloading extra-info descriptors from "
-          + authority + ".");
-    } catch (IOException e) {
-      logger.log(Level.WARNING, "Failed downloading either current "
-          + "consensus or extra-info descriptors from " + authority
-          + "!", e);
-    }
+      downloaded.addAll(urls);
+    } while (!urls.isEmpty());
   }
 }
 
diff --git a/src/RelayDescriptorParser.java b/src/RelayDescriptorParser.java
index e8b29fe..2139f48 100644
--- a/src/RelayDescriptorParser.java
+++ b/src/RelayDescriptorParser.java
@@ -66,6 +66,10 @@ public class RelayDescriptorParser {
   }
   public void parse(BufferedReader br) throws IOException {
     String line = br.readLine();
+    if (line == null) {
+      this.logger.warning("Parsing empty file?");
+      return;
+    }
     if (line.equals("network-status-version 3")) {
       int exit = 0, fast = 0, guard = 0, running = 0, stable = 0;
       String validAfter = null;
@@ -162,15 +166,19 @@ public class RelayDescriptorParser {
     format.setTimeZone(TimeZone.getTimeZone("UTC"));
     long now = System.currentTimeMillis();
     for (String directory : this.directories) {
-      try {
-        long statsEnd = format.parse(this.lastParsedExtraInfos.get(
-            directory)).getTime();
-        if (statsEnd + 36L * 60L * 60L * 1000L < now) {
-          urls.add("/tor/extra/fp/" + directory);
+      if (!this.lastParsedExtraInfos.containsKey(directory)) {
+        urls.add("/tor/extra/fp/" + directory);
+      } else {
+        try {
+          long statsEnd = format.parse(this.lastParsedExtraInfos.get(
+              directory)).getTime();
+          if (statsEnd + 36L * 60L * 60L * 1000L < now) {
+            urls.add("/tor/extra/fp/" + directory);
+          }
+        } catch (ParseException e) {
+          this.logger.log(Level.WARNING, "Failed parsing timestamp in "
+              + this.statsDir + "/relay-descriptor-parse-history!", e);
         }
-      } catch (ParseException e) {
-        this.logger.log(Level.WARNING, "Failed parsing timestamp in "
-            + this.statsDir + "/relay-descriptor-parse-history!", e);
       }
     }
     return urls;
-- 
1.6.5



More information about the tor-commits mailing list