[or-cvs] [ernie/master] Remember last parsed relay descriptors to avoid unnecessary downloads.

karsten at torproject.org karsten at torproject.org
Mon Feb 22 18:41:15 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Mon, 22 Feb 2010 19:40:20 +0100
Subject: Remember last parsed relay descriptors to avoid unnecessary downloads.
Commit: 50a7795fe9af74109bbb452c6c5dcabaf0430fea

---
 src/Main.java                  |    4 +-
 src/RelayDescriptorParser.java |  120 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 5 deletions(-)

diff --git a/src/Main.java b/src/Main.java
index 53221f4..c325904 100644
--- a/src/Main.java
+++ b/src/Main.java
@@ -40,8 +40,8 @@ public class Main {
 
     // Prepare parsers
     // TODO handle cases bsfh==NULL, csfh==NULL, dsfh==NULL
-    RelayDescriptorParser rdp = new RelayDescriptorParser(csfh, bsfh,
-        dsfh, countries, directories);
+    RelayDescriptorParser rdp = new RelayDescriptorParser(statsDirectory,
+        csfh, bsfh, dsfh, countries, directories);
     BridgeDescriptorParser bdp = new BridgeDescriptorParser(csfh, bsfh,
         countries);
 
diff --git a/src/RelayDescriptorParser.java b/src/RelayDescriptorParser.java
index 2858402..18a2c40 100644
--- a/src/RelayDescriptorParser.java
+++ b/src/RelayDescriptorParser.java
@@ -1,5 +1,7 @@
 import java.io.*;
+import java.text.*;
 import java.util.*;
+import java.util.logging.*;
 import org.apache.commons.codec.digest.*;
 import org.apache.commons.codec.binary.*;
 
@@ -8,24 +10,56 @@ import org.apache.commons.codec.binary.*;
  * relevant contents to the stats file handlers.
  */
 public class RelayDescriptorParser {
+  private String statsDir;
+  private File relayDescriptorParseHistory;
+  private SortedMap<String, String> lastParsedExtraInfos;
+  private String lastParsedConsensus;
+  private boolean initialized = false;
+  private boolean relayDescriptorParseHistoryModified = false;
   private DirreqStatsFileHandler dsfh;
   private ConsensusStatsFileHandler csfh;
   private BridgeStatsFileHandler bsfh;
   private SortedSet<String> countries;
   private SortedSet<String> directories;
-  public RelayDescriptorParser(ConsensusStatsFileHandler csfh,
-      BridgeStatsFileHandler bsfh, DirreqStatsFileHandler dsfh,
-      SortedSet<String> countries, SortedSet<String> directories) {
+  private Logger logger;
+  public RelayDescriptorParser(String statsDir,
+      ConsensusStatsFileHandler csfh, BridgeStatsFileHandler bsfh,
+      DirreqStatsFileHandler dsfh, SortedSet<String> countries,
+      SortedSet<String> directories) {
+    this.statsDir = statsDir;
+    this.relayDescriptorParseHistory = new File(statsDir
+        + "/relay-descriptor-parse-history");
     this.csfh = csfh;
     this.bsfh = bsfh;
     this.dsfh = dsfh;
     this.countries = countries;
     this.directories = directories;
+    this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
   }
   public void initialize() throws IOException {
     this.csfh.initialize();
     this.bsfh.initialize();
     this.dsfh.initialize();
+    this.lastParsedConsensus = null;
+    this.lastParsedExtraInfos = new TreeMap<String, String>();
+    if (this.relayDescriptorParseHistory.exists()) {
+      this.logger.info("Reading file " + statsDir
+          + "/relay-descriptor-parse-history...");
+      BufferedReader br = new BufferedReader(new FileReader(
+          this.relayDescriptorParseHistory));
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        if (line.startsWith("consensus")) {
+          this.lastParsedConsensus = line.split(" ")[2];
+        } else if (line.startsWith("extrainfo")) {
+          this.lastParsedExtraInfos.put(line.split(" ")[1],
+              line.split(" ")[2]);
+        }
+      }
+      br.close();
+      this.logger.info("Finished reading file " + statsDir
+          + "/relay-descriptor-parse-history");
+    }
   }
   public void parse(BufferedReader br) throws IOException {
     String line = br.readLine();
@@ -35,6 +69,16 @@ public class RelayDescriptorParser {
       while ((line = br.readLine()) != null) {
         if (line.startsWith("valid-after ")) {
           validAfter = line.substring("valid-after ".length());
+        } else if (line.startsWith("vote-status ")) {
+          if (line.equals("vote-status vote")) {
+            return;
+          } else {
+            if (this.lastParsedConsensus == null ||
+                validAfter.compareTo(this.lastParsedConsensus) > 0) {
+              this.lastParsedConsensus = validAfter;
+              relayDescriptorParseHistoryModified = true;
+            }
+          }
         } else if (line.startsWith("r ")) {
           String hashedRelay = DigestUtils.shaHex(Base64.decodeBase64(
               line.split(" ")[2] + "=")).toUpperCase();
@@ -61,6 +105,11 @@ public class RelayDescriptorParser {
       while ((line = br.readLine()) != null) {
         if (line.startsWith("dirreq-stats-end ")) {
           date = line.split(" ")[1];
+          if (this.lastParsedExtraInfos.get(dir) == null ||
+              date.compareTo(this.lastParsedExtraInfos.get(dir)) > 0) {
+            this.lastParsedExtraInfos.put(dir, date);
+            relayDescriptorParseHistoryModified = true;
+          }
           // trusted had very strange dirreq-v3-shares here...
           skip = dir.equals("8522EB98C91496E80EC238E732594D1509158E77")
               && (date.equals("2009-09-10") || date.equals("2009-09-11"));
@@ -85,5 +134,70 @@ public class RelayDescriptorParser {
       }
     }
   }
+  /**
+   * Returns the URLs of current descriptors that we are missing,
+   * including the current consensus and a few extra-info descriptors.
+   */
+  public Set<String> getMissingDescriptorUrls() {
+    Set<String> urls = new HashSet<String>();
+    // We might be missing the current consensus for either consensus
+    // stats or bridge stats; we remember ourselves which consensus we
+    // parsed before (most likely from parsing cached-consensus) and can
+    // decide whether we want a more current one
+    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH");
+    format.setTimeZone(TimeZone.getTimeZone("UTC"));
+    String currentConsensus = format.format(new Date())
+        + ":00:00";
+    if (currentConsensus.equals(this.lastParsedConsensus)) {
+      urls.add("/tor/status-vote/current/consensus");
+    }
+    // We might be missing extra-info descriptors for dirreq stats for
+    // the directories we care about; we are happy with previous dirreq
+    // stats until they are more than 36 hours old (24 hours for the
+    // next stats period to end plus 12 hours for publishing a new
+    // descriptor)
+    format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    format.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long now = System.currentTimeMillis();
+    for (String directory : this.directories) {
+      try {
+        long statsEnd = format.parse(this.lastParsedExtraInfos.get(
+            directory)).getTime();
+        if (statsEnd + 36L * 60L * 60L * 1000L < now) {
+          urls.add("/tor/extra/fp/" + directory);
+        }
+      } catch (ParseException e) {
+        this.logger.log(Level.WARNING, "Failed parsing timestamp in "
+            + this.statsDir + "/relay-descriptor-parse-history!", e);
+      }
+    }
+    return urls;
+  }
+  public void writeFile() {
+    if (relayDescriptorParseHistoryModified) {
+      try {
+        this.logger.info("Writing file " + this.statsDir
+            + "/relay-descriptor-parse-history...");
+        new File(this.statsDir).mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            this.relayDescriptorParseHistory));
+        bw.write("type,source,published\n");
+        if (this.lastParsedConsensus != null) {
+          bw.write("consensus,NA," + this.lastParsedConsensus + "\n");
+        }
+        for (Map.Entry<String, String> e :
+            this.lastParsedExtraInfos.entrySet()) {
+          bw.write("extrainfo," + e.getKey() + "," + e.getValue()
+              + "\n");
+        }
+        bw.close();
+        this.logger.info("Finished writing file " + this.statsDir
+            + "/relay-descriptor-parse-history.");
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Failed writing " + this.statsDir
+            + "/relay-descriptor-parse-history!", e);
+      }
+    }
+  }
 }
 
-- 
1.6.5



More information about the tor-commits mailing list