[tor-commits] [metrics-db/master] Recognize when data sources become stale.

karsten at torproject.org karsten at torproject.org
Sat Oct 27 20:07:46 UTC 2012


commit f7b58361aa6df5fd2afe40c38f9dd111e2820f89
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sat Oct 27 13:52:42 2012 -0400

    Recognize when data sources become stale.
    
    Implements #3850.
---
 .../db/bridgedescs/SanitizedBridgesWriter.java     |   58 +++++++++++
 .../BridgePoolAssignmentsProcessor.java            |   16 +++
 .../ernie/db/exitlists/ExitListDownloader.java     |  100 +++++++++++++++-----
 .../ernie/db/relaydescs/ArchiveWriter.java         |   46 +++++++++
 4 files changed, 194 insertions(+), 26 deletions(-)

diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 87593bd..7de9961 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -193,6 +193,8 @@ public class SanitizedBridgesWriter extends Thread {
     // Finish writing sanitized bridge descriptors to disk
     this.finishWriting();
 
+    this.checkStaleDescriptors();
+
     this.cleanUpRsyncDirectory();
   }
 
@@ -368,6 +370,8 @@ public class SanitizedBridgesWriter extends Thread {
     return this.secretsForHashingIPAddresses.get(month);
   }
 
+  private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
+
   /**
    * Sanitizes a network status and writes it to disk.
    */
@@ -380,6 +384,10 @@ public class SanitizedBridgesWriter extends Thread {
       return;
     }
 
+    if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) {
+      maxNetworkStatusPublishedTime = publicationTime;
+    }
+
     if (this.bridgeSanitizingCutOffTimestamp.
         compareTo(publicationTime) > 0) {
       this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING
@@ -543,6 +551,8 @@ public class SanitizedBridgesWriter extends Thread {
     }
   }
 
+  private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00";
+
   /**
    * Sanitizes a bridge server descriptor and writes it to disk.
    */
@@ -590,6 +600,9 @@ public class SanitizedBridgesWriter extends Thread {
          * sanitizing interval. */
         } else if (line.startsWith("published ")) {
           published = line.substring("published ".length());
+          if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
+            maxServerDescriptorPublishedTime = published;
+          }
           if (this.bridgeSanitizingCutOffTimestamp.
               compareTo(published) > 0) {
             this.logger.log(!this.haveWarnedAboutInterval
@@ -799,6 +812,9 @@ public class SanitizedBridgesWriter extends Thread {
     }
   }
 
+  private String maxExtraInfoDescriptorPublishedTime =
+      "1970-01-01 00:00:00";
+
   /**
    * Sanitizes an extra-info descriptor and writes it to disk.
    */
@@ -827,6 +843,10 @@ public class SanitizedBridgesWriter extends Thread {
         } else if (line.startsWith("published ")) {
           scrubbed.append(line + "\n");
           published = line.substring("published ".length());
+          if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
+              > 0) {
+            maxExtraInfoDescriptorPublishedTime = published;
+          }
 
         /* Remove everything from transport lines except the transport
          * name. */
@@ -968,6 +988,44 @@ public class SanitizedBridgesWriter extends Thread {
     }
   }
 
+  private void checkStaleDescriptors() {
+    SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+    try {
+      long maxNetworkStatusPublishedMillis =
+          dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime();
+      if (maxNetworkStatusPublishedMillis > 0L &&
+          maxNetworkStatusPublishedMillis < tooOldMillis) {
+        this.logger.warning("The last known bridge network status was "
+            + "published " + maxNetworkStatusPublishedTime + ", which is "
+            + "more than 5:30 hours in the past.");
+      }
+      long maxServerDescriptorPublishedMillis =
+          dateTimeFormat.parse(maxServerDescriptorPublishedTime).
+          getTime();
+      if (maxServerDescriptorPublishedMillis > 0L &&
+          maxServerDescriptorPublishedMillis < tooOldMillis) {
+        this.logger.warning("The last known bridge server descriptor was "
+            + "published " + maxServerDescriptorPublishedTime + ", which "
+            + "is more than 5:30 hours in the past.");
+      }
+      long maxExtraInfoDescriptorPublishedMillis =
+          dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime).
+          getTime();
+      if (maxExtraInfoDescriptorPublishedMillis > 0L &&
+          maxExtraInfoDescriptorPublishedMillis < tooOldMillis) {
+        this.logger.warning("The last known bridge extra-info descriptor "
+            + "was published " + maxExtraInfoDescriptorPublishedTime
+            + ", which is more than 5:30 hours in the past.");
+      }
+    } catch (ParseException e) {
+      this.logger.log(Level.WARNING, "Unable to parse timestamp for "
+          + "stale check.", e);
+    }
+  }
+
   /* Delete all files from the rsync directory that have not been modified
    * in the last three days. */
   public void cleanUpRsyncDirectory() {
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 0ac6f90..43d3427 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -77,6 +77,7 @@ public class BridgePoolAssignmentsProcessor extends Thread {
     SimpleDateFormat filenameFormat =
         new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
     filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long maxBridgePoolAssignmentTime = 0L;
     for (File assignmentFile : assignmentFiles) {
       logger.info("Processing bridge pool assignment file '"
           + assignmentFile.getAbsolutePath() + "'...");
@@ -120,6 +121,9 @@ public class BridgePoolAssignmentsProcessor extends Thread {
                 long bridgePoolAssignmentTime = assignmentFormat.parse(
                     bridgePoolAssignmentLine.substring(
                     "bridge-pool-assignment ".length())).getTime();
+                maxBridgePoolAssignmentTime = Math.max(
+                    maxBridgePoolAssignmentTime,
+                    bridgePoolAssignmentTime);
                 File tarballFile = new File(
                     sanitizedAssignmentsDirectory, filenameFormat.format(
                     bridgePoolAssignmentTime));
@@ -192,6 +196,18 @@ public class BridgePoolAssignmentsProcessor extends Thread {
       }
     }
 
+    if (maxBridgePoolAssignmentTime > 0L &&
+        maxBridgePoolAssignmentTime + 330L * 60L * 1000L
+        < System.currentTimeMillis()) {
+      SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+          "yyyy-MM-dd HH:mm:ss");
+      dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      logger.warning("The last known bridge pool assignment list was "
+          + "published at "
+          + dateTimeFormat.format(maxBridgePoolAssignmentTime)
+          + ", which is more than 5:30 hours in the past.");
+    }
+
     this.cleanUpRsyncDirectory();
 
     logger.info("Finished processing bridge pool assignment file(s).");
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index 26e944c..9b1f40b 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -12,6 +12,7 @@ import java.net.URL;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.List;
 import java.util.SortedSet;
 import java.util.Stack;
 import java.util.TimeZone;
@@ -19,6 +20,12 @@ import java.util.TreeSet;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExitListEntry;
+import org.torproject.descriptor.impl.DescriptorParseException;
 import org.torproject.ernie.db.main.Configuration;
 
 public class ExitListDownloader extends Thread {
@@ -35,8 +42,19 @@ public class ExitListDownloader extends Thread {
     }
 
     Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
+
+    SimpleDateFormat dateTimeFormat =
+        new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+    Date downloadedDate = new Date();
+    String downloadedExitList = null;
     try {
       logger.fine("Downloading exit list...");
+      StringBuilder sb = new StringBuilder();
+      sb.append("@type tordnsel 1.0\n");
+      sb.append("Downloaded " + dateTimeFormat.format(downloadedDate)
+          + "\n");
       String exitAddressesUrl =
           "http://exitlist.torproject.org/exit-addresses";
       URL u = new URL(exitAddressesUrl);
@@ -51,42 +69,72 @@ public class ExitListDownloader extends Thread {
       }
       BufferedInputStream in = new BufferedInputStream(
           huc.getInputStream());
-      SimpleDateFormat printFormat =
-          new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
-      printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-      Date downloadedDate = new Date();
-      File tarballFile = new File("exitlist/" + printFormat.format(
-          downloadedDate));
-      tarballFile.getParentFile().mkdirs();
-      File rsyncFile = new File("rsync/exit-lists/"
-          + tarballFile.getName());
-      rsyncFile.getParentFile().mkdirs();
-      SimpleDateFormat dateTimeFormat =
-          new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-      BufferedWriter bwT = new BufferedWriter(new FileWriter(
-          tarballFile));
-      BufferedWriter bwR = new BufferedWriter(new FileWriter(
-          rsyncFile));
-      bwT.write("@type tordnsel 1.0\n");
-      bwT.write("Downloaded " + dateTimeFormat.format(downloadedDate)
-          + "\n");
-      bwR.write("@type tordnsel 1.0\n");
-      bwR.write("Downloaded " + dateTimeFormat.format(downloadedDate)
-          + "\n");
       int len;
       byte[] data = new byte[1024];
       while ((len = in.read(data, 0, 1024)) >= 0) {
-        bwT.write(new String(data, 0, len));
-        bwR.write(new String(data, 0, len));
+        sb.append(new String(data, 0, len));
       }   
       in.close();
-      bwT.close();
-      bwR.close();
+      downloadedExitList = sb.toString();
       logger.fine("Finished downloading exit list.");
     } catch (IOException e) {
       logger.log(Level.WARNING, "Failed downloading exit list", e);
       return;
     }
+    if (downloadedExitList == null) {
+      logger.warning("Failed downloading exit list");
+      return;
+    }
+
+    SimpleDateFormat tarballFormat =
+        new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
+    tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    File tarballFile = new File("exitlist/" + tarballFormat.format(
+        downloadedDate));
+
+    long maxScanMillis = 0L;
+    try {
+      DescriptorParser descriptorParser =
+          DescriptorSourceFactory.createDescriptorParser();
+      List<Descriptor> parsedDescriptors =
+          descriptorParser.parseDescriptors(downloadedExitList.getBytes(),
+          tarballFile.getName());
+      if (parsedDescriptors.size() != 1 ||
+          !(parsedDescriptors.get(0) instanceof ExitList)) {
+        logger.warning("Could not parse downloaded exit list");
+        return;
+      }
+      ExitList parsedExitList = (ExitList) parsedDescriptors.get(0);
+      for (ExitListEntry entry : parsedExitList.getExitListEntries()) {
+        maxScanMillis = Math.max(maxScanMillis, entry.getScanMillis());
+      }
+    } catch (DescriptorParseException e) {
+      logger.log(Level.WARNING, "Could not parse downloaded exit list",
+          e);
+    }
+    if (maxScanMillis > 0L &&
+        maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) {
+      logger.warning("The last reported scan in the downloaded exit list "
+          + "took place at " + dateTimeFormat.format(maxScanMillis)
+          + ", which is more than 5:30 hours in the past.");
+  }
+
+    /* Write to disk. */
+    File rsyncFile = new File("rsync/exit-lists/"
+        + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    for (File outputFile : outputFiles) {
+      try {
+        outputFile.getParentFile().mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            outputFile));
+        bw.write(downloadedExitList);
+        bw.close();
+      } catch (IOException e) {
+        logger.log(Level.WARNING, "Could not write downloaded exit list "
+            + "to " + outputFile.getAbsolutePath(), e);
+      }
+    }
 
     /* Write stats. */
     StringBuilder dumpStats = new StringBuilder("Finished downloading "
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index f95bbf7..9a07ada 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -94,6 +94,8 @@ public class ArchiveWriter extends Thread {
     // Write output to disk that only depends on relay descriptors
     this.dumpStats();
 
+    this.checkStaledescriptors();
+
     this.cleanUpRsyncDirectory();
   }
 
@@ -128,9 +130,12 @@ public class ArchiveWriter extends Thread {
     return false;
   }
 
+  private long maxConsensusValidAfter = 0L;
   private static final byte[] CONSENSUS_ANNOTATION =
       "@type network-status-consensus-3 1.0\n".getBytes();
   public void storeConsensus(byte[] data, long validAfter) {
+    this.maxConsensusValidAfter = Math.max(this.maxConsensusValidAfter,
+        validAfter);
     SimpleDateFormat printFormat = new SimpleDateFormat(
         "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -144,10 +149,12 @@ public class ArchiveWriter extends Thread {
     }
   }
 
+  private long maxVoteValidAfter = 0L;
   private static final byte[] VOTE_ANNOTATION =
       "@type network-status-vote-3 1.0\n".getBytes();
   public void storeVote(byte[] data, long validAfter,
       String fingerprint, String digest) {
+    this.maxVoteValidAfter = Math.max(this.maxVoteValidAfter, validAfter);
     SimpleDateFormat printFormat = new SimpleDateFormat(
         "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -177,10 +184,13 @@ public class ArchiveWriter extends Thread {
     }
   }
 
+  private long maxServerDescriptorPublished = 0L;
   private static final byte[] SERVER_DESCRIPTOR_ANNOTATION =
       "@type server-descriptor 1.0\n".getBytes();
   public void storeServerDescriptor(byte[] data, String digest,
       long published) {
+    this.maxServerDescriptorPublished = Math.max(
+        this.maxServerDescriptorPublished, published);
     SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     File tarballFile = new File(this.outputDirectory
@@ -195,10 +205,13 @@ public class ArchiveWriter extends Thread {
     }
   }
 
+  private long maxExtraInfoDescriptorPublished = 0L;
   private static final byte[] EXTRA_INFO_ANNOTATION =
       "@type extra-info 1.0\n".getBytes();
   public void storeExtraInfoDescriptor(byte[] data,
       String extraInfoDigest, long published) {
+    this.maxExtraInfoDescriptorPublished = Math.max(
+        this.maxExtraInfoDescriptorPublished, published);
     SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
     descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     File tarballFile = new File(this.outputDirectory + "/extra-info/"
@@ -403,6 +416,39 @@ public class ArchiveWriter extends Thread {
     }
   }
 
+  private void checkStaledescriptors() {
+    SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L;
+    if (maxConsensusValidAfter > 0L &&
+        maxConsensusValidAfter < tooOldMillis) {
+      this.logger.warning("The last known relay network status "
+          + "consensus was valid after "
+          + dateTimeFormat.format(maxConsensusValidAfter)
+          + ", which is more than 5:30 hours in the past.");
+    }
+    if (maxVoteValidAfter > 0L && maxVoteValidAfter < tooOldMillis) {
+      this.logger.warning("The last known relay network status vote "
+          + "was valid after " + dateTimeFormat.format(maxVoteValidAfter)
+          + ", which is more than 5:30 hours in the past.");
+    }
+    if (maxServerDescriptorPublished > 0L &&
+        maxServerDescriptorPublished < tooOldMillis) {
+      this.logger.warning("The last known relay server descriptor was "
+          + "published at "
+          + dateTimeFormat.format(maxServerDescriptorPublished)
+          + ", which is more than 5:30 hours in the past.");
+    }
+    if (maxExtraInfoDescriptorPublished > 0L &&
+        maxExtraInfoDescriptorPublished < tooOldMillis) {
+      this.logger.warning("The last known relay extra-info descriptor "
+          + "was published at "
+          + dateTimeFormat.format(maxExtraInfoDescriptorPublished)
+          + ", which is more than 5:30 hours in the past.");
+    }
+  }
+
   /* Delete all files from the rsync directory that have not been modified
    * in the last three days. */
   public void cleanUpRsyncDirectory() {



More information about the tor-commits mailing list