commit f7b58361aa6df5fd2afe40c38f9dd111e2820f89 Author: Karsten Loesing karsten.loesing@gmx.net Date: Sat Oct 27 13:52:42 2012 -0400
Recognize when data sources become stale.
Implements #3850. --- .../db/bridgedescs/SanitizedBridgesWriter.java | 58 +++++++++++ .../BridgePoolAssignmentsProcessor.java | 16 +++ .../ernie/db/exitlists/ExitListDownloader.java | 100 +++++++++++++++----- .../ernie/db/relaydescs/ArchiveWriter.java | 46 +++++++++ 4 files changed, 194 insertions(+), 26 deletions(-)
diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java index 87593bd..7de9961 100644 --- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java +++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java @@ -193,6 +193,8 @@ public class SanitizedBridgesWriter extends Thread { // Finish writing sanitized bridge descriptors to disk this.finishWriting();
+ this.checkStaleDescriptors(); + this.cleanUpRsyncDirectory(); }
@@ -368,6 +370,8 @@ public class SanitizedBridgesWriter extends Thread { return this.secretsForHashingIPAddresses.get(month); }
+ private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00"; + /** * Sanitizes a network status and writes it to disk. */ @@ -380,6 +384,10 @@ public class SanitizedBridgesWriter extends Thread { return; }
+ if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) { + maxNetworkStatusPublishedTime = publicationTime; + } + if (this.bridgeSanitizingCutOffTimestamp. compareTo(publicationTime) > 0) { this.logger.log(!this.haveWarnedAboutInterval ? Level.WARNING @@ -543,6 +551,8 @@ public class SanitizedBridgesWriter extends Thread { } }
+ private String maxServerDescriptorPublishedTime = "1970-01-01 00:00:00"; + /** * Sanitizes a bridge server descriptor and writes it to disk. */ @@ -590,6 +600,9 @@ public class SanitizedBridgesWriter extends Thread { * sanitizing interval. */ } else if (line.startsWith("published ")) { published = line.substring("published ".length()); + if (published.compareTo(maxServerDescriptorPublishedTime) > 0) { + maxServerDescriptorPublishedTime = published; + } if (this.bridgeSanitizingCutOffTimestamp. compareTo(published) > 0) { this.logger.log(!this.haveWarnedAboutInterval @@ -799,6 +812,9 @@ public class SanitizedBridgesWriter extends Thread { } }
+ private String maxExtraInfoDescriptorPublishedTime = + "1970-01-01 00:00:00"; + /** * Sanitizes an extra-info descriptor and writes it to disk. */ @@ -827,6 +843,10 @@ public class SanitizedBridgesWriter extends Thread { } else if (line.startsWith("published ")) { scrubbed.append(line + "\n"); published = line.substring("published ".length()); + if (published.compareTo(maxExtraInfoDescriptorPublishedTime) + > 0) { + maxExtraInfoDescriptorPublishedTime = published; + }
/* Remove everything from transport lines except the transport * name. */ @@ -968,6 +988,44 @@ public class SanitizedBridgesWriter extends Thread { } }
+ private void checkStaleDescriptors() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L; + try { + long maxNetworkStatusPublishedMillis = + dateTimeFormat.parse(maxNetworkStatusPublishedTime).getTime(); + if (maxNetworkStatusPublishedMillis > 0L && + maxNetworkStatusPublishedMillis < tooOldMillis) { + this.logger.warning("The last known bridge network status was " + + "published " + maxNetworkStatusPublishedTime + ", which is " + + "more than 5:30 hours in the past."); + } + long maxServerDescriptorPublishedMillis = + dateTimeFormat.parse(maxServerDescriptorPublishedTime). + getTime(); + if (maxServerDescriptorPublishedMillis > 0L && + maxServerDescriptorPublishedMillis < tooOldMillis) { + this.logger.warning("The last known bridge server descriptor was " + + "published " + maxServerDescriptorPublishedTime + ", which " + + "is more than 5:30 hours in the past."); + } + long maxExtraInfoDescriptorPublishedMillis = + dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime). + getTime(); + if (maxExtraInfoDescriptorPublishedMillis > 0L && + maxExtraInfoDescriptorPublishedMillis < tooOldMillis) { + this.logger.warning("The last known bridge extra-info descriptor " + + "was published " + maxExtraInfoDescriptorPublishedTime + + ", which is more than 5:30 hours in the past."); + } + } catch (ParseException e) { + this.logger.log(Level.WARNING, "Unable to parse timestamp for " + + "stale check.", e); + } + } + /* Delete all files from the rsync directory that have not been modified * in the last three days. */ public void cleanUpRsyncDirectory() { diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java index 0ac6f90..43d3427 100644 --- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java +++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java @@ -77,6 +77,7 @@ public class BridgePoolAssignmentsProcessor extends Thread { SimpleDateFormat filenameFormat = new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); filenameFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + long maxBridgePoolAssignmentTime = 0L; for (File assignmentFile : assignmentFiles) { logger.info("Processing bridge pool assignment file '" + assignmentFile.getAbsolutePath() + "'..."); @@ -120,6 +121,9 @@ public class BridgePoolAssignmentsProcessor extends Thread { long bridgePoolAssignmentTime = assignmentFormat.parse( bridgePoolAssignmentLine.substring( "bridge-pool-assignment ".length())).getTime(); + maxBridgePoolAssignmentTime = Math.max( + maxBridgePoolAssignmentTime, + bridgePoolAssignmentTime); File tarballFile = new File( sanitizedAssignmentsDirectory, filenameFormat.format( bridgePoolAssignmentTime)); @@ -192,6 +196,18 @@ public class BridgePoolAssignmentsProcessor extends Thread { } }
+ if (maxBridgePoolAssignmentTime > 0L && + maxBridgePoolAssignmentTime + 330L * 60L * 1000L + < System.currentTimeMillis()) { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + logger.warning("The last known bridge pool assignment list was " + + "published at " + + dateTimeFormat.format(maxBridgePoolAssignmentTime) + + ", which is more than 5:30 hours in the past."); + } + this.cleanUpRsyncDirectory();
logger.info("Finished processing bridge pool assignment file(s)."); diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java index 26e944c..9b1f40b 100644 --- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java +++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java @@ -12,6 +12,7 @@ import java.net.URL; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; +import java.util.List; import java.util.SortedSet; import java.util.Stack; import java.util.TimeZone; @@ -19,6 +20,12 @@ import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger;
+import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorParser; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExitList; +import org.torproject.descriptor.ExitListEntry; +import org.torproject.descriptor.impl.DescriptorParseException; import org.torproject.ernie.db.main.Configuration;
public class ExitListDownloader extends Thread { @@ -35,8 +42,19 @@ public class ExitListDownloader extends Thread { }
Logger logger = Logger.getLogger(ExitListDownloader.class.getName()); + + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + + Date downloadedDate = new Date(); + String downloadedExitList = null; try { logger.fine("Downloading exit list..."); + StringBuilder sb = new StringBuilder(); + sb.append("@type tordnsel 1.0\n"); + sb.append("Downloaded " + dateTimeFormat.format(downloadedDate) + + "\n"); String exitAddressesUrl = "http://exitlist.torproject.org/exit-addresses"; URL u = new URL(exitAddressesUrl); @@ -51,42 +69,72 @@ public class ExitListDownloader extends Thread { } BufferedInputStream in = new BufferedInputStream( huc.getInputStream()); - SimpleDateFormat printFormat = - new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - Date downloadedDate = new Date(); - File tarballFile = new File("exitlist/" + printFormat.format( - downloadedDate)); - tarballFile.getParentFile().mkdirs(); - File rsyncFile = new File("rsync/exit-lists/" - + tarballFile.getName()); - rsyncFile.getParentFile().mkdirs(); - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - BufferedWriter bwT = new BufferedWriter(new FileWriter( - tarballFile)); - BufferedWriter bwR = new BufferedWriter(new FileWriter( - rsyncFile)); - bwT.write("@type tordnsel 1.0\n"); - bwT.write("Downloaded " + dateTimeFormat.format(downloadedDate) - + "\n"); - bwR.write("@type tordnsel 1.0\n"); - bwR.write("Downloaded " + dateTimeFormat.format(downloadedDate) - + "\n"); int len; byte[] data = new byte[1024]; while ((len = in.read(data, 0, 1024)) >= 0) { - bwT.write(new String(data, 0, len)); - bwR.write(new String(data, 0, len)); + sb.append(new String(data, 0, len)); } in.close(); - bwT.close(); - bwR.close(); + downloadedExitList = sb.toString(); logger.fine("Finished downloading exit list."); } catch (IOException e) { logger.log(Level.WARNING, "Failed downloading exit list", e); return; } + if (downloadedExitList == null) { + logger.warning("Failed downloading exit list"); + return; + } + + SimpleDateFormat tarballFormat = + new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + tarballFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File("exitlist/" + tarballFormat.format( + downloadedDate)); + + long maxScanMillis = 0L; + try { + DescriptorParser descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + List<Descriptor> parsedDescriptors = + descriptorParser.parseDescriptors(downloadedExitList.getBytes(), + tarballFile.getName()); + if (parsedDescriptors.size() != 1 || + !(parsedDescriptors.get(0) instanceof ExitList)) { + logger.warning("Could not parse downloaded exit list"); + return; + } + ExitList parsedExitList = (ExitList) parsedDescriptors.get(0); + for (ExitListEntry entry : parsedExitList.getExitListEntries()) { + maxScanMillis = Math.max(maxScanMillis, entry.getScanMillis()); + } + } catch (DescriptorParseException e) { + logger.log(Level.WARNING, "Could not parse downloaded exit list", + e); + } + if (maxScanMillis > 0L && + maxScanMillis + 330L * 60L * 1000L < System.currentTimeMillis()) { + logger.warning("The last reported scan in the downloaded exit list " + + "took place at " + dateTimeFormat.format(maxScanMillis) + + ", which is more than 5:30 hours in the past."); + } + + /* Write to disk. */ + File rsyncFile = new File("rsync/exit-lists/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + for (File outputFile : outputFiles) { + try { + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile)); + bw.write(downloadedExitList); + bw.close(); + } catch (IOException e) { + logger.log(Level.WARNING, "Could not write downloaded exit list " + + "to " + outputFile.getAbsolutePath(), e); + } + }
/* Write stats. */ StringBuilder dumpStats = new StringBuilder("Finished downloading " diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java index f95bbf7..9a07ada 100644 --- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java +++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java @@ -94,6 +94,8 @@ public class ArchiveWriter extends Thread { // Write output to disk that only depends on relay descriptors this.dumpStats();
+ this.checkStaledescriptors(); + this.cleanUpRsyncDirectory(); }
@@ -128,9 +130,12 @@ public class ArchiveWriter extends Thread { return false; }
+ private long maxConsensusValidAfter = 0L; private static final byte[] CONSENSUS_ANNOTATION = "@type network-status-consensus-3 1.0\n".getBytes(); public void storeConsensus(byte[] data, long validAfter) { + this.maxConsensusValidAfter = Math.max(this.maxConsensusValidAfter, + validAfter); SimpleDateFormat printFormat = new SimpleDateFormat( "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); @@ -144,10 +149,12 @@ public class ArchiveWriter extends Thread { } }
+ private long maxVoteValidAfter = 0L; private static final byte[] VOTE_ANNOTATION = "@type network-status-vote-3 1.0\n".getBytes(); public void storeVote(byte[] data, long validAfter, String fingerprint, String digest) { + this.maxVoteValidAfter = Math.max(this.maxVoteValidAfter, validAfter); SimpleDateFormat printFormat = new SimpleDateFormat( "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); @@ -177,10 +184,13 @@ public class ArchiveWriter extends Thread { } }
+ private long maxServerDescriptorPublished = 0L; private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = "@type server-descriptor 1.0\n".getBytes(); public void storeServerDescriptor(byte[] data, String digest, long published) { + this.maxServerDescriptorPublished = Math.max( + this.maxServerDescriptorPublished, published); SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); File tarballFile = new File(this.outputDirectory @@ -195,10 +205,13 @@ public class ArchiveWriter extends Thread { } }
+ private long maxExtraInfoDescriptorPublished = 0L; private static final byte[] EXTRA_INFO_ANNOTATION = "@type extra-info 1.0\n".getBytes(); public void storeExtraInfoDescriptor(byte[] data, String extraInfoDigest, long published) { + this.maxExtraInfoDescriptorPublished = Math.max( + this.maxExtraInfoDescriptorPublished, published); SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); File tarballFile = new File(this.outputDirectory + "/extra-info/" @@ -403,6 +416,39 @@ public class ArchiveWriter extends Thread { } }
+ private void checkStaledescriptors() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + long tooOldMillis = System.currentTimeMillis() - 330L * 60L * 1000L; + if (maxConsensusValidAfter > 0L && + maxConsensusValidAfter < tooOldMillis) { + this.logger.warning("The last known relay network status " + + "consensus was valid after " + + dateTimeFormat.format(maxConsensusValidAfter) + + ", which is more than 5:30 hours in the past."); + } + if (maxVoteValidAfter > 0L && maxVoteValidAfter < tooOldMillis) { + this.logger.warning("The last known relay network status vote " + + "was valid after " + dateTimeFormat.format(maxVoteValidAfter) + + ", which is more than 5:30 hours in the past."); + } + if (maxServerDescriptorPublished > 0L && + maxServerDescriptorPublished < tooOldMillis) { + this.logger.warning("The last known relay server descriptor was " + + "published at " + + dateTimeFormat.format(maxServerDescriptorPublished) + + ", which is more than 5:30 hours in the past."); + } + if (maxExtraInfoDescriptorPublished > 0L && + maxExtraInfoDescriptorPublished < tooOldMillis) { + this.logger.warning("The last known relay extra-info descriptor " + + "was published at " + + dateTimeFormat.format(maxExtraInfoDescriptorPublished) + + ", which is more than 5:30 hours in the past."); + } + } + /* Delete all files from the rsync directory that have not been modified * in the last three days. */ public void cleanUpRsyncDirectory() {