commit 6d228544da5116cd4be713a593147b894d1b097e Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed May 18 08:42:39 2016 +0200
Reorder attributes and methods in relaysdescs classes.
Attributes go first, followed by constructors, high-level methods, and then lower-level methods. --- .../collector/relaydescs/ArchiveReader.java | 7 +- .../collector/relaydescs/ArchiveWriter.java | 982 ++++++++++----------- .../collector/relaydescs/ReferenceChecker.java | 59 +- 3 files changed, 524 insertions(+), 524 deletions(-)
diff --git a/src/org/torproject/collector/relaydescs/ArchiveReader.java b/src/org/torproject/collector/relaydescs/ArchiveReader.java index 96409f4..72f8231 100644 --- a/src/org/torproject/collector/relaydescs/ArchiveReader.java +++ b/src/org/torproject/collector/relaydescs/ArchiveReader.java @@ -38,6 +38,10 @@ import java.util.logging.Logger; * them to the relay descriptor parser. */ public class ArchiveReader { + + private Map<String, Set<String>> microdescriptorValidAfterTimes = + new HashMap<String, Set<String>>(); + public ArchiveReader(RelayDescriptorParser rdp, File archivesDirectory, File statsDirectory, boolean keepImportHistory) {
@@ -266,9 +270,6 @@ public class ArchiveReader { + ignoredFiles + " files."); }
- private Map<String, Set<String>> microdescriptorValidAfterTimes = - new HashMap<String, Set<String>>(); - public void haveParsedMicrodescConsensus(String validAfterTime, SortedSet<String> microdescriptorDigests) { for (String microdescriptor : microdescriptorDigests) { diff --git a/src/org/torproject/collector/relaydescs/ArchiveWriter.java b/src/org/torproject/collector/relaydescs/ArchiveWriter.java index ee87b12..cf603d1 100644 --- a/src/org/torproject/collector/relaydescs/ArchiveWriter.java +++ b/src/org/torproject/collector/relaydescs/ArchiveWriter.java @@ -36,41 +36,8 @@ import java.util.logging.Logger;
public class ArchiveWriter extends Thread {
- public static void main(String[] args) { - - Logger logger = Logger.getLogger(ArchiveWriter.class.getName()); - logger.info("Starting relay-descriptors module of CollecTor."); - - // Initialize configuration - Configuration config = new Configuration(); - - // Use lock file to avoid overlapping runs - LockFile lf = new LockFile("relay-descriptors"); - if (!lf.acquireLock()) { - logger.severe("Warning: CollecTor is already running or has not exited " - + "cleanly! Exiting!"); - System.exit(1); - } - - // Import/download relay descriptors from the various sources - new ArchiveWriter(config).run(); - - new ReferenceChecker(new File("recent/relay-descriptors"), - new File("stats/references"), - new File("stats/references-history")).check(); - - // Remove lock file - lf.releaseLock(); - - logger.info("Terminating relay-descriptors module of CollecTor."); - } - private Configuration config;
- public ArchiveWriter(Configuration config) { - this.config = config; - } - private long now = System.currentTimeMillis(); private Logger logger; private File outputDirectory; @@ -107,6 +74,131 @@ public class ArchiveWriter extends Thread { private File storedMicrodescriptorsFile = new File( "stats/stored-microdescriptors");
+ private static final byte[] CONSENSUS_ANNOTATION = + "@type network-status-consensus-3 1.0\n".getBytes(); + + private static final byte[] MICRODESCCONSENSUS_ANNOTATION = + "@type network-status-microdesc-consensus-3 1.0\n".getBytes(); + + private static final byte[] VOTE_ANNOTATION = + "@type network-status-vote-3 1.0\n".getBytes(); + + private static final byte[] CERTIFICATE_ANNOTATION = + "@type dir-key-certificate-3 1.0\n".getBytes(); + + private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = + "@type server-descriptor 1.0\n".getBytes(); + + private static final byte[] EXTRA_INFO_ANNOTATION = + "@type extra-info 1.0\n".getBytes(); + + private static final byte[] MICRODESCRIPTOR_ANNOTATION = + "@type microdescriptor 1.0\n".getBytes(); + + private StringBuilder intermediateStats = new StringBuilder(); + + public static void main(String[] args) { + + Logger logger = Logger.getLogger(ArchiveWriter.class.getName()); + logger.info("Starting relay-descriptors module of CollecTor."); + + // Initialize configuration + Configuration config = new Configuration(); + + // Use lock file to avoid overlapping runs + LockFile lf = new LockFile("relay-descriptors"); + if (!lf.acquireLock()) { + logger.severe("Warning: CollecTor is already running or has not exited " + + "cleanly! Exiting!"); + System.exit(1); + } + + // Import/download relay descriptors from the various sources + new ArchiveWriter(config).run(); + + new ReferenceChecker(new File("recent/relay-descriptors"), + new File("stats/references"), + new File("stats/references-history")).check(); + + // Remove lock file + lf.releaseLock(); + + logger.info("Terminating relay-descriptors module of CollecTor."); + } + + public ArchiveWriter(Configuration config) { + this.config = config; + } + + public void run() { + + File outputDirectory = + new File(config.getDirectoryArchivesOutputDirectory()); + File statsDirectory = new File("stats"); + + this.logger = Logger.getLogger(ArchiveWriter.class.getName()); + this.outputDirectory = outputDirectory; + SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + this.rsyncCatString = rsyncCatFormat.format( + System.currentTimeMillis()); + this.descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + + this.loadDescriptorDigests(); + + // Prepare relay descriptor parser + RelayDescriptorParser rdp = new RelayDescriptorParser(this); + + RelayDescriptorDownloader rdd = null; + if (config.getDownloadRelayDescriptors()) { + List<String> dirSources = + config.getDownloadFromDirectoryAuthorities(); + rdd = new RelayDescriptorDownloader(rdp, dirSources, + config.getDownloadVotesByFingerprint(), + config.getDownloadCurrentConsensus(), + config.getDownloadCurrentMicrodescConsensus(), + config.getDownloadCurrentVotes(), + config.getDownloadMissingServerDescriptors(), + config.getDownloadMissingExtraInfoDescriptors(), + config.getDownloadMissingMicrodescriptors(), + config.getDownloadAllServerDescriptors(), + config.getDownloadAllExtraInfoDescriptors(), + config.getCompressRelayDescriptorDownloads()); + rdp.setRelayDescriptorDownloader(rdd); + } + if (config.getImportCachedRelayDescriptors()) { + new CachedRelayDescriptorReader(rdp, + config.getCachedRelayDescriptorDirectory(), statsDirectory); + this.intermediateStats("importing relay descriptors from local " + + "Tor data directories"); + } + if (config.getImportDirectoryArchives()) { + new ArchiveReader(rdp, + new File(config.getDirectoryArchivesDirectory()), + statsDirectory, + config.getKeepDirectoryArchiveImportHistory()); + this.intermediateStats("importing relay descriptors from local " + + "directory"); + } + if (rdd != null) { + rdd.downloadDescriptors(); + rdd.writeFile(); + rdd = null; + this.intermediateStats("downloading relay descriptors from the " + + "directory authorities"); + } + + this.checkMissingDescriptors(); + + this.checkStaledescriptors(); + + this.cleanUpRsyncDirectory(); + + this.saveDescriptorDigests(); + } + private void loadDescriptorDigests() { SimpleDateFormat dateTimeFormat = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); @@ -206,382 +298,23 @@ public class ArchiveWriter extends Thread { } }
- private void saveDescriptorDigests() { - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - try { - this.storedServerDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.storedServerDescriptorsFile)); - for (Map.Entry<Long, Map<String, String>> e : - this.storedServerDescriptors.entrySet()) { - String published = dateTimeFormat.format(e.getKey()); - for (Map.Entry<String, String> f : e.getValue().entrySet()) { - String serverDescriptorDigest = f.getKey(); - String extraInfoDescriptorDigest = f.getValue() == null ? "NA" - : f.getValue(); - bw.write(String.format("%s,%s,%s%n", published, - serverDescriptorDigest, extraInfoDescriptorDigest)); - } - } - bw.close(); - this.storedExtraInfoDescriptorsFile.getParentFile().mkdirs(); - bw = new BufferedWriter(new FileWriter( - this.storedExtraInfoDescriptorsFile)); - for (Map.Entry<Long, Set<String>> e : - this.storedExtraInfoDescriptors.entrySet()) { - String published = dateTimeFormat.format(e.getKey()); - for (String extraInfoDescriptorDigest : e.getValue()) { - bw.write(String.format("%s,%s%n", published, - extraInfoDescriptorDigest)); - } - } - bw.close(); - this.storedMicrodescriptorsFile.getParentFile().mkdirs(); - bw = new BufferedWriter(new FileWriter( - this.storedMicrodescriptorsFile)); - for (Map.Entry<Long, Set<String>> e : - this.storedMicrodescriptors.entrySet()) { - String validAfter = dateTimeFormat.format(e.getKey()); - for (String microdescriptorDigest : e.getValue()) { - bw.write(String.format("%s,%s%n", validAfter, - microdescriptorDigest)); - } - } - bw.close(); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not save descriptor " - + "digests. We might not be able to correctly check " - + "descriptors for completeness in the next run.", e); - } - } - - public void run() { - - File outputDirectory = - new File(config.getDirectoryArchivesOutputDirectory()); - File statsDirectory = new File("stats"); - - this.logger = Logger.getLogger(ArchiveWriter.class.getName()); - this.outputDirectory = outputDirectory; - SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - rsyncCatFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - this.rsyncCatString = rsyncCatFormat.format( - System.currentTimeMillis()); - this.descriptorParser = - DescriptorSourceFactory.createDescriptorParser(); - - this.loadDescriptorDigests(); - - // Prepare relay descriptor parser - RelayDescriptorParser rdp = new RelayDescriptorParser(this); - - RelayDescriptorDownloader rdd = null; - if (config.getDownloadRelayDescriptors()) { - List<String> dirSources = - config.getDownloadFromDirectoryAuthorities(); - rdd = new RelayDescriptorDownloader(rdp, dirSources, - config.getDownloadVotesByFingerprint(), - config.getDownloadCurrentConsensus(), - config.getDownloadCurrentMicrodescConsensus(), - config.getDownloadCurrentVotes(), - config.getDownloadMissingServerDescriptors(), - config.getDownloadMissingExtraInfoDescriptors(), - config.getDownloadMissingMicrodescriptors(), - config.getDownloadAllServerDescriptors(), - config.getDownloadAllExtraInfoDescriptors(), - config.getCompressRelayDescriptorDownloads()); - rdp.setRelayDescriptorDownloader(rdd); - } - if (config.getImportCachedRelayDescriptors()) { - new CachedRelayDescriptorReader(rdp, - config.getCachedRelayDescriptorDirectory(), statsDirectory); - this.intermediateStats("importing relay descriptors from local " - + "Tor data directories"); - } - if (config.getImportDirectoryArchives()) { - new ArchiveReader(rdp, - new File(config.getDirectoryArchivesDirectory()), - statsDirectory, - config.getKeepDirectoryArchiveImportHistory()); - this.intermediateStats("importing relay descriptors from local " - + "directory"); - } - if (rdd != null) { - rdd.downloadDescriptors(); - rdd.writeFile(); - rdd = null; - this.intermediateStats("downloading relay descriptors from the " - + "directory authorities"); - } - - this.checkMissingDescriptors(); - - this.checkStaledescriptors(); - - this.cleanUpRsyncDirectory(); - - this.saveDescriptorDigests(); - } - - private boolean store(byte[] typeAnnotation, byte[] data, - File[] outputFiles, boolean[] append) { - try { - this.logger.finer("Storing " + outputFiles[0]); - if (this.descriptorParser.parseDescriptors(data, - outputFiles[0].getName()).size() != 1) { - this.logger.info("Relay descriptor file " + outputFiles[0] - + " doesn't contain exactly one descriptor. Not storing."); - return false; - } - for (int i = 0; i < outputFiles.length; i++) { - File outputFile = outputFiles[i]; - boolean appendToFile = append == null ? false : append[i]; - outputFile.getParentFile().mkdirs(); - BufferedOutputStream bos = new BufferedOutputStream( - new FileOutputStream(outputFile, appendToFile)); - if (data.length > 0 && data[0] != '@') { - bos.write(typeAnnotation, 0, typeAnnotation.length); - } - bos.write(data, 0, data.length); - bos.close(); - } - return true; - } catch (DescriptorParseException e) { - this.logger.log(Level.WARNING, "Could not parse relay descriptor " - + outputFiles[0] + " before storing it to disk. Skipping.", e); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Could not store relay descriptor " - + outputFiles[0], e); - } - return false; - } - - private static final byte[] CONSENSUS_ANNOTATION = - "@type network-status-consensus-3 1.0\n".getBytes(); - - public void storeConsensus(byte[] data, long validAfter, - SortedSet<String> dirSources, - SortedSet<String> serverDescriptorDigests) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/consensus/" - + printFormat.format(new Date(validAfter)) + "-consensus"); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncFile = new File("recent/relay-descriptors/consensuses/" - + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - if (this.store(CONSENSUS_ANNOTATION, data, outputFiles, null)) { - this.storedConsensusesCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 3L * 60L * 60L * 1000L) { - this.storedConsensuses.put(validAfter, serverDescriptorDigests); - this.expectedVotes.put(validAfter, dirSources.size()); - } - } - - private static final byte[] MICRODESCCONSENSUS_ANNOTATION = - "@type network-status-microdesc-consensus-3 1.0\n".getBytes(); - - public void storeMicrodescConsensus(byte[] data, long validAfter, - SortedSet<String> microdescriptorDigests) { - SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat( - "yyyy/MM"); - yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat( - "dd/yyyy-MM-dd-HH-mm-ss"); - dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory - + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter) - + "/consensus-microdesc/" - + dayDirectoryFileFormat.format(validAfter) - + "-consensus-microdesc"); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncFile = new File("recent/relay-descriptors/microdescs/" - + "consensus-microdesc/" + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles, - null)) { - this.storedMicrodescConsensusesCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 3L * 60L * 60L * 1000L) { - this.storedMicrodescConsensuses.put(validAfter, - microdescriptorDigests); - } - } - - private static final byte[] VOTE_ANNOTATION = - "@type network-status-vote-3 1.0\n".getBytes(); - - public void storeVote(byte[] data, long validAfter, - String fingerprint, String digest, - SortedSet<String> serverDescriptorDigests) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/vote/" - + printFormat.format(new Date(validAfter)) + "-vote-" - + fingerprint + "-" + digest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncFile = new File("recent/relay-descriptors/votes/" - + tarballFile.getName()); - File[] outputFiles = new File[] { tarballFile, rsyncFile }; - if (this.store(VOTE_ANNOTATION, data, outputFiles, null)) { - this.storedVotesCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 3L * 60L * 60L * 1000L) { - if (!this.storedVotes.containsKey(validAfter)) { - this.storedVotes.put(validAfter, - new TreeMap<String, SortedSet<String>>()); - } - this.storedVotes.get(validAfter).put(fingerprint, - serverDescriptorDigests); - } - } - - private static final byte[] CERTIFICATE_ANNOTATION = - "@type dir-key-certificate-3 1.0\n".getBytes(); - - public void storeCertificate(byte[] data, String fingerprint, - long published) { - SimpleDateFormat printFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/certs/" - + fingerprint + "-" + printFormat.format(new Date(published))); - File[] outputFiles = new File[] { tarballFile }; - if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles, null)) { - this.storedCertsCounter++; - } - } - - private static final byte[] SERVER_DESCRIPTOR_ANNOTATION = - "@type server-descriptor 1.0\n".getBytes(); - - public void storeServerDescriptor(byte[] data, String digest, - long published, String extraInfoDigest) { - SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); - printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory - + "/server-descriptor/" + printFormat.format(new Date(published)) - + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/" - + digest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncCatFile = new File("recent/relay-descriptors/" - + "server-descriptors/" + this.rsyncCatString - + "-server-descriptors.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles, - append)) { - this.storedServerDescriptorsCounter++; - } - if (!tarballFileExistedBefore - && this.now - published < 48L * 60L * 60L * 1000L) { - if (!this.storedServerDescriptors.containsKey(published)) { - this.storedServerDescriptors.put(published, - new HashMap<String, String>()); - } - this.storedServerDescriptors.get(published).put(digest, - extraInfoDigest); - } - } - - private static final byte[] EXTRA_INFO_ANNOTATION = - "@type extra-info 1.0\n".getBytes(); - - public void storeExtraInfoDescriptor(byte[] data, - String extraInfoDigest, long published) { - SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); - descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/extra-info/" - + descriptorFormat.format(new Date(published)) - + extraInfoDigest.substring(0, 1) + "/" - + extraInfoDigest.substring(1, 2) + "/" - + extraInfoDigest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncCatFile = new File("recent/relay-descriptors/" - + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles, append)) { - this.storedExtraInfoDescriptorsCounter++; - } - if (!tarballFileExistedBefore - && this.now - published < 48L * 60L * 60L * 1000L) { - if (!this.storedExtraInfoDescriptors.containsKey(published)) { - this.storedExtraInfoDescriptors.put(published, - new HashSet<String>()); - } - this.storedExtraInfoDescriptors.get(published).add(extraInfoDigest); - } - } - - private static final byte[] MICRODESCRIPTOR_ANNOTATION = - "@type microdescriptor 1.0\n".getBytes(); - - public void storeMicrodescriptor(byte[] data, - String microdescriptorDigest, long validAfter) { - /* TODO We could check here whether we already stored the - * microdescriptor in the same valid-after month. This can happen, - * e.g., when two relays share the same microdescriptor. In that case - * this method gets called twice and the second call overwrites the - * file written in the first call. However, this method must be - * called twice to store the same microdescriptor in two different - * valid-after months. */ - SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); - descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - File tarballFile = new File(this.outputDirectory + "/microdesc/" - + descriptorFormat.format(validAfter) + "micro/" - + microdescriptorDigest.substring(0, 1) + "/" - + microdescriptorDigest.substring(1, 2) + "/" - + microdescriptorDigest); - boolean tarballFileExistedBefore = tarballFile.exists(); - File rsyncCatFile = new File("recent/relay-descriptors/" - + "microdescs/micro/" + this.rsyncCatString - + "-micro.tmp"); - File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; - boolean[] append = new boolean[] { false, true }; - if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles, - append)) { - this.storedMicrodescriptorsCounter++; - } - if (!tarballFileExistedBefore - && this.now - validAfter < 40L * 24L * 60L * 60L * 1000L) { - if (!this.storedMicrodescriptors.containsKey(validAfter)) { - this.storedMicrodescriptors.put(validAfter, - new HashSet<String>()); - } - this.storedMicrodescriptors.get(validAfter).add( - microdescriptorDigest); - } - } - - private StringBuilder intermediateStats = new StringBuilder(); - - public void intermediateStats(String event) { - intermediateStats.append("While " + event + ", we stored " - + this.storedConsensusesCounter + " consensus(es), " - + this.storedMicrodescConsensusesCounter + " microdesc " - + "consensus(es), " + this.storedVotesCounter + " vote(s), " - + this.storedCertsCounter + " certificate(s), " - + this.storedServerDescriptorsCounter + " server descriptor(s), " - + this.storedExtraInfoDescriptorsCounter + " extra-info " - + "descriptor(s), and " + this.storedMicrodescriptorsCounter - + " microdescriptor(s) to disk.\n"); - this.storedConsensusesCounter = 0; - this.storedMicrodescConsensusesCounter = 0; - this.storedVotesCounter = 0; - this.storedCertsCounter = 0; - this.storedServerDescriptorsCounter = 0; - this.storedExtraInfoDescriptorsCounter = 0; - this.storedMicrodescriptorsCounter = 0; + public void intermediateStats(String event) { + intermediateStats.append("While " + event + ", we stored " + + this.storedConsensusesCounter + " consensus(es), " + + this.storedMicrodescConsensusesCounter + " microdesc " + + "consensus(es), " + this.storedVotesCounter + " vote(s), " + + this.storedCertsCounter + " certificate(s), " + + this.storedServerDescriptorsCounter + " server descriptor(s), " + + this.storedExtraInfoDescriptorsCounter + " extra-info " + + "descriptor(s), and " + this.storedMicrodescriptorsCounter + + " microdescriptor(s) to disk.\n"); + this.storedConsensusesCounter = 0; + this.storedMicrodescConsensusesCounter = 0; + this.storedVotesCounter = 0; + this.storedCertsCounter = 0; + this.storedServerDescriptorsCounter = 0; + this.storedExtraInfoDescriptorsCounter = 0; + this.storedMicrodescriptorsCounter = 0; }
private void checkMissingDescriptors() { @@ -743,103 +476,370 @@ public class ArchiveWriter extends Thread { missingVotes = true; } } - this.logger.info(sb.toString()); - if (missingDescriptors) { - this.logger.fine("We are missing at least 0.5% of server or " - + "extra-info descriptors referenced from a consensus or " - + "vote or at least 0.5% of microdescriptors referenced from a " - + "microdesc consensus."); + this.logger.info(sb.toString()); + if (missingDescriptors) { + this.logger.fine("We are missing at least 0.5% of server or " + + "extra-info descriptors referenced from a consensus or " + + "vote or at least 0.5% of microdescriptors referenced from a " + + "microdesc consensus."); + } + if (missingVotes) { + /* TODO Shouldn't warn if we're not trying to archive votes at + * all. */ + this.logger.fine("We are missing at least one vote that was " + + "referenced from a consensus."); + } + if (missingMicrodescConsensus) { + /* TODO Shouldn't warn if we're not trying to archive microdesc + * consensuses at all. */ + this.logger.fine("We are missing at least one microdesc " + + "consensus that was published together with a known " + + "consensus."); + } + } + + private void checkStaledescriptors() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + long tooOldMillis = this.now - 330L * 60L * 1000L; + if (!this.storedConsensuses.isEmpty() + && this.storedConsensuses.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status " + + "consensus was valid after " + + dateTimeFormat.format(this.storedConsensuses.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedMicrodescConsensuses.isEmpty() + && this.storedMicrodescConsensuses.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status " + + "microdesc consensus was valid after " + + dateTimeFormat.format( + this.storedMicrodescConsensuses.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedVotes.isEmpty() + && this.storedVotes.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay network status vote " + + "was valid after " + dateTimeFormat.format( + this.storedVotes.lastKey()) + ", which is more than 5:30 hours " + + "in the past."); + } + if (!this.storedServerDescriptors.isEmpty() + && this.storedServerDescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay server descriptor was " + + "published at " + + dateTimeFormat.format(this.storedServerDescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedExtraInfoDescriptors.isEmpty() + && this.storedExtraInfoDescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay extra-info descriptor " + + "was published at " + dateTimeFormat.format( + this.storedExtraInfoDescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + if (!this.storedMicrodescriptors.isEmpty() + && this.storedMicrodescriptors.lastKey() < tooOldMillis) { + this.logger.warning("The last known relay microdescriptor was " + + "contained in a microdesc consensus that was valid after " + + dateTimeFormat.format(this.storedMicrodescriptors.lastKey()) + + ", which is more than 5:30 hours in the past."); + } + } + + /* Delete all files from the rsync directory that have not been modified + * in the last three days (except for microdescriptors which are kept + * for up to thirty days), and remove the .tmp extension from newly + * written files. */ + public void cleanUpRsyncDirectory() { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<File>(); + allFiles.add(new File("recent/relay-descriptors")); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.getName().endsWith("-micro")) { + if (file.lastModified() < cutOffMicroMillis) { + file.delete(); + } + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } else if (file.getName().endsWith(".tmp")) { + file.renameTo(new File(file.getParentFile(), + file.getName().substring(0, + file.getName().lastIndexOf(".tmp")))); + } + } + } + + private void saveDescriptorDigests() { + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + try { + this.storedServerDescriptorsFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + this.storedServerDescriptorsFile)); + for (Map.Entry<Long, Map<String, String>> e : + this.storedServerDescriptors.entrySet()) { + String published = dateTimeFormat.format(e.getKey()); + for (Map.Entry<String, String> f : e.getValue().entrySet()) { + String serverDescriptorDigest = f.getKey(); + String extraInfoDescriptorDigest = f.getValue() == null ? "NA" + : f.getValue(); + bw.write(String.format("%s,%s,%s%n", published, + serverDescriptorDigest, extraInfoDescriptorDigest)); + } + } + bw.close(); + this.storedExtraInfoDescriptorsFile.getParentFile().mkdirs(); + bw = new BufferedWriter(new FileWriter( + this.storedExtraInfoDescriptorsFile)); + for (Map.Entry<Long, Set<String>> e : + this.storedExtraInfoDescriptors.entrySet()) { + String published = dateTimeFormat.format(e.getKey()); + for (String extraInfoDescriptorDigest : e.getValue()) { + bw.write(String.format("%s,%s%n", published, + extraInfoDescriptorDigest)); + } + } + bw.close(); + this.storedMicrodescriptorsFile.getParentFile().mkdirs(); + bw = new BufferedWriter(new FileWriter( + this.storedMicrodescriptorsFile)); + for (Map.Entry<Long, Set<String>> e : + this.storedMicrodescriptors.entrySet()) { + String validAfter = dateTimeFormat.format(e.getKey()); + for (String microdescriptorDigest : e.getValue()) { + bw.write(String.format("%s,%s%n", validAfter, + microdescriptorDigest)); + } + } + bw.close(); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not save descriptor " + + "digests. We might not be able to correctly check " + + "descriptors for completeness in the next run.", e); + } + } + + public void storeConsensus(byte[] data, long validAfter, + SortedSet<String> dirSources, + SortedSet<String> serverDescriptorDigests) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/consensus/" + + printFormat.format(new Date(validAfter)) + "-consensus"); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = new File("recent/relay-descriptors/consensuses/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(CONSENSUS_ANNOTATION, data, outputFiles, null)) { + this.storedConsensusesCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 3L * 60L * 60L * 1000L) { + this.storedConsensuses.put(validAfter, serverDescriptorDigests); + this.expectedVotes.put(validAfter, dirSources.size()); + } + } + + public void storeMicrodescConsensus(byte[] data, long validAfter, + SortedSet<String> microdescriptorDigests) { + SimpleDateFormat yearMonthDirectoryFormat = new SimpleDateFormat( + "yyyy/MM"); + yearMonthDirectoryFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat dayDirectoryFileFormat = new SimpleDateFormat( + "dd/yyyy-MM-dd-HH-mm-ss"); + dayDirectoryFileFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + + "/microdesc/" + yearMonthDirectoryFormat.format(validAfter) + + "/consensus-microdesc/" + + dayDirectoryFileFormat.format(validAfter) + + "-consensus-microdesc"); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = new File("recent/relay-descriptors/microdescs/" + + "consensus-microdesc/" + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(MICRODESCCONSENSUS_ANNOTATION, data, outputFiles, + null)) { + this.storedMicrodescConsensusesCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 3L * 60L * 60L * 1000L) { + this.storedMicrodescConsensuses.put(validAfter, + microdescriptorDigests); } - if (missingVotes) { - /* TODO Shouldn't warn if we're not trying to archive votes at - * all. */ - this.logger.fine("We are missing at least one vote that was " - + "referenced from a consensus."); + } + + public void storeVote(byte[] data, long validAfter, + String fingerprint, String digest, + SortedSet<String> serverDescriptorDigests) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/vote/" + + printFormat.format(new Date(validAfter)) + "-vote-" + + fingerprint + "-" + digest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = new File("recent/relay-descriptors/votes/" + + tarballFile.getName()); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(VOTE_ANNOTATION, data, outputFiles, null)) { + this.storedVotesCounter++; } - if (missingMicrodescConsensus) { - /* TODO Shouldn't warn if we're not trying to archive microdesc - * consensuses at all. */ - this.logger.fine("We are missing at least one microdesc " - + "consensus that was published together with a known " - + "consensus."); + if (!tarballFileExistedBefore + && this.now - validAfter < 3L * 60L * 60L * 1000L) { + if (!this.storedVotes.containsKey(validAfter)) { + this.storedVotes.put(validAfter, + new TreeMap<String, SortedSet<String>>()); + } + this.storedVotes.get(validAfter).put(fingerprint, + serverDescriptorDigests); } }
- private void checkStaledescriptors() { - SimpleDateFormat dateTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - long tooOldMillis = this.now - 330L * 60L * 1000L; - if (!this.storedConsensuses.isEmpty() - && this.storedConsensuses.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay network status " - + "consensus was valid after " - + dateTimeFormat.format(this.storedConsensuses.lastKey()) - + ", which is more than 5:30 hours in the past."); + public void storeCertificate(byte[] data, String fingerprint, + long published) { + SimpleDateFormat printFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/certs/" + + fingerprint + "-" + printFormat.format(new Date(published))); + File[] outputFiles = new File[] { tarballFile }; + if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles, null)) { + this.storedCertsCounter++; } - if (!this.storedMicrodescConsensuses.isEmpty() - && this.storedMicrodescConsensuses.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay network status " - + "microdesc consensus was valid after " - + dateTimeFormat.format( - this.storedMicrodescConsensuses.lastKey()) - + ", which is more than 5:30 hours in the past."); + } + + public void storeServerDescriptor(byte[] data, String digest, + long published, String extraInfoDigest) { + SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/"); + printFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + + "/server-descriptor/" + printFormat.format(new Date(published)) + + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/" + + digest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncCatFile = new File("recent/relay-descriptors/" + + "server-descriptors/" + this.rsyncCatString + + "-server-descriptors.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles, + append)) { + this.storedServerDescriptorsCounter++; } - if (!this.storedVotes.isEmpty() - && this.storedVotes.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay network status vote " - + "was valid after " + dateTimeFormat.format( - this.storedVotes.lastKey()) + ", which is more than 5:30 hours " - + "in the past."); + if (!tarballFileExistedBefore + && this.now - published < 48L * 60L * 60L * 1000L) { + if (!this.storedServerDescriptors.containsKey(published)) { + this.storedServerDescriptors.put(published, + new HashMap<String, String>()); + } + this.storedServerDescriptors.get(published).put(digest, + extraInfoDigest); } - if (!this.storedServerDescriptors.isEmpty() - && this.storedServerDescriptors.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay server descriptor was " - + "published at " - + dateTimeFormat.format(this.storedServerDescriptors.lastKey()) - + ", which is more than 5:30 hours in the past."); + } + + public void storeExtraInfoDescriptor(byte[] data, + String extraInfoDigest, long published) { + SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/extra-info/" + + descriptorFormat.format(new Date(published)) + + extraInfoDigest.substring(0, 1) + "/" + + extraInfoDigest.substring(1, 2) + "/" + + extraInfoDigest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncCatFile = new File("recent/relay-descriptors/" + + "extra-infos/" + this.rsyncCatString + "-extra-infos.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles, append)) { + this.storedExtraInfoDescriptorsCounter++; } - if (!this.storedExtraInfoDescriptors.isEmpty() - && this.storedExtraInfoDescriptors.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay extra-info descriptor " - + "was published at " + dateTimeFormat.format( - this.storedExtraInfoDescriptors.lastKey()) - + ", which is more than 5:30 hours in the past."); + if (!tarballFileExistedBefore + && this.now - published < 48L * 60L * 60L * 1000L) { + if (!this.storedExtraInfoDescriptors.containsKey(published)) { + this.storedExtraInfoDescriptors.put(published, + new HashSet<String>()); + } + this.storedExtraInfoDescriptors.get(published).add(extraInfoDigest); } - if (!this.storedMicrodescriptors.isEmpty() - && this.storedMicrodescriptors.lastKey() < tooOldMillis) { - this.logger.warning("The last known relay microdescriptor was " - + "contained in a microdesc consensus that was valid after " - + dateTimeFormat.format(this.storedMicrodescriptors.lastKey()) - + ", which is more than 5:30 hours in the past."); + } + + public void storeMicrodescriptor(byte[] data, + String microdescriptorDigest, long validAfter) { + /* TODO We could check here whether we already stored the + * microdescriptor in the same valid-after month. This can happen, + * e.g., when two relays share the same microdescriptor. In that case + * this method gets called twice and the second call overwrites the + * file written in the first call. However, this method must be + * called twice to store the same microdescriptor in two different + * valid-after months. */ + SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/"); + descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + File tarballFile = new File(this.outputDirectory + "/microdesc/" + + descriptorFormat.format(validAfter) + "micro/" + + microdescriptorDigest.substring(0, 1) + "/" + + microdescriptorDigest.substring(1, 2) + "/" + + microdescriptorDigest); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncCatFile = new File("recent/relay-descriptors/" + + "microdescs/micro/" + this.rsyncCatString + + "-micro.tmp"); + File[] outputFiles = new File[] { tarballFile, rsyncCatFile }; + boolean[] append = new boolean[] { false, true }; + if (this.store(MICRODESCRIPTOR_ANNOTATION, data, outputFiles, + append)) { + this.storedMicrodescriptorsCounter++; + } + if (!tarballFileExistedBefore + && this.now - validAfter < 40L * 24L * 60L * 60L * 1000L) { + if (!this.storedMicrodescriptors.containsKey(validAfter)) { + this.storedMicrodescriptors.put(validAfter, + new HashSet<String>()); + } + this.storedMicrodescriptors.get(validAfter).add( + microdescriptorDigest); } }
- /* Delete all files from the rsync directory that have not been modified - * in the last three days (except for microdescriptors which are kept - * for up to thirty days), and remove the .tmp extension from newly - * written files. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<File>(); - allFiles.add(new File("recent/relay-descriptors")); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.getName().endsWith("-micro")) { - if (file.lastModified() < cutOffMicroMillis) { - file.delete(); + private boolean store(byte[] typeAnnotation, byte[] data, + File[] outputFiles, boolean[] append) { + try { + this.logger.finer("Storing " + outputFiles[0]); + if (this.descriptorParser.parseDescriptors(data, + outputFiles[0].getName()).size() != 1) { + this.logger.info("Relay descriptor file " + outputFiles[0] + + " doesn't contain exactly one descriptor. Not storing."); + return false; + } + for (int i = 0; i < outputFiles.length; i++) { + File outputFile = outputFiles[i]; + boolean appendToFile = append == null ? false : append[i]; + outputFile.getParentFile().mkdirs(); + BufferedOutputStream bos = new BufferedOutputStream( + new FileOutputStream(outputFile, appendToFile)); + if (data.length > 0 && data[0] != '@') { + bos.write(typeAnnotation, 0, typeAnnotation.length); } - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } else if (file.getName().endsWith(".tmp")) { - file.renameTo(new File(file.getParentFile(), - file.getName().substring(0, - file.getName().lastIndexOf(".tmp")))); + bos.write(data, 0, data.length); + bos.close(); } + return true; + } catch (DescriptorParseException e) { + this.logger.log(Level.WARNING, "Could not parse relay descriptor " + + outputFiles[0] + " before storing it to disk. Skipping.", e); + } catch (IOException e) { + this.logger.log(Level.WARNING, "Could not store relay descriptor " + + outputFiles[0], e); } + return false; } } diff --git a/src/org/torproject/collector/relaydescs/ReferenceChecker.java b/src/org/torproject/collector/relaydescs/ReferenceChecker.java index 485fcea..9f0f183 100644 --- a/src/org/torproject/collector/relaydescs/ReferenceChecker.java +++ b/src/org/torproject/collector/relaydescs/ReferenceChecker.java @@ -44,6 +44,29 @@ public class ReferenceChecker {
private File historyFile;
+ private long currentTimeMillis; + + private SortedSet<Reference> references = new TreeSet<Reference>(); + + private static DateFormat dateTimeFormat; + + static { + dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", + Locale.US); + dateTimeFormat.setLenient(false); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static final long ONE_HOUR = 60L * 60L * 1000L; + + private static final long THREE_HOURS = 3L * ONE_HOUR; + + private static final long SIX_HOURS = 6L * ONE_HOUR; + + private static final long ONE_DAY = 24L * ONE_HOUR; + + private static final long THIRTY_DAYS = 30L * ONE_DAY; + public ReferenceChecker(File descriptorsDir, File referencesFile, File historyFile) { this.descriptorsDir = descriptorsDir; @@ -60,8 +83,6 @@ public class ReferenceChecker { this.writeReferencesFile(); }
- private long currentTimeMillis; - private void getCurrentTimeMillis() { this.currentTimeMillis = System.currentTimeMillis(); } @@ -109,14 +130,6 @@ public class ReferenceChecker { } }
- private SortedSet<Reference> references = new TreeSet<Reference>(); - - private void addReference(String referencing, String referenced, - double weight, long expiresAfterMillis) { - this.references.add(new Reference(referencing.toUpperCase(), - referenced.toUpperCase(), weight, expiresAfterMillis)); - } - private void readReferencesFile() { if (!this.referencesFile.exists()) { return; @@ -170,25 +183,6 @@ public class ReferenceChecker { } }
- private static DateFormat dateTimeFormat; - - static { - dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", - Locale.US); - dateTimeFormat.setLenient(false); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - private static final long ONE_HOUR = 60L * 60L * 1000L; - - private static final long THREE_HOURS = 3L * ONE_HOUR; - - private static final long SIX_HOURS = 6L * ONE_HOUR; - - private static final long ONE_DAY = 24L * ONE_HOUR; - - private static final long THIRTY_DAYS = 30L * ONE_DAY; - private void readRelayNetworkStatusConsensusUnflavored( RelayNetworkStatusConsensus consensus) { String validAfter = dateTimeFormat.format( @@ -214,7 +208,6 @@ public class ReferenceChecker { } }
- private void readRelayNetworkStatusConsensusMicrodesc( RelayNetworkStatusConsensus consensus) { String validAfter = dateTimeFormat.format( @@ -267,6 +260,12 @@ public class ReferenceChecker { 0.0, this.currentTimeMillis + THIRTY_DAYS); }
+ private void addReference(String referencing, String referenced, + double weight, long expiresAfterMillis) { + this.references.add(new Reference(referencing.toUpperCase(), + referenced.toUpperCase(), weight, expiresAfterMillis)); + } + private void dropStaleReferences() { SortedSet<Reference> recentReferences = new TreeSet<Reference>(); for (Reference reference : this.references) {