
commit 66ddc4d7d996ad2877aea44ea03982f14f069545 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Wed Nov 25 16:09:14 2020 +0100 Delete files in out/ that are older than 7 weeks. Fixes #21219. --- CHANGELOG.md | 7 +++ .../bridgedb/BridgedbMetricsProcessor.java | 38 ++++---------- .../bridgedescs/SanitizedBridgesWriter.java | 37 +++++-------- .../BridgePoolAssignmentsProcessor.java | 33 ++++-------- .../collector/exitlists/ExitListDownloader.java | 27 ++++------ .../collector/onionperf/OnionPerfDownloader.java | 37 ++++++------- .../collector/persist/PersistenceUtils.java | 50 +++++++++++++++--- .../collector/relaydescs/ArchiveWriter.java | 61 +++++----------------- .../snowflake/SnowflakeStatsDownloader.java | 33 +++++------- .../metrics/collector/sync/SyncPersistence.java | 7 +-- .../collector/webstats/SanitizeWeblogs.java | 12 +++-- .../collector/persist/PersistUtilsTest.java | 32 ++++++++++++ 12 files changed, 182 insertions(+), 192 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff2e9e7..e292f9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# Changes in version 1.??.? - 2020-1?-?? + + * Medium changes + - Clean up descriptors written to the `out/` directory by deleting + files that are older than seven weeks. + + # Changes in version 1.16.1 - 2020-08-16 * Medium changes diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java index 0073ee3..d05aa9c 100644 --- a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java +++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java @@ -12,6 +12,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,9 +24,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.time.Instant; import java.time.temporal.ChronoUnit; -import java.util.Arrays; import java.util.SortedSet; -import java.util.Stack; import java.util.TreeSet; public class BridgedbMetricsProcessor extends CollecTorMain { @@ -127,10 +126,10 @@ public class BridgedbMetricsProcessor extends CollecTorMain { descriptor.getClass(), descriptor.getDescriptorFile()); } } - logger.info("Cleaning up directory {} containing recent files.", - this.recentPathName); + logger.info("Cleaning up directories {} and {}.", + this.recentPathName, this.outputPathName); this.writeProcessedFiles(this.parsedBridgedbMetricsFile, processedFiles); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); logger.info("Finished processing BridgeDB statistics file(s)."); } @@ -175,28 +174,13 @@ public class BridgedbMetricsProcessor extends CollecTorMain { } /** - * Delete all files from the rsync directory that have not been modified in - * the last three days. + * Delete all files from the rsync (out) directory that have not been modified + * in the last three days (seven weeks). */ - public void cleanUpRsyncDirectory() { - Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS); - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(this.recentPathName)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - File[] filesInDirectory = file.listFiles(); - if (null != filesInDirectory) { - allFiles.addAll(Arrays.asList(filesInDirectory)); - } - } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) { - try { - Files.deleteIfExists(file.toPath()); - } catch (IOException e) { - logger.warn("Unable to delete file {} that is apparently older than " - + "three days.", file, e); - } - } - } + private void cleanUpDirectories() { + PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java index b8e7f2d..62288ad 100644 --- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java @@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.Configuration; import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Base64; @@ -34,15 +35,15 @@ import java.security.GeneralSecurityException; import java.security.SecureRandom; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.Instant; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; -import java.util.Stack; import java.util.TreeMap; /** @@ -228,7 +229,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { this.checkStaleDescriptors(); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); } private String scrubOrAddress(String orAddress, byte[] fingerprintBytes, @@ -1388,27 +1389,15 @@ public class SanitizedBridgesWriter extends CollecTorMain { } } - /** Delete all files from the rsync directory that have not been modified - * in the last three days, and remove the .tmp extension from newly - * written files. */ - public void cleanUpRsyncDirectory() throws ConfigurationException { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(config.getPath(Key.RecentPath).toFile(), - BRIDGE_DESCRIPTORS)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } else if (file.getName().endsWith(".tmp")) { - file.renameTo(new File(file.getParentFile(), - file.getName().substring(0, - file.getName().lastIndexOf(".tmp")))); - } - } + /** + * Delete all files from the rsync (out) directory that have not been modified + * in the last three days (seven weeks), and remove the .tmp extension from + * newly written files. */ + private void cleanUpDirectories() { + PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } diff --git a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java index ffae262..9961d4c 100644 --- a/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java +++ b/src/main/java/org/torproject/metrics/collector/bridgepools/BridgePoolAssignmentsProcessor.java @@ -8,6 +8,7 @@ import org.torproject.metrics.collector.conf.Configuration; import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; @@ -24,7 +25,6 @@ import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.time.DateTimeException; @@ -178,7 +178,7 @@ public class BridgePoolAssignmentsProcessor extends CollecTorMain { } this.writeProcessedFiles(this.parsedBridgePoolAssignmentsFile, processedFiles); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); logger.info("Finished processing bridge pool assignment file(s)."); } @@ -363,29 +363,14 @@ public class BridgePoolAssignmentsProcessor extends CollecTorMain { } /** - * Delete all files from the rsync directory that have not been modified in - * the last three days. + * Delete all files from the rsync (out) directory that have not been modified + * in the last three days (seven weeks). */ - public void cleanUpRsyncDirectory() { - Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS); - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(this.recentPathName)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - File[] filesInDirectory = file.listFiles(); - if (null != filesInDirectory) { - allFiles.addAll(Arrays.asList(filesInDirectory)); - } - } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) { - try { - Files.deleteIfExists(file.toPath()); - } catch (IOException e) { - logger.warn("Unable to delete file {} that is apparently older than " - + "three days.", file, e); - } - } - } + public void cleanUpDirectories() { + PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } diff --git a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java index c6b45da..6b9b791 100644 --- a/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java +++ b/src/main/java/org/torproject/metrics/collector/exitlists/ExitListDownloader.java @@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; import org.torproject.metrics.collector.downloader.Downloader; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,6 +25,8 @@ import java.io.IOException; import java.net.URL; import java.nio.file.Paths; import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.Date; import java.util.SortedSet; @@ -168,24 +171,16 @@ public class ExitListDownloader extends CollecTorMain { } logger.info(dumpStats.toString()); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); } - /** Delete all files from the rsync directory that have not been modified - * in the last three days. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(recentPathName)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } - } + /** Delete all files from the rsync (out) directory that have not been + * modified in the last three days (seven weeks). */ + private void cleanUpDirectories() { + PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } diff --git a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java index d22ac0b..f90bdfe 100644 --- a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java +++ b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java @@ -12,6 +12,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; import org.torproject.metrics.collector.downloader.Downloader; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.apache.commons.compress.utils.IOUtils; import org.slf4j.Logger; @@ -31,13 +32,13 @@ import java.nio.file.StandardCopyOption; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedSet; -import java.util.Stack; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -103,7 +104,7 @@ public class OnionPerfDownloader extends CollecTorMain { this.downloadFromOnionPerfHost(baseUrl); } this.writeDownloadedOnionPerfFiles(); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); } private void readDownloadedOnionPerfFiles() { @@ -441,21 +442,21 @@ public class OnionPerfDownloader extends CollecTorMain { } } - /** Delete all files from the rsync directory that have not been modified - * in the last three days. */ - public void cleanUpRsyncDirectory() throws ConfigurationException { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(config.getPath(Key.RecentPath).toFile(), TORPERF)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } - } + /** Delete all files from the rsync (out) directories that have not been + * modified in the last three days (seven weeks). */ + private void cleanUpDirectories() { + PersistenceUtils.cleanDirectory( + new File(this.recentDirectory, TORPERF).toPath(), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory( + new File(this.recentDirectory, ONIONPERF).toPath(), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory( + new File(this.archiveDirectory, TORPERF).toPath(), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory( + new File(this.archiveDirectory, ONIONPERF).toPath(), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } diff --git a/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java b/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java index c958aec..e787c39 100644 --- a/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java +++ b/src/main/java/org/torproject/metrics/collector/persist/PersistenceUtils.java @@ -18,6 +18,7 @@ import java.nio.file.StandardCopyOption; import java.nio.file.StandardOpenOption; import java.nio.file.attribute.BasicFileAttributes; import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.Date; public class PersistenceUtils { @@ -80,33 +81,70 @@ public class PersistenceUtils { } /** Move temporary files to their final location. */ - public static void cleanDirectory(Path pathToClean) throws IOException { + public static void cleanDirectory(Path pathToClean) { PersistenceUtils.cleanDirectory(pathToClean, -1L); } /** Clean up the given directory by deleting files that are older than the * given cut-off timestamp, and by moving temporary files to their final * location. */ - public static void cleanDirectory(Path pathToClean, long cutOffMillis) - throws IOException { + public static void cleanDirectory(Path pathToClean, long cutOffMillis) { + PersistenceUtils.cleanDirectory(pathToClean, cutOffMillis, null); + } + + /** Clean up the given directory, excluding the given subdirectory, by + * deleting files that are older than the given cut-off timestamp, and by + * moving temporary files to their final location. */ + public static void cleanDirectory(Path pathToClean, long cutOffMillis, + Path pathToExclude) { + if (!Files.exists(pathToClean)) { + return; + } + logger.info("Cleaning up directory {} with cut-off time {}.", + pathToClean, Instant.ofEpochMilli(cutOffMillis)); SimpleFileVisitor<Path> sfv = new SimpleFileVisitor<Path>() { + @Override + public FileVisitResult preVisitDirectory(Path dir, + BasicFileAttributes attrs) { + if (null == pathToExclude || !pathToExclude.equals(dir)) { + return FileVisitResult.CONTINUE; + } else { + return FileVisitResult.SKIP_SUBTREE; + } + } + @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { String tempName = file.toString(); if (cutOffMillis >= 0L && attrs.lastModifiedTime().toMillis() < cutOffMillis) { - file.toFile().delete(); + Files.delete(file); } else if (tempName.endsWith(TEMPFIX)) { Path outputPath = Paths .get(tempName.substring(0, tempName.length() - TEMPFIX.length())); Files.deleteIfExists(outputPath); - file.toFile().renameTo(outputPath.toFile()); + Files.move(file, outputPath); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) + throws IOException { + if (!Files.list(dir).findFirst().isPresent()) { + Files.delete(dir); } return FileVisitResult.CONTINUE; } }; - Files.walkFileTree(pathToClean, sfv); + try { + Files.walkFileTree(pathToClean, sfv); + } catch (IOException e) { + logger.warn("Caught I/O exception while cleaning up directory {} with " + + "cut-off time {}. Continuing.", + pathToClean, Instant.ofEpochMilli(cutOffMillis), e); + } } /** Return all date-time parts as array. */ diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java index 8addd5e..28472f8 100644 --- a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java +++ b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java @@ -18,6 +18,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.conf.SourceType; import org.torproject.metrics.collector.cron.CollecTorMain; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,7 +39,7 @@ import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; -import java.util.Arrays; +import java.time.temporal.ChronoUnit; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -46,7 +47,6 @@ import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; -import java.util.Stack; import java.util.TreeMap; import java.util.TreeSet; @@ -197,7 +197,7 @@ public class ArchiveWriter extends CollecTorMain { this.checkStaledescriptors(); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); this.saveDescriptorDigests(); @@ -549,51 +549,16 @@ public class ArchiveWriter extends CollecTorMain { } } - /** Delete all files from the rsync directory that have not been modified - * in the last three days (except for microdescriptors which are kept - * for up to thirty days), and remove the .tmp extension from newly - * written files. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - long cutOffMicroMillis = cutOffMillis - 27L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(recentPathName, RELAY_DESCRIPTORS)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - File[] containedFiles = file.listFiles(); - if (null == containedFiles) { - logger.warn("Unable to list files contained in directory {}.", file); - } else { - allFiles.addAll(Arrays.asList(containedFiles)); - } - } else if (file.getName().endsWith("-micro")) { - if (file.lastModified() < cutOffMicroMillis) { - if (!file.delete()) { - logger.warn("Unable to delete outdated descriptor file {}.", file); - } - } - } else if (file.lastModified() < cutOffMillis) { - if (!file.delete()) { - logger.warn("Unable to delete outdated descriptor file {}.", file); - } - } else if (file.getName().endsWith(".tmp")) { - File destinationFile = new File(file.getParentFile(), - file.getName().substring(0, file.getName().lastIndexOf(".tmp"))); - if (destinationFile.exists()) { - logger.warn("Attempting to rename descriptor file {} to existing " - + "file {}.", file, destinationFile); - } else { - logger.info("Renaming descriptor file {} to non-existing file {}.", - file, destinationFile); - } - if (!file.renameTo(destinationFile)) { - logger.warn("Unable to rename descriptor file {} to {}.", file, - destinationFile); - } - } - } + /** Delete all files from the rsync (out) directory that have not been + * modified in the last three days (seven weeks), and remove the .tmp + * extension from newly written files. */ + public void cleanUpDirectories() { + PersistenceUtils.cleanDirectory( + Paths.get(recentPathName, RELAY_DESCRIPTORS), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory(Paths.get(outputDirectory), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli(), + Paths.get(this.outputDirectory, "certs")); } private void saveDescriptorDigests() { diff --git a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java index cbca74a..93388d5 100644 --- a/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java +++ b/src/main/java/org/torproject/metrics/collector/snowflake/SnowflakeStatsDownloader.java @@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; import org.torproject.metrics.collector.downloader.Downloader; +import org.torproject.metrics.collector.persist.PersistenceUtils; import org.torproject.metrics.collector.persist.SnowflakeStatsPersistence; import org.slf4j.Logger; @@ -25,10 +26,10 @@ import java.io.OutputStream; import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Instant; import java.time.LocalDateTime; -import java.util.Arrays; +import java.time.temporal.ChronoUnit; import java.util.SortedSet; -import java.util.Stack; import java.util.TreeSet; public class SnowflakeStatsDownloader extends CollecTorMain { @@ -38,6 +39,8 @@ public class SnowflakeStatsDownloader extends CollecTorMain { private String recentPathName; + private String outputPathName; + /** Instantiate the snowflake-stats module using the given configuration. */ public SnowflakeStatsDownloader(Configuration config) { super(config); @@ -81,7 +84,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain { DescriptorParser descriptorParser = DescriptorSourceFactory.createDescriptorParser(); SortedSet<LocalDateTime> snowflakeStatsEnds = new TreeSet<>(); - String outputPathName = config.getPath(Key.OutputPath).toString(); + this.outputPathName = config.getPath(Key.OutputPath).toString(); for (Descriptor descriptor : descriptorParser.parseDescriptors( downloadedBytes, null, null)) { if (descriptor instanceof SnowflakeStats) { @@ -119,7 +122,7 @@ public class SnowflakeStatsDownloader extends CollecTorMain { } this.writeProcessedFiles(parsedSnowflakeStatsFile, processedFiles); - this.cleanUpRsyncDirectory(); + this.cleanUpDirectories(); } /** @@ -150,21 +153,13 @@ public class SnowflakeStatsDownloader extends CollecTorMain { } } - /** Delete all files from the rsync directory that have not been modified - * in the last three days. */ - public void cleanUpRsyncDirectory() { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(recentPathName)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } - } + /** Delete all files from the rsync (out) directory that have not been + * modified in the last three days (seven weeks). */ + private void cleanUpDirectories() { + PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java index adffb93..af48b1f 100644 --- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java @@ -42,7 +42,6 @@ import org.torproject.metrics.collector.persist.WebServerAccessLogPersistence; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.nio.file.Path; /** Provides persistence for descriptors based on the descriptor type. */ @@ -69,11 +68,7 @@ public class SyncPersistence { * Cleans the directory in {@code RecentPath} after storing descriptors. */ public void cleanDirectory() { - try { - PersistenceUtils.cleanDirectory(recentPath); - } catch (IOException ioe) { - logger.error("Cleaning of {} failed.", recentPath.toString(), ioe); - } + PersistenceUtils.cleanDirectory(recentPath); } /** diff --git a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java index 670f686..e4f427e 100644 --- a/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java +++ b/src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java @@ -31,8 +31,10 @@ import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Instant; import java.time.LocalDate; import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -106,10 +108,12 @@ public class SanitizeWeblogs extends CollecTorMain { = this.findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins), previouslyProcessedWebstats); this.writeProcessedWebstats(newlyProcessedWebstats); - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath), - cutOffMillis); + PersistenceUtils.cleanDirectory( + Paths.get(this.recentDirectory.toString(), WEBSTATS), + Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli()); + PersistenceUtils.cleanDirectory( + Paths.get(this.outputDirectory.toString(), WEBSTATS), + Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli()); } } catch (Exception e) { logger.error("Cannot sanitize web-logs: {}", e.getMessage(), e); diff --git a/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java b/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java index a33b94f..054d0e6 100644 --- a/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java +++ b/src/test/java/org/torproject/metrics/collector/persist/PersistUtilsTest.java @@ -16,6 +16,9 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.FileTime; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.List; public class PersistUtilsTest { @@ -107,4 +110,33 @@ public class PersistUtilsTest { assertEquals("File contained: " + text, theText2, text.get(3)); } + @Test() + public void testCleanDirectory() throws Exception { + /* + * out/ + * a/ # empty after deleting x + * x # too old file, delete + * b/ # keep together with recent file y + * y.tmp # recent enough, rename to y + * c/ # exclude (empty) subdirectory + */ + Instant now = Instant.now(); + Path out = tmpf.newFolder().toPath(); + Path dirA = Files.createDirectory(out.resolve("a")); + Path fileX = Files.createFile(dirA.resolve("x")); + Files.setLastModifiedTime(fileX, + FileTime.from(now.minus(9L, ChronoUnit.DAYS))); + Path dirB = Files.createDirectory(out.resolve("b")); + Path fileYTmp = Files.createFile(dirB.resolve("y.tmp")); + Files.setLastModifiedTime(fileYTmp, FileTime.from(now)); + Path dirC = Files.createDirectory(out.resolve("c")); + PersistenceUtils.cleanDirectory(out, + now.minus(3L, ChronoUnit.DAYS).toEpochMilli(), dirC); + assertFalse(Files.exists(dirA)); + assertFalse(Files.exists(fileX)); + assertTrue(Files.exists(dirB)); + assertFalse(Files.exists(fileYTmp)); + assertTrue(Files.exists(dirB.resolve("y"))); + assertTrue(Files.exists(dirC)); + } }