commit f2abf679c890f3f050c839cf32910900ae73ee76 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Sep 18 15:57:52 2019 +0200
Add new BridgedbMetrics module.
Implements part of #19332. --- CHANGELOG.md | 2 + build.xml | 2 +- .../org/torproject/metrics/collector/Main.java | 3 + .../bridgedb/BridgedbMetricsProcessor.java | 190 +++++++++++++++++++++ .../metrics/collector/conf/Annotation.java | 1 + .../metrics/collector/conf/Configuration.java | 1 + .../org/torproject/metrics/collector/conf/Key.java | 8 +- .../persist/BridgedbMetricsPersistence.java | 37 ++++ .../collector/persist/DescriptorPersistence.java | 1 + .../metrics/collector/sync/SyncPersistence.java | 5 + src/main/resources/collector.properties | 17 ++ .../metrics/collector/conf/ConfigurationTest.java | 2 +- .../metrics/collector/cron/CollecTorMainTest.java | 1 + 13 files changed, 267 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e72f02..ca4d7b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@
* Medium changes - Require Mockito 1.10.19 as dependency for running tests. + - Archive BridgeDB statistics. + - Update to metrics-lib 2.8.0.
# Changes in version 1.11.1 - 2019-09-19 diff --git a/build.xml b/build.xml index 6b097a3..892566b 100644 --- a/build.xml +++ b/build.xml @@ -12,7 +12,7 @@ <property name="release.version" value="1.11.1-dev" /> <property name="project-main-class" value="org.torproject.metrics.collector.Main" /> <property name="name" value="collector"/> - <property name="metricslibversion" value="2.7.0" /> + <property name="metricslibversion" value="2.8.0" /> <property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" > diff --git a/src/main/java/org/torproject/metrics/collector/Main.java b/src/main/java/org/torproject/metrics/collector/Main.java index 3150ffc..3438bda 100644 --- a/src/main/java/org/torproject/metrics/collector/Main.java +++ b/src/main/java/org/torproject/metrics/collector/Main.java @@ -3,6 +3,7 @@
package org.torproject.metrics.collector;
+import org.torproject.metrics.collector.bridgedb.BridgedbMetricsProcessor; import org.torproject.metrics.collector.bridgedescs.SanitizedBridgesWriter; import org.torproject.metrics.collector.bridgepools.BridgePoolAssignmentsProcessor; import org.torproject.metrics.collector.conf.Configuration; @@ -59,6 +60,8 @@ public class Main { collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class); collecTorMains.put(Key.SnowflakeStatsActivated, SnowflakeStatsDownloader.class); + collecTorMains.put(Key.BridgedbMetricsActivated, + BridgedbMetricsProcessor.class); }
private static Configuration conf = new Configuration(); diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java new file mode 100644 index 0000000..7ae4502 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java @@ -0,0 +1,190 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedb; + +import org.torproject.descriptor.BridgedbMetrics; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.UnparseableDescriptor; +import org.torproject.metrics.collector.conf.Configuration; +import org.torproject.metrics.collector.conf.ConfigurationException; +import org.torproject.metrics.collector.conf.Key; +import org.torproject.metrics.collector.cron.CollecTorMain; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; +import java.util.Arrays; +import java.util.Stack; + +public class BridgedbMetricsProcessor extends CollecTorMain { + + /** + * Class logger. + */ + private static final Logger logger = LoggerFactory.getLogger( + BridgedbMetricsProcessor.class); + + /** + * Directory for reading BridgeDB statistics files. + */ + private File inputDirectory; + + /** + * Directory for writing BridgeDB statistics files to be archived in tarballs. + */ + private String outputPathName; + + /** + * Directory for writing recently processed BridgeDB statistics files. + */ + private String recentPathName; + + /** + * File name format. + */ + private DateTimeFormatter filenameFormat = DateTimeFormatter.ofPattern( + "uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss"); + + /** + * Initialize this class with the given configuration. + */ + public BridgedbMetricsProcessor(Configuration config) { + super(config); + } + + /** + * Return the module identifier. + * + * @return Module identifier. + */ + @Override + public String module() { + return "BridgedbMetrics"; + } + + /** + * Return the synchronization marker. + * + * @return Synchronization marker. + */ + @Override + protected String syncMarker() { + return "BridgedbMetrics"; + } + + /** + * Start processing files, which includes reading BridgeDB statistics files + * from disk, possibly decompressing them and splitting them by date, and + * writing them back to disk. + * + * @throws ConfigurationException Thrown if configuration values cannot be + * obtained. + */ + @Override + protected void startProcessing() throws ConfigurationException { + logger.info("Starting BridgeDB statistics module of CollecTor."); + this.initializeConfiguration(); + logger.info("Reading BridgeDB statistics files in {}.", + this.inputDirectory); + for (Descriptor descriptor + : DescriptorSourceFactory.createDescriptorReader() + .readDescriptors(this.inputDirectory)) { + if (descriptor instanceof BridgedbMetrics) { + BridgedbMetrics bridgedbMetrics = (BridgedbMetrics) descriptor; + Path tarballPath = Paths.get(this.outputPathName, + bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat)); + Path rsyncPath = Paths.get(this.recentPathName, + bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat)); + this.writeDescriptor(bridgedbMetrics.getRawDescriptorBytes(), + tarballPath, rsyncPath); + } else if (descriptor instanceof UnparseableDescriptor) { + logger.warn("Skipping unparseable descriptor in file {}.", + descriptor.getDescriptorFile(), + ((UnparseableDescriptor) descriptor).getDescriptorParseException()); + } else { + logger.warn("Skipping unexpected descriptor of type {} in file {}.", + descriptor.getClass(), descriptor.getDescriptorFile()); + } + } + logger.info("Cleaning up directory {} containing recent files.", + this.recentPathName); + this.cleanUpRsyncDirectory(); + logger.info("Finished processing BridgeDB statistics file(s)."); + } + + /** + * Initialize configuration by obtaining current configuration values and + * storing them in instance attributes. + */ + private void initializeConfiguration() throws ConfigurationException { + this.outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(), + "bridgedb-metrics").toString(); + this.recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(), + "bridgedb-metrics").toString(); + this.inputDirectory = + config.getPath(Key.BridgedbMetricsLocalOrigins).toFile(); + } + + /** + * Write the given raw descriptor bytes to the given files, and stop at the + * first file that already exists. + * + * @param rawDescriptorBytes Raw descriptor bytes to write. + * @param outputPaths One or more paths to write to. + */ + private void writeDescriptor(byte[] rawDescriptorBytes, + Path ... outputPaths) { + for (Path outputPath : outputPaths) { + try { + File outputFile = outputPath.toFile(); + if (outputFile.exists()) { + continue; + } + if (!outputFile.getParentFile().exists() + && !outputFile.getParentFile().mkdirs()) { + logger.warn("Could not create parent directories of {}.", outputFile); + return; + } + Files.write(outputPath, rawDescriptorBytes); + } catch (IOException e) { + logger.warn("Unable to write descriptor to file {}.", outputPath, e); + } + } + } + + /** + * Delete all files from the rsync directory that have not been modified in + * the last three days. + */ + public void cleanUpRsyncDirectory() { + Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS); + Stack<File> allFiles = new Stack<>(); + allFiles.add(new File(this.recentPathName)); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + File[] filesInDirectory = file.listFiles(); + if (null != filesInDirectory) { + allFiles.addAll(Arrays.asList(filesInDirectory)); + } + } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) { + try { + Files.deleteIfExists(file.toPath()); + } catch (IOException e) { + logger.warn("Unable to delete file {} that is apparently older than " + + "three days.", file, e); + } + } + } + } +} diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java index 7d2bbe9..ff5119e 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java @@ -7,6 +7,7 @@ package org.torproject.metrics.collector.conf; public enum Annotation {
BandwidthFile("@type bandwidth-file 1.0\n"), + BridgedbMetrics("@type bridgedb-metrics 1.0\n"), BridgeExtraInfo("@type bridge-extra-info 1.3\n"), BridgePoolAssignment("@type bridge-pool-assignment 1.0\n"), BridgeServer("@type bridge-server-descriptor 1.2\n"), diff --git a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java index 59229e3..56be34c 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java @@ -89,6 +89,7 @@ public class Configuration extends Observable implements Cloneable { if (!(this.getBool(Key.RelaydescsActivated) || this.getBool(Key.BridgedescsActivated) || this.getBool(Key.BridgePoolAssignmentsActivated) + || this.getBool(Key.BridgedbMetricsActivated) || this.getBool(Key.ExitlistsActivated) || this.getBool(Key.UpdateindexActivated) || this.getBool(Key.OnionPerfActivated) diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java index dfef673..390feed 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Key.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java @@ -78,7 +78,13 @@ public enum Key { SnowflakeStatsPeriodMinutes(Integer.class), SnowflakeStatsUrl(URL.class), SnowflakeStatsSources(SourceType[].class), - SnowflakeStatsSyncOrigins(URL[].class); + SnowflakeStatsSyncOrigins(URL[].class), + BridgedbMetricsActivated(Boolean.class), + BridgedbMetricsOffsetMinutes(Integer.class), + BridgedbMetricsPeriodMinutes(Integer.class), + BridgedbMetricsSources(SourceType[].class), + BridgedbMetricsLocalOrigins(Path.class), + BridgedbMetricsSyncOrigins(URL[].class);
private Class clazz; private static Set<String> keys; diff --git a/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java new file mode 100644 index 0000000..a72ffe2 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java @@ -0,0 +1,37 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.persist; + +import org.torproject.descriptor.BridgedbMetrics; +import org.torproject.metrics.collector.conf.Annotation; + +import java.nio.file.Paths; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +public class BridgedbMetricsPersistence + extends DescriptorPersistence<BridgedbMetrics> { + + private static final String BRIDGEDB_STATS = "bridgedb-metrics"; + + public BridgedbMetricsPersistence(BridgedbMetrics desc) { + super(desc, Annotation.BridgedbMetrics.bytes()); + calculatePaths(); + } + + private void calculatePaths() { + DateTimeFormatter directoriesFormatter = DateTimeFormatter + .ofPattern("uuuu/MM/dd").withZone(ZoneOffset.UTC); + String[] directories = this.desc.bridgedbMetricsEnd() + .format(directoriesFormatter).split("/"); + DateTimeFormatter fileFormatter = DateTimeFormatter + .ofPattern("uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC); + String fileOut = this.desc.bridgedbMetricsEnd().format(fileFormatter) + + "-bridgedb-metrics"; + this.recentPath = Paths.get(BRIDGEDB_STATS, fileOut).toString(); + this.storagePath = Paths.get(BRIDGEDB_STATS, directories[0], directories[1], + directories[2], fileOut).toString(); + } +} + diff --git a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java index 3e7a06b..20cd570 100644 --- a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java @@ -20,6 +20,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> { protected static final String BRIDGEDESCS = "bridge-descriptors"; protected static final String BRIDGEPOOLASSIGNMENTS = "bridge-pool-assignments"; + protected static final String BRIDGEDBSTATS = "bridgedb-stats"; protected static final String DASH = "-"; protected static final String DOT = "."; protected static final String MICRODESC = "microdesc"; diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java index cfc3dbe..f6678fe 100644 --- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java +++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java @@ -8,6 +8,7 @@ import org.torproject.descriptor.BridgeExtraInfoDescriptor; import org.torproject.descriptor.BridgeNetworkStatus; import org.torproject.descriptor.BridgePoolAssignment; import org.torproject.descriptor.BridgeServerDescriptor; +import org.torproject.descriptor.BridgedbMetrics; import org.torproject.descriptor.Descriptor; import org.torproject.descriptor.ExitList; import org.torproject.descriptor.RelayExtraInfoDescriptor; @@ -24,6 +25,7 @@ import org.torproject.metrics.collector.persist.BandwidthFilePersistence; import org.torproject.metrics.collector.persist.BridgeExtraInfoPersistence; import org.torproject.metrics.collector.persist.BridgePoolAssignmentPersistence; import org.torproject.metrics.collector.persist.BridgeServerDescriptorPersistence; +import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence; import org.torproject.metrics.collector.persist.ConsensusPersistence; import org.torproject.metrics.collector.persist.DescriptorPersistence; import org.torproject.metrics.collector.persist.ExitlistPersistence; @@ -154,6 +156,9 @@ public class SyncPersistence { case "SnowflakeStats": descPersist = new SnowflakeStatsPersistence((SnowflakeStats) desc); break; + case "BridgedbStats": + descPersist = new BridgedbMetricsPersistence((BridgedbMetrics) desc); + break; default: log.trace("Invalid descriptor type {} for sync-merge.", clazz.getName()); diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties index b180a3e..e7cadf7 100644 --- a/src/main/resources/collector.properties +++ b/src/main/resources/collector.properties @@ -59,6 +59,12 @@ SnowflakeStatsActivated = false SnowflakeStatsPeriodMinutes = 480 # offset in minutes since the epoch and SnowflakeStatsOffsetMinutes = 100 +# the following defines, if this module is activated +BridgedbMetricsActivated = false +# period in minutes +BridgedbMetricsPeriodMinutes = 480 +# offset in minutes since the epoch and +BridgedbMetricsOffsetMinutes = 340
########################################## ## All below can be changed at runtime. @@ -216,3 +222,14 @@ SnowflakeStatsSyncOrigins = https://collector.torproject.org ## Where to download snowflake statistics from. SnowflakeStatsUrl = https://snowflake-broker.torproject.net/metrics # +######## BridgeDB statistics ######## +# +## Define descriptor sources +# possible values: Local, Sync +BridgedbMetricsSources = Local +## Relative path to directory to import BridgeDB metrics from. +BridgedbMetricsLocalOrigins = in/bridgedb-stats +## Retrieve files from the following instances. +## List of URLs separated by comma. +BridgedbMetricsSyncOrigins = https://collector.torproject.org +# \ No newline at end of file diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java index 7845909..7e9ea28 100644 --- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java +++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java @@ -39,7 +39,7 @@ public class ConfigurationTest { public void testKeyCount() { assertEquals("The number of properties keys in enum Key changed." + "\n This test class should be adapted.", - 65, Key.values().length); + 71, Key.values().length); }
@Test() diff --git a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java index 99f1f48..cc124a4 100644 --- a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java +++ b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java @@ -70,6 +70,7 @@ public class CollecTorMainTest { case "Relay": case "Bridge": case "BridgePoolAssignments": + case "BridgedbMetrics": case "Exitlist": case "OnionPerf": case "Webstats":