[tor-commits] [collector/master] Add new BridgedbMetrics module.

karsten at torproject.org karsten at torproject.org
Fri Oct 18 09:10:12 UTC 2019


commit f2abf679c890f3f050c839cf32910900ae73ee76
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Sep 18 15:57:52 2019 +0200

    Add new BridgedbMetrics module.
    
    Implements part of #19332.
---
 CHANGELOG.md                                       |   2 +
 build.xml                                          |   2 +-
 .../org/torproject/metrics/collector/Main.java     |   3 +
 .../bridgedb/BridgedbMetricsProcessor.java         | 190 +++++++++++++++++++++
 .../metrics/collector/conf/Annotation.java         |   1 +
 .../metrics/collector/conf/Configuration.java      |   1 +
 .../org/torproject/metrics/collector/conf/Key.java |   8 +-
 .../persist/BridgedbMetricsPersistence.java        |  37 ++++
 .../collector/persist/DescriptorPersistence.java   |   1 +
 .../metrics/collector/sync/SyncPersistence.java    |   5 +
 src/main/resources/collector.properties            |  17 ++
 .../metrics/collector/conf/ConfigurationTest.java  |   2 +-
 .../metrics/collector/cron/CollecTorMainTest.java  |   1 +
 13 files changed, 267 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e72f02..ca4d7b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
  * Medium changes
    - Require Mockito 1.10.19 as dependency for running tests.
+   - Archive BridgeDB statistics.
+   - Update to metrics-lib 2.8.0.
 
 
 # Changes in version 1.11.1 - 2019-09-19
diff --git a/build.xml b/build.xml
index 6b097a3..892566b 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
   <property name="release.version" value="1.11.1-dev" />
   <property name="project-main-class" value="org.torproject.metrics.collector.Main" />
   <property name="name" value="collector"/>
-  <property name="metricslibversion" value="2.7.0" />
+  <property name="metricslibversion" value="2.8.0" />
   <property name="jarincludes" value="collector.properties logback.xml" />
 
   <patternset id="runtime" >
diff --git a/src/main/java/org/torproject/metrics/collector/Main.java b/src/main/java/org/torproject/metrics/collector/Main.java
index 3150ffc..3438bda 100644
--- a/src/main/java/org/torproject/metrics/collector/Main.java
+++ b/src/main/java/org/torproject/metrics/collector/Main.java
@@ -3,6 +3,7 @@
 
 package org.torproject.metrics.collector;
 
+import org.torproject.metrics.collector.bridgedb.BridgedbMetricsProcessor;
 import org.torproject.metrics.collector.bridgedescs.SanitizedBridgesWriter;
 import org.torproject.metrics.collector.bridgepools.BridgePoolAssignmentsProcessor;
 import org.torproject.metrics.collector.conf.Configuration;
@@ -59,6 +60,8 @@ public class Main {
     collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class);
     collecTorMains.put(Key.SnowflakeStatsActivated,
         SnowflakeStatsDownloader.class);
+    collecTorMains.put(Key.BridgedbMetricsActivated,
+        BridgedbMetricsProcessor.class);
   }
 
   private static Configuration conf = new Configuration();
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
new file mode 100644
index 0000000..7ae4502
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
@@ -0,0 +1,190 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedb;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.UnparseableDescriptor;
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.ConfigurationException;
+import org.torproject.metrics.collector.conf.Key;
+import org.torproject.metrics.collector.cron.CollecTorMain;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoUnit;
+import java.util.Arrays;
+import java.util.Stack;
+
+public class BridgedbMetricsProcessor extends CollecTorMain {
+
+  /**
+   * Class logger.
+   */
+  private static final Logger logger = LoggerFactory.getLogger(
+      BridgedbMetricsProcessor.class);
+
+  /**
+   * Directory for reading BridgeDB statistics files.
+   */
+  private File inputDirectory;
+
+  /**
+   * Directory for writing BridgeDB statistics files to be archived in tarballs.
+   */
+  private String outputPathName;
+
+  /**
+   * Directory for writing recently processed BridgeDB statistics files.
+   */
+  private String recentPathName;
+
+  /**
+   * File name format.
+   */
+  private DateTimeFormatter filenameFormat = DateTimeFormatter.ofPattern(
+      "uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss");
+
+  /**
+   * Initialize this class with the given configuration.
+   */
+  public BridgedbMetricsProcessor(Configuration config) {
+    super(config);
+  }
+
+  /**
+   * Return the module identifier.
+   *
+   * @return Module identifier.
+   */
+  @Override
+  public String module() {
+    return "BridgedbMetrics";
+  }
+
+  /**
+   * Return the synchronization marker.
+   *
+   * @return Synchronization marker.
+   */
+  @Override
+  protected String syncMarker() {
+    return "BridgedbMetrics";
+  }
+
+  /**
+   * Start processing files, which includes reading BridgeDB statistics files
+   * from disk, possibly decompressing them and splitting them by date, and
+   * writing them back to disk.
+   *
+   * @throws ConfigurationException Thrown if configuration values cannot be
+   *     obtained.
+   */
+  @Override
+  protected void startProcessing() throws ConfigurationException {
+    logger.info("Starting BridgeDB statistics module of CollecTor.");
+    this.initializeConfiguration();
+    logger.info("Reading BridgeDB statistics files in {}.",
+        this.inputDirectory);
+    for (Descriptor descriptor
+        : DescriptorSourceFactory.createDescriptorReader()
+        .readDescriptors(this.inputDirectory)) {
+      if (descriptor instanceof BridgedbMetrics) {
+        BridgedbMetrics bridgedbMetrics = (BridgedbMetrics) descriptor;
+        Path tarballPath = Paths.get(this.outputPathName,
+            bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat));
+        Path rsyncPath = Paths.get(this.recentPathName,
+            bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat));
+        this.writeDescriptor(bridgedbMetrics.getRawDescriptorBytes(),
+            tarballPath, rsyncPath);
+      } else if (descriptor instanceof UnparseableDescriptor) {
+        logger.warn("Skipping unparseable descriptor in file {}.",
+            descriptor.getDescriptorFile(),
+            ((UnparseableDescriptor) descriptor).getDescriptorParseException());
+      } else {
+        logger.warn("Skipping unexpected descriptor of type {} in file {}.",
+            descriptor.getClass(), descriptor.getDescriptorFile());
+      }
+    }
+    logger.info("Cleaning up directory {} containing recent files.",
+        this.recentPathName);
+    this.cleanUpRsyncDirectory();
+    logger.info("Finished processing BridgeDB statistics file(s).");
+  }
+
+  /**
+   * Initialize configuration by obtaining current configuration values and
+   * storing them in instance attributes.
+   */
+  private void initializeConfiguration() throws ConfigurationException {
+    this.outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(),
+        "bridgedb-metrics").toString();
+    this.recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(),
+        "bridgedb-metrics").toString();
+    this.inputDirectory =
+        config.getPath(Key.BridgedbMetricsLocalOrigins).toFile();
+  }
+
+  /**
+   * Write the given raw descriptor bytes to the given files, and stop at the
+   * first file that already exists.
+   *
+   * @param rawDescriptorBytes Raw descriptor bytes to write.
+   * @param outputPaths One or more paths to write to.
+   */
+  private void writeDescriptor(byte[] rawDescriptorBytes,
+      Path ... outputPaths) {
+    for (Path outputPath : outputPaths) {
+      try {
+        File outputFile = outputPath.toFile();
+        if (outputFile.exists()) {
+          continue;
+        }
+        if (!outputFile.getParentFile().exists()
+            && !outputFile.getParentFile().mkdirs()) {
+          logger.warn("Could not create parent directories of {}.", outputFile);
+          return;
+        }
+        Files.write(outputPath, rawDescriptorBytes);
+      } catch (IOException e) {
+        logger.warn("Unable to write descriptor to file {}.", outputPath, e);
+      }
+    }
+  }
+
+  /**
+   * Delete all files from the rsync directory that have not been modified in
+   * the last three days.
+   */
+  public void cleanUpRsyncDirectory() {
+    Instant cutOff = Instant.now().minus(3L, ChronoUnit.DAYS);
+    Stack<File> allFiles = new Stack<>();
+    allFiles.add(new File(this.recentPathName));
+    while (!allFiles.isEmpty()) {
+      File file = allFiles.pop();
+      if (file.isDirectory()) {
+        File[] filesInDirectory = file.listFiles();
+        if (null != filesInDirectory) {
+          allFiles.addAll(Arrays.asList(filesInDirectory));
+        }
+      } else if (Instant.ofEpochMilli(file.lastModified()).isBefore(cutOff)) {
+        try {
+          Files.deleteIfExists(file.toPath());
+        } catch (IOException e) {
+          logger.warn("Unable to delete file {} that is apparently older than "
+              + "three days.", file, e);
+        }
+      }
+    }
+  }
+}
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
index 7d2bbe9..ff5119e 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
@@ -7,6 +7,7 @@ package org.torproject.metrics.collector.conf;
 public enum Annotation {
 
   BandwidthFile("@type bandwidth-file 1.0\n"),
+  BridgedbMetrics("@type bridgedb-metrics 1.0\n"),
   BridgeExtraInfo("@type bridge-extra-info 1.3\n"),
   BridgePoolAssignment("@type bridge-pool-assignment 1.0\n"),
   BridgeServer("@type bridge-server-descriptor 1.2\n"),
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
index 59229e3..56be34c 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Configuration.java
@@ -89,6 +89,7 @@ public class Configuration extends Observable implements Cloneable {
     if (!(this.getBool(Key.RelaydescsActivated)
         || this.getBool(Key.BridgedescsActivated)
         || this.getBool(Key.BridgePoolAssignmentsActivated)
+        || this.getBool(Key.BridgedbMetricsActivated)
         || this.getBool(Key.ExitlistsActivated)
         || this.getBool(Key.UpdateindexActivated)
         || this.getBool(Key.OnionPerfActivated)
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java
index dfef673..390feed 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Key.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java
@@ -78,7 +78,13 @@ public enum Key {
   SnowflakeStatsPeriodMinutes(Integer.class),
   SnowflakeStatsUrl(URL.class),
   SnowflakeStatsSources(SourceType[].class),
-  SnowflakeStatsSyncOrigins(URL[].class);
+  SnowflakeStatsSyncOrigins(URL[].class),
+  BridgedbMetricsActivated(Boolean.class),
+  BridgedbMetricsOffsetMinutes(Integer.class),
+  BridgedbMetricsPeriodMinutes(Integer.class),
+  BridgedbMetricsSources(SourceType[].class),
+  BridgedbMetricsLocalOrigins(Path.class),
+  BridgedbMetricsSyncOrigins(URL[].class);
 
   private Class clazz;
   private static Set<String> keys;
diff --git a/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java
new file mode 100644
index 0000000..a72ffe2
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/persist/BridgedbMetricsPersistence.java
@@ -0,0 +1,37 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.persist;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.metrics.collector.conf.Annotation;
+
+import java.nio.file.Paths;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+
+public class BridgedbMetricsPersistence
+    extends DescriptorPersistence<BridgedbMetrics> {
+
+  private static final String BRIDGEDB_STATS = "bridgedb-metrics";
+
+  public BridgedbMetricsPersistence(BridgedbMetrics desc) {
+    super(desc, Annotation.BridgedbMetrics.bytes());
+    calculatePaths();
+  }
+
+  private void calculatePaths() {
+    DateTimeFormatter directoriesFormatter = DateTimeFormatter
+        .ofPattern("uuuu/MM/dd").withZone(ZoneOffset.UTC);
+    String[] directories = this.desc.bridgedbMetricsEnd()
+        .format(directoriesFormatter).split("/");
+    DateTimeFormatter fileFormatter = DateTimeFormatter
+        .ofPattern("uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC);
+    String fileOut = this.desc.bridgedbMetricsEnd().format(fileFormatter)
+        + "-bridgedb-metrics";
+    this.recentPath = Paths.get(BRIDGEDB_STATS, fileOut).toString();
+    this.storagePath = Paths.get(BRIDGEDB_STATS, directories[0], directories[1],
+        directories[2], fileOut).toString();
+  }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
index 3e7a06b..20cd570 100644
--- a/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/persist/DescriptorPersistence.java
@@ -20,6 +20,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> {
   protected static final String BRIDGEDESCS = "bridge-descriptors";
   protected static final String BRIDGEPOOLASSIGNMENTS
       = "bridge-pool-assignments";
+  protected static final String BRIDGEDBSTATS = "bridgedb-stats";
   protected static final String DASH = "-";
   protected static final String DOT = ".";
   protected static final String MICRODESC = "microdesc";
diff --git a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
index cfc3dbe..f6678fe 100644
--- a/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/metrics/collector/sync/SyncPersistence.java
@@ -8,6 +8,7 @@ import org.torproject.descriptor.BridgeExtraInfoDescriptor;
 import org.torproject.descriptor.BridgeNetworkStatus;
 import org.torproject.descriptor.BridgePoolAssignment;
 import org.torproject.descriptor.BridgeServerDescriptor;
+import org.torproject.descriptor.BridgedbMetrics;
 import org.torproject.descriptor.Descriptor;
 import org.torproject.descriptor.ExitList;
 import org.torproject.descriptor.RelayExtraInfoDescriptor;
@@ -24,6 +25,7 @@ import org.torproject.metrics.collector.persist.BandwidthFilePersistence;
 import org.torproject.metrics.collector.persist.BridgeExtraInfoPersistence;
 import org.torproject.metrics.collector.persist.BridgePoolAssignmentPersistence;
 import org.torproject.metrics.collector.persist.BridgeServerDescriptorPersistence;
+import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence;
 import org.torproject.metrics.collector.persist.ConsensusPersistence;
 import org.torproject.metrics.collector.persist.DescriptorPersistence;
 import org.torproject.metrics.collector.persist.ExitlistPersistence;
@@ -154,6 +156,9 @@ public class SyncPersistence {
         case "SnowflakeStats":
           descPersist = new SnowflakeStatsPersistence((SnowflakeStats) desc);
           break;
+        case "BridgedbStats":
+          descPersist = new BridgedbMetricsPersistence((BridgedbMetrics) desc);
+          break;
         default:
           log.trace("Invalid descriptor type {} for sync-merge.",
               clazz.getName());
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index b180a3e..e7cadf7 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -59,6 +59,12 @@ SnowflakeStatsActivated = false
 SnowflakeStatsPeriodMinutes = 480
 # offset in minutes since the epoch and
 SnowflakeStatsOffsetMinutes = 100
+# the following defines, if this module is activated
+BridgedbMetricsActivated = false
+# period in minutes
+BridgedbMetricsPeriodMinutes = 480
+# offset in minutes since the epoch and
+BridgedbMetricsOffsetMinutes = 340
 
 ##########################################
 ## All below can be changed at runtime.
@@ -216,3 +222,14 @@ SnowflakeStatsSyncOrigins = https://collector.torproject.org
 ## Where to download snowflake statistics from.
 SnowflakeStatsUrl = https://snowflake-broker.torproject.net/metrics
 #
+######## BridgeDB statistics ########
+#
+## Define descriptor sources
+#  possible values: Local, Sync
+BridgedbMetricsSources = Local
+## Relative path to directory to import BridgeDB metrics from.
+BridgedbMetricsLocalOrigins = in/bridgedb-stats
+##  Retrieve files from the following instances.
+##  List of URLs separated by comma.
+BridgedbMetricsSyncOrigins = https://collector.torproject.org
+#
\ No newline at end of file
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 7845909..7e9ea28 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -39,7 +39,7 @@ public class ConfigurationTest {
   public void testKeyCount() {
     assertEquals("The number of properties keys in enum Key changed."
         + "\n This test class should be adapted.",
-        65, Key.values().length);
+        71, Key.values().length);
   }
 
   @Test()
diff --git a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
index 99f1f48..cc124a4 100644
--- a/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
+++ b/src/test/java/org/torproject/metrics/collector/cron/CollecTorMainTest.java
@@ -70,6 +70,7 @@ public class CollecTorMainTest {
         case "Relay":
         case "Bridge":
         case "BridgePoolAssignments":
+        case "BridgedbMetrics":
         case "Exitlist":
         case "OnionPerf":
         case "Webstats":





More information about the tor-commits mailing list