[tor-commits] [metrics-lib/master] Add new BridgedbMetrics descriptor type.

karsten at torproject.org karsten at torproject.org
Fri Oct 18 07:39:54 UTC 2019


commit 09d7311df6c78e354fc99be34e8226772d9193b7
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Sep 18 10:22:48 2019 +0200

    Add new BridgedbMetrics descriptor type.
    
    Also extend DescriptorReader#readDescriptors to support .gz-compressed
    files which will be necessary to process files rsync'ed from BridgeDB.
    And maybe it's useful for other purposes, too.
    
    Implements part of #19332.
---
 CHANGELOG.md                                       |   8 ++
 .../org/torproject/descriptor/BridgedbMetrics.java |  70 +++++++++++
 .../descriptor/impl/BridgedbMetricsImpl.java       | 136 +++++++++++++++++++++
 .../descriptor/impl/DescriptorParserImpl.java      |   5 +
 .../descriptor/impl/DescriptorReaderImpl.java      |  19 ++-
 .../java/org/torproject/descriptor/impl/Key.java   |   3 +
 .../torproject/descriptor/impl/ParseHelper.java    |  14 +++
 .../descriptor/impl/SnowflakeStatsImpl.java        |  22 +---
 .../descriptor/impl/BridgedbMetricsImplTest.java   | 118 ++++++++++++++++++
 9 files changed, 373 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 457533e..23b1ca9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+# Changes in version 2.?.? - 2019-??-??
+
+ * Medium changes
+   - Extend DescriptorReader#readDescriptors to support .gz-compressed
+     files.
+   - Add new BridgedbMetrics descriptor type.
+
+
 # Changes in version 2.7.0 - 2019-09-06
 
  * Medium changes
diff --git a/src/main/java/org/torproject/descriptor/BridgedbMetrics.java b/src/main/java/org/torproject/descriptor/BridgedbMetrics.java
new file mode 100644
index 0000000..68d9d4f
--- /dev/null
+++ b/src/main/java/org/torproject/descriptor/BridgedbMetrics.java
@@ -0,0 +1,70 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.descriptor;
+
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Contains aggregated information about requests to the BridgeDB service.
+ *
+ * @since 2.8.0
+ */
+public interface BridgedbMetrics extends Descriptor {
+
+  /**
+   * Return the end of the included measurement interval.
+   *
+   * @return End of the included measurement interval.
+   * @since 2.8.0
+   */
+  LocalDateTime bridgedbMetricsEnd();
+
+  /**
+   * Return the length of the included measurement interval.
+   *
+   * @return Length of the included measurement interval.
+   * @since 2.8.0
+   */
+  Duration bridgedbMetricsIntervalLength();
+
+  /**
+   * Return the BridgeDB metrics format version.
+   *
+   * @return BridgeDB metrics format version.
+   * @since 2.8.0
+   */
+  String bridgedbMetricsVersion();
+
+  /**
+   * Return approximate request numbers to the BridgeDB service in the
+   * measurement interval broken down by distribution mechanism, obfuscation
+   * protocol, and country code.
+   *
+   * <p>Keys are formatted as {@code DIST.PROTO.CC/EMAIL.[success|fail].none}
+   * where:</p>
+   * <ul>
+   * <li>{@code DIST} is BridgeDB's distribution mechanism, for example,
+   * {@code http}, {@code email}, or {@code moat};</li>
+   * <li>{@code PROTO} is the obfuscation protocol, for example, {@code obfs2},
+   * {@code obfs3}, {@code obfs4}, {@code scramblesuit}, or {@code fte};</li>
+   * <li>{@code CC/EMAIL} is either a two-letter country code or an email
+   * provider;</li>
+   * <li>the second-to-last field is either {@code success} or {@code fail}
+   * depending on if the BridgeDB request succeeded or not; and</li>
+   * <li>the last field is reserved for an anomaly score to be added in the
+   * future.</li>
+   * </ul>
+   *
+   * <p>Values are approximate request numbers, rounded up to the next multiple
+   * of 10.</p>
+   *
+   * @return Map of approximate request numbers.
+   * @since 2.8.0
+   */
+  Optional<Map<String, Long>> bridgedbMetricCounts();
+}
+
diff --git a/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java b/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java
new file mode 100644
index 0000000..f683067
--- /dev/null
+++ b/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java
@@ -0,0 +1,136 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.descriptor.impl;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.DescriptorParseException;
+
+import java.io.File;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Scanner;
+import java.util.Set;
+
+public class BridgedbMetricsImpl extends DescriptorImpl
+    implements BridgedbMetrics {
+
+  private static final Set<Key> exactlyOnce = EnumSet.of(
+      Key.BRIDGEDB_METRICS_END, Key.BRIDGEDB_METRICS_VERSION);
+
+  BridgedbMetricsImpl(byte[] rawDescriptorBytes, int[] offsetAndLength,
+      File descriptorFile) throws DescriptorParseException {
+    super(rawDescriptorBytes, offsetAndLength, descriptorFile, false);
+    this.parseDescriptorBytes();
+    this.checkExactlyOnceKeys(exactlyOnce);
+    this.checkFirstKey(Key.BRIDGEDB_METRICS_END);
+    this.clearParsedKeys();
+  }
+
+  BridgedbMetricsImpl(byte[] rawDescriptorBytes, File descriptorFile)
+      throws DescriptorParseException {
+    this(rawDescriptorBytes, new int[] { 0, rawDescriptorBytes.length },
+        descriptorFile);
+  }
+
+  private void parseDescriptorBytes() throws DescriptorParseException {
+    Scanner scanner = this.newScanner().useDelimiter(NL);
+    while (scanner.hasNext()) {
+      String line = scanner.next();
+      if (line.startsWith("@")) {
+        continue;
+      }
+      String[] parts = line.split("[ \t]+");
+      Key key = Key.get(parts[0]);
+      switch (key) {
+        case BRIDGEDB_METRICS_END:
+          this.parseBridgedbMetricsEnd(line, parts);
+          break;
+        case BRIDGEDB_METRICS_VERSION:
+          this.parseBridgedbMetricsVersion(line, parts);
+          break;
+        case BRIDGEDB_METRIC_COUNT:
+          this.parseBridgedbMetricCount(line, parts);
+          break;
+        case INVALID:
+        default:
+          ParseHelper.parseKeyword(line, parts[0]);
+          if (this.unrecognizedLines == null) {
+            this.unrecognizedLines = new ArrayList<>();
+          }
+          this.unrecognizedLines.add(line);
+      }
+    }
+  }
+
+  private void parseBridgedbMetricsEnd(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length < 5 || parts[3].length() < 2 || !parts[3].startsWith("(")
+        || !parts[4].equals("s)")) {
+      throw new DescriptorParseException("Illegal line '" + line + "'.");
+    }
+    this.bridgedbMetricsEnd = ParseHelper.parseLocalDateTime(line, parts,
+        1, 2);
+    this.bridgedbMetricsIntervalLength = ParseHelper.parseDuration(line,
+        parts[3].substring(1));
+  }
+
+  private void parseBridgedbMetricsVersion(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length < 2) {
+      throw new DescriptorParseException("Illegal line '" + line + "'.");
+    }
+    this.bridgedbMetricsVersion = parts[1];
+  }
+
+  private void parseBridgedbMetricCount(String line, String[] parts)
+      throws DescriptorParseException {
+    if (parts.length < 3) {
+      throw new DescriptorParseException("Illegal line '" + line + "'.");
+    }
+    if (null == this.bridgedbMetricCounts) {
+      this.bridgedbMetricCounts = new LinkedHashMap<>();
+    }
+    String key = parts[1];
+    if (this.bridgedbMetricCounts.containsKey(key)) {
+      throw new DescriptorParseException("Duplicate key '" + key + "' in line '"
+          + line + "'.");
+    }
+    long value = ParseHelper.parseLong(line, parts, 2);
+    this.bridgedbMetricCounts.put(key, value);
+  }
+
+  private LocalDateTime bridgedbMetricsEnd;
+
+  @Override
+  public LocalDateTime bridgedbMetricsEnd() {
+    return this.bridgedbMetricsEnd;
+  }
+
+  private Duration bridgedbMetricsIntervalLength;
+
+  @Override
+  public Duration bridgedbMetricsIntervalLength() {
+    return this.bridgedbMetricsIntervalLength;
+  }
+
+  private String bridgedbMetricsVersion;
+
+  @Override
+  public String bridgedbMetricsVersion() {
+    return this.bridgedbMetricsVersion;
+  }
+
+  private Map<String, Long> bridgedbMetricCounts;
+
+  @Override
+  public Optional<Map<String, Long>> bridgedbMetricCounts() {
+    return Optional.ofNullable(this.bridgedbMetricCounts);
+  }
+}
+
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
index 9b620cb..25494f4 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
@@ -135,6 +135,11 @@ public class DescriptorParserImpl implements DescriptorParser {
         || firstLines.contains(NL + Key.SNOWFLAKE_STATS_END.keyword + SP)) {
       return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile,
           Key.SNOWFLAKE_STATS_END, SnowflakeStatsImpl.class);
+    } else if (firstLines.startsWith("@type bridgedb-metrics 1.")
+        || firstLines.startsWith(Key.BRIDGEDB_METRICS_END.keyword + SP)
+        || firstLines.contains(NL + Key.BRIDGEDB_METRICS_END.keyword + SP)) {
+      return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile,
+          Key.BRIDGEDB_METRICS_END, BridgedbMetricsImpl.class);
     } else if (fileName.contains(LogDescriptorImpl.MARKER)) {
       return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName);
     } else if (firstLines.startsWith("@type bandwidth-file 1.")
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java
index 90ef2c6..207baca 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java
@@ -10,7 +10,9 @@ import org.torproject.descriptor.DescriptorReader;
 import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.compress.utils.IOUtils;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -21,6 +23,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.util.ArrayList;
@@ -325,10 +328,18 @@ public class DescriptorReaderImpl implements DescriptorReader {
     }
 
     private void readDescriptorFile(File file) throws IOException {
-      byte[] rawDescriptorBytes = Files.readAllBytes(file.toPath());
-      for (Descriptor descriptor : this.descriptorParser.parseDescriptors(
-          rawDescriptorBytes, file, file.getName())) {
-        this.descriptorQueue.add(descriptor);
+      try (FileInputStream fis = new FileInputStream(file)) {
+        InputStream is = fis;
+        if (file.getName().endsWith(".gz")) {
+          is = new GzipCompressorInputStream(fis);
+        }
+        byte[] rawDescriptorBytes = IOUtils.toByteArray(is);
+        if (rawDescriptorBytes.length > 0) {
+          for (Descriptor descriptor : this.descriptorParser.parseDescriptors(
+              rawDescriptorBytes, file, file.getName())) {
+            this.descriptorQueue.add(descriptor);
+          }
+        }
       }
     }
   }
diff --git a/src/main/java/org/torproject/descriptor/impl/Key.java b/src/main/java/org/torproject/descriptor/impl/Key.java
index cf6ed09..ac12992 100644
--- a/src/main/java/org/torproject/descriptor/impl/Key.java
+++ b/src/main/java/org/torproject/descriptor/impl/Key.java
@@ -18,6 +18,9 @@ public enum Key {
   ALLOW_SINGLE_HOP_EXITS("allow-single-hop-exits"),
   BANDWIDTH("bandwidth"),
   BANDWIDTH_WEIGHTS("bandwidth-weights"),
+  BRIDGEDB_METRICS_END("bridgedb-metrics-end"),
+  BRIDGEDB_METRICS_VERSION("bridgedb-metrics-version"),
+  BRIDGEDB_METRIC_COUNT("bridgedb-metric-count"),
   BRIDGE_IPS("bridge-ips"),
   BRIDGE_IP_TRANSPORTS("bridge-ip-transports"),
   BRIDGE_IP_VERSIONS("bridge-ip-versions"),
diff --git a/src/main/java/org/torproject/descriptor/impl/ParseHelper.java b/src/main/java/org/torproject/descriptor/impl/ParseHelper.java
index ba45ff6..53f011c 100644
--- a/src/main/java/org/torproject/descriptor/impl/ParseHelper.java
+++ b/src/main/java/org/torproject/descriptor/impl/ParseHelper.java
@@ -115,6 +115,20 @@ public class ParseHelper {
     return Duration.ofSeconds(parsedSeconds);
   }
 
+  protected static Long parseLong(String line, String[] parts, int index)
+      throws DescriptorParseException {
+    if (index >= parts.length) {
+      throw new DescriptorParseException(String.format(
+          "Line '%s' does not contain a long value at index %d.", line, index));
+    }
+    try {
+      return Long.parseLong(parts[index]);
+    } catch (NumberFormatException e) {
+      throw new DescriptorParseException(String.format(
+          "Unable to parse long value '%s' in line '%s'.", parts[index], line));
+    }
+  }
+
   protected static String parseExitPattern(String line, String exitPattern)
       throws DescriptorParseException {
     if (!exitPattern.contains(":")) {
diff --git a/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java b/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java
index 2f46bbe..9922756 100644
--- a/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java
@@ -101,36 +101,22 @@ public class SnowflakeStatsImpl extends DescriptorImpl
 
   private void parseSnowflakeIpsTotal(String line, String[] parts)
       throws DescriptorParseException {
-    this.snowflakeIpsTotal = parseLong(line, parts, 1);
+    this.snowflakeIpsTotal = ParseHelper.parseLong(line, parts, 1);
   }
 
   private void parseSnowflakeIdleCount(String line, String[] parts)
       throws DescriptorParseException {
-    this.snowflakeIdleCount = parseLong(line, parts, 1);
+    this.snowflakeIdleCount = ParseHelper.parseLong(line, parts, 1);
   }
 
   private void parseClientDeniedCount(String line, String[] parts)
       throws DescriptorParseException {
-    this.clientDeniedCount = parseLong(line, parts, 1);
+    this.clientDeniedCount = ParseHelper.parseLong(line, parts, 1);
   }
 
   private void parseClientSnowflakeMatchCount(String line, String[] parts)
       throws DescriptorParseException {
-    this.clientSnowflakeMatchCount = parseLong(line, parts, 1);
-  }
-
-  private static Long parseLong(String line, String[] parts, int index)
-      throws DescriptorParseException {
-    if (index >= parts.length) {
-      throw new DescriptorParseException(String.format(
-          "Line '%s' does not contain a long value at index %d.", line, index));
-    }
-    try {
-      return Long.parseLong(parts[index]);
-    } catch (NumberFormatException e) {
-      throw new DescriptorParseException(String.format(
-          "Unable to parse long value '%s' in line '%s'.", parts[index], line));
-    }
+    this.clientSnowflakeMatchCount = ParseHelper.parseLong(line, parts, 1);
   }
 
   private LocalDateTime snowflakeStatsEnd;
diff --git a/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java b/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java
new file mode 100644
index 0000000..8c0e2d5
--- /dev/null
+++ b/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java
@@ -0,0 +1,118 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.descriptor.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.DescriptorParseException;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.time.Duration;
+import java.time.LocalDateTime;
+
+public class BridgedbMetricsImplTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Example taken from BridgeDB metrics from 2019-09-17.
+   */
+  private static final String[] exampleBridgedbMetricsLog = new String[] {
+      "bridgedb-metrics-end 2019-09-17 00:33:44 (86400 s)",
+      "bridgedb-metrics-version 1",
+      "bridgedb-metric-count https.obfs3.ru.success.none 10",
+      "bridgedb-metric-count https.obfs3.sk.success.none 10",
+      "bridgedb-metric-count https.fte.de.fail.none 10" };
+
+  @Test
+  public void testExampleMetricsLog() throws DescriptorParseException {
+    BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl(
+        new TestDescriptorBuilder(exampleBridgedbMetricsLog).build(), null);
+    assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44),
+        bridgedbMetrics.bridgedbMetricsEnd());
+    assertEquals(Duration.ofDays(1L),
+        bridgedbMetrics.bridgedbMetricsIntervalLength());
+    assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion());
+    assertTrue(bridgedbMetrics.bridgedbMetricCounts().isPresent());
+    assertEquals(3, bridgedbMetrics.bridgedbMetricCounts().get().size());
+    assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
+        .get("https.obfs3.ru.success.none"));
+    assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
+        .get("https.obfs3.sk.success.none"));
+    assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
+        .get("https.fte.de.fail.none"));
+  }
+
+  @Test
+  public void testMinimalBridgedbMetrics() throws DescriptorParseException {
+    BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl(
+        new TestDescriptorBuilder(exampleBridgedbMetricsLog[0],
+            exampleBridgedbMetricsLog[1]).build(), null);
+    assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44),
+        bridgedbMetrics.bridgedbMetricsEnd());
+    assertEquals(Duration.ofDays(1L),
+        bridgedbMetrics.bridgedbMetricsIntervalLength());
+    assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion());
+    assertFalse(bridgedbMetrics.bridgedbMetricCounts().isPresent());
+  }
+
+  @Test
+  public void testEmptyLine() throws DescriptorParseException {
+    this.thrown.expect(DescriptorParseException.class);
+    this.thrown.expectMessage(Matchers.containsString(
+        "Blank lines are not allowed."));
+    new BridgedbMetricsImpl(new TestDescriptorBuilder(exampleBridgedbMetricsLog)
+        .appendLines("")
+        .build(), null);
+  }
+
+  @Test
+  public void testDuplicateLine() throws DescriptorParseException {
+    this.thrown.expect(DescriptorParseException.class);
+    this.thrown.expectMessage(Matchers.containsString(
+        "must be contained exactly once."));
+    new BridgedbMetricsImpl(new TestDescriptorBuilder(
+        exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
+        exampleBridgedbMetricsLog[1]).build(), null);
+  }
+
+  @Test
+  public void testDuplicateKey() throws DescriptorParseException {
+    this.thrown.expect(DescriptorParseException.class);
+    this.thrown.expectMessage(Matchers.containsString("Duplicate key"));
+    new BridgedbMetricsImpl(new TestDescriptorBuilder(
+        exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
+        exampleBridgedbMetricsLog[2], exampleBridgedbMetricsLog[2])
+        .build(), null);
+  }
+
+  @Test
+  public void testNoValue() throws DescriptorParseException {
+    this.thrown.expect(DescriptorParseException.class);
+    this.thrown.expectMessage(Matchers.containsString(
+        "Unable to parse long value '10-ish' in line"));
+    new BridgedbMetricsImpl(new TestDescriptorBuilder(
+        exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
+        exampleBridgedbMetricsLog[2] + "-ish").build(), null);
+  }
+
+  @Test
+  public void testNonPositiveIntervalLength() throws DescriptorParseException {
+    this.thrown.expect(DescriptorParseException.class);
+    this.thrown.expectMessage(Matchers.containsString(
+        "Duration must be positive"));
+    new BridgedbMetricsImpl(new TestDescriptorBuilder(
+        "bridgedb-metrics-end 2019-09-17 00:33:44 (0 s)",
+        exampleBridgedbMetricsLog[1]).build(), null);
+  }
+}
+





More information about the tor-commits mailing list