commit 09d7311df6c78e354fc99be34e8226772d9193b7
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Sep 18 10:22:48 2019 +0200
Add new BridgedbMetrics descriptor type.
Also extend DescriptorReader#readDescriptors to support .gz-compressed
files which will be necessary to process files rsync'ed from BridgeDB.
And maybe it's useful for other purposes, too.
Implements part of #19332.
---
CHANGELOG.md | 8 ++
.../org/torproject/descriptor/BridgedbMetrics.java | 70 +++++++++++
.../descriptor/impl/BridgedbMetricsImpl.java | 136 +++++++++++++++++++++
.../descriptor/impl/DescriptorParserImpl.java | 5 +
.../descriptor/impl/DescriptorReaderImpl.java | 19 ++-
.../java/org/torproject/descriptor/impl/Key.java | 3 +
.../torproject/descriptor/impl/ParseHelper.java | 14 +++
.../descriptor/impl/SnowflakeStatsImpl.java | 22 +---
.../descriptor/impl/BridgedbMetricsImplTest.java | 118 ++++++++++++++++++
9 files changed, 373 insertions(+), 22 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 457533e..23b1ca9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+# Changes in version 2.?.? - 2019-??-??
+
+ * Medium changes
+ - Extend DescriptorReader#readDescriptors to support .gz-compressed
+ files.
+ - Add new BridgedbMetrics descriptor type.
+
+
# Changes in version 2.7.0 - 2019-09-06
* Medium changes
diff --git a/src/main/java/org/torproject/descriptor/BridgedbMetrics.java b/src/main/java/org/torproject/descriptor/BridgedbMetrics.java
new file mode 100644
index 0000000..68d9d4f
--- /dev/null
+++ b/src/main/java/org/torproject/descriptor/BridgedbMetrics.java
@@ -0,0 +1,70 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.descriptor;
+
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Contains aggregated information about requests to the BridgeDB service.
+ *
+ * @since 2.8.0
+ */
+public interface BridgedbMetrics extends Descriptor {
+
+ /**
+ * Return the end of the included measurement interval.
+ *
+ * @return End of the included measurement interval.
+ * @since 2.8.0
+ */
+ LocalDateTime bridgedbMetricsEnd();
+
+ /**
+ * Return the length of the included measurement interval.
+ *
+ * @return Length of the included measurement interval.
+ * @since 2.8.0
+ */
+ Duration bridgedbMetricsIntervalLength();
+
+ /**
+ * Return the BridgeDB metrics format version.
+ *
+ * @return BridgeDB metrics format version.
+ * @since 2.8.0
+ */
+ String bridgedbMetricsVersion();
+
+ /**
+ * Return approximate request numbers to the BridgeDB service in the
+ * measurement interval broken down by distribution mechanism, obfuscation
+ * protocol, and country code.
+ *
+ * <p>Keys are formatted as {@code DIST.PROTO.CC/EMAIL.[success|fail].none}
+ * where:</p>
+ * <ul>
+ * <li>{@code DIST} is BridgeDB's distribution mechanism, for example,
+ * {@code http}, {@code email}, or {@code moat};</li>
+ * <li>{@code PROTO} is the obfuscation protocol, for example, {@code obfs2},
+ * {@code obfs3}, {@code obfs4}, {@code scramblesuit}, or {@code fte};</li>
+ * <li>{@code CC/EMAIL} is either a two-letter country code or an email
+ * provider;</li>
+ * <li>the second-to-last field is either {@code success} or {@code fail}
+ * depending on if the BridgeDB request succeeded or not; and</li>
+ * <li>the last field is reserved for an anomaly score to be added in the
+ * future.</li>
+ * </ul>
+ *
+ * <p>Values are approximate request numbers, rounded up to the next multiple
+ * of 10.</p>
+ *
+ * @return Map of approximate request numbers.
+ * @since 2.8.0
+ */
+ Optional<Map<String, Long>> bridgedbMetricCounts();
+}
+
diff --git a/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java b/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java
new file mode 100644
index 0000000..f683067
--- /dev/null
+++ b/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java
@@ -0,0 +1,136 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.descriptor.impl;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.DescriptorParseException;
+
+import java.io.File;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Scanner;
+import java.util.Set;
+
+public class BridgedbMetricsImpl extends DescriptorImpl
+ implements BridgedbMetrics {
+
+ private static final Set<Key> exactlyOnce = EnumSet.of(
+ Key.BRIDGEDB_METRICS_END, Key.BRIDGEDB_METRICS_VERSION);
+
+ BridgedbMetricsImpl(byte[] rawDescriptorBytes, int[] offsetAndLength,
+ File descriptorFile) throws DescriptorParseException {
+ super(rawDescriptorBytes, offsetAndLength, descriptorFile, false);
+ this.parseDescriptorBytes();
+ this.checkExactlyOnceKeys(exactlyOnce);
+ this.checkFirstKey(Key.BRIDGEDB_METRICS_END);
+ this.clearParsedKeys();
+ }
+
+ BridgedbMetricsImpl(byte[] rawDescriptorBytes, File descriptorFile)
+ throws DescriptorParseException {
+ this(rawDescriptorBytes, new int[] { 0, rawDescriptorBytes.length },
+ descriptorFile);
+ }
+
+ private void parseDescriptorBytes() throws DescriptorParseException {
+ Scanner scanner = this.newScanner().useDelimiter(NL);
+ while (scanner.hasNext()) {
+ String line = scanner.next();
+ if (line.startsWith("@")) {
+ continue;
+ }
+ String[] parts = line.split("[ \t]+");
+ Key key = Key.get(parts[0]);
+ switch (key) {
+ case BRIDGEDB_METRICS_END:
+ this.parseBridgedbMetricsEnd(line, parts);
+ break;
+ case BRIDGEDB_METRICS_VERSION:
+ this.parseBridgedbMetricsVersion(line, parts);
+ break;
+ case BRIDGEDB_METRIC_COUNT:
+ this.parseBridgedbMetricCount(line, parts);
+ break;
+ case INVALID:
+ default:
+ ParseHelper.parseKeyword(line, parts[0]);
+ if (this.unrecognizedLines == null) {
+ this.unrecognizedLines = new ArrayList<>();
+ }
+ this.unrecognizedLines.add(line);
+ }
+ }
+ }
+
+ private void parseBridgedbMetricsEnd(String line, String[] parts)
+ throws DescriptorParseException {
+ if (parts.length < 5 || parts[3].length() < 2 || !parts[3].startsWith("(")
+ || !parts[4].equals("s)")) {
+ throw new DescriptorParseException("Illegal line '" + line + "'.");
+ }
+ this.bridgedbMetricsEnd = ParseHelper.parseLocalDateTime(line, parts,
+ 1, 2);
+ this.bridgedbMetricsIntervalLength = ParseHelper.parseDuration(line,
+ parts[3].substring(1));
+ }
+
+ private void parseBridgedbMetricsVersion(String line, String[] parts)
+ throws DescriptorParseException {
+ if (parts.length < 2) {
+ throw new DescriptorParseException("Illegal line '" + line + "'.");
+ }
+ this.bridgedbMetricsVersion = parts[1];
+ }
+
+ private void parseBridgedbMetricCount(String line, String[] parts)
+ throws DescriptorParseException {
+ if (parts.length < 3) {
+ throw new DescriptorParseException("Illegal line '" + line + "'.");
+ }
+ if (null == this.bridgedbMetricCounts) {
+ this.bridgedbMetricCounts = new LinkedHashMap<>();
+ }
+ String key = parts[1];
+ if (this.bridgedbMetricCounts.containsKey(key)) {
+ throw new DescriptorParseException("Duplicate key '" + key + "' in line '"
+ + line + "'.");
+ }
+ long value = ParseHelper.parseLong(line, parts, 2);
+ this.bridgedbMetricCounts.put(key, value);
+ }
+
+ private LocalDateTime bridgedbMetricsEnd;
+
+ @Override
+ public LocalDateTime bridgedbMetricsEnd() {
+ return this.bridgedbMetricsEnd;
+ }
+
+ private Duration bridgedbMetricsIntervalLength;
+
+ @Override
+ public Duration bridgedbMetricsIntervalLength() {
+ return this.bridgedbMetricsIntervalLength;
+ }
+
+ private String bridgedbMetricsVersion;
+
+ @Override
+ public String bridgedbMetricsVersion() {
+ return this.bridgedbMetricsVersion;
+ }
+
+ private Map<String, Long> bridgedbMetricCounts;
+
+ @Override
+ public Optional<Map<String, Long>> bridgedbMetricCounts() {
+ return Optional.ofNullable(this.bridgedbMetricCounts);
+ }
+}
+
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
index 9b620cb..25494f4 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
@@ -135,6 +135,11 @@ public class DescriptorParserImpl implements DescriptorParser {
|| firstLines.contains(NL + Key.SNOWFLAKE_STATS_END.keyword + SP)) {
return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile,
Key.SNOWFLAKE_STATS_END, SnowflakeStatsImpl.class);
+ } else if (firstLines.startsWith("@type bridgedb-metrics 1.")
+ || firstLines.startsWith(Key.BRIDGEDB_METRICS_END.keyword + SP)
+ || firstLines.contains(NL + Key.BRIDGEDB_METRICS_END.keyword + SP)) {
+ return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile,
+ Key.BRIDGEDB_METRICS_END, BridgedbMetricsImpl.class);
} else if (fileName.contains(LogDescriptorImpl.MARKER)) {
return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName);
} else if (firstLines.startsWith("@type bandwidth-file 1.")
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java
index 90ef2c6..207baca 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java
@@ -10,7 +10,9 @@ import org.torproject.descriptor.DescriptorReader;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -21,6 +23,7 @@ import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
@@ -325,10 +328,18 @@ public class DescriptorReaderImpl implements DescriptorReader {
}
private void readDescriptorFile(File file) throws IOException {
- byte[] rawDescriptorBytes = Files.readAllBytes(file.toPath());
- for (Descriptor descriptor : this.descriptorParser.parseDescriptors(
- rawDescriptorBytes, file, file.getName())) {
- this.descriptorQueue.add(descriptor);
+ try (FileInputStream fis = new FileInputStream(file)) {
+ InputStream is = fis;
+ if (file.getName().endsWith(".gz")) {
+ is = new GzipCompressorInputStream(fis);
+ }
+ byte[] rawDescriptorBytes = IOUtils.toByteArray(is);
+ if (rawDescriptorBytes.length > 0) {
+ for (Descriptor descriptor : this.descriptorParser.parseDescriptors(
+ rawDescriptorBytes, file, file.getName())) {
+ this.descriptorQueue.add(descriptor);
+ }
+ }
}
}
}
diff --git a/src/main/java/org/torproject/descriptor/impl/Key.java b/src/main/java/org/torproject/descriptor/impl/Key.java
index cf6ed09..ac12992 100644
--- a/src/main/java/org/torproject/descriptor/impl/Key.java
+++ b/src/main/java/org/torproject/descriptor/impl/Key.java
@@ -18,6 +18,9 @@ public enum Key {
ALLOW_SINGLE_HOP_EXITS("allow-single-hop-exits"),
BANDWIDTH("bandwidth"),
BANDWIDTH_WEIGHTS("bandwidth-weights"),
+ BRIDGEDB_METRICS_END("bridgedb-metrics-end"),
+ BRIDGEDB_METRICS_VERSION("bridgedb-metrics-version"),
+ BRIDGEDB_METRIC_COUNT("bridgedb-metric-count"),
BRIDGE_IPS("bridge-ips"),
BRIDGE_IP_TRANSPORTS("bridge-ip-transports"),
BRIDGE_IP_VERSIONS("bridge-ip-versions"),
diff --git a/src/main/java/org/torproject/descriptor/impl/ParseHelper.java b/src/main/java/org/torproject/descriptor/impl/ParseHelper.java
index ba45ff6..53f011c 100644
--- a/src/main/java/org/torproject/descriptor/impl/ParseHelper.java
+++ b/src/main/java/org/torproject/descriptor/impl/ParseHelper.java
@@ -115,6 +115,20 @@ public class ParseHelper {
return Duration.ofSeconds(parsedSeconds);
}
+ protected static Long parseLong(String line, String[] parts, int index)
+ throws DescriptorParseException {
+ if (index >= parts.length) {
+ throw new DescriptorParseException(String.format(
+ "Line '%s' does not contain a long value at index %d.", line, index));
+ }
+ try {
+ return Long.parseLong(parts[index]);
+ } catch (NumberFormatException e) {
+ throw new DescriptorParseException(String.format(
+ "Unable to parse long value '%s' in line '%s'.", parts[index], line));
+ }
+ }
+
protected static String parseExitPattern(String line, String exitPattern)
throws DescriptorParseException {
if (!exitPattern.contains(":")) {
diff --git a/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java b/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java
index 2f46bbe..9922756 100644
--- a/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java
@@ -101,36 +101,22 @@ public class SnowflakeStatsImpl extends DescriptorImpl
private void parseSnowflakeIpsTotal(String line, String[] parts)
throws DescriptorParseException {
- this.snowflakeIpsTotal = parseLong(line, parts, 1);
+ this.snowflakeIpsTotal = ParseHelper.parseLong(line, parts, 1);
}
private void parseSnowflakeIdleCount(String line, String[] parts)
throws DescriptorParseException {
- this.snowflakeIdleCount = parseLong(line, parts, 1);
+ this.snowflakeIdleCount = ParseHelper.parseLong(line, parts, 1);
}
private void parseClientDeniedCount(String line, String[] parts)
throws DescriptorParseException {
- this.clientDeniedCount = parseLong(line, parts, 1);
+ this.clientDeniedCount = ParseHelper.parseLong(line, parts, 1);
}
private void parseClientSnowflakeMatchCount(String line, String[] parts)
throws DescriptorParseException {
- this.clientSnowflakeMatchCount = parseLong(line, parts, 1);
- }
-
- private static Long parseLong(String line, String[] parts, int index)
- throws DescriptorParseException {
- if (index >= parts.length) {
- throw new DescriptorParseException(String.format(
- "Line '%s' does not contain a long value at index %d.", line, index));
- }
- try {
- return Long.parseLong(parts[index]);
- } catch (NumberFormatException e) {
- throw new DescriptorParseException(String.format(
- "Unable to parse long value '%s' in line '%s'.", parts[index], line));
- }
+ this.clientSnowflakeMatchCount = ParseHelper.parseLong(line, parts, 1);
}
private LocalDateTime snowflakeStatsEnd;
diff --git a/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java b/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java
new file mode 100644
index 0000000..8c0e2d5
--- /dev/null
+++ b/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java
@@ -0,0 +1,118 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.descriptor.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.DescriptorParseException;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.time.Duration;
+import java.time.LocalDateTime;
+
+public class BridgedbMetricsImplTest {
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ /**
+ * Example taken from BridgeDB metrics from 2019-09-17.
+ */
+ private static final String[] exampleBridgedbMetricsLog = new String[] {
+ "bridgedb-metrics-end 2019-09-17 00:33:44 (86400 s)",
+ "bridgedb-metrics-version 1",
+ "bridgedb-metric-count https.obfs3.ru.success.none 10",
+ "bridgedb-metric-count https.obfs3.sk.success.none 10",
+ "bridgedb-metric-count https.fte.de.fail.none 10" };
+
+ @Test
+ public void testExampleMetricsLog() throws DescriptorParseException {
+ BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl(
+ new TestDescriptorBuilder(exampleBridgedbMetricsLog).build(), null);
+ assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44),
+ bridgedbMetrics.bridgedbMetricsEnd());
+ assertEquals(Duration.ofDays(1L),
+ bridgedbMetrics.bridgedbMetricsIntervalLength());
+ assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion());
+ assertTrue(bridgedbMetrics.bridgedbMetricCounts().isPresent());
+ assertEquals(3, bridgedbMetrics.bridgedbMetricCounts().get().size());
+ assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
+ .get("https.obfs3.ru.success.none"));
+ assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
+ .get("https.obfs3.sk.success.none"));
+ assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get()
+ .get("https.fte.de.fail.none"));
+ }
+
+ @Test
+ public void testMinimalBridgedbMetrics() throws DescriptorParseException {
+ BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl(
+ new TestDescriptorBuilder(exampleBridgedbMetricsLog[0],
+ exampleBridgedbMetricsLog[1]).build(), null);
+ assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44),
+ bridgedbMetrics.bridgedbMetricsEnd());
+ assertEquals(Duration.ofDays(1L),
+ bridgedbMetrics.bridgedbMetricsIntervalLength());
+ assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion());
+ assertFalse(bridgedbMetrics.bridgedbMetricCounts().isPresent());
+ }
+
+ @Test
+ public void testEmptyLine() throws DescriptorParseException {
+ this.thrown.expect(DescriptorParseException.class);
+ this.thrown.expectMessage(Matchers.containsString(
+ "Blank lines are not allowed."));
+ new BridgedbMetricsImpl(new TestDescriptorBuilder(exampleBridgedbMetricsLog)
+ .appendLines("")
+ .build(), null);
+ }
+
+ @Test
+ public void testDuplicateLine() throws DescriptorParseException {
+ this.thrown.expect(DescriptorParseException.class);
+ this.thrown.expectMessage(Matchers.containsString(
+ "must be contained exactly once."));
+ new BridgedbMetricsImpl(new TestDescriptorBuilder(
+ exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
+ exampleBridgedbMetricsLog[1]).build(), null);
+ }
+
+ @Test
+ public void testDuplicateKey() throws DescriptorParseException {
+ this.thrown.expect(DescriptorParseException.class);
+ this.thrown.expectMessage(Matchers.containsString("Duplicate key"));
+ new BridgedbMetricsImpl(new TestDescriptorBuilder(
+ exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
+ exampleBridgedbMetricsLog[2], exampleBridgedbMetricsLog[2])
+ .build(), null);
+ }
+
+ @Test
+ public void testNoValue() throws DescriptorParseException {
+ this.thrown.expect(DescriptorParseException.class);
+ this.thrown.expectMessage(Matchers.containsString(
+ "Unable to parse long value '10-ish' in line"));
+ new BridgedbMetricsImpl(new TestDescriptorBuilder(
+ exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1],
+ exampleBridgedbMetricsLog[2] + "-ish").build(), null);
+ }
+
+ @Test
+ public void testNonPositiveIntervalLength() throws DescriptorParseException {
+ this.thrown.expect(DescriptorParseException.class);
+ this.thrown.expectMessage(Matchers.containsString(
+ "Duration must be positive"));
+ new BridgedbMetricsImpl(new TestDescriptorBuilder(
+ "bridgedb-metrics-end 2019-09-17 00:33:44 (0 s)",
+ exampleBridgedbMetricsLog[1]).build(), null);
+ }
+}
+