commit 09d7311df6c78e354fc99be34e8226772d9193b7 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Sep 18 10:22:48 2019 +0200
Add new BridgedbMetrics descriptor type.
Also extend DescriptorReader#readDescriptors to support .gz-compressed files which will be necessary to process files rsync'ed from BridgeDB. And maybe it's useful for other purposes, too.
Implements part of #19332. --- CHANGELOG.md | 8 ++ .../org/torproject/descriptor/BridgedbMetrics.java | 70 +++++++++++ .../descriptor/impl/BridgedbMetricsImpl.java | 136 +++++++++++++++++++++ .../descriptor/impl/DescriptorParserImpl.java | 5 + .../descriptor/impl/DescriptorReaderImpl.java | 19 ++- .../java/org/torproject/descriptor/impl/Key.java | 3 + .../torproject/descriptor/impl/ParseHelper.java | 14 +++ .../descriptor/impl/SnowflakeStatsImpl.java | 22 +--- .../descriptor/impl/BridgedbMetricsImplTest.java | 118 ++++++++++++++++++ 9 files changed, 373 insertions(+), 22 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index 457533e..23b1ca9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# Changes in version 2.?.? - 2019-??-?? + + * Medium changes + - Extend DescriptorReader#readDescriptors to support .gz-compressed + files. + - Add new BridgedbMetrics descriptor type. + + # Changes in version 2.7.0 - 2019-09-06
* Medium changes diff --git a/src/main/java/org/torproject/descriptor/BridgedbMetrics.java b/src/main/java/org/torproject/descriptor/BridgedbMetrics.java new file mode 100644 index 0000000..68d9d4f --- /dev/null +++ b/src/main/java/org/torproject/descriptor/BridgedbMetrics.java @@ -0,0 +1,70 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.descriptor; + +import java.time.Duration; +import java.time.LocalDateTime; +import java.util.Map; +import java.util.Optional; + +/** + * Contains aggregated information about requests to the BridgeDB service. + * + * @since 2.8.0 + */ +public interface BridgedbMetrics extends Descriptor { + + /** + * Return the end of the included measurement interval. + * + * @return End of the included measurement interval. + * @since 2.8.0 + */ + LocalDateTime bridgedbMetricsEnd(); + + /** + * Return the length of the included measurement interval. + * + * @return Length of the included measurement interval. + * @since 2.8.0 + */ + Duration bridgedbMetricsIntervalLength(); + + /** + * Return the BridgeDB metrics format version. + * + * @return BridgeDB metrics format version. + * @since 2.8.0 + */ + String bridgedbMetricsVersion(); + + /** + * Return approximate request numbers to the BridgeDB service in the + * measurement interval broken down by distribution mechanism, obfuscation + * protocol, and country code. + * + * <p>Keys are formatted as {@code DIST.PROTO.CC/EMAIL.[success|fail].none} + * where:</p> + * <ul> + * <li>{@code DIST} is BridgeDB's distribution mechanism, for example, + * {@code http}, {@code email}, or {@code moat};</li> + * <li>{@code PROTO} is the obfuscation protocol, for example, {@code obfs2}, + * {@code obfs3}, {@code obfs4}, {@code scramblesuit}, or {@code fte};</li> + * <li>{@code CC/EMAIL} is either a two-letter country code or an email + * provider;</li> + * <li>the second-to-last field is either {@code success} or {@code fail} + * depending on if the BridgeDB request succeeded or not; and</li> + * <li>the last field is reserved for an anomaly score to be added in the + * future.</li> + * </ul> + * + * <p>Values are approximate request numbers, rounded up to the next multiple + * of 10.</p> + * + * @return Map of approximate request numbers. + * @since 2.8.0 + */ + Optional<Map<String, Long>> bridgedbMetricCounts(); +} + diff --git a/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java b/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java new file mode 100644 index 0000000..f683067 --- /dev/null +++ b/src/main/java/org/torproject/descriptor/impl/BridgedbMetricsImpl.java @@ -0,0 +1,136 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.descriptor.impl; + +import org.torproject.descriptor.BridgedbMetrics; +import org.torproject.descriptor.DescriptorParseException; + +import java.io.File; +import java.time.Duration; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Scanner; +import java.util.Set; + +public class BridgedbMetricsImpl extends DescriptorImpl + implements BridgedbMetrics { + + private static final Set<Key> exactlyOnce = EnumSet.of( + Key.BRIDGEDB_METRICS_END, Key.BRIDGEDB_METRICS_VERSION); + + BridgedbMetricsImpl(byte[] rawDescriptorBytes, int[] offsetAndLength, + File descriptorFile) throws DescriptorParseException { + super(rawDescriptorBytes, offsetAndLength, descriptorFile, false); + this.parseDescriptorBytes(); + this.checkExactlyOnceKeys(exactlyOnce); + this.checkFirstKey(Key.BRIDGEDB_METRICS_END); + this.clearParsedKeys(); + } + + BridgedbMetricsImpl(byte[] rawDescriptorBytes, File descriptorFile) + throws DescriptorParseException { + this(rawDescriptorBytes, new int[] { 0, rawDescriptorBytes.length }, + descriptorFile); + } + + private void parseDescriptorBytes() throws DescriptorParseException { + Scanner scanner = this.newScanner().useDelimiter(NL); + while (scanner.hasNext()) { + String line = scanner.next(); + if (line.startsWith("@")) { + continue; + } + String[] parts = line.split("[ \t]+"); + Key key = Key.get(parts[0]); + switch (key) { + case BRIDGEDB_METRICS_END: + this.parseBridgedbMetricsEnd(line, parts); + break; + case BRIDGEDB_METRICS_VERSION: + this.parseBridgedbMetricsVersion(line, parts); + break; + case BRIDGEDB_METRIC_COUNT: + this.parseBridgedbMetricCount(line, parts); + break; + case INVALID: + default: + ParseHelper.parseKeyword(line, parts[0]); + if (this.unrecognizedLines == null) { + this.unrecognizedLines = new ArrayList<>(); + } + this.unrecognizedLines.add(line); + } + } + } + + private void parseBridgedbMetricsEnd(String line, String[] parts) + throws DescriptorParseException { + if (parts.length < 5 || parts[3].length() < 2 || !parts[3].startsWith("(") + || !parts[4].equals("s)")) { + throw new DescriptorParseException("Illegal line '" + line + "'."); + } + this.bridgedbMetricsEnd = ParseHelper.parseLocalDateTime(line, parts, + 1, 2); + this.bridgedbMetricsIntervalLength = ParseHelper.parseDuration(line, + parts[3].substring(1)); + } + + private void parseBridgedbMetricsVersion(String line, String[] parts) + throws DescriptorParseException { + if (parts.length < 2) { + throw new DescriptorParseException("Illegal line '" + line + "'."); + } + this.bridgedbMetricsVersion = parts[1]; + } + + private void parseBridgedbMetricCount(String line, String[] parts) + throws DescriptorParseException { + if (parts.length < 3) { + throw new DescriptorParseException("Illegal line '" + line + "'."); + } + if (null == this.bridgedbMetricCounts) { + this.bridgedbMetricCounts = new LinkedHashMap<>(); + } + String key = parts[1]; + if (this.bridgedbMetricCounts.containsKey(key)) { + throw new DescriptorParseException("Duplicate key '" + key + "' in line '" + + line + "'."); + } + long value = ParseHelper.parseLong(line, parts, 2); + this.bridgedbMetricCounts.put(key, value); + } + + private LocalDateTime bridgedbMetricsEnd; + + @Override + public LocalDateTime bridgedbMetricsEnd() { + return this.bridgedbMetricsEnd; + } + + private Duration bridgedbMetricsIntervalLength; + + @Override + public Duration bridgedbMetricsIntervalLength() { + return this.bridgedbMetricsIntervalLength; + } + + private String bridgedbMetricsVersion; + + @Override + public String bridgedbMetricsVersion() { + return this.bridgedbMetricsVersion; + } + + private Map<String, Long> bridgedbMetricCounts; + + @Override + public Optional<Map<String, Long>> bridgedbMetricCounts() { + return Optional.ofNullable(this.bridgedbMetricCounts); + } +} + diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java index 9b620cb..25494f4 100644 --- a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java +++ b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java @@ -135,6 +135,11 @@ public class DescriptorParserImpl implements DescriptorParser { || firstLines.contains(NL + Key.SNOWFLAKE_STATS_END.keyword + SP)) { return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile, Key.SNOWFLAKE_STATS_END, SnowflakeStatsImpl.class); + } else if (firstLines.startsWith("@type bridgedb-metrics 1.") + || firstLines.startsWith(Key.BRIDGEDB_METRICS_END.keyword + SP) + || firstLines.contains(NL + Key.BRIDGEDB_METRICS_END.keyword + SP)) { + return this.parseOneOrMoreDescriptors(rawDescriptorBytes, sourceFile, + Key.BRIDGEDB_METRICS_END, BridgedbMetricsImpl.class); } else if (fileName.contains(LogDescriptorImpl.MARKER)) { return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName); } else if (firstLines.startsWith("@type bandwidth-file 1.") diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java index 90ef2c6..207baca 100644 --- a/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java +++ b/src/main/java/org/torproject/descriptor/impl/DescriptorReaderImpl.java @@ -10,7 +10,9 @@ import org.torproject.descriptor.DescriptorReader; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +23,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; @@ -325,10 +328,18 @@ public class DescriptorReaderImpl implements DescriptorReader { }
private void readDescriptorFile(File file) throws IOException { - byte[] rawDescriptorBytes = Files.readAllBytes(file.toPath()); - for (Descriptor descriptor : this.descriptorParser.parseDescriptors( - rawDescriptorBytes, file, file.getName())) { - this.descriptorQueue.add(descriptor); + try (FileInputStream fis = new FileInputStream(file)) { + InputStream is = fis; + if (file.getName().endsWith(".gz")) { + is = new GzipCompressorInputStream(fis); + } + byte[] rawDescriptorBytes = IOUtils.toByteArray(is); + if (rawDescriptorBytes.length > 0) { + for (Descriptor descriptor : this.descriptorParser.parseDescriptors( + rawDescriptorBytes, file, file.getName())) { + this.descriptorQueue.add(descriptor); + } + } } } } diff --git a/src/main/java/org/torproject/descriptor/impl/Key.java b/src/main/java/org/torproject/descriptor/impl/Key.java index cf6ed09..ac12992 100644 --- a/src/main/java/org/torproject/descriptor/impl/Key.java +++ b/src/main/java/org/torproject/descriptor/impl/Key.java @@ -18,6 +18,9 @@ public enum Key { ALLOW_SINGLE_HOP_EXITS("allow-single-hop-exits"), BANDWIDTH("bandwidth"), BANDWIDTH_WEIGHTS("bandwidth-weights"), + BRIDGEDB_METRICS_END("bridgedb-metrics-end"), + BRIDGEDB_METRICS_VERSION("bridgedb-metrics-version"), + BRIDGEDB_METRIC_COUNT("bridgedb-metric-count"), BRIDGE_IPS("bridge-ips"), BRIDGE_IP_TRANSPORTS("bridge-ip-transports"), BRIDGE_IP_VERSIONS("bridge-ip-versions"), diff --git a/src/main/java/org/torproject/descriptor/impl/ParseHelper.java b/src/main/java/org/torproject/descriptor/impl/ParseHelper.java index ba45ff6..53f011c 100644 --- a/src/main/java/org/torproject/descriptor/impl/ParseHelper.java +++ b/src/main/java/org/torproject/descriptor/impl/ParseHelper.java @@ -115,6 +115,20 @@ public class ParseHelper { return Duration.ofSeconds(parsedSeconds); }
+ protected static Long parseLong(String line, String[] parts, int index) + throws DescriptorParseException { + if (index >= parts.length) { + throw new DescriptorParseException(String.format( + "Line '%s' does not contain a long value at index %d.", line, index)); + } + try { + return Long.parseLong(parts[index]); + } catch (NumberFormatException e) { + throw new DescriptorParseException(String.format( + "Unable to parse long value '%s' in line '%s'.", parts[index], line)); + } + } + protected static String parseExitPattern(String line, String exitPattern) throws DescriptorParseException { if (!exitPattern.contains(":")) { diff --git a/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java b/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java index 2f46bbe..9922756 100644 --- a/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java +++ b/src/main/java/org/torproject/descriptor/impl/SnowflakeStatsImpl.java @@ -101,36 +101,22 @@ public class SnowflakeStatsImpl extends DescriptorImpl
private void parseSnowflakeIpsTotal(String line, String[] parts) throws DescriptorParseException { - this.snowflakeIpsTotal = parseLong(line, parts, 1); + this.snowflakeIpsTotal = ParseHelper.parseLong(line, parts, 1); }
private void parseSnowflakeIdleCount(String line, String[] parts) throws DescriptorParseException { - this.snowflakeIdleCount = parseLong(line, parts, 1); + this.snowflakeIdleCount = ParseHelper.parseLong(line, parts, 1); }
private void parseClientDeniedCount(String line, String[] parts) throws DescriptorParseException { - this.clientDeniedCount = parseLong(line, parts, 1); + this.clientDeniedCount = ParseHelper.parseLong(line, parts, 1); }
private void parseClientSnowflakeMatchCount(String line, String[] parts) throws DescriptorParseException { - this.clientSnowflakeMatchCount = parseLong(line, parts, 1); - } - - private static Long parseLong(String line, String[] parts, int index) - throws DescriptorParseException { - if (index >= parts.length) { - throw new DescriptorParseException(String.format( - "Line '%s' does not contain a long value at index %d.", line, index)); - } - try { - return Long.parseLong(parts[index]); - } catch (NumberFormatException e) { - throw new DescriptorParseException(String.format( - "Unable to parse long value '%s' in line '%s'.", parts[index], line)); - } + this.clientSnowflakeMatchCount = ParseHelper.parseLong(line, parts, 1); }
private LocalDateTime snowflakeStatsEnd; diff --git a/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java b/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java new file mode 100644 index 0000000..8c0e2d5 --- /dev/null +++ b/src/test/java/org/torproject/descriptor/impl/BridgedbMetricsImplTest.java @@ -0,0 +1,118 @@ +/* Copyright 2019 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.descriptor.impl; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.torproject.descriptor.BridgedbMetrics; +import org.torproject.descriptor.DescriptorParseException; + +import org.hamcrest.Matchers; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.time.Duration; +import java.time.LocalDateTime; + +public class BridgedbMetricsImplTest { + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + /** + * Example taken from BridgeDB metrics from 2019-09-17. + */ + private static final String[] exampleBridgedbMetricsLog = new String[] { + "bridgedb-metrics-end 2019-09-17 00:33:44 (86400 s)", + "bridgedb-metrics-version 1", + "bridgedb-metric-count https.obfs3.ru.success.none 10", + "bridgedb-metric-count https.obfs3.sk.success.none 10", + "bridgedb-metric-count https.fte.de.fail.none 10" }; + + @Test + public void testExampleMetricsLog() throws DescriptorParseException { + BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl( + new TestDescriptorBuilder(exampleBridgedbMetricsLog).build(), null); + assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44), + bridgedbMetrics.bridgedbMetricsEnd()); + assertEquals(Duration.ofDays(1L), + bridgedbMetrics.bridgedbMetricsIntervalLength()); + assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion()); + assertTrue(bridgedbMetrics.bridgedbMetricCounts().isPresent()); + assertEquals(3, bridgedbMetrics.bridgedbMetricCounts().get().size()); + assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get() + .get("https.obfs3.ru.success.none")); + assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get() + .get("https.obfs3.sk.success.none")); + assertEquals((Long) 10L, bridgedbMetrics.bridgedbMetricCounts().get() + .get("https.fte.de.fail.none")); + } + + @Test + public void testMinimalBridgedbMetrics() throws DescriptorParseException { + BridgedbMetrics bridgedbMetrics = new BridgedbMetricsImpl( + new TestDescriptorBuilder(exampleBridgedbMetricsLog[0], + exampleBridgedbMetricsLog[1]).build(), null); + assertEquals(LocalDateTime.of(2019, 9, 17, 0, 33, 44), + bridgedbMetrics.bridgedbMetricsEnd()); + assertEquals(Duration.ofDays(1L), + bridgedbMetrics.bridgedbMetricsIntervalLength()); + assertEquals("1", bridgedbMetrics.bridgedbMetricsVersion()); + assertFalse(bridgedbMetrics.bridgedbMetricCounts().isPresent()); + } + + @Test + public void testEmptyLine() throws DescriptorParseException { + this.thrown.expect(DescriptorParseException.class); + this.thrown.expectMessage(Matchers.containsString( + "Blank lines are not allowed.")); + new BridgedbMetricsImpl(new TestDescriptorBuilder(exampleBridgedbMetricsLog) + .appendLines("") + .build(), null); + } + + @Test + public void testDuplicateLine() throws DescriptorParseException { + this.thrown.expect(DescriptorParseException.class); + this.thrown.expectMessage(Matchers.containsString( + "must be contained exactly once.")); + new BridgedbMetricsImpl(new TestDescriptorBuilder( + exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1], + exampleBridgedbMetricsLog[1]).build(), null); + } + + @Test + public void testDuplicateKey() throws DescriptorParseException { + this.thrown.expect(DescriptorParseException.class); + this.thrown.expectMessage(Matchers.containsString("Duplicate key")); + new BridgedbMetricsImpl(new TestDescriptorBuilder( + exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1], + exampleBridgedbMetricsLog[2], exampleBridgedbMetricsLog[2]) + .build(), null); + } + + @Test + public void testNoValue() throws DescriptorParseException { + this.thrown.expect(DescriptorParseException.class); + this.thrown.expectMessage(Matchers.containsString( + "Unable to parse long value '10-ish' in line")); + new BridgedbMetricsImpl(new TestDescriptorBuilder( + exampleBridgedbMetricsLog[0], exampleBridgedbMetricsLog[1], + exampleBridgedbMetricsLog[2] + "-ish").build(), null); + } + + @Test + public void testNonPositiveIntervalLength() throws DescriptorParseException { + this.thrown.expect(DescriptorParseException.class); + this.thrown.expectMessage(Matchers.containsString( + "Duration must be positive")); + new BridgedbMetricsImpl(new TestDescriptorBuilder( + "bridgedb-metrics-end 2019-09-17 00:33:44 (0 s)", + exampleBridgedbMetricsLog[1]).build(), null); + } +} +