commit dcbac68b48ae31b1bfbabab7a9c32f5577b78571 Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu May 2 22:02:23 2019 +0200
Archive bandwidth files in relaydescs module.
Also update to metrics-lib 2.6.1.
Implements #30218. --- CHANGELOG.md | 7 ++++ build.xml | 4 +-- .../metrics/collector/conf/Annotation.java | 1 + .../collector/relaydescs/ArchiveWriter.java | 42 +++++++++++++++++++++- .../relaydescs/RelayDescriptorDownloader.java | 24 ++++++++++--- .../relaydescs/RelayDescriptorParser.java | 42 ++++++++++++++++++++++ src/main/resources/create-tarballs.sh | 7 ++++ 7 files changed, 120 insertions(+), 7 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index 0307748..0e592ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# Changes in version 1.9.0 - 2019-05-?? + + * Medium changes + - Archive bandwidth files in relaydescs module. + - Update to metrics-lib 2.6.1. + + # Changes in version 1.8.0 - 2018-10-11
* Medium changes diff --git a/build.xml b/build.xml index 5cea51a..5874013 100644 --- a/build.xml +++ b/build.xml @@ -11,7 +11,7 @@ <property name="release.version" value="1.8.0-dev" /> <property name="project-main-class" value="org.torproject.metrics.collector.Main" /> <property name="name" value="collector"/> - <property name="metricslibversion" value="2.4.0" /> + <property name="metricslibversion" value="2.6.1" /> <property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" > @@ -21,7 +21,7 @@ <include name="jackson-core-2.8.6.jar"/> <include name="jackson-databind-2.8.6.jar"/> <include name="xz-1.6.jar"/> - <include name="metrics-lib-${metricslibversion}.jar"/> + <include name="metrics-lib-${metricslibversion}-thin.jar"/> <include name="logback-core-1.1.9.jar" /> <include name="logback-classic-1.1.9.jar" /> <include name="slf4j-api-1.7.22.jar" /> diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java index f90516b..2e47df0 100644 --- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java +++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java @@ -6,6 +6,7 @@ package org.torproject.metrics.collector.conf; /** This enum contains all currently valid descriptor annotations. */ public enum Annotation {
+ BandwidthFile("@type bandwidth-file 1.0\n"), BridgeExtraInfo("@type bridge-extra-info 1.3\n"), BridgeServer("@type bridge-server-descriptor 1.2\n"), Cert("@type dir-key-certificate-3 1.0\n"), diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java index 966b649..e1279ee 100644 --- a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java +++ b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java @@ -3,6 +3,7 @@
package org.torproject.metrics.collector.relaydescs;
+import org.torproject.descriptor.BandwidthFile; import org.torproject.descriptor.Descriptor; import org.torproject.descriptor.DescriptorParser; import org.torproject.descriptor.DescriptorSourceFactory; @@ -33,6 +34,10 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.Date; import java.util.HashMap; @@ -44,6 +49,7 @@ import java.util.SortedSet; import java.util.Stack; import java.util.TimeZone; import java.util.TreeMap; +import java.util.TreeSet;
public class ArchiveWriter extends CollecTorMain {
@@ -51,12 +57,15 @@ public class ArchiveWriter extends CollecTorMain { ArchiveWriter.class);
private long now = System.currentTimeMillis(); + private LocalDateTime nowLocalDateTime + = LocalDateTime.ofInstant(Instant.ofEpochMilli(this.now), ZoneOffset.UTC); private String outputDirectory; private String rsyncCatString; private DescriptorParser descriptorParser; private int storedConsensusesCounter = 0; private int storedMicrodescConsensusesCounter = 0; private int storedVotesCounter = 0; + private int storedBandwidthsCounter = 0; private int storedCertsCounter = 0; private int storedServerDescriptorsCounter = 0; private int storedExtraInfoDescriptorsCounter = 0; @@ -74,6 +83,8 @@ public class ArchiveWriter extends CollecTorMain { private SortedMap<Long, Set<String>> storedExtraInfoDescriptors = new TreeMap<>(); private SortedMap<Long, Set<String>> storedMicrodescriptors = new TreeMap<>(); + private SortedMap<LocalDateTime, Set<String>> storedBandwidths + = new TreeMap<>();
private File storedServerDescriptorsFile; private File storedExtraInfoDescriptorsFile; @@ -103,6 +114,8 @@ public class ArchiveWriter extends CollecTorMain { RelayServerDescriptor.class); this.mapPathDescriptors.put("recent/relay-descriptors/extra-infos", RelayExtraInfoDescriptor.class); + this.mapPathDescriptors.put("recent/relay-descriptors/bandwidths", + BandwidthFile.class); }
@Override @@ -203,6 +216,7 @@ public class ArchiveWriter extends CollecTorMain { this.storedConsensuses.clear(); this.storedMicrodescConsensuses.clear(); this.storedVotes.clear(); + this.storedBandwidths.clear(); this.storedServerDescriptors.clear(); this.storedExtraInfoDescriptors.clear(); this.storedMicrodescriptors.clear(); @@ -299,7 +313,8 @@ public class ArchiveWriter extends CollecTorMain { .append(this.storedConsensusesCounter).append(" consensus(es), ") .append(this.storedMicrodescConsensusesCounter).append(" microdesc ") .append("consensus(es), ").append(this.storedVotesCounter) - .append(" vote(s), ").append(this.storedCertsCounter) + .append(" vote(s), ").append(this.storedBandwidthsCounter) + .append(" bandwidth file(s), ").append(this.storedCertsCounter) .append(" certificate(s), ").append(this.storedServerDescriptorsCounter) .append(" server descriptor(s), ") .append(this.storedExtraInfoDescriptorsCounter).append(" extra-info ") @@ -309,6 +324,7 @@ public class ArchiveWriter extends CollecTorMain { this.storedConsensusesCounter = 0; this.storedMicrodescConsensusesCounter = 0; this.storedVotesCounter = 0; + this.storedBandwidthsCounter = 0; this.storedCertsCounter = 0; this.storedServerDescriptorsCounter = 0; this.storedExtraInfoDescriptorsCounter = 0; @@ -727,6 +743,30 @@ public class ArchiveWriter extends CollecTorMain { } }
+ /** Stores a bandwidth file to disk. */ + void storeBandwidthFile(byte[] data, LocalDateTime fileCreatedOrTimestamp, + String bandwidthFileDigest) { + DateTimeFormatter printFormat = DateTimeFormatter + .ofPattern("uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC); + File tarballFile = Paths.get(this.outputDirectory, "bandwidth", + fileCreatedOrTimestamp.format(printFormat) + "-bandwidth-" + + bandwidthFileDigest).toFile(); + boolean tarballFileExistedBefore = tarballFile.exists(); + File rsyncFile = Paths.get(recentPathName, RELAY_DESCRIPTORS, "bandwidths", + tarballFile.getName()).toFile(); + File[] outputFiles = new File[] { tarballFile, rsyncFile }; + if (this.store(Annotation.BandwidthFile.bytes(), data, outputFiles, null)) { + this.storedVotesCounter++; + } + if (!tarballFileExistedBefore + && this.nowLocalDateTime.isAfter(fileCreatedOrTimestamp.plusDays(3L))) { + this.storedBandwidths.putIfAbsent(fileCreatedOrTimestamp, + new TreeSet<>()); + this.storedBandwidths.get(fileCreatedOrTimestamp) + .add(bandwidthFileDigest); + } + } + /** Stores a key certificate to disk. */ public void storeCertificate(byte[] data, String fingerprint, long published) { diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java index 4764a4b..5a241f4 100644 --- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java +++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java @@ -258,6 +258,8 @@ public class RelayDescriptorDownloader {
private int requestedVotes = 0;
+ private int requestedBandwidthFiles = 0; + private int requestedMissingServerDescriptors = 0;
private int requestedAllServerDescriptors = 0; @@ -274,6 +276,8 @@ public class RelayDescriptorDownloader {
private int downloadedVotes = 0;
+ private int downloadedBandwidthFiles = 0; + private int downloadedMissingServerDescriptors = 0;
private int downloadedAllServerDescriptors = 0; @@ -729,6 +733,14 @@ public class RelayDescriptorDownloader { } }
+ /* Now try to download the bandwidth file, regardless of whether this + * authority might provide one or when we last downloaded a bandwidth + * file from it. */ + this.requestedBandwidthFiles++; + this.downloadedBandwidthFiles += + this.downloadResourceFromAuthority(authority, + "/tor/status-vote/next/bandwidth"); + /* Download either all server and extra-info descriptors or only * those that we're missing. Start with server descriptors, then * request extra-info descriptors. Finally, request missing @@ -886,7 +898,7 @@ public class RelayDescriptorDownloader { allData == null ? 0 : allData.length); int receivedDescriptors = 0; if (allData != null) { - if (resource.startsWith("/tor/status-vote/current/")) { + if (resource.startsWith("/tor/status-vote/")) { this.rdp.parse(allData); receivedDescriptors = 1; } else if (resource.startsWith("/tor/server/") @@ -1067,11 +1079,13 @@ public class RelayDescriptorDownloader { this.newMissingServerDescriptors, this.newMissingExtraInfoDescriptors, this.newMissingMicrodescriptors); logger.info("We requested {} consensus(es), {} microdesc consensus(es), " - + "{} vote(s), {} missing server descriptor(s), {} times all server " + + "{} vote(s), {} bandwidth file(s), {} missing server descriptor(s), " + + "{} times all server " + "descriptors, {} missing extra-info descriptor(s), {} times all " + "extra-info descriptors, and {} missing microdescriptor(s) from the " + "directory authorities.", this.requestedConsensuses, this.requestedMicrodescConsensuses, this.requestedVotes, + this.requestedBandwidthFiles, this.requestedMissingServerDescriptors, this.requestedAllServerDescriptors, this.requestedMissingExtraInfoDescriptors, @@ -1085,12 +1099,14 @@ public class RelayDescriptorDownloader { logger.info("We sent these numbers of requests to the directory " + "authorities:{}", sb.toString()); logger.info("We successfully downloaded {} consensus(es), {} microdesc " - + "consensus(es), {} vote(s), {} missing server descriptor(s), {} " + + "consensus(es), {} vote(s), {} bandwidth file(s), " + + "{} missing server descriptor(s), {} " + "server descriptor(s) when downloading all descriptors, {} missing " + "extra-info descriptor(s), {} extra-info descriptor(s) when " + "downloading all descriptors, and {} missing microdescriptor(s).", this.downloadedConsensuses, this.downloadedMicrodescConsensuses, - this.downloadedVotes, this.downloadedMissingServerDescriptors, + this.downloadedVotes, this.downloadedBandwidthFiles, + this.downloadedMissingServerDescriptors, this.downloadedAllServerDescriptors, this.downloadedMissingExtraInfoDescriptors, this.downloadedAllExtraInfoDescriptors, diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java index 5224a61..113ac77 100644 --- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java +++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java @@ -14,6 +14,10 @@ import java.io.IOException; import java.io.StringReader; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeParseException; import java.util.SortedSet; import java.util.TimeZone; import java.util.TreeSet; @@ -318,6 +322,44 @@ public class RelayDescriptorParser { * time(s) of microdesc consensuses containing them, because we * don't know which month directories to put them in. Have to use * storeMicrodescriptor below. */ + } else if (line.matches("[0-9]{10}")) { + /* The following code is a much more lenient version of the parser in + * metrics-lib that we need for storing a bandwidth file even if + * metrics-lib has trouble verifying its format. As in metrics-lib, + * identifying bandwidth files by a 10-digit timestamp in the first line + * breaks with files generated before 2002 or after 2286 and when the + * next descriptor identifier starts with just a timestamp in the first + * line rather than a document type identifier. */ + String timestampLine = line; + LocalDateTime fileCreatedOrTimestamp = null; + try { + while ((line = br.readLine()) != null) { + if (line.startsWith("file_created=")) { + fileCreatedOrTimestamp = LocalDateTime.parse( + line.substring("file_created=".length())); + break; + } else if (line.startsWith("bw=") || line.contains(" bw=") + || "====".equals(line) || "=====".equals(line)) { + break; + } + } + } catch (IOException | DateTimeParseException e) { + /* Fall back to using timestamp in first line. */ + } + if (null == fileCreatedOrTimestamp) { + try { + fileCreatedOrTimestamp = LocalDateTime.ofInstant( + Instant.ofEpochSecond(Long.parseLong(timestampLine)), + ZoneOffset.UTC); + } catch (NumberFormatException | DateTimeParseException e) { + logger.warn("Could not parse timestamp or file_created time from " + + "bandwidth file. Storing with timestamp 2000-01-01 00:00:00"); + fileCreatedOrTimestamp = LocalDateTime.of(2000, 1, 1, 0, 0, 0); + } + } + this.aw.storeBandwidthFile(data, fileCreatedOrTimestamp, + DigestUtils.sha256Hex(data).toUpperCase()); + stored = true; } br.close(); } catch (IOException | ParseException e) { diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh index d247c52..7e4668a 100755 --- a/src/main/resources/create-tarballs.sh +++ b/src/main/resources/create-tarballs.sh @@ -47,6 +47,8 @@ TARBALLS=( consensuses-$YEARTWO-$MONTHTWO votes-$YEARONE-$MONTHONE votes-$YEARTWO-$MONTHTWO + bandwidths-$YEARONE-$MONTHONE + bandwidths-$YEARTWO-$MONTHTWO server-descriptors-$YEARONE-$MONTHONE server-descriptors-$YEARTWO-$MONTHTWO extra-infos-$YEARONE-$MONTHONE @@ -72,6 +74,8 @@ DIRECTORIES=( $OUTDIR/relay-descriptors/consensus/$YEARTWO/$MONTHTWO $OUTDIR/relay-descriptors/vote/$YEARONE/$MONTHONE/ $OUTDIR/relay-descriptors/vote/$YEARTWO/$MONTHTWO/ + $OUTDIR/relay-descriptors/bandwidth/$YEARONE/$MONTHONE/ + $OUTDIR/relay-descriptors/bandwidth/$YEARTWO/$MONTHTWO/ $OUTDIR/relay-descriptors/server-descriptor/$YEARONE/$MONTHONE/ $OUTDIR/relay-descriptors/server-descriptor/$YEARTWO/$MONTHTWO/ $OUTDIR/relay-descriptors/extra-info/$YEARONE/$MONTHONE/ @@ -156,6 +160,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/tor/ $TARBALLTARGETDIR/tor-20??-??.tar mkdir -p $ARCHIVEDIR/relay-descriptors/votes/ ln -f -s -t $ARCHIVEDIR/relay-descriptors/votes/ $TARBALLTARGETDIR/votes-20??-??.tar.xz
+mkdir -p $ARCHIVEDIR/relay-descriptors/bandwidths/ +ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidths-20??-??.tar.xz + mkdir -p $ARCHIVEDIR/torperf/ ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz
tor-commits@lists.torproject.org