[tor-commits] [collector/master] Archive bandwidth files in relaydescs module.

karsten at torproject.org karsten at torproject.org
Mon May 13 13:41:54 UTC 2019


commit dcbac68b48ae31b1bfbabab7a9c32f5577b78571
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu May 2 22:02:23 2019 +0200

    Archive bandwidth files in relaydescs module.
    
    Also update to metrics-lib 2.6.1.
    
    Implements #30218.
---
 CHANGELOG.md                                       |  7 ++++
 build.xml                                          |  4 +--
 .../metrics/collector/conf/Annotation.java         |  1 +
 .../collector/relaydescs/ArchiveWriter.java        | 42 +++++++++++++++++++++-
 .../relaydescs/RelayDescriptorDownloader.java      | 24 ++++++++++---
 .../relaydescs/RelayDescriptorParser.java          | 42 ++++++++++++++++++++++
 src/main/resources/create-tarballs.sh              |  7 ++++
 7 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0307748..0e592ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# Changes in version 1.9.0 - 2019-05-??
+
+ * Medium changes
+   - Archive bandwidth files in relaydescs module.
+   - Update to metrics-lib 2.6.1.
+
+
 # Changes in version 1.8.0 - 2018-10-11
 
  * Medium changes
diff --git a/build.xml b/build.xml
index 5cea51a..5874013 100644
--- a/build.xml
+++ b/build.xml
@@ -11,7 +11,7 @@
   <property name="release.version" value="1.8.0-dev" />
   <property name="project-main-class" value="org.torproject.metrics.collector.Main" />
   <property name="name" value="collector"/>
-  <property name="metricslibversion" value="2.4.0" />
+  <property name="metricslibversion" value="2.6.1" />
   <property name="jarincludes" value="collector.properties logback.xml" />
 
   <patternset id="runtime" >
@@ -21,7 +21,7 @@
       <include name="jackson-core-2.8.6.jar"/>
       <include name="jackson-databind-2.8.6.jar"/>
       <include name="xz-1.6.jar"/>
-      <include name="metrics-lib-${metricslibversion}.jar"/>
+      <include name="metrics-lib-${metricslibversion}-thin.jar"/>
       <include name="logback-core-1.1.9.jar" />
       <include name="logback-classic-1.1.9.jar" />
       <include name="slf4j-api-1.7.22.jar" />
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
index f90516b..2e47df0 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Annotation.java
@@ -6,6 +6,7 @@ package org.torproject.metrics.collector.conf;
 /** This enum contains all currently valid descriptor annotations. */
 public enum Annotation {
 
+  BandwidthFile("@type bandwidth-file 1.0\n"),
   BridgeExtraInfo("@type bridge-extra-info 1.3\n"),
   BridgeServer("@type bridge-server-descriptor 1.2\n"),
   Cert("@type dir-key-certificate-3 1.0\n"),
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
index 966b649..e1279ee 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/ArchiveWriter.java
@@ -3,6 +3,7 @@
 
 package org.torproject.metrics.collector.relaydescs;
 
+import org.torproject.descriptor.BandwidthFile;
 import org.torproject.descriptor.Descriptor;
 import org.torproject.descriptor.DescriptorParser;
 import org.torproject.descriptor.DescriptorSourceFactory;
@@ -33,6 +34,10 @@ import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.HashMap;
@@ -44,6 +49,7 @@ import java.util.SortedSet;
 import java.util.Stack;
 import java.util.TimeZone;
 import java.util.TreeMap;
+import java.util.TreeSet;
 
 public class ArchiveWriter extends CollecTorMain {
 
@@ -51,12 +57,15 @@ public class ArchiveWriter extends CollecTorMain {
       ArchiveWriter.class);
 
   private long now = System.currentTimeMillis();
+  private LocalDateTime nowLocalDateTime
+      = LocalDateTime.ofInstant(Instant.ofEpochMilli(this.now), ZoneOffset.UTC);
   private String outputDirectory;
   private String rsyncCatString;
   private DescriptorParser descriptorParser;
   private int storedConsensusesCounter = 0;
   private int storedMicrodescConsensusesCounter = 0;
   private int storedVotesCounter = 0;
+  private int storedBandwidthsCounter = 0;
   private int storedCertsCounter = 0;
   private int storedServerDescriptorsCounter = 0;
   private int storedExtraInfoDescriptorsCounter = 0;
@@ -74,6 +83,8 @@ public class ArchiveWriter extends CollecTorMain {
   private SortedMap<Long, Set<String>> storedExtraInfoDescriptors =
       new TreeMap<>();
   private SortedMap<Long, Set<String>> storedMicrodescriptors = new TreeMap<>();
+  private SortedMap<LocalDateTime, Set<String>> storedBandwidths
+      = new TreeMap<>();
 
   private File storedServerDescriptorsFile;
   private File storedExtraInfoDescriptorsFile;
@@ -103,6 +114,8 @@ public class ArchiveWriter extends CollecTorMain {
         RelayServerDescriptor.class);
     this.mapPathDescriptors.put("recent/relay-descriptors/extra-infos",
         RelayExtraInfoDescriptor.class);
+    this.mapPathDescriptors.put("recent/relay-descriptors/bandwidths",
+        BandwidthFile.class);
   }
 
   @Override
@@ -203,6 +216,7 @@ public class ArchiveWriter extends CollecTorMain {
     this.storedConsensuses.clear();
     this.storedMicrodescConsensuses.clear();
     this.storedVotes.clear();
+    this.storedBandwidths.clear();
     this.storedServerDescriptors.clear();
     this.storedExtraInfoDescriptors.clear();
     this.storedMicrodescriptors.clear();
@@ -299,7 +313,8 @@ public class ArchiveWriter extends CollecTorMain {
         .append(this.storedConsensusesCounter).append(" consensus(es), ")
         .append(this.storedMicrodescConsensusesCounter).append(" microdesc ")
         .append("consensus(es), ").append(this.storedVotesCounter)
-        .append(" vote(s), ").append(this.storedCertsCounter)
+        .append(" vote(s), ").append(this.storedBandwidthsCounter)
+        .append(" bandwidth file(s), ").append(this.storedCertsCounter)
         .append(" certificate(s), ").append(this.storedServerDescriptorsCounter)
         .append(" server descriptor(s), ")
         .append(this.storedExtraInfoDescriptorsCounter).append(" extra-info ")
@@ -309,6 +324,7 @@ public class ArchiveWriter extends CollecTorMain {
     this.storedConsensusesCounter = 0;
     this.storedMicrodescConsensusesCounter = 0;
     this.storedVotesCounter = 0;
+    this.storedBandwidthsCounter = 0;
     this.storedCertsCounter = 0;
     this.storedServerDescriptorsCounter = 0;
     this.storedExtraInfoDescriptorsCounter = 0;
@@ -727,6 +743,30 @@ public class ArchiveWriter extends CollecTorMain {
     }
   }
 
+  /** Stores a bandwidth file to disk. */
+  void storeBandwidthFile(byte[] data, LocalDateTime fileCreatedOrTimestamp,
+      String bandwidthFileDigest) {
+    DateTimeFormatter printFormat = DateTimeFormatter
+        .ofPattern("uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss").withZone(ZoneOffset.UTC);
+    File tarballFile = Paths.get(this.outputDirectory, "bandwidth",
+        fileCreatedOrTimestamp.format(printFormat) + "-bandwidth-"
+        + bandwidthFileDigest).toFile();
+    boolean tarballFileExistedBefore = tarballFile.exists();
+    File rsyncFile = Paths.get(recentPathName, RELAY_DESCRIPTORS, "bandwidths",
+        tarballFile.getName()).toFile();
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(Annotation.BandwidthFile.bytes(), data, outputFiles, null)) {
+      this.storedVotesCounter++;
+    }
+    if (!tarballFileExistedBefore
+        && this.nowLocalDateTime.isAfter(fileCreatedOrTimestamp.plusDays(3L))) {
+      this.storedBandwidths.putIfAbsent(fileCreatedOrTimestamp,
+          new TreeSet<>());
+      this.storedBandwidths.get(fileCreatedOrTimestamp)
+          .add(bandwidthFileDigest);
+    }
+  }
+
   /** Stores a key certificate to disk. */
   public void storeCertificate(byte[] data, String fingerprint,
       long published) {
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
index 4764a4b..5a241f4 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorDownloader.java
@@ -258,6 +258,8 @@ public class RelayDescriptorDownloader {
 
   private int requestedVotes = 0;
 
+  private int requestedBandwidthFiles = 0;
+
   private int requestedMissingServerDescriptors = 0;
 
   private int requestedAllServerDescriptors = 0;
@@ -274,6 +276,8 @@ public class RelayDescriptorDownloader {
 
   private int downloadedVotes = 0;
 
+  private int downloadedBandwidthFiles = 0;
+
   private int downloadedMissingServerDescriptors = 0;
 
   private int downloadedAllServerDescriptors = 0;
@@ -729,6 +733,14 @@ public class RelayDescriptorDownloader {
           }
         }
 
+        /* Now try to download the bandwidth file, regardless of whether this
+         * authority might provide one or when we last downloaded a bandwidth
+         * file from it. */
+        this.requestedBandwidthFiles++;
+        this.downloadedBandwidthFiles +=
+            this.downloadResourceFromAuthority(authority,
+            "/tor/status-vote/next/bandwidth");
+
         /* Download either all server and extra-info descriptors or only
          * those that we're missing. Start with server descriptors, then
          * request extra-info descriptors. Finally, request missing
@@ -886,7 +898,7 @@ public class RelayDescriptorDownloader {
         allData == null ? 0 : allData.length);
     int receivedDescriptors = 0;
     if (allData != null) {
-      if (resource.startsWith("/tor/status-vote/current/")) {
+      if (resource.startsWith("/tor/status-vote/")) {
         this.rdp.parse(allData);
         receivedDescriptors = 1;
       } else if (resource.startsWith("/tor/server/")
@@ -1067,11 +1079,13 @@ public class RelayDescriptorDownloader {
         this.newMissingServerDescriptors, this.newMissingExtraInfoDescriptors,
         this.newMissingMicrodescriptors);
     logger.info("We requested {} consensus(es), {} microdesc consensus(es), "
-        + "{} vote(s), {} missing server descriptor(s), {} times all server "
+        + "{} vote(s), {} bandwidth file(s), {} missing server descriptor(s), "
+        + "{} times all server "
         + "descriptors, {} missing extra-info descriptor(s), {} times all "
         + "extra-info descriptors, and {} missing microdescriptor(s) from the "
         + "directory authorities.", this.requestedConsensuses,
         this.requestedMicrodescConsensuses, this.requestedVotes,
+        this.requestedBandwidthFiles,
         this.requestedMissingServerDescriptors,
         this.requestedAllServerDescriptors,
         this.requestedMissingExtraInfoDescriptors,
@@ -1085,12 +1099,14 @@ public class RelayDescriptorDownloader {
     logger.info("We sent these numbers of requests to the directory "
         + "authorities:{}", sb.toString());
     logger.info("We successfully downloaded {} consensus(es), {} microdesc "
-        + "consensus(es), {} vote(s), {} missing server descriptor(s), {} "
+        + "consensus(es), {} vote(s), {} bandwidth file(s), "
+        + "{} missing server descriptor(s), {} "
         + "server descriptor(s) when downloading all descriptors, {} missing "
         + "extra-info descriptor(s), {} extra-info descriptor(s) when "
         + "downloading all descriptors, and {} missing microdescriptor(s).",
         this.downloadedConsensuses, this.downloadedMicrodescConsensuses,
-        this.downloadedVotes, this.downloadedMissingServerDescriptors,
+        this.downloadedVotes, this.downloadedBandwidthFiles,
+        this.downloadedMissingServerDescriptors,
         this.downloadedAllServerDescriptors,
         this.downloadedMissingExtraInfoDescriptors,
         this.downloadedAllExtraInfoDescriptors,
diff --git a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java
index 5224a61..113ac77 100644
--- a/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java
+++ b/src/main/java/org/torproject/metrics/collector/relaydescs/RelayDescriptorParser.java
@@ -14,6 +14,10 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeParseException;
 import java.util.SortedSet;
 import java.util.TimeZone;
 import java.util.TreeSet;
@@ -318,6 +322,44 @@ public class RelayDescriptorParser {
          * time(s) of microdesc consensuses containing them, because we
          * don't know which month directories to put them in.  Have to use
          * storeMicrodescriptor below. */
+      } else if (line.matches("[0-9]{10}")) {
+        /* The following code is a much more lenient version of the parser in
+         * metrics-lib that we need for storing a bandwidth file even if
+         * metrics-lib has trouble verifying its format. As in metrics-lib,
+         * identifying bandwidth files by a 10-digit timestamp in the first line
+         * breaks with files generated before 2002 or after 2286 and when the
+         * next descriptor identifier starts with just a timestamp in the first
+         * line rather than a document type identifier. */
+        String timestampLine = line;
+        LocalDateTime fileCreatedOrTimestamp = null;
+        try {
+          while ((line = br.readLine()) != null) {
+            if (line.startsWith("file_created=")) {
+              fileCreatedOrTimestamp = LocalDateTime.parse(
+                  line.substring("file_created=".length()));
+              break;
+            } else if (line.startsWith("bw=") || line.contains(" bw=")
+                || "====".equals(line) || "=====".equals(line)) {
+              break;
+            }
+          }
+        } catch (IOException | DateTimeParseException e) {
+          /* Fall back to using timestamp in first line. */
+        }
+        if (null == fileCreatedOrTimestamp) {
+          try {
+            fileCreatedOrTimestamp = LocalDateTime.ofInstant(
+                Instant.ofEpochSecond(Long.parseLong(timestampLine)),
+                ZoneOffset.UTC);
+          } catch (NumberFormatException | DateTimeParseException e) {
+            logger.warn("Could not parse timestamp or file_created time from "
+                + "bandwidth file. Storing with timestamp 2000-01-01 00:00:00");
+            fileCreatedOrTimestamp = LocalDateTime.of(2000, 1, 1, 0, 0, 0);
+          }
+        }
+        this.aw.storeBandwidthFile(data, fileCreatedOrTimestamp,
+            DigestUtils.sha256Hex(data).toUpperCase());
+        stored = true;
       }
       br.close();
     } catch (IOException | ParseException e) {
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index d247c52..7e4668a 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -47,6 +47,8 @@ TARBALLS=(
   consensuses-$YEARTWO-$MONTHTWO
   votes-$YEARONE-$MONTHONE
   votes-$YEARTWO-$MONTHTWO
+  bandwidths-$YEARONE-$MONTHONE
+  bandwidths-$YEARTWO-$MONTHTWO
   server-descriptors-$YEARONE-$MONTHONE
   server-descriptors-$YEARTWO-$MONTHTWO
   extra-infos-$YEARONE-$MONTHONE
@@ -72,6 +74,8 @@ DIRECTORIES=(
   $OUTDIR/relay-descriptors/consensus/$YEARTWO/$MONTHTWO
   $OUTDIR/relay-descriptors/vote/$YEARONE/$MONTHONE/
   $OUTDIR/relay-descriptors/vote/$YEARTWO/$MONTHTWO/
+  $OUTDIR/relay-descriptors/bandwidth/$YEARONE/$MONTHONE/
+  $OUTDIR/relay-descriptors/bandwidth/$YEARTWO/$MONTHTWO/
   $OUTDIR/relay-descriptors/server-descriptor/$YEARONE/$MONTHONE/
   $OUTDIR/relay-descriptors/server-descriptor/$YEARTWO/$MONTHTWO/
   $OUTDIR/relay-descriptors/extra-info/$YEARONE/$MONTHONE/
@@ -156,6 +160,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/tor/ $TARBALLTARGETDIR/tor-20??-??.tar
 mkdir -p $ARCHIVEDIR/relay-descriptors/votes/
 ln -f -s -t $ARCHIVEDIR/relay-descriptors/votes/ $TARBALLTARGETDIR/votes-20??-??.tar.xz
 
+mkdir -p $ARCHIVEDIR/relay-descriptors/bandwidths/
+ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidths-20??-??.tar.xz
+
 mkdir -p $ARCHIVEDIR/torperf/
 ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz
 



More information about the tor-commits mailing list