[tor-commits] [collector/release] Archive OnionPerf analysis .json files.

karsten at torproject.org karsten at torproject.org
Thu Apr 30 16:48:28 UTC 2020


commit 0f5536ed68c79be50a9b1e326356008f7ffaefff
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Apr 30 17:29:35 2020 +0200

    Archive OnionPerf analysis .json files.
    
    Implements #34072.
---
 CHANGELOG.md                                       |   5 +
 build.xml                                          |   2 +-
 .../collector/onionperf/OnionPerfDownloader.java   | 218 +++++++++++++++++----
 src/main/resources/collector.properties            |   2 +-
 src/main/resources/create-tarballs.sh              |   7 +
 5 files changed, 195 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bbedc4..a55a0fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changes in version 1.1?.? - 2020-0?-??
 
+ * Medium changes
+   - Update to metrics-lib 2.12.1.
+   - Download OnionPerf analysis .json files in addition to .tpf
+     files.
+
  * Minor changes
    - Simplify logging configuration.
    - Set default locale `US` and default time zone `UTC` at the
diff --git a/build.xml b/build.xml
index 748351e..a9988f5 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
   <property name="release.version" value="1.14.1-dev" />
   <property name="project-main-class" value="org.torproject.metrics.collector.Main" />
   <property name="name" value="collector"/>
-  <property name="metricslibversion" value="2.10.0" />
+  <property name="metricslibversion" value="2.12.1" />
   <property name="jarincludes" value="collector.properties logback.xml" />
 
   <patternset id="runtime" >
diff --git a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
index b651620..d22ac0b 100644
--- a/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
+++ b/src/main/java/org/torproject/metrics/collector/onionperf/OnionPerfDownloader.java
@@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 import org.torproject.metrics.collector.downloader.Downloader;
 
+import org.apache.commons.compress.utils.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -32,14 +33,16 @@ import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.SortedSet;
 import java.util.Stack;
 import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-/** Download download .tpf files from OnionPerf hosts. */
+/** Download OnionPerf files from OnionPerf hosts. */
 public class OnionPerfDownloader extends CollecTorMain {
 
   private static final Logger logger = LoggerFactory.getLogger(
@@ -47,6 +50,8 @@ public class OnionPerfDownloader extends CollecTorMain {
 
   private static final String TORPERF = "torperf";
 
+  private static final String ONIONPERF = "onionperf";
+
   /** Instantiate the OnionPerf module using the given configuration. */
   public OnionPerfDownloader(Configuration config) {
     super(config);
@@ -54,21 +59,25 @@ public class OnionPerfDownloader extends CollecTorMain {
   }
 
   /** File containing the download history, which is necessary, because
-   * OnionPerf does not delete older .tpf files, but which enables us to do
-   * so. */
+   * OnionPerf does not delete older files, but which enables us to do so. */
   private File onionPerfDownloadedFile;
 
-  /** Full URLs of .tpf files downloaded in the current or in past
-   * executions. */
-  private SortedSet<String> downloadedTpfFiles = new TreeSet<>();
+  /** Full URLs of files downloaded in the current or in past executions. */
+  private SortedSet<String> downloadedFiles = new TreeSet<>();
 
   /** Base URLs of configured OnionPerf hosts. */
   private URL[] onionPerfHosts = null;
 
-  /** Directory for storing archived .tpf files. */
+  /** Relative URLs of available .tpf files by base URL. */
+  private Map<URL, List<String>> tpfFileUrls = new HashMap<>();
+
+  /** Relative URLs of available OnionPerf analysis files by base URL. */
+  private Map<URL, List<String>> onionPerfAnalysisFileUrls = new HashMap<>();
+
+  /** Directory for storing archived files. */
   private File archiveDirectory = null;
 
-  /** Directory for storing recent .tpf files. */
+  /** Directory for storing recent files. */
   private File recentDirectory = null;
 
   @Override
@@ -87,19 +96,17 @@ public class OnionPerfDownloader extends CollecTorMain {
         new File(config.getPath(Key.StatsPath).toFile(),
         "onionperf-downloaded");
     this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts);
-    this.readDownloadedOnionPerfTpfFiles();
-    this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(),
-        TORPERF);
-    this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(),
-        TORPERF);
+    this.readDownloadedOnionPerfFiles();
+    this.archiveDirectory = config.getPath(Key.OutputPath).toFile();
+    this.recentDirectory = config.getPath(Key.RecentPath).toFile();
     for (URL baseUrl : this.onionPerfHosts) {
       this.downloadFromOnionPerfHost(baseUrl);
     }
-    this.writeDownloadedOnionPerfTpfFiles();
+    this.writeDownloadedOnionPerfFiles();
     this.cleanUpRsyncDirectory();
   }
 
-  private void readDownloadedOnionPerfTpfFiles() {
+  private void readDownloadedOnionPerfFiles() {
     if (!this.onionPerfDownloadedFile.exists()) {
       return;
     }
@@ -107,47 +114,69 @@ public class OnionPerfDownloader extends CollecTorMain {
           this.onionPerfDownloadedFile))) {
       String line;
       while ((line = br.readLine()) != null) {
-        this.downloadedTpfFiles.add(line);
+        this.downloadedFiles.add(line);
       }
     } catch (IOException e) {
       logger.info("Unable to read download history file '{}'. Ignoring "
-          + "download history and downloading all available .tpf files.",
+          + "download history and downloading all available files.",
           this.onionPerfDownloadedFile.getAbsolutePath());
-      this.downloadedTpfFiles.clear();
+      this.downloadedFiles.clear();
     }
   }
 
   private void downloadFromOnionPerfHost(URL baseUrl) {
     logger.info("Downloading from OnionPerf host {}", baseUrl);
-    List<String> tpfFileNames =
-        this.downloadOnionPerfDirectoryListing(baseUrl);
+    this.downloadOnionPerfDirectoryListing(baseUrl);
     String source = baseUrl.getHost().split("\\.")[0];
-    for (String tpfFileName : tpfFileNames) {
-      this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
+    if (this.tpfFileUrls.containsKey(baseUrl)) {
+      for (String tpfFileName : this.tpfFileUrls.get(baseUrl)) {
+        this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
+      }
+    }
+    if (this.onionPerfAnalysisFileUrls.containsKey(baseUrl)) {
+      for (String onionPerfAnalysisFileName
+          : this.onionPerfAnalysisFileUrls.get(baseUrl)) {
+        this.downloadAndParseOnionPerfAnalysisFile(baseUrl, source,
+            onionPerfAnalysisFileName);
+      }
     }
   }
 
-  /** Pattern for links contained in directory listings. */
+  /** Patterns for links contained in directory listings. */
   private static final Pattern TPF_FILE_URL_PATTERN =
       Pattern.compile(".*<a href=\"([^\"]+\\.tpf)\">.*");
 
-  private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) {
-    List<String> tpfFileUrls = new ArrayList<>();
+  private static final Pattern ONIONPERF_ANALYSIS_FILE_URL_PATTERN =
+      Pattern.compile(
+      ".*<a href=\"([0-9-]{10}\\.onionperf\\.analysis\\.json\\.xz)\">.*");
+
+  private void downloadOnionPerfDirectoryListing(URL baseUrl) {
     try (BufferedReader br = new BufferedReader(new InputStreamReader(
         baseUrl.openStream()))) {
       String line;
       while ((line = br.readLine()) != null) {
-        Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line);
-        if (matcher.matches() && !matcher.group(1).startsWith("/")) {
-          tpfFileUrls.add(matcher.group(1));
+        Matcher tpfFileMatcher = TPF_FILE_URL_PATTERN.matcher(line);
+        if (tpfFileMatcher.matches()
+            && !tpfFileMatcher.group(1).startsWith("/")) {
+          this.tpfFileUrls.putIfAbsent(baseUrl, new ArrayList<>());
+          this.tpfFileUrls.get(baseUrl).add(tpfFileMatcher.group(1));
+        }
+        Matcher onionPerfAnalysisFileMatcher
+            = ONIONPERF_ANALYSIS_FILE_URL_PATTERN.matcher(line);
+        if (onionPerfAnalysisFileMatcher.matches()
+            && !onionPerfAnalysisFileMatcher.group(1).startsWith("/")) {
+          this.onionPerfAnalysisFileUrls.putIfAbsent(baseUrl,
+              new ArrayList<>());
+          this.onionPerfAnalysisFileUrls.get(baseUrl)
+              .add(onionPerfAnalysisFileMatcher.group(1));
         }
       }
     } catch (IOException e) {
       logger.warn("Unable to download directory listing from '{}'.  Skipping "
           + "this OnionPerf host.", baseUrl);
-      tpfFileUrls.clear();
+      this.tpfFileUrls.remove(baseUrl);
+      this.onionPerfAnalysisFileUrls.remove(baseUrl);
     }
-    return tpfFileUrls;
   }
 
   private static final DateFormat DATE_FORMAT;
@@ -169,7 +198,7 @@ public class OnionPerfDownloader extends CollecTorMain {
     }
 
     /* Skip if we successfully downloaded this file before. */
-    if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) {
+    if (this.downloadedFiles.contains(tpfFileUrl.toString())) {
       return;
     }
 
@@ -197,7 +226,8 @@ public class OnionPerfDownloader extends CollecTorMain {
     }
 
     /* Download file contents to temporary file. */
-    File tempFile = new File(this.recentDirectory, "." + tpfFileName);
+    File tempFile = new File(this.recentDirectory,
+        TORPERF + "/." + tpfFileName);
     byte[] downloadedBytes;
     try {
       downloadedBytes = Downloader.downloadFromHttpServer(
@@ -263,7 +293,7 @@ public class OnionPerfDownloader extends CollecTorMain {
 
     /* Copy/move files in place. */
     File archiveFile = new File(this.archiveDirectory,
-         date.replaceAll("-", "/") + "/" + tpfFileName);
+         TORPERF + "/" + date.replaceAll("-", "/") + "/" + tpfFileName);
     archiveFile.getParentFile().mkdirs();
     try {
       Files.copy(tempFile.toPath(), archiveFile.toPath(),
@@ -274,18 +304,132 @@ public class OnionPerfDownloader extends CollecTorMain {
       tempFile.delete();
       return;
     }
-    File recentFile = new File(this.recentDirectory, tpfFileName);
+    File recentFile = new File(this.recentDirectory,
+        TORPERF + "/" + tpfFileName);
+    tempFile.renameTo(recentFile);
+
+    /* Add to download history to avoid downloading it again. */
+    this.downloadedFiles.add(baseUrl + tpfFileName);
+  }
+
+
+  private void downloadAndParseOnionPerfAnalysisFile(URL baseUrl, String source,
+      String onionPerfAnalysisFileName) {
+    URL onionPerfAnalysisFileUrl;
+    try {
+      onionPerfAnalysisFileUrl = new URL(baseUrl, onionPerfAnalysisFileName);
+    } catch (MalformedURLException e1) {
+      logger.warn("Unable to put together base URL '{}' and file path '{}' to "
+          + "a URL. Skipping.", baseUrl, onionPerfAnalysisFileName);
+      return;
+    }
+
+    /* Skip if we successfully downloaded this file before. */
+    if (this.downloadedFiles.contains(onionPerfAnalysisFileUrl.toString())) {
+      return;
+    }
+
+    /* Parse date from file name: yyyy-MM-dd.onionperf.analysis.json.xz */
+    String date;
+    try {
+      date = onionPerfAnalysisFileName.substring(0, 10);
+      DATE_FORMAT.parse(date);
+    } catch (NumberFormatException | ParseException e) {
+      logger.warn("Invalid file name '{}{}'. Skipping.", baseUrl,
+          onionPerfAnalysisFileName, e);
+      return;
+    }
+
+    /* Download file contents to temporary file. */
+    File tempFile = new File(this.recentDirectory,
+        ONIONPERF + "/." + onionPerfAnalysisFileName);
+    byte[] downloadedBytes;
+    try {
+      downloadedBytes = Downloader.downloadFromHttpServer(
+          new URL(baseUrl + onionPerfAnalysisFileName));
+    } catch (IOException e) {
+      logger.warn("Unable to download '{}{}'. Skipping.", baseUrl,
+          onionPerfAnalysisFileName, e);
+      return;
+    }
+    if (null == downloadedBytes) {
+      logger.warn("Unable to download '{}{}'. Skipping.", baseUrl,
+          onionPerfAnalysisFileName);
+      return;
+    }
+    tempFile.getParentFile().mkdirs();
+    try {
+      Files.write(tempFile.toPath(), downloadedBytes);
+    } catch (IOException e) {
+      logger.warn("Unable to write previously downloaded '{}{}' to temporary "
+          + "file '{}'. Skipping.", baseUrl, onionPerfAnalysisFileName,
+          tempFile, e);
+      return;
+    }
+
+    /* Validate contained descriptors. */
+    DescriptorParser descriptorParser =
+        DescriptorSourceFactory.createDescriptorParser();
+    byte[] rawDescriptorBytes;
+    try {
+      rawDescriptorBytes = IOUtils.toByteArray(
+          Files.newInputStream(tempFile.toPath()));
+    } catch (IOException e) {
+      logger.warn("OnionPerf file '{}{}' could not be read. Skipping.", baseUrl,
+          onionPerfAnalysisFileName, e);
+      tempFile.delete();
+      return;
+    }
+    Iterable<Descriptor> descriptors = descriptorParser.parseDescriptors(
+        rawDescriptorBytes, null, onionPerfAnalysisFileName);
+    String message = null;
+    for (Descriptor descriptor : descriptors) {
+      if (!(descriptor instanceof TorperfResult)) {
+        message = "File contains descriptors other than an OnionPerf analysis "
+            + "document: " + descriptor.getClass();
+        break;
+      }
+      TorperfResult torperf = (TorperfResult) descriptor;
+      if (!source.equals(torperf.getSource())) {
+        message = "File contains transfer from another source: "
+            + torperf.getSource();
+        break;
+      }
+    }
+    if (null != message) {
+      logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. "
+          + "Skipping.", baseUrl, onionPerfAnalysisFileName, message);
+      tempFile.delete();
+      return;
+    }
+
+    /* Copy/move files in place. */
+    File archiveFile = new File(this.archiveDirectory,
+        ONIONPERF + "/" + date.replaceAll("-", "/") + "/" + date + "." + source
+        + ".onionperf.analysis.json.xz");
+    archiveFile.getParentFile().mkdirs();
+    try {
+      Files.copy(tempFile.toPath(), archiveFile.toPath(),
+          StandardCopyOption.REPLACE_EXISTING);
+    } catch (IOException e) {
+      logger.warn("Unable to copy OnionPerf file {} to {}. Skipping.",
+          tempFile, archiveFile, e);
+      tempFile.delete();
+      return;
+    }
+    File recentFile = new File(this.recentDirectory,
+        ONIONPERF + "/" + date + "." + source + ".onionperf.analysis.json.xz");
     tempFile.renameTo(recentFile);
 
     /* Add to download history to avoid downloading it again. */
-    this.downloadedTpfFiles.add(baseUrl + tpfFileName);
+    this.downloadedFiles.add(baseUrl + onionPerfAnalysisFileName);
   }
 
-  private void writeDownloadedOnionPerfTpfFiles() {
+  private void writeDownloadedOnionPerfFiles() {
     this.onionPerfDownloadedFile.getParentFile().mkdirs();
     try (BufferedWriter bw = new BufferedWriter(new FileWriter(
           this.onionPerfDownloadedFile))) {
-      for (String line : this.downloadedTpfFiles) {
+      for (String line : this.downloadedFiles) {
         bw.write(line);
         bw.newLine();
       }
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index 61baed5..2347021 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -175,7 +175,7 @@ ExitlistUrl = https://check.torproject.org/exit-addresses
 ######## OnionPerf downloader ########
 #
 ## Define descriptor sources
-#  possible values: Remote,Sync
+#  possible values: Remote,Sync (.tpf files only!)
 OnionPerfSources = Remote
 #  Retrieve files from the following CollecTor instances.
 #  List of URLs separated by comma.
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index 07952c7..fcac2f3 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -40,6 +40,8 @@ TARBALLS=(
   exit-list-$YEARTWO-$MONTHTWO
   torperf-$YEARONE-$MONTHONE
   torperf-$YEARTWO-$MONTHTWO
+  onionperf-$YEARONE-$MONTHONE
+  onionperf-$YEARTWO-$MONTHTWO
   certs
   microdescs-$YEARONE-$MONTHONE
   microdescs-$YEARTWO-$MONTHTWO
@@ -73,6 +75,8 @@ DIRECTORIES=(
   $OUTDIR/exit-lists/$YEARTWO/$MONTHTWO/
   $OUTDIR/torperf/$YEARONE/$MONTHONE/
   $OUTDIR/torperf/$YEARTWO/$MONTHTWO/
+  $OUTDIR/onionperf/$YEARONE/$MONTHONE/
+  $OUTDIR/onionperf/$YEARTWO/$MONTHTWO/
   $OUTDIR/relay-descriptors/certs/
   $OUTDIR/relay-descriptors/microdesc/$YEARONE/$MONTHONE
   $OUTDIR/relay-descriptors/microdesc/$YEARTWO/$MONTHTWO
@@ -178,6 +182,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidt
 mkdir -p $ARCHIVEDIR/torperf/
 ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz
 
+mkdir -p $ARCHIVEDIR/onionperf/
+ln -f -s -t $ARCHIVEDIR/onionperf/ $TARBALLTARGETDIR/onionperf-20??-??.tar.xz
+
 mkdir -p $ARCHIVEDIR/webstats/
 ln -f -s -t $ARCHIVEDIR/webstats/ $TARBALLTARGETDIR/webstats-20??-??.tar
 





More information about the tor-commits mailing list