[tor-commits] [collector/master] Update most of the bridgedescs module to NIO.

karsten at torproject.org karsten at torproject.org
Tue Dec 1 09:42:36 UTC 2020


commit c0ee1a6cf76f6f6b1677edccc1dc7e4055de50e9
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sun Nov 29 00:05:47 2020 +0100

    Update most of the bridgedescs module to NIO.
    
    Replace all File references with their Path equivalents, and use Files
    methods wherever feasible.
    
    Part of #20542.
---
 .../bridgedescs/SanitizedBridgesWriter.java        | 288 +++++++++------------
 1 file changed, 127 insertions(+), 161 deletions(-)

diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 8db7db5..34156c2 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -26,16 +26,15 @@ import org.slf4j.LoggerFactory;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
-import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringReader;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 import java.security.GeneralSecurityException;
 import java.security.SecureRandom;
 import java.text.ParseException;
@@ -45,7 +44,6 @@ import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.time.temporal.ChronoUnit;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -89,12 +87,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 
   private String rsyncCatString;
 
-  private File bridgeDirectoriesDirectory;
-
-  /**
-   * Output directory for writing sanitized bridge descriptors.
-   */
-  private File sanitizedBridgesDirectory;
+  private Path inputDirectory;
 
   private boolean replaceIpAddressesWithHashes;
 
@@ -106,13 +99,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 
   private boolean haveWarnedAboutInterval;
 
-  private File bridgeIpSecretsFile;
+  private Path bridgeIpSecretsFile;
 
   private SecureRandom secureRandom;
 
-  private String outputPathName;
+  private Path outputDirectory;
+
+  private Path recentDirectory;
 
-  private String recentPathName;
+  private Path statsDirectory;
 
   @Override
   public String module() {
@@ -127,25 +122,12 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   @Override
   protected void startProcessing() throws ConfigurationException {
 
-    outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(),
-        BRIDGE_DESCRIPTORS).toString();
-    recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(),
-        BRIDGE_DESCRIPTORS).toString();
-    File bridgeDirectoriesDirectory =
-        config.getPath(Key.BridgeLocalOrigins).toFile();
-    File sanitizedBridgesDirectory = new File(outputPathName);
-    File statsDirectory = config.getPath(Key.StatsPath).toFile();
-
-    if (bridgeDirectoriesDirectory == null
-        || sanitizedBridgesDirectory == null || statsDirectory == null) {
-      throw new ConfigurationException("BridgeSnapshotsDirectory, "
-          + "SanitizedBridgesWriteDirectory, StatsPath should be set. "
-          + "Please, edit the 'collector.properties' file.");
-    }
-
-    /* Memorize argument values. */
-    this.bridgeDirectoriesDirectory = bridgeDirectoriesDirectory;
-    this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
+    this.outputDirectory = config.getPath(Key.OutputPath)
+        .resolve(BRIDGE_DESCRIPTORS);
+    this.recentDirectory = config.getPath(Key.RecentPath)
+        .resolve(BRIDGE_DESCRIPTORS);
+    this.inputDirectory = config.getPath(Key.BridgeLocalOrigins);
+    this.statsDirectory = config.getPath(Key.StatsPath);
     this.replaceIpAddressesWithHashes =
         config.getBool(Key.ReplaceIpAddressesWithHashes);
     SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
@@ -168,13 +150,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     /* Read hex-encoded secrets for replacing IP addresses with hashes
      * from disk. */
     this.secretsForHashingIpAddresses = new TreeMap<>();
-    this.bridgeIpSecretsFile = new File(statsDirectory,
-        "bridge-ip-secrets");
-    if (this.bridgeIpSecretsFile.exists()) {
-      try (BufferedReader br = new BufferedReader(new FileReader(
-          this.bridgeIpSecretsFile))) {
-        String line;
-        while ((line = br.readLine()) != null) {
+    this.bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
+    if (Files.exists(this.bridgeIpSecretsFile)) {
+      try {
+        for (String line : Files.readAllLines(this.bridgeIpSecretsFile)) {
           String[] parts = line.split(",");
           if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
               && line.length() != ("yyyy-MM,".length() + 50 * 2)
@@ -229,7 +208,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         this.bridgeSanitizingCutOffTimestamp);
 
     // Import bridge descriptors
-    this.readBridgeSnapshots(this.bridgeDirectoriesDirectory, statsDirectory);
+    this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory);
 
     // Finish writing sanitized bridge descriptors to disk
     this.finishWriting();
@@ -239,8 +218,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     this.cleanUpDirectories();
   }
 
-  private void readBridgeSnapshots(File bridgeDirectoriesDir,
-      File statsDirectory) {
+  private void readBridgeSnapshots(Path bridgeDirectoriesDir,
+      Path statsDirectory) {
 
     if (bridgeDirectoriesDir == null
         || statsDirectory == null) {
@@ -248,21 +227,16 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     }
 
     SortedSet<String> parsed = new TreeSet<>();
-    File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
+    Path pbdFile = statsDirectory.resolve("parsed-bridge-directories");
     boolean modified = false;
-    if (bridgeDirectoriesDir.exists()) {
-      if (pbdFile.exists()) {
-        logger.debug("Reading file {}...", pbdFile.getAbsolutePath());
+    if (Files.exists(bridgeDirectoriesDir)) {
+      if (Files.exists(pbdFile)) {
+        logger.debug("Reading file {}...", pbdFile);
         try {
-          BufferedReader br = new BufferedReader(new FileReader(pbdFile));
-          String line;
-          while ((line = br.readLine()) != null) {
-            parsed.add(line);
-          }
-          br.close();
-          logger.debug("Finished reading file {}.", pbdFile.getAbsolutePath());
+          parsed.addAll(Files.readAllLines(pbdFile));
+          logger.debug("Finished reading file {}.", pbdFile);
         } catch (IOException e) {
-          logger.warn("Failed reading file {}!", pbdFile.getAbsolutePath(), e);
+          logger.warn("Failed reading file {}!", pbdFile, e);
           return;
         }
       }
@@ -275,28 +249,31 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       int skippedServerDescriptors = 0;
       int parsedExtraInfoDescriptors = 0;
       int skippedExtraInfoDescriptors = 0;
-      Stack<File> filesInInputDir = new Stack<>();
+      Stack<Path> filesInInputDir = new Stack<>();
       filesInInputDir.add(bridgeDirectoriesDir);
       while (!filesInInputDir.isEmpty()) {
-        File pop = filesInInputDir.pop();
-        if (pop.isDirectory()) {
-          Collections.addAll(filesInInputDir, pop.listFiles());
-        } else if (!parsed.contains(pop.getName())) {
+        Path pop = filesInInputDir.pop();
+        String fn = pop.getFileName().toString();
+        if (Files.isDirectory(pop)) {
           try {
-            FileInputStream in = new FileInputStream(pop);
+            Files.list(pop).forEachOrdered(filesInInputDir::add);
+          } catch (IOException e) {
+            e.printStackTrace();
+          }
+        } else if (!parsed.contains(pop.getFileName().toString())) {
+          try (InputStream in = Files.newInputStream(pop)) {
             if (in.available() > 0) {
               TarArchiveInputStream tais;
-              if (pop.getName().endsWith(".tar.gz")) {
+              if (fn.endsWith(".tar.gz")) {
                 GzipCompressorInputStream gcis =
                     new GzipCompressorInputStream(in);
                 tais = new TarArchiveInputStream(gcis);
-              } else if (pop.getName().endsWith(".tar")) {
+              } else if (fn.endsWith(".tar")) {
                 tais = new TarArchiveInputStream(in);
               } else {
                 continue;
               }
               BufferedInputStream bis = new BufferedInputStream(tais);
-              String fn = pop.getName();
               String[] fnParts = fn.split("-");
               if (fnParts.length != 5) {
                 logger.warn("Invalid bridge descriptor tarball file name: {}. "
@@ -416,16 +393,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
               }
               bis.close();
             }
-            in.close();
 
             /* Let's give some memory back, or we'll run out of it. */
             System.gc();
 
-            parsed.add(pop.getName());
+            parsed.add(fn);
             modified = true;
           } catch (IOException e) {
-            logger.warn("Could not parse bridge snapshot {}!", pop.getName(),
-                e);
+            logger.warn("Could not parse bridge snapshot {}!", pop, e);
           }
         }
       }
@@ -436,15 +411,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
           parsedStatuses, parsedServerDescriptors, skippedServerDescriptors,
           parsedExtraInfoDescriptors, skippedExtraInfoDescriptors);
       if (!parsed.isEmpty() && modified) {
-        logger.debug("Writing file {}...", pbdFile.getAbsolutePath());
-        pbdFile.getParentFile().mkdirs();
-        try (BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile))) {
-          for (String f : parsed) {
-            bw.append(f).append("\n");
-          }
-          logger.debug("Finished writing file {}.", pbdFile.getAbsolutePath());
+        logger.debug("Writing file {}...", pbdFile);
+        try {
+          Files.createDirectories(pbdFile.getParent());
+          Files.write(pbdFile, parsed);
+          logger.debug("Finished writing file {}.", pbdFile);
         } catch (IOException e) {
-          logger.warn("Failed writing file {}!", pbdFile.getAbsolutePath(), e);
+          logger.warn("Failed writing file {}!", pbdFile, e);
         }
       }
     }
@@ -627,15 +600,16 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       } else {
         /* Append secret to file on disk immediately before using it, or
          * we might end with inconsistently sanitized bridges. */
+        byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
+            .getBytes();
         try {
-          if (!this.bridgeIpSecretsFile.exists()) {
-            this.bridgeIpSecretsFile.getParentFile().mkdirs();
+          if (Files.exists(this.bridgeIpSecretsFile)) {
+            Files.write(this.bridgeIpSecretsFile, newBytes,
+                StandardOpenOption.APPEND);
+          } else {
+            Files.createDirectories(this.bridgeIpSecretsFile.getParent());
+            Files.write(this.bridgeIpSecretsFile, newBytes);
           }
-          BufferedWriter bw = new BufferedWriter(new FileWriter(
-              this.bridgeIpSecretsFile,
-              this.bridgeIpSecretsFile.exists()));
-          bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
-          bw.close();
         } catch (IOException e) {
           logger.warn("Could not store new secret "
               + "to disk! Not calculating any IP address or TCP port "
@@ -850,24 +824,24 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       String stime = publicationTime.substring(11, 13)
           + publicationTime.substring(14, 16)
           + publicationTime.substring(17, 19);
-      File tarballFile = new File(
-          this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
-          + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
-          + sday + "-" + stime + "-" + authorityFingerprint);
-      File rsyncFile = new File(recentPathName, "statuses/"
-          + tarballFile.getName());
-      File[] outputFiles = new File[] { tarballFile, rsyncFile };
-      for (File outputFile : outputFiles) {
-        outputFile.getParentFile().mkdirs();
-        BufferedWriter bw = new BufferedWriter(new FileWriter(
-            outputFile));
-        bw.write(Annotation.Status.toString());
-        bw.write("published " + publicationTime + "\n");
-        bw.write(header.toString());
+      String fileName = syear + smonth + sday + "-" + stime + "-"
+          + authorityFingerprint;
+      Path tarballFile = this.outputDirectory.resolve(
+          Paths.get(syear, smonth, "statuses", sday, fileName));
+      Path rsyncFile = this.recentDirectory.resolve(
+          Paths.get("statuses", fileName));
+      Path[] outputFiles = new Path[] { tarballFile, rsyncFile };
+      for (Path outputFile : outputFiles) {
+        Files.createDirectories(outputFile.getParent());
+        StringBuilder sanitizedStatus = new StringBuilder();
+        sanitizedStatus.append(Annotation.Status.toString());
+        sanitizedStatus.append("published ").append(publicationTime)
+            .append("\n");
+        sanitizedStatus.append(header.toString());
         for (String scrubbed : scrubbedLines.values()) {
-          bw.write(scrubbed);
+          sanitizedStatus.append(scrubbed);
         }
-        bw.close();
+        Files.write(outputFile, sanitizedStatus.toString().getBytes());
       }
     } catch (IOException e) {
       logger.warn("Could not write sanitized bridge "
@@ -1234,33 +1208,30 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     /* Determine filename of sanitized server descriptor. */
     String dyear = published.substring(0, 4);
     String dmonth = published.substring(5, 7);
-    File tarballFile = new File(
-        this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
-        + dyear + "/" + dmonth + "/server-descriptors/"
-        + "/" + descriptorDigest.charAt(0) + "/"
-        + descriptorDigest.charAt(1) + "/"
-        + descriptorDigest);
     try {
-      File rsyncCatFile = new File(config.getPath(Key.RecentPath).toFile(),
-          "bridge-descriptors/server-descriptors/" + this.rsyncCatString
-          + "-server-descriptors.tmp");
-      File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+      Path tarballFile = this.outputDirectory.resolve(
+          Paths.get(dyear, dmonth, "server-descriptors",
+          descriptorDigest.substring(0, 1), descriptorDigest.substring(1, 2),
+          descriptorDigest));
+      Path rsyncCatFile = this.recentDirectory.resolve(
+          Paths.get("bridge-descriptors", "server-descriptors",
+          this.rsyncCatString + "-server-descriptors.tmp"));
+      Path[] outputFiles = new Path[] { tarballFile, rsyncCatFile };
       boolean[] append = new boolean[] { false, true };
       for (int i = 0; i < outputFiles.length; i++) {
-        File outputFile = outputFiles[i];
-        boolean appendToFile = append[i];
-        if (outputFile.exists() && !appendToFile) {
+        Path outputFile = outputFiles[i];
+        StandardOpenOption openOption = append[i] ? StandardOpenOption.APPEND
+            : StandardOpenOption.CREATE_NEW;
+        if (Files.exists(outputFile)
+            && openOption != StandardOpenOption.APPEND) {
           /* We already stored this descriptor to disk before, so let's
            * not store it yet another time. */
           break;
         }
-        outputFile.getParentFile().mkdirs();
-        BufferedWriter bw = new BufferedWriter(new FileWriter(
-            outputFile, appendToFile));
-        bw.write(scrubbed.toString());
-        bw.close();
+        Files.createDirectories(outputFile.getParent());
+        Files.write(outputFile, scrubbed.toString().getBytes(), openOption);
       }
-    } catch (ConfigurationException | IOException e) {
+    } catch (IOException e) {
       logger.warn("Could not write sanitized server descriptor to disk.", e);
     }
   }
@@ -1323,14 +1294,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
 
     /* Parse descriptor to generate a sanitized version. */
-    String scrubbedDesc = null;
     String published = null;
     String masterKeyEd25519FromIdentityEd25519 = null;
-    try {
-      BufferedReader br = new BufferedReader(new StringReader(new String(
-          data, StandardCharsets.US_ASCII)));
+    DescriptorBuilder scrubbed = new DescriptorBuilder();
+    try (BufferedReader br = new BufferedReader(new StringReader(new String(
+          data, StandardCharsets.US_ASCII)))) {
+      scrubbed.append(Annotation.BridgeExtraInfo.toString());
       String line;
-      DescriptorBuilder scrubbed = null;
       String hashedBridgeIdentity;
       String masterKeyEd25519 = null;
       while ((line = br.readLine()) != null) {
@@ -1346,7 +1316,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
           }
           hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
               parts[2].toCharArray())).toLowerCase();
-          scrubbed = new DescriptorBuilder("extra-info ").append(parts[1])
+          scrubbed.append("extra-info ").append(parts[1])
             .space().append(hashedBridgeIdentity.toUpperCase()).newLine();
 
         /* Parse the publication time to determine the file name. */
@@ -1432,7 +1402,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         /* When we reach the signature, we're done. Write the sanitized
          * descriptor to disk below. */
         } else if (line.startsWith("router-signature")) {
-          scrubbedDesc = scrubbed.toString();
           break;
 
         /* Skip the ed25519 signature; we'll include a SHA256 digest of
@@ -1491,44 +1460,43 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         return;
       }
     }
+    if (descriptorDigestSha256Base64 != null) {
+      scrubbed.append("router-digest-sha256 ")
+          .append(descriptorDigestSha256Base64).newLine();
+    }
+    scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
+        .newLine();
+
+    /* Determine filename of sanitized extra-info descriptor. */
     String dyear = published.substring(0, 4);
     String dmonth = published.substring(5, 7);
-    File tarballFile = new File(
-        this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
-        + dyear + "/" + dmonth + "/extra-infos/"
-        + descriptorDigest.charAt(0) + "/"
-        + descriptorDigest.charAt(1) + "/"
-        + descriptorDigest);
+
     try {
-      File rsyncCatFile = new File(config.getPath(Key.RecentPath).toFile(),
-          "bridge-descriptors/extra-infos/" + this.rsyncCatString
-          + "-extra-infos.tmp");
-      File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+      Path tarballFile = this.outputDirectory.resolve(
+          Paths.get(dyear, dmonth, "extra-infos",
+          descriptorDigest.substring(0, 1), descriptorDigest.substring(1, 2),
+          descriptorDigest));
+      Path rsyncCatFile = this.recentDirectory.resolve(
+          Paths.get("bridge-descriptors", "extra-infos",
+          this.rsyncCatString + "-extra-infos.tmp"));
+      Path[] outputFiles = new Path[] { tarballFile, rsyncCatFile };
       boolean[] append = new boolean[] { false, true };
       for (int i = 0; i < outputFiles.length; i++) {
-        File outputFile = outputFiles[i];
-        boolean appendToFile = append[i];
-        if (outputFile.exists() && !appendToFile) {
+        Path outputFile = outputFiles[i];
+        StandardOpenOption openOption = append[i] ? StandardOpenOption.APPEND
+            : StandardOpenOption.CREATE_NEW;
+        if (Files.exists(outputFile)
+            && openOption != StandardOpenOption.APPEND) {
           /* We already stored this descriptor to disk before, so let's
            * not store it yet another time. */
           break;
         }
-        outputFile.getParentFile().mkdirs();
-        BufferedWriter bw = new BufferedWriter(new FileWriter(
-            outputFile, appendToFile));
-        bw.write(Annotation.BridgeExtraInfo.toString());
-        bw.write(scrubbedDesc);
-        if (descriptorDigestSha256Base64 != null) {
-          bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
-              + "\n");
-        }
-        bw.write("router-digest " + descriptorDigest.toUpperCase()
-            + "\n");
-        bw.close();
+        Files.createDirectories(outputFile.getParent());
+        Files.write(outputFile, scrubbed.toString().getBytes(), openOption);
       }
-    } catch (Exception e) {
-      logger.warn("Could not write sanitized "
-          + "extra-info descriptor to disk.", e);
+    } catch (IOException e) {
+      logger.warn("Could not write sanitized extra-info descriptor to disk.",
+          e);
     }
   }
 
@@ -1547,20 +1515,18 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       try {
         int kept = 0;
         int deleted = 0;
-        BufferedWriter bw = new BufferedWriter(new FileWriter(
-            this.bridgeIpSecretsFile));
+        List<String> lines = new ArrayList<>();
         for (Map.Entry<String, byte[]> e :
             this.secretsForHashingIpAddresses.entrySet()) {
           if (e.getKey().compareTo(
               this.bridgeSanitizingCutOffTimestamp) < 0) {
             deleted++;
           } else {
-            bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
-                + "\n");
+            lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
             kept++;
           }
         }
-        bw.close();
+        Files.write(this.bridgeIpSecretsFile, lines);
         logger.info("Deleted {} secrets that we don't "
             + "need anymore and kept {}.", deleted, kept);
       } catch (IOException e) {
@@ -1612,9 +1578,9 @@ public class SanitizedBridgesWriter extends CollecTorMain {
    * in the last three days (seven weeks), and remove the .tmp extension from
    * newly written files. */
   private void cleanUpDirectories() {
-    PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+    PersistenceUtils.cleanDirectory(this.recentDirectory,
         Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
-    PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+    PersistenceUtils.cleanDirectory(this.outputDirectory,
         Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
   }
 }





More information about the tor-commits mailing list