[tor-commits] [collector/master] Move lower-level sanitizing code to its own class.

karsten at torproject.org karsten at torproject.org
Tue Dec 1 09:42:36 UTC 2020


commit a2fdbf3c6f67e5ddb735773e1ab456ee4f464555
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Nov 30 21:59:17 2020 +0100

    Move lower-level sanitizing code to its own class.
    
    Part of #20542.
---
 .../bridgedescs/SanitizedBridgesWriter.java        | 404 ++-------------------
 .../bridgedescs/SensitivePartsSanitizer.java       | 378 +++++++++++++++++++
 .../bridgedescs/SanitizedBridgesWriterTest.java    |   2 +
 3 files changed, 410 insertions(+), 374 deletions(-)

diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 34156c2..843aa40 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -3,8 +3,6 @@
 
 package org.torproject.metrics.collector.bridgedescs;
 
-import static java.time.ZoneOffset.UTC;
-
 import org.torproject.descriptor.BridgeExtraInfoDescriptor;
 import org.torproject.descriptor.BridgeNetworkStatus;
 import org.torproject.descriptor.BridgeServerDescriptor;
@@ -35,18 +33,12 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
-import java.security.GeneralSecurityException;
-import java.security.SecureRandom;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.time.Instant;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
 import java.time.temporal.ChronoUnit;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedMap;
@@ -89,26 +81,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
 
   private Path inputDirectory;
 
-  private boolean replaceIpAddressesWithHashes;
-
-  private boolean persistenceProblemWithSecrets;
-
-  private SortedMap<String, byte[]> secretsForHashingIpAddresses;
-
-  private String bridgeSanitizingCutOffTimestamp;
-
-  private boolean haveWarnedAboutInterval;
-
-  private Path bridgeIpSecretsFile;
-
-  private SecureRandom secureRandom;
-
   private Path outputDirectory;
 
   private Path recentDirectory;
 
   private Path statsDirectory;
 
+  private SensitivePartsSanitizer sensitivePartsSanitizer;
+
   @Override
   public String module() {
     return "bridgedescs";
@@ -128,90 +108,30 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         .resolve(BRIDGE_DESCRIPTORS);
     this.inputDirectory = config.getPath(Key.BridgeLocalOrigins);
     this.statsDirectory = config.getPath(Key.StatsPath);
-    this.replaceIpAddressesWithHashes =
+    boolean replaceIpAddressesWithHashes =
         config.getBool(Key.ReplaceIpAddressesWithHashes);
     SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
         "yyyy-MM-dd-HH-mm-ss");
     this.rsyncCatString = rsyncCatFormat.format(
         System.currentTimeMillis());
 
-    /* Initialize secure random number generator if we need it. */
-    if (this.replaceIpAddressesWithHashes) {
-      try {
-        this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
-      } catch (GeneralSecurityException e) {
-        logger.warn("Could not initialize secure "
-            + "random number generator! Not calculating any IP address "
-            + "hashes in this execution!", e);
-        this.persistenceProblemWithSecrets = true;
-      }
-    }
-
-    /* Read hex-encoded secrets for replacing IP addresses with hashes
-     * from disk. */
-    this.secretsForHashingIpAddresses = new TreeMap<>();
-    this.bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
-    if (Files.exists(this.bridgeIpSecretsFile)) {
-      try {
-        for (String line : Files.readAllLines(this.bridgeIpSecretsFile)) {
-          String[] parts = line.split(",");
-          if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
-              && line.length() != ("yyyy-MM,".length() + 50 * 2)
-              && line.length() != ("yyyy-MM,".length() + 83 * 2))
-              || parts.length != 2) {
-            logger.warn("Invalid line in bridge-ip-secrets file "
-                + "starting with '{}'! "
-                + "Not calculating any IP address hashes in this "
-                + "execution!", line.substring(0, 7));
-            this.persistenceProblemWithSecrets = true;
-            break;
-          }
-          String month = parts[0];
-          byte[] secret = Hex.decodeHex(parts[1].toCharArray());
-          this.secretsForHashingIpAddresses.put(month, secret);
-        }
-        if (!this.persistenceProblemWithSecrets) {
-          logger.debug("Read {} secrets for hashing bridge IP addresses.",
-              this.secretsForHashingIpAddresses.size());
-        }
-      } catch (DecoderException e) {
-        logger.warn("Failed to decode hex string in {}! Not calculating any IP "
-            + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
-        this.persistenceProblemWithSecrets = true;
-      } catch (IOException e) {
-        logger.warn("Failed to read {}! Not calculating any IP "
-            + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
-        this.persistenceProblemWithSecrets = true;
-      }
-    }
-
-    long limitBridgeSanitizingIntervalDays
-        = config.getInt(Key.BridgeDescriptorMappingsLimit);
-
-    /* If we're configured to keep secrets only for a limited time, define
-     * the cut-off day and time. */
-    LocalDateTime bridgeSanitizingCutOffDateTime
-        = LocalDateTime.of(1999, 12, 31, 23, 59, 59);
-    if (limitBridgeSanitizingIntervalDays >= 0L) {
-      LocalDateTime configuredBridgeSanitizingCutOffDateTime
-          = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays);
-      if (configuredBridgeSanitizingCutOffDateTime.isAfter(
-          bridgeSanitizingCutOffDateTime)) {
-        bridgeSanitizingCutOffDateTime
-            = configuredBridgeSanitizingCutOffDateTime;
-      }
+    Path bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
+    if (replaceIpAddressesWithHashes) {
+      long limitBridgeSanitizingIntervalDays
+          = config.getInt(Key.BridgeDescriptorMappingsLimit);
+      this.sensitivePartsSanitizer = new SensitivePartsSanitizer(
+          bridgeIpSecretsFile, limitBridgeSanitizingIntervalDays);
+    } else {
+      this.sensitivePartsSanitizer = new SensitivePartsSanitizer();
     }
-    this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime
-        .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
-
-    logger.info("Using cut-off datetime '{}' for secrets.",
-        this.bridgeSanitizingCutOffTimestamp);
 
     // Import bridge descriptors
     this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory);
 
     // Finish writing sanitized bridge descriptors to disk
-    this.finishWriting();
+    if (replaceIpAddressesWithHashes) {
+      this.sensitivePartsSanitizer.finishWriting();
+    }
 
     this.checkStaleDescriptors();
 
@@ -423,206 +343,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     }
   }
 
-  private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
-      String published) throws IOException {
-    if (!orAddress.contains(":")) {
-      /* Malformed or-address or a line. */
-      return null;
-    }
-    String addressPart = orAddress.substring(0,
-        orAddress.lastIndexOf(":"));
-    String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
-    String scrubbedAddressPart;
-    if (addressPart.startsWith("[")) {
-      scrubbedAddressPart = this.scrubIpv6Address(addressPart,
-          fingerprintBytes, published);
-    } else {
-      scrubbedAddressPart = this.scrubIpv4Address(addressPart,
-          fingerprintBytes, published);
-    }
-    String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
-        published);
-    return (scrubbedAddressPart == null ? null :
-          scrubbedAddressPart + ":" + scrubbedPort);
-  }
-
-  private String scrubIpv4Address(String address, byte[] fingerprintBytes,
-      String published) throws IOException {
-    if (this.replaceIpAddressesWithHashes) {
-      if (this.persistenceProblemWithSecrets) {
-        /* There's a persistence problem, so we shouldn't scrub more IP
-         * addresses in this execution. */
-        return null;
-      }
-      byte[] hashInput = new byte[4 + 20 + 31];
-      String[] ipParts = address.split("\\.");
-      for (int i = 0; i < 4; i++) {
-        hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
-      }
-      System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
-      String month = published.substring(0, "yyyy-MM".length());
-      byte[] secret = this.getSecretForMonth(month);
-      System.arraycopy(secret, 0, hashInput, 24, 31);
-      byte[] hashOutput = DigestUtils.sha256(hashInput);
-      return "10."
-          + (((int) hashOutput[0] + 256) % 256) + "."
-          + (((int) hashOutput[1] + 256) % 256) + "."
-          + (((int) hashOutput[2] + 256) % 256);
-    } else {
-      return "127.0.0.1";
-    }
-  }
-
-  private String scrubIpv6Address(String address, byte[] fingerprintBytes,
-      String published) throws IOException {
-    StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
-    if (this.replaceIpAddressesWithHashes) {
-      if (this.persistenceProblemWithSecrets) {
-        /* There's a persistence problem, so we shouldn't scrub more IP
-         * addresses in this execution. */
-        return null;
-      }
-      String[] doubleColonSeparatedParts = address.substring(1,
-          address.length() - 1).split("::", -1);
-      if (doubleColonSeparatedParts.length > 2) {
-        /* Invalid IPv6 address. */
-        return null;
-      }
-      List<String> hexParts = new ArrayList<>();
-      for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
-        StringBuilder hexPart = new StringBuilder();
-        String[] parts = doubleColonSeparatedPart.split(":", -1);
-        if (parts.length < 1 || parts.length > 8) {
-          /* Invalid IPv6 address. */
-          return null;
-        }
-        for (String part : parts) {
-          if (part.contains(".")) {
-            String[] ipParts = part.split("\\.");
-            byte[] ipv4Bytes = new byte[4];
-            if (ipParts.length != 4) {
-              /* Invalid IPv4 part in IPv6 address. */
-              return null;
-            }
-            for (int m = 0; m < 4; m++) {
-              ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
-            }
-            hexPart.append(Hex.encodeHexString(ipv4Bytes));
-          } else if (part.length() > 4) {
-            /* Invalid IPv6 address. */
-            return null;
-          } else {
-            for (int k = part.length(); k < 4; k++) {
-              hexPart.append("0");
-            }
-            hexPart.append(part);
-          }
-        }
-        hexParts.add(hexPart.toString());
-      }
-      StringBuilder hex = new StringBuilder();
-      hex.append(hexParts.get(0));
-      if (hexParts.size() == 2) {
-        for (int i = 32 - hexParts.get(0).length()
-            - hexParts.get(1).length(); i > 0; i--) {
-          hex.append("0");
-        }
-        hex.append(hexParts.get(1));
-      }
-      byte[] ipBytes;
-      try {
-        ipBytes = Hex.decodeHex(hex.toString().toCharArray());
-      } catch (DecoderException e) {
-        /* TODO Invalid IPv6 address. */
-        return null;
-      }
-      if (ipBytes.length != 16) {
-        /* TODO Invalid IPv6 address. */
-        return null;
-      }
-      byte[] hashInput = new byte[16 + 20 + 19];
-      System.arraycopy(ipBytes, 0, hashInput, 0, 16);
-      System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
-      String month = published.substring(0, "yyyy-MM".length());
-      byte[] secret = this.getSecretForMonth(month);
-      System.arraycopy(secret, 31, hashInput, 36, 19);
-      String hashOutput = DigestUtils.sha256Hex(hashInput);
-      sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4);
-      sb.append(":");
-      sb.append(hashOutput.substring(hashOutput.length() - 4));
-    }
-    sb.append("]");
-    return sb.toString();
-  }
-
-  private String scrubTcpPort(String portString, byte[] fingerprintBytes,
-      String published) throws IOException {
-    if (portString.equals("0")) {
-      return "0";
-    } else if (this.replaceIpAddressesWithHashes) {
-      if (this.persistenceProblemWithSecrets) {
-        /* There's a persistence problem, so we shouldn't scrub more TCP
-         * ports in this execution. */
-        return null;
-      }
-      byte[] hashInput = new byte[2 + 20 + 33];
-      int portNumber = Integer.parseInt(portString);
-      hashInput[0] = (byte) (portNumber >> 8);
-      hashInput[1] = (byte) portNumber;
-      System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
-      String month = published.substring(0, "yyyy-MM".length());
-      byte[] secret = this.getSecretForMonth(month);
-      System.arraycopy(secret, 50, hashInput, 22, 33);
-      byte[] hashOutput = DigestUtils.sha256(hashInput);
-      int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8)
-          | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000;
-      return String.valueOf(hashedPort);
-    } else {
-      return "1";
-    }
-  }
-
-  private byte[] getSecretForMonth(String month) throws IOException {
-    if (!this.secretsForHashingIpAddresses.containsKey(month)
-        || this.secretsForHashingIpAddresses.get(month).length < 83) {
-      byte[] secret = new byte[83];
-      this.secureRandom.nextBytes(secret);
-      if (this.secretsForHashingIpAddresses.containsKey(month)) {
-        System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
-            secret, 0,
-            this.secretsForHashingIpAddresses.get(month).length);
-      }
-      if (month.compareTo(
-          this.bridgeSanitizingCutOffTimestamp) < 0) {
-        logger.warn("Generated a secret that we won't make "
-            + "persistent, because it's outside our bridge descriptor "
-            + "sanitizing interval.");
-      } else {
-        /* Append secret to file on disk immediately before using it, or
-         * we might end with inconsistently sanitized bridges. */
-        byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
-            .getBytes();
-        try {
-          if (Files.exists(this.bridgeIpSecretsFile)) {
-            Files.write(this.bridgeIpSecretsFile, newBytes,
-                StandardOpenOption.APPEND);
-          } else {
-            Files.createDirectories(this.bridgeIpSecretsFile.getParent());
-            Files.write(this.bridgeIpSecretsFile, newBytes);
-          }
-        } catch (IOException e) {
-          logger.warn("Could not store new secret "
-              + "to disk! Not calculating any IP address or TCP port "
-              + "hashes in this execution!", e);
-          this.persistenceProblemWithSecrets = true;
-          throw new IOException(e);
-        }
-      }
-      this.secretsForHashingIpAddresses.put(month, secret);
-    }
-    return this.secretsForHashingIpAddresses.get(month);
-  }
-
   private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
 
   /**
@@ -631,7 +351,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
   public void sanitizeAndStoreNetworkStatus(byte[] data,
       String publicationTime, String authorityFingerprint) {
 
-    if (this.persistenceProblemWithSecrets) {
+    if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
       /* There's a persistence problem, so we shouldn't scrub more IP
        * addresses in this execution. */
       return;
@@ -641,19 +361,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       maxNetworkStatusPublishedTime = publicationTime;
     }
 
-    if (this.bridgeSanitizingCutOffTimestamp
-        .compareTo(publicationTime) > 0) {
-      String text = "Sanitizing and storing network status with "
-          + "publication time outside our descriptor sanitizing "
-          + "interval.";
-      if (this.haveWarnedAboutInterval) {
-        logger.debug(text);
-      } else {
-        logger.warn(text);
-        this.haveWarnedAboutInterval = true;
-      }
-    }
-
     /* Parse the given network status line by line. */
     DescriptorBuilder header = new DescriptorBuilder();
     boolean includesFingerprintLine = false;
@@ -738,14 +445,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
           String hashedDescriptorIdentifier = Base64.encodeBase64String(
               DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier
               + "=="))).substring(0, 27);
-          String scrubbedAddress = scrubIpv4Address(address,
-              fingerprintBytes,
-              descPublicationTime);
+          String scrubbedAddress = this.sensitivePartsSanitizer
+              .scrubIpv4Address(address, fingerprintBytes, descPublicationTime);
           String nickname = parts[1];
-          String scrubbedOrPort = this.scrubTcpPort(orPort,
-              fingerprintBytes, descPublicationTime);
-          String scrubbedDirPort = this.scrubTcpPort(dirPort,
-              fingerprintBytes, descPublicationTime);
+          String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort(
+              orPort, fingerprintBytes, descPublicationTime);
+          String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
+              dirPort, fingerprintBytes, descPublicationTime);
           scrubbed.append("r ").append(nickname).space()
               .append(hashedBridgeIdentityBase64).space()
               .append(hashedDescriptorIdentifier).space()
@@ -757,8 +463,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         /* Sanitize any addresses in a lines using the fingerprint and
          * descriptor publication time from the previous r line. */
         } else if (line.startsWith("a ")) {
-          String scrubbedOrAddress = scrubOrAddress(
-              line.substring("a ".length()), fingerprintBytes,
+          String scrubbedOrAddress = this.sensitivePartsSanitizer
+              .scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
               descPublicationTime);
           if (scrubbedOrAddress != null) {
             scrubbed.append("a ").append(scrubbedOrAddress).newLine();
@@ -856,7 +562,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
    */
   public void sanitizeAndStoreServerDescriptor(byte[] data) {
 
-    if (this.persistenceProblemWithSecrets) {
+    if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
       /* There's a persistence problem, so we shouldn't scrub more IP
        * addresses in this execution. */
       return;
@@ -922,18 +628,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
           if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
             maxServerDescriptorPublishedTime = published;
           }
-          if (this.bridgeSanitizingCutOffTimestamp
-              .compareTo(published) > 0) {
-            String text = "Sanitizing and storing "
-                + "server descriptor with publication time outside our "
-                + "descriptor sanitizing interval.";
-            if (this.haveWarnedAboutInterval) {
-              logger.debug(text);
-            } else {
-              logger.warn(text);
-              this.haveWarnedAboutInterval = true;
-            }
-          }
           scrubbed.append(line).newLine();
 
         /* Parse the fingerprint to determine the hashed bridge
@@ -1127,8 +821,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       return;
     }
     try {
-      String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes,
-          published);
+      String scrubbedAddressString = this.sensitivePartsSanitizer
+          .scrubIpv4Address(address, fingerprintBytes, published);
       if (null == scrubbedAddressString) {
         logger.warn("Invalid IP address in \"router\" line in bridge server "
             + "descriptor. Skipping descriptor.");
@@ -1137,8 +831,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       scrubbedAddress.append(scrubbedAddressString);
       for (Map.Entry<StringBuilder, String> e
           : scrubbedIpAddressesAndTcpPorts.entrySet()) {
-        String scrubbedOrAddress = scrubOrAddress(e.getValue(),
-            fingerprintBytes, published);
+        String scrubbedOrAddress = this.sensitivePartsSanitizer
+            .scrubOrAddress(e.getValue(), fingerprintBytes, published);
         if (null == scrubbedOrAddress) {
           logger.warn("Invalid IP address or TCP port in \"or-address\" line "
               + "in bridge server descriptor. Skipping descriptor.");
@@ -1147,8 +841,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         e.getKey().append(scrubbedOrAddress);
       }
       for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
-        String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes,
-            published);
+        String scrubbedTcpPort = this.sensitivePartsSanitizer
+            .scrubTcpPort(e.getValue(), fingerprintBytes, published);
         if (null == scrubbedTcpPort) {
           logger.warn("Invalid TCP port in \"router\" line in bridge server "
               + "descriptor. Skipping descriptor.");
@@ -1159,7 +853,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     } catch (IOException exception) {
       /* There's a persistence problem, so we shouldn't scrub more IP addresses
        * or TCP ports in this execution. */
-      this.persistenceProblemWithSecrets = true;
       return;
     }
 
@@ -1500,43 +1193,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     }
   }
 
-  /**
-   * Rewrite all network statuses that might contain references to server
-   * descriptors we added or updated in this execution. This applies to
-   * all statuses that have been published up to 24 hours after any added
-   * or updated server descriptor.
-   */
-  public void finishWriting() {
-
-    /* Delete secrets that we don't need anymore. */
-    if (!this.secretsForHashingIpAddresses.isEmpty()
-        && this.secretsForHashingIpAddresses.firstKey().compareTo(
-        this.bridgeSanitizingCutOffTimestamp) < 0) {
-      try {
-        int kept = 0;
-        int deleted = 0;
-        List<String> lines = new ArrayList<>();
-        for (Map.Entry<String, byte[]> e :
-            this.secretsForHashingIpAddresses.entrySet()) {
-          if (e.getKey().compareTo(
-              this.bridgeSanitizingCutOffTimestamp) < 0) {
-            deleted++;
-          } else {
-            lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
-            kept++;
-          }
-        }
-        Files.write(this.bridgeIpSecretsFile, lines);
-        logger.info("Deleted {} secrets that we don't "
-            + "need anymore and kept {}.", deleted, kept);
-      } catch (IOException e) {
-        logger.warn("Could not store reduced set of "
-            + "secrets to disk! This is a bad sign, better check what's "
-            + "going on!", e);
-      }
-    }
-  }
-
   private void checkStaleDescriptors() {
     SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
         "yyyy-MM-dd HH:mm:ss");
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java
new file mode 100644
index 0000000..71931dd
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java
@@ -0,0 +1,378 @@
+/* Copyright 2010--2020 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedescs;
+
+import static java.time.ZoneOffset.UTC;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.security.GeneralSecurityException;
+import java.security.SecureRandom;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class SensitivePartsSanitizer {
+
+  private static final Logger logger = LoggerFactory.getLogger(
+      SensitivePartsSanitizer.class);
+
+  private boolean replaceIpAddressesWithHashes = false;
+
+  private Path bridgeIpSecretsFile;
+
+  private boolean persistenceProblemWithSecrets;
+
+  private final SortedMap<String, byte[]> secretsForHashingIpAddresses
+      = new TreeMap<>();
+
+  private String bridgeSanitizingCutOffTimestamp;
+
+  private SecureRandom secureRandom;
+
+  private boolean haveWarnedAboutInterval;
+
+  SensitivePartsSanitizer() {
+    /* Nothing to do, if we're not using secrets for computing hashes. */
+  }
+
+  SensitivePartsSanitizer(Path bridgeIpSecretsFile,
+      long limitBridgeSanitizingIntervalDays) {
+    this.bridgeIpSecretsFile = bridgeIpSecretsFile;
+    this.readBridgeIpSecretsFile();
+    this.determineCutOffTimestamp(limitBridgeSanitizingIntervalDays);
+    this.replaceIpAddressesWithHashes = true;
+    this.initializeSecureRandom();
+  }
+
+  /* Read hex-encoded secrets for replacing IP addresses with hashes
+   * from disk. */
+  private void readBridgeIpSecretsFile() {
+    if (Files.exists(this.bridgeIpSecretsFile)) {
+      try {
+        for (String line : Files.readAllLines(bridgeIpSecretsFile)) {
+          String[] parts = line.split(",");
+          if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
+              && line.length() != ("yyyy-MM,".length() + 50 * 2)
+              && line.length() != ("yyyy-MM,".length() + 83 * 2))
+              || parts.length != 2) {
+            logger.warn("Invalid line in bridge-ip-secrets file "
+                + "starting with '{}'! "
+                + "Not calculating any IP address hashes in this "
+                + "execution!", line.substring(0, 7));
+            this.persistenceProblemWithSecrets = true;
+            break;
+          }
+          String month = parts[0];
+          byte[] secret = Hex.decodeHex(parts[1].toCharArray());
+          this.secretsForHashingIpAddresses.put(month, secret);
+        }
+        if (!this.persistenceProblemWithSecrets) {
+          logger.debug("Read {} secrets for hashing bridge IP addresses.",
+              this.secretsForHashingIpAddresses.size());
+        }
+      } catch (DecoderException e) {
+        logger.warn("Failed to decode hex string in {}! Not calculating any IP "
+            + "address hashes in this execution!", bridgeIpSecretsFile, e);
+        this.persistenceProblemWithSecrets = true;
+      } catch (IOException e) {
+        logger.warn("Failed to read {}! Not calculating any IP "
+            + "address hashes in this execution!", bridgeIpSecretsFile, e);
+        this.persistenceProblemWithSecrets = true;
+      }
+    }
+  }
+
+  boolean hasPersistenceProblemWithSecrets() {
+    return this.persistenceProblemWithSecrets;
+  }
+
+  private void determineCutOffTimestamp(
+      long limitBridgeSanitizingIntervalDays) {
+
+    /* If we're configured to keep secrets only for a limited time, define
+     * the cut-off day and time. */
+    LocalDateTime bridgeSanitizingCutOffDateTime
+        = LocalDateTime.of(1999, 12, 31, 23, 59, 59);
+    if (limitBridgeSanitizingIntervalDays >= 0L) {
+      LocalDateTime configuredBridgeSanitizingCutOffDateTime
+          = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays);
+      if (configuredBridgeSanitizingCutOffDateTime.isAfter(
+          bridgeSanitizingCutOffDateTime)) {
+        bridgeSanitizingCutOffDateTime
+            = configuredBridgeSanitizingCutOffDateTime;
+      }
+    }
+    this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime
+        .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+
+    logger.info("Using cut-off datetime '{}' for secrets.",
+        this.bridgeSanitizingCutOffTimestamp);
+  }
+
+  private void initializeSecureRandom() {
+    /* Initialize secure random number generator. */
+    try {
+      this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
+    } catch (GeneralSecurityException e) {
+      logger.warn("Could not initialize secure "
+          + "random number generator! Not calculating any IP address "
+          + "hashes in this execution!", e);
+      this.persistenceProblemWithSecrets = true;
+    }
+  }
+
+  String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
+      String published) throws IOException {
+    if (!orAddress.contains(":")) {
+      /* Malformed or-address or a line. */
+      return null;
+    }
+    String addressPart = orAddress.substring(0,
+        orAddress.lastIndexOf(":"));
+    String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
+    String scrubbedAddressPart;
+    if (addressPart.startsWith("[")) {
+      scrubbedAddressPart = this.scrubIpv6Address(addressPart,
+          fingerprintBytes, published);
+    } else {
+      scrubbedAddressPart = this.scrubIpv4Address(addressPart,
+          fingerprintBytes, published);
+    }
+    String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
+        published);
+    return (scrubbedAddressPart == null ? null :
+        scrubbedAddressPart + ":" + scrubbedPort);
+  }
+
+  String scrubIpv4Address(String address, byte[] fingerprintBytes,
+      String published) throws IOException {
+    if (this.replaceIpAddressesWithHashes) {
+      if (this.persistenceProblemWithSecrets) {
+        /* There's a persistence problem, so we shouldn't scrub more IP
+         * addresses in this execution. */
+        return null;
+      }
+      byte[] hashInput = new byte[4 + 20 + 31];
+      String[] ipParts = address.split("\\.");
+      for (int i = 0; i < 4; i++) {
+        hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
+      }
+      System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
+      byte[] secret = this.getSecretForMonth(published);
+      System.arraycopy(secret, 0, hashInput, 24, 31);
+      byte[] hashOutput = DigestUtils.sha256(hashInput);
+      return "10."
+          + (((int) hashOutput[0] + 256) % 256) + "."
+          + (((int) hashOutput[1] + 256) % 256) + "."
+          + (((int) hashOutput[2] + 256) % 256);
+    } else {
+      return "127.0.0.1";
+    }
+  }
+
+  private String scrubIpv6Address(String address, byte[] fingerprintBytes,
+      String published) throws IOException {
+    StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
+    if (this.replaceIpAddressesWithHashes) {
+      if (this.persistenceProblemWithSecrets) {
+        /* There's a persistence problem, so we shouldn't scrub more IP
+         * addresses in this execution. */
+        return null;
+      }
+      String[] doubleColonSeparatedParts = address.substring(1,
+          address.length() - 1).split("::", -1);
+      if (doubleColonSeparatedParts.length > 2) {
+        /* Invalid IPv6 address. */
+        return null;
+      }
+      List<String> hexParts = new ArrayList<>();
+      for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
+        StringBuilder hexPart = new StringBuilder();
+        String[] parts = doubleColonSeparatedPart.split(":", -1);
+        if (parts.length < 1 || parts.length > 8) {
+          /* Invalid IPv6 address. */
+          return null;
+        }
+        for (String part : parts) {
+          if (part.contains(".")) {
+            String[] ipParts = part.split("\\.");
+            byte[] ipv4Bytes = new byte[4];
+            if (ipParts.length != 4) {
+              /* Invalid IPv4 part in IPv6 address. */
+              return null;
+            }
+            for (int m = 0; m < 4; m++) {
+              ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
+            }
+            hexPart.append(Hex.encodeHexString(ipv4Bytes));
+          } else if (part.length() > 4) {
+            /* Invalid IPv6 address. */
+            return null;
+          } else {
+            for (int k = part.length(); k < 4; k++) {
+              hexPart.append("0");
+            }
+            hexPart.append(part);
+          }
+        }
+        hexParts.add(hexPart.toString());
+      }
+      StringBuilder hex = new StringBuilder();
+      hex.append(hexParts.get(0));
+      if (hexParts.size() == 2) {
+        for (int i = 32 - hexParts.get(0).length()
+            - hexParts.get(1).length(); i > 0; i--) {
+          hex.append("0");
+        }
+        hex.append(hexParts.get(1));
+      }
+      byte[] ipBytes;
+      try {
+        ipBytes = Hex.decodeHex(hex.toString().toCharArray());
+      } catch (DecoderException e) {
+        /* TODO Invalid IPv6 address. */
+        return null;
+      }
+      if (ipBytes.length != 16) {
+        /* TODO Invalid IPv6 address. */
+        return null;
+      }
+      byte[] hashInput = new byte[16 + 20 + 19];
+      System.arraycopy(ipBytes, 0, hashInput, 0, 16);
+      System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
+      byte[] secret = this.getSecretForMonth(published);
+      System.arraycopy(secret, 31, hashInput, 36, 19);
+      String hashOutput = DigestUtils.sha256Hex(hashInput);
+      sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4);
+      sb.append(":");
+      sb.append(hashOutput.substring(hashOutput.length() - 4));
+    }
+    sb.append("]");
+    return sb.toString();
+  }
+
+  String scrubTcpPort(String portString, byte[] fingerprintBytes,
+      String published) throws IOException {
+    if (portString.equals("0")) {
+      return "0";
+    } else if (this.replaceIpAddressesWithHashes) {
+      if (this.persistenceProblemWithSecrets) {
+        /* There's a persistence problem, so we shouldn't scrub more TCP
+         * ports in this execution. */
+        return null;
+      }
+      byte[] hashInput = new byte[2 + 20 + 33];
+      int portNumber = Integer.parseInt(portString);
+      hashInput[0] = (byte) (portNumber >> 8);
+      hashInput[1] = (byte) portNumber;
+      System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
+      byte[] secret = this.getSecretForMonth(published);
+      System.arraycopy(secret, 50, hashInput, 22, 33);
+      byte[] hashOutput = DigestUtils.sha256(hashInput);
+      int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8)
+          | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000;
+      return String.valueOf(hashedPort);
+    } else {
+      return "1";
+    }
+  }
+
+  private byte[] getSecretForMonth(String published) throws IOException {
+    if (this.bridgeSanitizingCutOffTimestamp
+        .compareTo(published) > 0) {
+      String text = "Sanitizing and storing bridge descriptor with publication "
+          + "time outside our descriptor sanitizing interval.";
+      if (this.haveWarnedAboutInterval) {
+        logger.debug(text);
+      } else {
+        logger.warn(text);
+        this.haveWarnedAboutInterval = true;
+      }
+    }
+    String month = published.substring(0, "yyyy-MM".length());
+    if (!this.secretsForHashingIpAddresses.containsKey(month)
+        || this.secretsForHashingIpAddresses.get(month).length < 83) {
+      byte[] secret = new byte[83];
+      this.secureRandom.nextBytes(secret);
+      if (this.secretsForHashingIpAddresses.containsKey(month)) {
+        System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
+            secret, 0,
+            this.secretsForHashingIpAddresses.get(month).length);
+      }
+      if (month.compareTo(
+          this.bridgeSanitizingCutOffTimestamp) < 0) {
+        logger.warn("Generated a secret that we won't make "
+            + "persistent, because it's outside our bridge descriptor "
+            + "sanitizing interval.");
+      } else {
+        /* Append secret to file on disk immediately before using it, or
+         * we might end with inconsistently sanitized bridges. */
+        byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
+            .getBytes();
+        try {
+          if (Files.exists(this.bridgeIpSecretsFile)) {
+            Files.write(this.bridgeIpSecretsFile, newBytes,
+                StandardOpenOption.APPEND);
+          } else {
+            Files.createDirectories(this.bridgeIpSecretsFile.getParent());
+            Files.write(this.bridgeIpSecretsFile, newBytes);
+          }
+        } catch (IOException e) {
+          logger.warn("Could not store new secret "
+              + "to disk! Not calculating any IP address or TCP port "
+              + "hashes in this execution!", e);
+          this.persistenceProblemWithSecrets = true;
+          throw new IOException(e);
+        }
+      }
+      this.secretsForHashingIpAddresses.put(month, secret);
+    }
+    return this.secretsForHashingIpAddresses.get(month);
+  }
+
+  void finishWriting() {
+
+    /* Delete secrets that we don't need anymore. */
+    if (!this.secretsForHashingIpAddresses.isEmpty()
+        && this.secretsForHashingIpAddresses.firstKey().compareTo(
+        this.bridgeSanitizingCutOffTimestamp) < 0) {
+      try {
+        int kept = 0;
+        int deleted = 0;
+        List<String> lines = new ArrayList<>();
+        for (Map.Entry<String, byte[]> e :
+            this.secretsForHashingIpAddresses.entrySet()) {
+          if (e.getKey().compareTo(
+              this.bridgeSanitizingCutOffTimestamp) < 0) {
+            deleted++;
+          } else {
+            lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
+            kept++;
+          }
+        }
+        Files.write(bridgeIpSecretsFile, lines);
+        logger.info("Deleted {} secrets that we don't "
+            + "need anymore and kept {}.", deleted, kept);
+      } catch (IOException e) {
+        logger.warn("Could not store reduced set of "
+            + "secrets to disk! This is a bad sign, better check what's "
+            + "going on!", e);
+      }
+    }
+  }
+}
+
diff --git a/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java b/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java
index 67e9738..d8c7119 100644
--- a/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java
+++ b/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java
@@ -821,6 +821,8 @@ public class SanitizedBridgesWriterTest {
 
   @Test
   public void testBridgeIpSecretsIsDirectory() throws Exception {
+    this.configuration.setProperty(Key.ReplaceIpAddressesWithHashes.name(),
+        "true");
     Files.createDirectory(Paths.get(statsDirectory, "bridge-ip-secrets"));
     this.runTest();
     assertTrue("Sanitized server descriptors without secrets.",





More information about the tor-commits mailing list