[tor-commits] [collector/master] Separate parsing and sanitizing steps for bridge descriptors.

karsten at torproject.org karsten at torproject.org
Tue Feb 20 16:33:13 UTC 2018


commit d5aba97f9b6c4ee74735b183552b8435e5e0661b
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Oct 27 19:26:57 2017 +0200

    Separate parsing and sanitizing steps for bridge descriptors.
    
    First step towards implementing #20549.
---
 .../SanitizedBridgeDescriptorBuilder.java          |  54 +++++
 .../bridgedescs/SanitizedBridgesWriter.java        | 240 +++++++++------------
 2 files changed, 161 insertions(+), 133 deletions(-)

diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
new file mode 100644
index 0000000..174a5ae
--- /dev/null
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
@@ -0,0 +1,54 @@
+package org.torproject.collector.bridgedescs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Builder for sanitized bridge descriptors.
+ *
+ * <p>This builder class can be used while parsing and sanitizing an original
+ * bridge descriptor. It accepts already sanitized {@code String}s and
+ * {@code StringBuilder}s as placeholders for parts that can only be sanitized
+ * after finishing the parsing step.</p> */
+class SanitizedBridgeDescriptorBuilder {
+
+  private List<StringBuilder> descriptorParts;
+
+  private StringBuilder lastDescriptorPart;
+
+  SanitizedBridgeDescriptorBuilder() {
+    this.descriptorParts = new ArrayList<>();
+    this.lastDescriptorPart = new StringBuilder();
+    this.descriptorParts.add(this.lastDescriptorPart);
+  }
+
+  SanitizedBridgeDescriptorBuilder append(String sanitizedString) {
+    this.lastDescriptorPart.append(sanitizedString);
+    return this;
+  }
+
+  SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) {
+    this.descriptorParts.add(placeholder);
+    this.lastDescriptorPart = new StringBuilder();
+    this.descriptorParts.add(this.lastDescriptorPart);
+    return this;
+  }
+
+  SanitizedBridgeDescriptorBuilder space() {
+    this.lastDescriptorPart.append(' ');
+    return this;
+  }
+
+  SanitizedBridgeDescriptorBuilder newLine() {
+    this.lastDescriptorPart.append('\n');
+    return this;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder fullDescriptor = new StringBuilder();
+    for (StringBuilder descriptorPart : this.descriptorParts) {
+      fullDescriptor.append(descriptorPart);
+    }
+    return fullDescriptor.toString();
+  }
+}
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index e257245..1ef1d60 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -34,6 +34,7 @@ import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
@@ -671,23 +672,20 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     }
 
     /* Parse descriptor to generate a sanitized version. */
-    String scrubbedDesc = null;
+    String address = null;
     String published = null;
+    byte[] fingerprintBytes = null;
+    StringBuilder scrubbedAddress = null;
+    Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>();
+    Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>();
     String masterKeyEd25519FromIdentityEd25519 = null;
-    try {
-      BufferedReader br = new BufferedReader(new StringReader(
-          new String(data, "US-ASCII")));
-      StringBuilder scrubbed = new StringBuilder();
-      String line = null;
-      byte[] fingerprintBytes = null;
-      String hashedBridgeIdentity = null;
-      String address = null;
-      String routerLine = null;
-      String scrubbedRouterLine = null;
-      String scrubbedAddress = null;
+    SanitizedBridgeDescriptorBuilder scrubbed =
+        new SanitizedBridgeDescriptorBuilder();
+    try (BufferedReader br = new BufferedReader(new StringReader(
+        new String(data, "US-ASCII")))) {
+      scrubbed.append(Annotation.BridgeServer.toString());
+      String line;
       String masterKeyEd25519 = null;
-      List<String> orAddresses = null;
-      List<String> scrubbedOrAddresses = null;
       boolean skipCrypto = false;
       while ((line = br.readLine()) != null) {
 
@@ -706,15 +704,26 @@ public class SanitizedBridgesWriter extends CollecTorMain {
             return;
           }
           address = parts[2];
-          routerLine = line;
-
-        /* Store or-address parts in a list and sanitize them when we have
-         * read the fingerprint. */
+          scrubbedAddress = new StringBuilder();
+          StringBuilder scrubbedOrPort = new StringBuilder();
+          scrubbedTcpPorts.put(scrubbedOrPort, parts[3]);
+          StringBuilder scrubbedDirPort = new StringBuilder();
+          scrubbedTcpPorts.put(scrubbedDirPort, parts[4]);
+          StringBuilder scrubbedSocksPort = new StringBuilder();
+          scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]);
+          scrubbed.append("router ").append(parts[1]).space()
+              .append(scrubbedAddress).space()
+              .append(scrubbedOrPort).space()
+              .append(scrubbedDirPort).space()
+              .append(scrubbedSocksPort).newLine();
+
+        /* Store or-address and sanitize it when we have read the fingerprint
+         * and descriptor publication time. */
         } else if (line.startsWith("or-address ")) {
-          if (orAddresses == null) {
-            orAddresses = new ArrayList<>();
-          }
-          orAddresses.add(line.substring("or-address ".length()));
+          String orAddress = line.substring("or-address ".length());
+          StringBuilder scrubbedOrAddress = new StringBuilder();
+          scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress);
+          scrubbed.append("or-address ").append(scrubbedOrAddress).newLine();
 
         /* Parse the publication time to see if we're still inside the
          * sanitizing interval. */
@@ -735,21 +744,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
               this.haveWarnedAboutInterval = true;
             }
           }
-          if (null != fingerprintBytes) {
-            /* We have read both published and fingerprint lines that we need to
-             * scrub the bridge's address which we might need to scrub reject
-             * lines. */
-            try {
-              scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
-                  published);
-            } catch (IOException e) {
-              /* There's a persistence problem, so we shouldn't scrub more
-               * IP addresses in this execution. */
-              this.persistenceProblemWithSecrets = true;
-              return;
-            }
-          }
-          scrubbed.append(line + "\n");
+          scrubbed.append(line).newLine();
 
         /* Parse the fingerprint to determine the hashed bridge
          * identity. */
@@ -759,91 +754,23 @@ public class SanitizedBridgesWriter extends CollecTorMain {
               ? "opt fingerprint".length() : "fingerprint".length())
               .replaceAll(" ", "").toLowerCase();
           fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray());
-          hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
+          String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
               .toLowerCase();
-          if (null != published) {
-            /* We have read both published and fingerprint lines that we need to
-             * scrub the bridge's address which we might need to scrub reject
-             * lines. */
-            try {
-              scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
-                  published);
-            } catch (IOException e) {
-              /* There's a persistence problem, so we shouldn't scrub more
-               * IP addresses in this execution. */
-              this.persistenceProblemWithSecrets = true;
-              return;
-            }
-          }
-          scrubbed.append((line.startsWith("opt ") ? "opt " : "")
-              + "fingerprint");
+          scrubbed.append(line.startsWith("opt ") ? "opt " : "")
+              .append("fingerprint");
           for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) {
-            scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
+            scrubbed.space().append(hashedBridgeIdentity.substring(4 * i,
                 4 * (i + 1)).toUpperCase());
           }
-          scrubbed.append("\n");
+          scrubbed.newLine();
 
         /* Replace the contact line (if present) with a generic one. */
         } else if (line.startsWith("contact ")) {
-          scrubbed.append("contact somebody\n");
+          scrubbed.append("contact somebody").newLine();
 
         /* When we reach the signature, we're done. Write the sanitized
          * descriptor to disk below. */
         } else if (line.startsWith("router-signature")) {
-
-          /* Write the scrubbed "router" line now based on the "router",
-           * "fingerprint", and "published" lines that we read before. Also
-           * scrub any "or-address" lines. */
-          if (null == routerLine || null == fingerprintBytes
-              || null == published) {
-            logger.warn("Missing either of the following lines that are "
-                + "required to sanitize this server bridge descriptor: "
-                + "\"router\", \"fingerprint\", \"published\". Skipping "
-                + "descriptor.");
-            return;
-          }
-          try {
-            if (orAddresses != null) {
-              scrubbedOrAddresses = new ArrayList<>();
-              for (String orAddress : orAddresses) {
-                String scrubbedOrAddress = scrubOrAddress(orAddress,
-                    fingerprintBytes, published);
-                if (scrubbedOrAddress != null) {
-                  scrubbedOrAddresses.add(scrubbedOrAddress);
-                } else {
-                  logger.warn("Invalid address in line "
-                      + "'or-address " + orAddress + "' in bridge server "
-                      + "descriptor.  Skipping line!");
-                }
-              }
-            }
-            String[] routerLineParts = routerLine.split(" ");
-            String nickname = routerLineParts[1];
-            String scrubbedOrPort = this.scrubTcpPort(routerLineParts[3],
-                fingerprintBytes, published);
-            String scrubbedDirPort = this.scrubTcpPort(routerLineParts[4],
-                fingerprintBytes, published);
-            String scrubbedSocksPort = this.scrubTcpPort(
-                routerLineParts[5], fingerprintBytes, published);
-            scrubbedRouterLine = String.format("router %s %s %s %s %s%n",
-                nickname, scrubbedAddress, scrubbedOrPort,
-                scrubbedDirPort, scrubbedSocksPort);
-          } catch (IOException e) {
-            /* There's a persistence problem, so we shouldn't scrub more
-             * IP addresses in this execution. */
-            this.persistenceProblemWithSecrets = true;
-            return;
-          }
-
-          /* Put together the scrubbed descriptor from "router" to the newline
-           * before the original "router-signature" line. */
-          scrubbedDesc = scrubbedRouterLine;
-          if (scrubbedOrAddresses != null) {
-            for (String scrubbedOrAddress : scrubbedOrAddresses) {
-              scrubbedDesc += "or-address " + scrubbedOrAddress + "\n";
-            }
-          }
-          scrubbedDesc += scrubbed.toString();
           break;
 
         /* Replace extra-info digest with the hashed digest of the
@@ -860,7 +787,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
                 + "expected: '" + line + "'.  Skipping descriptor.");
             return;
           }
-          scrubbed.append("extra-info-digest " + DigestUtils.sha1Hex(
+          scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex(
               Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
           if (parts.length > 2) {
             if (!Base64.isBase64(parts[2])) {
@@ -868,21 +795,21 @@ public class SanitizedBridgesWriter extends CollecTorMain {
                   + line + "'.  Skipping descriptor.");
               return;
             }
-            scrubbed.append(" " + Base64.encodeBase64String(
+            scrubbed.space().append(Base64.encodeBase64String(
                 DigestUtils.sha256(Base64.decodeBase64(parts[2])))
                 .replaceAll("=", ""));
           }
-          scrubbed.append("\n");
+          scrubbed.newLine();
 
         /* Possibly sanitize reject lines if they contain the bridge's own
          * IP address. */
         } else if (line.startsWith("reject ")) {
           if (address != null && line.startsWith("reject " + address)) {
-            scrubbed.append("reject " + scrubbedAddress
-                + line.substring("reject ".length() + address.length())
-                + "\n");
+            scrubbed.append("reject ").append(scrubbedAddress)
+                .append(line.substring("reject ".length() + address.length()))
+                .newLine();
           } else {
-            scrubbed.append(line + "\n");
+            scrubbed.append(line).newLine();
           }
 
         /* Extract master-key-ed25519 from identity-ed25519. */
@@ -907,8 +834,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
               DigestUtils.sha256(Base64.decodeBase64(
               masterKeyEd25519FromIdentityEd25519 + "=")))
               .replaceAll("=", "");
-          scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
-              + "\n");
+          scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
+              .newLine();
           if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
               masterKeyEd25519FromIdentityEd25519)) {
             logger.warn("Mismatch between identity-ed25519 and "
@@ -948,7 +875,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
             || line.startsWith("ipv6-policy ")
             || line.equals("tunnelled-dir-server")
             || line.startsWith("bridge-distribution-request ")) {
-          scrubbed.append(line + "\n");
+          scrubbed.append(line).newLine();
 
         /* Replace node fingerprints in the family line with their hashes
          * and leave nicknames unchanged. */
@@ -956,13 +883,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
           StringBuilder familyLine = new StringBuilder("family");
           for (String s : line.substring(7).split(" ")) {
             if (s.startsWith("$")) {
-              familyLine.append(" $" + DigestUtils.sha1Hex(Hex.decodeHex(
+              familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
                   s.substring(1).toCharArray())).toUpperCase());
             } else {
-              familyLine.append(" " + s);
+              familyLine.append(" ").append(s);
             }
           }
-          scrubbed.append(familyLine.toString() + "\n");
+          scrubbed.append(familyLine.toString()).newLine();
 
         /* Skip the purpose line that the bridge authority adds to its
          * cached-descriptors file. */
@@ -1003,7 +930,53 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       return;
     }
 
-    /* Determine filename of sanitized server descriptor. */
+    /* Sanitize the parts that we couldn't sanitize earlier. */
+    if (null == address || null == fingerprintBytes
+        || null == published) {
+      logger.warn("Missing either of the following lines that are "
+          + "required to sanitize this server bridge descriptor: "
+          + "\"router\", \"fingerprint\", \"published\". Skipping "
+          + "descriptor.");
+      return;
+    }
+    try {
+      String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes,
+          published);
+      if (null == scrubbedAddressString) {
+        logger.warn("Invalid IP address in \"router\" line in bridge server "
+            + "descriptor. Skipping descriptor.");
+        return;
+      }
+      scrubbedAddress.append(scrubbedAddressString);
+      for (Map.Entry<StringBuilder, String> e
+          : scrubbedIpAddressesAndTcpPorts.entrySet()) {
+        String scrubbedOrAddress = scrubOrAddress(e.getValue(),
+            fingerprintBytes, published);
+        if (null == scrubbedOrAddress) {
+          logger.warn("Invalid IP address or TCP port in \"or-address\" line "
+              + "in bridge server descriptor. Skipping descriptor.");
+          return;
+        }
+        e.getKey().append(scrubbedOrAddress);
+      }
+      for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
+        String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes,
+            published);
+        if (null == scrubbedTcpPort) {
+          logger.warn("Invalid TCP port in \"router\" line in bridge server "
+              + "descriptor. Skipping descriptor.");
+          return;
+        }
+        e.getKey().append(scrubbedTcpPort);
+      }
+    } catch (IOException exception) {
+      /* There's a persistence problem, so we shouldn't scrub more IP addresses
+       * or TCP ports in this execution. */
+      this.persistenceProblemWithSecrets = true;
+      return;
+    }
+
+    /* Determine digest(s) of sanitized server descriptor. */
     String descriptorDigest = null;
     try {
       String ascii = new String(data, "US-ASCII");
@@ -1048,6 +1021,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         return;
       }
     }
+    if (null != descriptorDigestSha256Base64) {
+      scrubbed.append("router-digest-sha256 ")
+          .append(descriptorDigestSha256Base64).newLine();
+    }
+    scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
+        .newLine();
+
+    /* Determine filename of sanitized server descriptor. */
     String dyear = published.substring(0, 4);
     String dmonth = published.substring(5, 7);
     File tarballFile = new File(
@@ -1073,14 +1054,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         outputFile.getParentFile().mkdirs();
         BufferedWriter bw = new BufferedWriter(new FileWriter(
             outputFile, appendToFile));
-        bw.write(Annotation.BridgeServer.toString());
-        bw.write(scrubbedDesc);
-        if (descriptorDigestSha256Base64 != null) {
-          bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
-              + "\n");
-        }
-        bw.write("router-digest " + descriptorDigest.toUpperCase()
-            + "\n");
+        bw.write(scrubbed.toString());
         bw.close();
       }
     } catch (ConfigurationException | IOException e) {





More information about the tor-commits mailing list