[tor-commits] [collector/master] Sanitize TCP ports in bridge descriptors.

karsten at torproject.org karsten at torproject.org
Mon Sep 19 12:25:25 UTC 2016


commit ecb053899eb965c2778cf05479c26549d67f7956
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Jun 10 13:28:42 2016 +0200

    Sanitize TCP ports in bridge descriptors.
    
    Implements #19317.
---
 CHANGELOG.md                                       |  2 +
 .../bridgedescs/SanitizedBridgesWriter.java        | 81 +++++++++++++++++-----
 src/main/webapp/index.html                         | 46 ++++++++++--
 3 files changed, 107 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24206ee..e17abad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
    - Add support for Bifroest's bridge descriptor tarballs.
    - Use a shutdown hook that gives currently running modules up to 10
      minutes to finish properly, rather than killing them immediately.
+   - Replace TCP ports with hashes in @type bridge-network-status 1.1
+     and @type bridge-server-descriptor 1.2.
 
  * Minor changes
    - Remove quotes around base URL in index.json.
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index b787f78..b61cd30 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -138,7 +138,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         while ((line = br.readLine()) != null) {
           String[] parts = line.split(",");
           if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
-              && line.length() != ("yyyy-MM,".length() + 50 * 2))
+              && line.length() != ("yyyy-MM,".length() + 50 * 2)
+              && line.length() != ("yyyy-MM,".length() + 83 * 2))
               || parts.length != 2) {
             logger.warn("Invalid line in bridge-ip-secrets file "
                 + "starting with '" + line.substring(0, 7) + "'! "
@@ -218,8 +219,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       scrubbedAddressPart = this.scrubIpv4Address(addressPart,
           fingerprintBytes, published);
     }
+    String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
+        published);
     return (scrubbedAddressPart == null ? null :
-          scrubbedAddressPart + ":" + portPart);
+          scrubbedAddressPart + ":" + scrubbedPort);
   }
 
   private String scrubIpv4Address(String address, byte[] fingerprintBytes,
@@ -334,14 +337,42 @@ public class SanitizedBridgesWriter extends CollecTorMain {
     return sb.toString();
   }
 
+  private String scrubTcpPort(String portString, byte[] fingerprintBytes,
+      String published) throws IOException {
+    if (portString.equals("0")) {
+      return "0";
+    } else if (this.replaceIpAddressesWithHashes) {
+      if (this.persistenceProblemWithSecrets) {
+        /* There's a persistence problem, so we shouldn't scrub more TCP
+         * ports in this execution. */
+        return null;
+      }
+      byte[] hashInput = new byte[2 + 20 + 33];
+      int portNumber = Integer.parseInt(portString);
+      hashInput[0] = (byte) (portNumber >> 8);
+      hashInput[1] = (byte) portNumber;
+      System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
+      String month = published.substring(0, "yyyy-MM".length());
+      byte[] secret = this.getSecretForMonth(month);
+      System.arraycopy(secret, 50, hashInput, 22, 33);
+      byte[] hashOutput = DigestUtils.sha256(hashInput);
+      int hashedPort = ((((hashOutput[0] & 0xFF) << 8)
+          | (hashOutput[1] & 0xFF)) >> 2) | 0xC000;
+      return String.valueOf(hashedPort);
+    } else {
+      return "1";
+    }
+  }
+
   private byte[] getSecretForMonth(String month) throws IOException {
     if (!this.secretsForHashingIpAddresses.containsKey(month)
-        || this.secretsForHashingIpAddresses.get(month).length == 31) {
-      byte[] secret = new byte[50];
+        || this.secretsForHashingIpAddresses.get(month).length < 83) {
+      byte[] secret = new byte[83];
       this.secureRandom.nextBytes(secret);
       if (this.secretsForHashingIpAddresses.containsKey(month)) {
         System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
-            secret, 0, 31);
+            secret, 0,
+            this.secretsForHashingIpAddresses.get(month).length);
       }
       if (month.compareTo(
           this.bridgeSanitizingCutOffTimestamp) < 0) {
@@ -362,8 +393,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
           bw.close();
         } catch (IOException e) {
           logger.warn("Could not store new secret "
-              + "to disk! Not calculating any IP address hashes in "
-              + "this execution!", e);
+              + "to disk! Not calculating any IP address or TCP port "
+              + "hashes in this execution!", e);
           this.persistenceProblemWithSecrets = true;
           throw new IOException(e);
         }
@@ -471,11 +502,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
               fingerprintBytes,
               descPublicationTime);
           String nickname = parts[1];
+          String scrubbedOrPort = this.scrubTcpPort(orPort,
+              fingerprintBytes, descPublicationTime);
+          String scrubbedDirPort = this.scrubTcpPort(dirPort,
+              fingerprintBytes, descPublicationTime);
           scrubbed.append("r " + nickname + " "
               + hashedBridgeIdentityBase64 + " "
               + hashedDescriptorIdentifier + " " + descPublicationTime
-              + " " + scrubbedAddress + " " + orPort + " " + dirPort
-              + "\n");
+              + " " + scrubbedAddress + " " + scrubbedOrPort + " "
+              + scrubbedDirPort + "\n");
 
         /* Sanitize any addresses in a lines using the fingerprint and
          * descriptor publication time from the previous r line. */
@@ -555,7 +590,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         outputFile.getParentFile().mkdirs();
         BufferedWriter bw = new BufferedWriter(new FileWriter(
             outputFile));
-        bw.write("@type bridge-network-status 1.0\n");
+        bw.write("@type bridge-network-status 1.1\n");
         bw.write("published " + publicationTime + "\n");
         bw.write(header.toString());
         for (String scrubbed : scrubbedLines.values()) {
@@ -595,6 +630,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
       String hashedBridgeIdentity = null;
       String address = null;
       String routerLine = null;
+      String scrubbedRouterLine = null;
       String scrubbedAddress = null;
       String masterKeyEd25519 = null;
       List<String> orAddresses = null;
@@ -611,7 +647,12 @@ public class SanitizedBridgesWriter extends CollecTorMain {
          * the bridge identity fingerprint for replacing the IP address in
          * the scrubbed version.  */
         } else if (line.startsWith("router ")) {
-          address = line.split(" ")[2];
+          String[] parts = line.split(" ");
+          if (parts.length != 6) {
+            logger.warn("Invalid router line: '" + line + "'.  Skipping.");
+            return;
+          }
+          address = parts[2];
           routerLine = line;
 
         /* Store or-address parts in a list and sanitize them when we have
@@ -671,6 +712,17 @@ public class SanitizedBridgesWriter extends CollecTorMain {
                 }
               }
             }
+            String[] routerLineParts = routerLine.split(" ");
+            String nickname = routerLineParts[1];
+            String scrubbedOrPort = this.scrubTcpPort(routerLineParts[3],
+                fingerprintBytes, published);
+            String scrubbedDirPort = this.scrubTcpPort(routerLineParts[4],
+                fingerprintBytes, published);
+            String scrubbedSocksPort = this.scrubTcpPort(
+                routerLineParts[5], fingerprintBytes, published);
+            scrubbedRouterLine = String.format("router %s %s %s %s %s%n",
+                nickname, scrubbedAddress, scrubbedOrPort,
+                scrubbedDirPort, scrubbedSocksPort);
           } catch (IOException e) {
             /* There's a persistence problem, so we shouldn't scrub more
              * IP addresses in this execution. */
@@ -692,10 +744,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         /* When we reach the signature, we're done. Write the sanitized
          * descriptor to disk below. */
         } else if (line.startsWith("router-signature")) {
-          String[] routerLineParts = routerLine.split(" ");
-          scrubbedDesc = "router " + routerLineParts[1] + " "
-              + scrubbedAddress + " " + routerLineParts[3] + " "
-              + routerLineParts[4] + " " + routerLineParts[5] + "\n";
+          scrubbedDesc = scrubbedRouterLine;
           if (scrubbedOrAddresses != null) {
             for (String scrubbedOrAddress : scrubbedOrAddresses) {
               scrubbedDesc = scrubbedDesc += "or-address "
@@ -915,7 +964,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
         outputFile.getParentFile().mkdirs();
         BufferedWriter bw = new BufferedWriter(new FileWriter(
             outputFile, appendToFile));
-        bw.write("@type bridge-server-descriptor 1.1\n");
+        bw.write("@type bridge-server-descriptor 1.2\n");
         bw.write(scrubbedDesc);
         if (descriptorDigestSha256Base64 != null) {
           bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
diff --git a/src/main/webapp/index.html b/src/main/webapp/index.html
index ccdeea2..bbc70f2 100644
--- a/src/main/webapp/index.html
+++ b/src/main/webapp/index.html
@@ -166,14 +166,14 @@
 </tr>
 <tr>
   <td>Bridge Network Statuses</td>
-  <td><tt>@type bridge-network-status 1.0</tt></td>
+  <td><tt>@type bridge-network-status 1.1</tt></td>
   <td><a href="recent/bridge-descriptors/statuses/" class="btn btn-primary btn-xs">recent</a></td>
   <td><a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a></td>
   <td><a href="#type-bridge-network-status" class="btn btn-default btn-xs">format</a></td>
 </tr>
 <tr>
   <td>Bridge Server Descriptors</td>
-  <td><tt>@type bridge-server-descriptor 1.1</tt></td>
+  <td><tt>@type bridge-server-descriptor 1.2</tt></td>
   <td><a href="recent/bridge-descriptors/server-descriptors/" class="btn btn-primary btn-xs">recent</a></td>
   <td><a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a></td>
   <td><a href="#type-bridge-server-descriptor" class="btn btn-default btn-xs">format</a></td>
@@ -447,6 +447,28 @@ with <tt>xx:xxxx</tt> being the hex-formatted 3 byte output of a similar
 hash function as described for IPv4 addresses.
 The only differences are that the input <tt>IP address</tt> is 16 bytes
 long and the <tt>secret</tt> is only 19 bytes long.</li></ul>
+<li><b>Replace TCP port with TCP port hash:</b> It may be less obvious
+that TCP ports need to be sanitized, but an unusual TCP port used by a
+high-value bridge might still stand out and provide yet another way to
+locate and block the bridge.
+Therefore, each non-zero TCP port is replaced with a number in the range
+from 49152 to 65535 that is the result of
+<tt>H(port | bridge identity | secret)[:2] / 2^2 + 2^15 + 2^14</tt>
+written as decimal number.
+The input <tt>port</tt> is the 2-byte long binary representation of the
+TCP port.
+The <tt>bridge identity</tt> is the 20-byte long binary representation of
+the bridge's long-term identity fingerprint.
+The <tt>secret</tt> is a 33-byte long secure random string that changes
+once per month for all descriptors and statuses published in that month.
+<tt>H()</tt> is SHA-256.
+The <tt>[:2]</tt> operator means that we pick the 2 most significant bytes
+of the result.
+The subsequent integer division and additions make sure that sanitized
+ports are in the range from 49152 to 65535 which is reserved for private
+services.
+All operations assume inputs to be in network byte order.
+TCP ports that are 0 in the original descriptor are left unchanged.</li>
 <li><b>Replace contact information:</b> If there is contact information in
 a descriptor, the contact line is changed to
 <tt>somebody</tt>.</li>
@@ -468,7 +490,7 @@ descriptor including all signatures.</li>
 </ol>
 
 <h3 id="type-bridge-network-status" class="hover">Bridge Network Statuses
-<small><tt>@type bridge-network-status 1.0</tt></small>
+<small><tt>@type bridge-network-status 1.1</tt></small>
 <a href="recent/bridge-descriptors/statuses/" class="btn btn-primary btn-xs">recent</a>
 <a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a>
 <a href="#type-bridge-network-status" class="hover">#</a>
@@ -480,10 +502,19 @@ statuses, but with only a <tt>published</tt> line in the header and
 without any lines in the footer.
 The bridge descriptor archive tarballs contain all bridge
 descriptors of a given month, not just network statuses.
+The format has changed over time to accomodate changes to the sanitizing
+process, with earlier versions being:
 </p>
 
+<ul>
+<li><font color="#666"><tt>@type bridge-network-status 1.0</tt> was the
+first version.</font></li>
+<li><tt>@type bridge-network-status 1.1</tt> introduced sanitized TCP
+ports.</li>
+</ul>
+
 <h3 id="type-bridge-server-descriptor" class="hover">Bridge Server descriptors
-<small><tt>@type bridge-server-descriptor 1.1</tt></small>
+<small><tt>@type bridge-server-descriptor 1.2</tt></small>
 <a href="recent/bridge-descriptors/server-descriptors/" class="btn btn-primary btn-xs">recent</a>
 <a href="archive/bridge-descriptors/" class="btn btn-primary btn-xs">archive</a>
 <a href="#type-bridge-server-descriptor" class="hover">#</a>
@@ -511,9 +542,12 @@ number of sanitized bridge extra-info descriptors was raised.
 As a result, there may be sanitized bridge server descriptors with version
 <tt>@type bridge-server-descriptor 1.0</tt> with and without those
 lines.</font></li>
-<li><tt>@type bridge-server-descriptor 1.1</tt> added
+<li><font color="#666"><tt>@type bridge-server-descriptor 1.1</tt> added
 <tt>master-key-ed25519</tt> lines and <tt>router-digest-sha256</tt> to
-server descriptors published by bridges using an Ed25519 master key.</li>
+server descriptors published by bridges using an Ed25519 master
+key.</font></li>
+<li><tt>@type bridge-server-descriptor 1.2</tt> introduced sanitized TCP
+ports.</li>
 </ul>
 
 <h3 id="type-bridge-extra-info" class="hover">Bridge Extra-info Descriptors





More information about the tor-commits mailing list