[or-cvs] [metrics-db/master] Sanitize IP addresses by replacing them with hashes.

karsten at torproject.org karsten at torproject.org
Thu Feb 3 16:14:23 UTC 2011


commit 34ef92e1fb37b1d4fd2779d1232734323f10bbe7
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Feb 3 16:53:14 2011 +0100

    Sanitize IP addresses by replacing them with hashes.
    
    See #2435.  This patch still uses a hard-coded secret.
---
 config.template                                    |    5 ++
 src/org/torproject/ernie/db/Configuration.java     |    7 ++
 src/org/torproject/ernie/db/Main.java              |    2 +-
 .../ernie/db/SanitizedBridgesWriter.java           |   76 +++++++++++++++-----
 .../ernie/test/SanitizedBridgesWriterTest.java     |    5 +-
 5 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/config.template b/config.template
index aff609c..a1d35d4 100644
--- a/config.template
+++ b/config.template
@@ -93,6 +93,11 @@
 ## Write sanitized bridges to disk
 #WriteSanitizedBridges 0
 #
+## Replace IP addresses in sanitized bridge descriptors with 10.x.y.z
+## where x.y.z = H(IP address | bridge identity | secret)[:3], so that we
+## can learn about IP address changes.
+#ReplaceIPAddressesWithHashes 0
+#
 ## Relative path to directory to write sanitized bridges to
 #SanitizedBridgesWriteDirectory sanitized-bridges/
 #
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index 413e03b..48e5abe 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -30,6 +30,7 @@ public class Configuration {
   private boolean writeRelayDescriptorsRawFiles = false;
   private String relayDescriptorRawFilesDirectory = "pg-import/";
   private boolean writeSanitizedBridges = false;
+  private boolean replaceIPAddressesWithHashes = false;
   private String sanitizedBridgesWriteDirectory = "sanitized-bridges/";
   private boolean importSanitizedBridges = false;
   private String sanitizedBridgesDirectory = "bridges/";
@@ -113,6 +114,9 @@ public class Configuration {
         } else if (line.startsWith("WriteSanitizedBridges")) {
           this.writeSanitizedBridges = Integer.parseInt(
               line.split(" ")[1]) != 0;
+        } else if (line.startsWith("ReplaceIPAddressesWithHashes")) {
+          this.replaceIPAddressesWithHashes = Integer.parseInt(
+              line.split(" ")[1]) != 0;
         } else if (line.startsWith("SanitizedBridgesWriteDirectory")) {
           this.sanitizedBridgesWriteDirectory = line.split(" ")[1];
         } else if (line.startsWith("ImportSanitizedBridges")) {
@@ -290,6 +294,9 @@ public class Configuration {
   public boolean getWriteSanitizedBridges() {
     return this.writeSanitizedBridges;
   }
+  public boolean getReplaceIPAddressesWithHashes() {
+    return this.replaceIPAddressesWithHashes;
+  }
   public String getSanitizedBridgesWriteDirectory() {
     return this.sanitizedBridgesWriteDirectory;
   }
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index e3d04b1..a159642 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -138,7 +138,7 @@ public class Main {
     SanitizedBridgesWriter sbw = config.getWriteSanitizedBridges() ?
         new SanitizedBridgesWriter(
         new File(config.getSanitizedBridgesWriteDirectory()),
-        statsDirectory) : null;
+        statsDirectory, config.getReplaceIPAddressesWithHashes()) : null;
 
     // Prepare bridge descriptor parser
     BridgeDescriptorParser bdp = config.getWriteConsensusStats() ||
diff --git a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
index dc5d0dc..243b0b6 100644
--- a/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/SanitizedBridgesWriter.java
@@ -165,12 +165,16 @@ public class SanitizedBridgesWriter {
 
   private File statsDirectory;
 
+  private boolean replaceIPAddressesWithHashes;
+
+  private byte[] secretForHashingIPAddresses;
+
   /**
    * Initializes this class, including reading in the known descriptor
    * mapping.
    */
   public SanitizedBridgesWriter(File sanitizedBridgesDirectory,
-      File statsDirectory) {
+      File statsDirectory, boolean replaceIPAddressesWithHashes) {
 
     if (sanitizedBridgesDirectory == null || statsDirectory == null) {
       throw new IllegalArgumentException();
@@ -179,6 +183,7 @@ public class SanitizedBridgesWriter {
     /* Memorize argument values. */
     this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
     this.statsDirectory = statsDirectory;
+    this.replaceIPAddressesWithHashes = replaceIPAddressesWithHashes;
 
     /* Initialize logger. */
     this.logger = Logger.getLogger(
@@ -189,6 +194,11 @@ public class SanitizedBridgesWriter {
         DescriptorMapping>();
     this.descriptorPublicationTimes = new TreeSet<String>();
 
+    /* Define "secret" to be used for replacing IP addresses with hashes.
+     * TODO implement generating secrets and storing them to disk. */
+    this.secretForHashingIPAddresses =
+        "secret for hashing IP addresses".getBytes();
+
     /* Read known descriptor mappings from disk. */
     this.bridgeDescriptorMappingsFile = new File(
         "stats/bridge-descriptor-mappings");
@@ -224,6 +234,27 @@ public class SanitizedBridgesWriter {
     }
   }
 
+  private String scrubAddress(String address, byte[] fingerprintBytes) {
+    if (this.replaceIPAddressesWithHashes) {
+      byte[] hashInput = new byte[4 + 20 + 31];
+      String[] ipParts = address.split("\\.");
+      for (int i = 0; i < 4; i++) {
+        hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
+      }
+      System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
+      System.arraycopy(this.secretForHashingIPAddresses, 0,
+          hashInput, 24, 31);
+      byte[] hashOutput = DigestUtils.sha256(hashInput);
+      String hashedAddress = "10."
+          + (((int) hashOutput[0] + 256) % 256) + "."
+          + (((int) hashOutput[1] + 256) % 256) + "."
+          + (((int) hashOutput[2] + 256) % 256);
+      return hashedAddress;
+    } else {
+      return "127.0.0.1";
+    }
+  }
+
   /**
    * Sanitizes a network status and writes it to disk. Processes every r
    * line separately and looks up whether the descriptor mapping contains
@@ -247,6 +278,7 @@ public class SanitizedBridgesWriter {
           String[] parts = line.split(" ");
           String bridgeIdentity = parts[2];
           String descPublicationTime = parts[4] + " " + parts[5];
+          String address = parts[6];
           String orPort = parts[7];
           String dirPort = parts[8];
 
@@ -273,10 +305,12 @@ public class SanitizedBridgesWriter {
           String sdi = Base64.encodeBase64String(Hex.decodeHex(
                 mapping.serverDescriptorIdentifier.toCharArray())).
                 substring(0, 27);
+          String scrubbedAddress = scrubAddress(address,
+              Base64.decodeBase64(bridgeIdentity + "=="));
           scrubbed.append("r Unnamed "
               + hashedBridgeIdentityBase64 + " " + sdi + " "
-              + descPublicationTime + " 127.0.0.1 " + orPort + " "
-              + dirPort + "\n");
+              + descPublicationTime + " " + scrubbedAddress + " "
+              + orPort + " " + dirPort + "\n");
 
         /* Nothing special about s, w, and p lines; just copy them. */
         } else if (line.startsWith("s ") || line.equals("s") ||
@@ -355,7 +389,7 @@ public class SanitizedBridgesWriter {
           new String(data, "US-ASCII")));
       StringBuilder scrubbed = new StringBuilder();
       String line = null, hashedBridgeIdentity = null, address = null,
-          published = null;
+          published = null, routerLine = null, scrubbedAddress = null;
       boolean skipCrypto = false;
       while ((line = br.readLine()) != null) {
 
@@ -379,14 +413,12 @@ public class SanitizedBridgesWriter {
         if (skipCrypto && !line.startsWith("-----END ")) {
           continue;
 
-        /* Parse the original IP address for looking it up in the GeoIP
-         * database and replace it with 127.0.0.1 in the scrubbed
-         * version. */
+        /* Store the router line for later processing, because we may need
+         * the bridge identity fingerprint for replacing the IP address in
+         * the scrubbed version.  */
         } else if (line.startsWith("router ")) {
           address = line.split(" ")[2];
-          scrubbed = new StringBuilder("router Unnamed 127.0.0.1 "
-              + line.split(" ")[3] + " " + line.split(" ")[4] + " "
-              + line.split(" ")[5] + "\n");
+          routerLine = line;
 
         /* Parse the publication time and add it to the list of descriptor
          * publication times to re-write network statuses at the end of
@@ -402,8 +434,11 @@ public class SanitizedBridgesWriter {
           String fingerprint = line.substring(line.startsWith("opt ") ?
               "opt fingerprint".length() : "fingerprint".length()).
               replaceAll(" ", "").toLowerCase();
-          hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(
-              fingerprint.toCharArray())).toLowerCase();
+          byte[] fingerprintBytes = Hex.decodeHex(
+              fingerprint.toCharArray());
+          hashedBridgeIdentity = DigestUtils.shaHex(fingerprintBytes).
+              toLowerCase();
+          scrubbedAddress = scrubAddress(address, fingerprintBytes);
           scrubbed.append("opt fingerprint");
           for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
             scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
@@ -417,7 +452,11 @@ public class SanitizedBridgesWriter {
         /* When we reach the signature, we're done. Write the sanitized
          * descriptor to disk below. */
         } else if (line.startsWith("router-signature")) {
-          scrubbedDesc = scrubbed.toString();
+          String[] routerLineParts = routerLine.split(" ");
+          scrubbedDesc = "router Unnamed " + scrubbedAddress + " "
+              + routerLineParts[3] + " " + routerLineParts[4] + " "
+              + routerLineParts[5] + "\n";
+          scrubbedDesc += scrubbed.toString();
           break;
 
         /* Replace extra-info digest with the one we know from our
@@ -432,7 +471,7 @@ public class SanitizedBridgesWriter {
          * IP address. */
         } else if (line.startsWith("reject ")) {
           if (address != null && line.startsWith("reject " + address)) {
-            scrubbed.append("reject 127.0.0.1"
+            scrubbed.append("reject " + scrubbedAddress
                 + line.substring("reject ".length() + address.length())
                 + "\n");
           } else {
@@ -675,6 +714,7 @@ public class SanitizedBridgesWriter {
               toLowerCase();
           String descPublished = line.split(" ")[4] + " "
               + line.split(" ")[5];
+          String address = line.split(" ")[6];
           String mappingKey = (hashedBridgeIdentity + ","
               + descPublished).toLowerCase();
           DescriptorMapping mapping = null;
@@ -693,7 +733,7 @@ public class SanitizedBridgesWriter {
           String dirPort = line.split(" ")[8];
           sb.append("r Unnamed "
               + hashedBridgeIdentityBase64 + " " + sdi + " "
-              + descPublished + " 127.0.0.1 " + orPort + " "
+              + descPublished + " " + address + " " + orPort + " "
               + dirPort + "\n");
         } else {
           sb.append(line + "\n");
@@ -761,9 +801,9 @@ public class SanitizedBridgesWriter {
           }
         }
         if (line2.startsWith("router ")) {
-          sb.append("router Unnamed 127.0.0.1 " + line2.split(" ")[3]
-              + " " + line2.split(" ")[4] + " " + line2.split(" ")[5]
-              + "\n");
+          sb.append("router Unnamed " + line2.split(" ")[2] + " "
+              + line2.split(" ")[3] + " " + line2.split(" ")[4] + " "
+              + line2.split(" ")[5] + "\n");
         } else if (line2.startsWith("published ")) {
           published = line2.substring("published ".length());
           sb.append(line2 + "\n");
diff --git a/src/org/torproject/ernie/test/SanitizedBridgesWriterTest.java b/src/org/torproject/ernie/test/SanitizedBridgesWriterTest.java
index 0a3b0d7..d256122 100644
--- a/src/org/torproject/ernie/test/SanitizedBridgesWriterTest.java
+++ b/src/org/torproject/ernie/test/SanitizedBridgesWriterTest.java
@@ -27,12 +27,13 @@ public class SanitizedBridgesWriterTest {
 
   @Test(expected = IllegalArgumentException.class)
   public void testSanitizedBridgesDirectoryNull() {
-    new SanitizedBridgesWriter(null, this.tempStatsDirectory);
+    new SanitizedBridgesWriter(null, this.tempStatsDirectory, false);
   }
 
   @Test(expected = IllegalArgumentException.class)
   public void testStatsDirectoryNull() {
-    new SanitizedBridgesWriter(this.tempSanitizedBridgesDirectory, null);
+    new SanitizedBridgesWriter(this.tempSanitizedBridgesDirectory, null,
+        false);
   }
 }
 



More information about the tor-commits mailing list