
commit a2fdbf3c6f67e5ddb735773e1ab456ee4f464555 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Mon Nov 30 21:59:17 2020 +0100 Move lower-level sanitizing code to its own class. Part of #20542. --- .../bridgedescs/SanitizedBridgesWriter.java | 404 ++------------------- .../bridgedescs/SensitivePartsSanitizer.java | 378 +++++++++++++++++++ .../bridgedescs/SanitizedBridgesWriterTest.java | 2 + 3 files changed, 410 insertions(+), 374 deletions(-) diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java index 34156c2..843aa40 100644 --- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java @@ -3,8 +3,6 @@ package org.torproject.metrics.collector.bridgedescs; -import static java.time.ZoneOffset.UTC; - import org.torproject.descriptor.BridgeExtraInfoDescriptor; import org.torproject.descriptor.BridgeNetworkStatus; import org.torproject.descriptor.BridgeServerDescriptor; @@ -35,18 +33,12 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; -import java.security.GeneralSecurityException; -import java.security.SecureRandom; import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; -import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedMap; @@ -89,26 +81,14 @@ public class SanitizedBridgesWriter extends CollecTorMain { private Path inputDirectory; - private boolean replaceIpAddressesWithHashes; - - private boolean persistenceProblemWithSecrets; - - private SortedMap<String, byte[]> secretsForHashingIpAddresses; - - private String bridgeSanitizingCutOffTimestamp; - - private boolean haveWarnedAboutInterval; - - private Path bridgeIpSecretsFile; - - private SecureRandom secureRandom; - private Path outputDirectory; private Path recentDirectory; private Path statsDirectory; + private SensitivePartsSanitizer sensitivePartsSanitizer; + @Override public String module() { return "bridgedescs"; @@ -128,90 +108,30 @@ public class SanitizedBridgesWriter extends CollecTorMain { .resolve(BRIDGE_DESCRIPTORS); this.inputDirectory = config.getPath(Key.BridgeLocalOrigins); this.statsDirectory = config.getPath(Key.StatsPath); - this.replaceIpAddressesWithHashes = + boolean replaceIpAddressesWithHashes = config.getBool(Key.ReplaceIpAddressesWithHashes); SimpleDateFormat rsyncCatFormat = new SimpleDateFormat( "yyyy-MM-dd-HH-mm-ss"); this.rsyncCatString = rsyncCatFormat.format( System.currentTimeMillis()); - /* Initialize secure random number generator if we need it. */ - if (this.replaceIpAddressesWithHashes) { - try { - this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN"); - } catch (GeneralSecurityException e) { - logger.warn("Could not initialize secure " - + "random number generator! Not calculating any IP address " - + "hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - } - } - - /* Read hex-encoded secrets for replacing IP addresses with hashes - * from disk. */ - this.secretsForHashingIpAddresses = new TreeMap<>(); - this.bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets"); - if (Files.exists(this.bridgeIpSecretsFile)) { - try { - for (String line : Files.readAllLines(this.bridgeIpSecretsFile)) { - String[] parts = line.split(","); - if ((line.length() != ("yyyy-MM,".length() + 31 * 2) - && line.length() != ("yyyy-MM,".length() + 50 * 2) - && line.length() != ("yyyy-MM,".length() + 83 * 2)) - || parts.length != 2) { - logger.warn("Invalid line in bridge-ip-secrets file " - + "starting with '{}'! " - + "Not calculating any IP address hashes in this " - + "execution!", line.substring(0, 7)); - this.persistenceProblemWithSecrets = true; - break; - } - String month = parts[0]; - byte[] secret = Hex.decodeHex(parts[1].toCharArray()); - this.secretsForHashingIpAddresses.put(month, secret); - } - if (!this.persistenceProblemWithSecrets) { - logger.debug("Read {} secrets for hashing bridge IP addresses.", - this.secretsForHashingIpAddresses.size()); - } - } catch (DecoderException e) { - logger.warn("Failed to decode hex string in {}! Not calculating any IP " - + "address hashes in this execution!", this.bridgeIpSecretsFile, e); - this.persistenceProblemWithSecrets = true; - } catch (IOException e) { - logger.warn("Failed to read {}! Not calculating any IP " - + "address hashes in this execution!", this.bridgeIpSecretsFile, e); - this.persistenceProblemWithSecrets = true; - } - } - - long limitBridgeSanitizingIntervalDays - = config.getInt(Key.BridgeDescriptorMappingsLimit); - - /* If we're configured to keep secrets only for a limited time, define - * the cut-off day and time. */ - LocalDateTime bridgeSanitizingCutOffDateTime - = LocalDateTime.of(1999, 12, 31, 23, 59, 59); - if (limitBridgeSanitizingIntervalDays >= 0L) { - LocalDateTime configuredBridgeSanitizingCutOffDateTime - = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays); - if (configuredBridgeSanitizingCutOffDateTime.isAfter( - bridgeSanitizingCutOffDateTime)) { - bridgeSanitizingCutOffDateTime - = configuredBridgeSanitizingCutOffDateTime; - } + Path bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets"); + if (replaceIpAddressesWithHashes) { + long limitBridgeSanitizingIntervalDays + = config.getInt(Key.BridgeDescriptorMappingsLimit); + this.sensitivePartsSanitizer = new SensitivePartsSanitizer( + bridgeIpSecretsFile, limitBridgeSanitizingIntervalDays); + } else { + this.sensitivePartsSanitizer = new SensitivePartsSanitizer(); } - this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime - .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); - - logger.info("Using cut-off datetime '{}' for secrets.", - this.bridgeSanitizingCutOffTimestamp); // Import bridge descriptors this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory); // Finish writing sanitized bridge descriptors to disk - this.finishWriting(); + if (replaceIpAddressesWithHashes) { + this.sensitivePartsSanitizer.finishWriting(); + } this.checkStaleDescriptors(); @@ -423,206 +343,6 @@ public class SanitizedBridgesWriter extends CollecTorMain { } } - private String scrubOrAddress(String orAddress, byte[] fingerprintBytes, - String published) throws IOException { - if (!orAddress.contains(":")) { - /* Malformed or-address or a line. */ - return null; - } - String addressPart = orAddress.substring(0, - orAddress.lastIndexOf(":")); - String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1); - String scrubbedAddressPart; - if (addressPart.startsWith("[")) { - scrubbedAddressPart = this.scrubIpv6Address(addressPart, - fingerprintBytes, published); - } else { - scrubbedAddressPart = this.scrubIpv4Address(addressPart, - fingerprintBytes, published); - } - String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes, - published); - return (scrubbedAddressPart == null ? null : - scrubbedAddressPart + ":" + scrubbedPort); - } - - private String scrubIpv4Address(String address, byte[] fingerprintBytes, - String published) throws IOException { - if (this.replaceIpAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return null; - } - byte[] hashInput = new byte[4 + 20 + 31]; - String[] ipParts = address.split("\\."); - for (int i = 0; i < 4; i++) { - hashInput[i] = (byte) Integer.parseInt(ipParts[i]); - } - System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 0, hashInput, 24, 31); - byte[] hashOutput = DigestUtils.sha256(hashInput); - return "10." - + (((int) hashOutput[0] + 256) % 256) + "." - + (((int) hashOutput[1] + 256) % 256) + "." - + (((int) hashOutput[2] + 256) % 256); - } else { - return "127.0.0.1"; - } - } - - private String scrubIpv6Address(String address, byte[] fingerprintBytes, - String published) throws IOException { - StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::"); - if (this.replaceIpAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ - return null; - } - String[] doubleColonSeparatedParts = address.substring(1, - address.length() - 1).split("::", -1); - if (doubleColonSeparatedParts.length > 2) { - /* Invalid IPv6 address. */ - return null; - } - List<String> hexParts = new ArrayList<>(); - for (String doubleColonSeparatedPart : doubleColonSeparatedParts) { - StringBuilder hexPart = new StringBuilder(); - String[] parts = doubleColonSeparatedPart.split(":", -1); - if (parts.length < 1 || parts.length > 8) { - /* Invalid IPv6 address. */ - return null; - } - for (String part : parts) { - if (part.contains(".")) { - String[] ipParts = part.split("\\."); - byte[] ipv4Bytes = new byte[4]; - if (ipParts.length != 4) { - /* Invalid IPv4 part in IPv6 address. */ - return null; - } - for (int m = 0; m < 4; m++) { - ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]); - } - hexPart.append(Hex.encodeHexString(ipv4Bytes)); - } else if (part.length() > 4) { - /* Invalid IPv6 address. */ - return null; - } else { - for (int k = part.length(); k < 4; k++) { - hexPart.append("0"); - } - hexPart.append(part); - } - } - hexParts.add(hexPart.toString()); - } - StringBuilder hex = new StringBuilder(); - hex.append(hexParts.get(0)); - if (hexParts.size() == 2) { - for (int i = 32 - hexParts.get(0).length() - - hexParts.get(1).length(); i > 0; i--) { - hex.append("0"); - } - hex.append(hexParts.get(1)); - } - byte[] ipBytes; - try { - ipBytes = Hex.decodeHex(hex.toString().toCharArray()); - } catch (DecoderException e) { - /* TODO Invalid IPv6 address. */ - return null; - } - if (ipBytes.length != 16) { - /* TODO Invalid IPv6 address. */ - return null; - } - byte[] hashInput = new byte[16 + 20 + 19]; - System.arraycopy(ipBytes, 0, hashInput, 0, 16); - System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 31, hashInput, 36, 19); - String hashOutput = DigestUtils.sha256Hex(hashInput); - sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4); - sb.append(":"); - sb.append(hashOutput.substring(hashOutput.length() - 4)); - } - sb.append("]"); - return sb.toString(); - } - - private String scrubTcpPort(String portString, byte[] fingerprintBytes, - String published) throws IOException { - if (portString.equals("0")) { - return "0"; - } else if (this.replaceIpAddressesWithHashes) { - if (this.persistenceProblemWithSecrets) { - /* There's a persistence problem, so we shouldn't scrub more TCP - * ports in this execution. */ - return null; - } - byte[] hashInput = new byte[2 + 20 + 33]; - int portNumber = Integer.parseInt(portString); - hashInput[0] = (byte) (portNumber >> 8); - hashInput[1] = (byte) portNumber; - System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20); - String month = published.substring(0, "yyyy-MM".length()); - byte[] secret = this.getSecretForMonth(month); - System.arraycopy(secret, 50, hashInput, 22, 33); - byte[] hashOutput = DigestUtils.sha256(hashInput); - int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8) - | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000; - return String.valueOf(hashedPort); - } else { - return "1"; - } - } - - private byte[] getSecretForMonth(String month) throws IOException { - if (!this.secretsForHashingIpAddresses.containsKey(month) - || this.secretsForHashingIpAddresses.get(month).length < 83) { - byte[] secret = new byte[83]; - this.secureRandom.nextBytes(secret); - if (this.secretsForHashingIpAddresses.containsKey(month)) { - System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0, - secret, 0, - this.secretsForHashingIpAddresses.get(month).length); - } - if (month.compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - logger.warn("Generated a secret that we won't make " - + "persistent, because it's outside our bridge descriptor " - + "sanitizing interval."); - } else { - /* Append secret to file on disk immediately before using it, or - * we might end with inconsistently sanitized bridges. */ - byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n") - .getBytes(); - try { - if (Files.exists(this.bridgeIpSecretsFile)) { - Files.write(this.bridgeIpSecretsFile, newBytes, - StandardOpenOption.APPEND); - } else { - Files.createDirectories(this.bridgeIpSecretsFile.getParent()); - Files.write(this.bridgeIpSecretsFile, newBytes); - } - } catch (IOException e) { - logger.warn("Could not store new secret " - + "to disk! Not calculating any IP address or TCP port " - + "hashes in this execution!", e); - this.persistenceProblemWithSecrets = true; - throw new IOException(e); - } - } - this.secretsForHashingIpAddresses.put(month, secret); - } - return this.secretsForHashingIpAddresses.get(month); - } - private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00"; /** @@ -631,7 +351,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { public void sanitizeAndStoreNetworkStatus(byte[] data, String publicationTime, String authorityFingerprint) { - if (this.persistenceProblemWithSecrets) { + if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) { /* There's a persistence problem, so we shouldn't scrub more IP * addresses in this execution. */ return; @@ -641,19 +361,6 @@ public class SanitizedBridgesWriter extends CollecTorMain { maxNetworkStatusPublishedTime = publicationTime; } - if (this.bridgeSanitizingCutOffTimestamp - .compareTo(publicationTime) > 0) { - String text = "Sanitizing and storing network status with " - + "publication time outside our descriptor sanitizing " - + "interval."; - if (this.haveWarnedAboutInterval) { - logger.debug(text); - } else { - logger.warn(text); - this.haveWarnedAboutInterval = true; - } - } - /* Parse the given network status line by line. */ DescriptorBuilder header = new DescriptorBuilder(); boolean includesFingerprintLine = false; @@ -738,14 +445,13 @@ public class SanitizedBridgesWriter extends CollecTorMain { String hashedDescriptorIdentifier = Base64.encodeBase64String( DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier + "=="))).substring(0, 27); - String scrubbedAddress = scrubIpv4Address(address, - fingerprintBytes, - descPublicationTime); + String scrubbedAddress = this.sensitivePartsSanitizer + .scrubIpv4Address(address, fingerprintBytes, descPublicationTime); String nickname = parts[1]; - String scrubbedOrPort = this.scrubTcpPort(orPort, - fingerprintBytes, descPublicationTime); - String scrubbedDirPort = this.scrubTcpPort(dirPort, - fingerprintBytes, descPublicationTime); + String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort( + orPort, fingerprintBytes, descPublicationTime); + String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort( + dirPort, fingerprintBytes, descPublicationTime); scrubbed.append("r ").append(nickname).space() .append(hashedBridgeIdentityBase64).space() .append(hashedDescriptorIdentifier).space() @@ -757,8 +463,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { /* Sanitize any addresses in a lines using the fingerprint and * descriptor publication time from the previous r line. */ } else if (line.startsWith("a ")) { - String scrubbedOrAddress = scrubOrAddress( - line.substring("a ".length()), fingerprintBytes, + String scrubbedOrAddress = this.sensitivePartsSanitizer + .scrubOrAddress(line.substring("a ".length()), fingerprintBytes, descPublicationTime); if (scrubbedOrAddress != null) { scrubbed.append("a ").append(scrubbedOrAddress).newLine(); @@ -856,7 +562,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { */ public void sanitizeAndStoreServerDescriptor(byte[] data) { - if (this.persistenceProblemWithSecrets) { + if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) { /* There's a persistence problem, so we shouldn't scrub more IP * addresses in this execution. */ return; @@ -922,18 +628,6 @@ public class SanitizedBridgesWriter extends CollecTorMain { if (published.compareTo(maxServerDescriptorPublishedTime) > 0) { maxServerDescriptorPublishedTime = published; } - if (this.bridgeSanitizingCutOffTimestamp - .compareTo(published) > 0) { - String text = "Sanitizing and storing " - + "server descriptor with publication time outside our " - + "descriptor sanitizing interval."; - if (this.haveWarnedAboutInterval) { - logger.debug(text); - } else { - logger.warn(text); - this.haveWarnedAboutInterval = true; - } - } scrubbed.append(line).newLine(); /* Parse the fingerprint to determine the hashed bridge @@ -1127,8 +821,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { return; } try { - String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes, - published); + String scrubbedAddressString = this.sensitivePartsSanitizer + .scrubIpv4Address(address, fingerprintBytes, published); if (null == scrubbedAddressString) { logger.warn("Invalid IP address in \"router\" line in bridge server " + "descriptor. Skipping descriptor."); @@ -1137,8 +831,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { scrubbedAddress.append(scrubbedAddressString); for (Map.Entry<StringBuilder, String> e : scrubbedIpAddressesAndTcpPorts.entrySet()) { - String scrubbedOrAddress = scrubOrAddress(e.getValue(), - fingerprintBytes, published); + String scrubbedOrAddress = this.sensitivePartsSanitizer + .scrubOrAddress(e.getValue(), fingerprintBytes, published); if (null == scrubbedOrAddress) { logger.warn("Invalid IP address or TCP port in \"or-address\" line " + "in bridge server descriptor. Skipping descriptor."); @@ -1147,8 +841,8 @@ public class SanitizedBridgesWriter extends CollecTorMain { e.getKey().append(scrubbedOrAddress); } for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) { - String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes, - published); + String scrubbedTcpPort = this.sensitivePartsSanitizer + .scrubTcpPort(e.getValue(), fingerprintBytes, published); if (null == scrubbedTcpPort) { logger.warn("Invalid TCP port in \"router\" line in bridge server " + "descriptor. Skipping descriptor."); @@ -1159,7 +853,6 @@ public class SanitizedBridgesWriter extends CollecTorMain { } catch (IOException exception) { /* There's a persistence problem, so we shouldn't scrub more IP addresses * or TCP ports in this execution. */ - this.persistenceProblemWithSecrets = true; return; } @@ -1500,43 +1193,6 @@ public class SanitizedBridgesWriter extends CollecTorMain { } } - /** - * Rewrite all network statuses that might contain references to server - * descriptors we added or updated in this execution. This applies to - * all statuses that have been published up to 24 hours after any added - * or updated server descriptor. - */ - public void finishWriting() { - - /* Delete secrets that we don't need anymore. */ - if (!this.secretsForHashingIpAddresses.isEmpty() - && this.secretsForHashingIpAddresses.firstKey().compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - try { - int kept = 0; - int deleted = 0; - List<String> lines = new ArrayList<>(); - for (Map.Entry<String, byte[]> e : - this.secretsForHashingIpAddresses.entrySet()) { - if (e.getKey().compareTo( - this.bridgeSanitizingCutOffTimestamp) < 0) { - deleted++; - } else { - lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue())); - kept++; - } - } - Files.write(this.bridgeIpSecretsFile, lines); - logger.info("Deleted {} secrets that we don't " - + "need anymore and kept {}.", deleted, kept); - } catch (IOException e) { - logger.warn("Could not store reduced set of " - + "secrets to disk! This is a bad sign, better check what's " - + "going on!", e); - } - } - } - private void checkStaleDescriptors() { SimpleDateFormat dateTimeFormat = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java new file mode 100644 index 0000000..71931dd --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java @@ -0,0 +1,378 @@ +/* Copyright 2010--2020 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedescs; + +import static java.time.ZoneOffset.UTC; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.security.GeneralSecurityException; +import java.security.SecureRandom; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +public class SensitivePartsSanitizer { + + private static final Logger logger = LoggerFactory.getLogger( + SensitivePartsSanitizer.class); + + private boolean replaceIpAddressesWithHashes = false; + + private Path bridgeIpSecretsFile; + + private boolean persistenceProblemWithSecrets; + + private final SortedMap<String, byte[]> secretsForHashingIpAddresses + = new TreeMap<>(); + + private String bridgeSanitizingCutOffTimestamp; + + private SecureRandom secureRandom; + + private boolean haveWarnedAboutInterval; + + SensitivePartsSanitizer() { + /* Nothing to do, if we're not using secrets for computing hashes. */ + } + + SensitivePartsSanitizer(Path bridgeIpSecretsFile, + long limitBridgeSanitizingIntervalDays) { + this.bridgeIpSecretsFile = bridgeIpSecretsFile; + this.readBridgeIpSecretsFile(); + this.determineCutOffTimestamp(limitBridgeSanitizingIntervalDays); + this.replaceIpAddressesWithHashes = true; + this.initializeSecureRandom(); + } + + /* Read hex-encoded secrets for replacing IP addresses with hashes + * from disk. */ + private void readBridgeIpSecretsFile() { + if (Files.exists(this.bridgeIpSecretsFile)) { + try { + for (String line : Files.readAllLines(bridgeIpSecretsFile)) { + String[] parts = line.split(","); + if ((line.length() != ("yyyy-MM,".length() + 31 * 2) + && line.length() != ("yyyy-MM,".length() + 50 * 2) + && line.length() != ("yyyy-MM,".length() + 83 * 2)) + || parts.length != 2) { + logger.warn("Invalid line in bridge-ip-secrets file " + + "starting with '{}'! " + + "Not calculating any IP address hashes in this " + + "execution!", line.substring(0, 7)); + this.persistenceProblemWithSecrets = true; + break; + } + String month = parts[0]; + byte[] secret = Hex.decodeHex(parts[1].toCharArray()); + this.secretsForHashingIpAddresses.put(month, secret); + } + if (!this.persistenceProblemWithSecrets) { + logger.debug("Read {} secrets for hashing bridge IP addresses.", + this.secretsForHashingIpAddresses.size()); + } + } catch (DecoderException e) { + logger.warn("Failed to decode hex string in {}! Not calculating any IP " + + "address hashes in this execution!", bridgeIpSecretsFile, e); + this.persistenceProblemWithSecrets = true; + } catch (IOException e) { + logger.warn("Failed to read {}! Not calculating any IP " + + "address hashes in this execution!", bridgeIpSecretsFile, e); + this.persistenceProblemWithSecrets = true; + } + } + } + + boolean hasPersistenceProblemWithSecrets() { + return this.persistenceProblemWithSecrets; + } + + private void determineCutOffTimestamp( + long limitBridgeSanitizingIntervalDays) { + + /* If we're configured to keep secrets only for a limited time, define + * the cut-off day and time. */ + LocalDateTime bridgeSanitizingCutOffDateTime + = LocalDateTime.of(1999, 12, 31, 23, 59, 59); + if (limitBridgeSanitizingIntervalDays >= 0L) { + LocalDateTime configuredBridgeSanitizingCutOffDateTime + = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays); + if (configuredBridgeSanitizingCutOffDateTime.isAfter( + bridgeSanitizingCutOffDateTime)) { + bridgeSanitizingCutOffDateTime + = configuredBridgeSanitizingCutOffDateTime; + } + } + this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime + .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + + logger.info("Using cut-off datetime '{}' for secrets.", + this.bridgeSanitizingCutOffTimestamp); + } + + private void initializeSecureRandom() { + /* Initialize secure random number generator. */ + try { + this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN"); + } catch (GeneralSecurityException e) { + logger.warn("Could not initialize secure " + + "random number generator! Not calculating any IP address " + + "hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + } + } + + String scrubOrAddress(String orAddress, byte[] fingerprintBytes, + String published) throws IOException { + if (!orAddress.contains(":")) { + /* Malformed or-address or a line. */ + return null; + } + String addressPart = orAddress.substring(0, + orAddress.lastIndexOf(":")); + String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1); + String scrubbedAddressPart; + if (addressPart.startsWith("[")) { + scrubbedAddressPart = this.scrubIpv6Address(addressPart, + fingerprintBytes, published); + } else { + scrubbedAddressPart = this.scrubIpv4Address(addressPart, + fingerprintBytes, published); + } + String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes, + published); + return (scrubbedAddressPart == null ? null : + scrubbedAddressPart + ":" + scrubbedPort); + } + + String scrubIpv4Address(String address, byte[] fingerprintBytes, + String published) throws IOException { + if (this.replaceIpAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return null; + } + byte[] hashInput = new byte[4 + 20 + 31]; + String[] ipParts = address.split("\\."); + for (int i = 0; i < 4; i++) { + hashInput[i] = (byte) Integer.parseInt(ipParts[i]); + } + System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20); + byte[] secret = this.getSecretForMonth(published); + System.arraycopy(secret, 0, hashInput, 24, 31); + byte[] hashOutput = DigestUtils.sha256(hashInput); + return "10." + + (((int) hashOutput[0] + 256) % 256) + "." + + (((int) hashOutput[1] + 256) % 256) + "." + + (((int) hashOutput[2] + 256) % 256); + } else { + return "127.0.0.1"; + } + } + + private String scrubIpv6Address(String address, byte[] fingerprintBytes, + String published) throws IOException { + StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::"); + if (this.replaceIpAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return null; + } + String[] doubleColonSeparatedParts = address.substring(1, + address.length() - 1).split("::", -1); + if (doubleColonSeparatedParts.length > 2) { + /* Invalid IPv6 address. */ + return null; + } + List<String> hexParts = new ArrayList<>(); + for (String doubleColonSeparatedPart : doubleColonSeparatedParts) { + StringBuilder hexPart = new StringBuilder(); + String[] parts = doubleColonSeparatedPart.split(":", -1); + if (parts.length < 1 || parts.length > 8) { + /* Invalid IPv6 address. */ + return null; + } + for (String part : parts) { + if (part.contains(".")) { + String[] ipParts = part.split("\\."); + byte[] ipv4Bytes = new byte[4]; + if (ipParts.length != 4) { + /* Invalid IPv4 part in IPv6 address. */ + return null; + } + for (int m = 0; m < 4; m++) { + ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]); + } + hexPart.append(Hex.encodeHexString(ipv4Bytes)); + } else if (part.length() > 4) { + /* Invalid IPv6 address. */ + return null; + } else { + for (int k = part.length(); k < 4; k++) { + hexPart.append("0"); + } + hexPart.append(part); + } + } + hexParts.add(hexPart.toString()); + } + StringBuilder hex = new StringBuilder(); + hex.append(hexParts.get(0)); + if (hexParts.size() == 2) { + for (int i = 32 - hexParts.get(0).length() + - hexParts.get(1).length(); i > 0; i--) { + hex.append("0"); + } + hex.append(hexParts.get(1)); + } + byte[] ipBytes; + try { + ipBytes = Hex.decodeHex(hex.toString().toCharArray()); + } catch (DecoderException e) { + /* TODO Invalid IPv6 address. */ + return null; + } + if (ipBytes.length != 16) { + /* TODO Invalid IPv6 address. */ + return null; + } + byte[] hashInput = new byte[16 + 20 + 19]; + System.arraycopy(ipBytes, 0, hashInput, 0, 16); + System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20); + byte[] secret = this.getSecretForMonth(published); + System.arraycopy(secret, 31, hashInput, 36, 19); + String hashOutput = DigestUtils.sha256Hex(hashInput); + sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4); + sb.append(":"); + sb.append(hashOutput.substring(hashOutput.length() - 4)); + } + sb.append("]"); + return sb.toString(); + } + + String scrubTcpPort(String portString, byte[] fingerprintBytes, + String published) throws IOException { + if (portString.equals("0")) { + return "0"; + } else if (this.replaceIpAddressesWithHashes) { + if (this.persistenceProblemWithSecrets) { + /* There's a persistence problem, so we shouldn't scrub more TCP + * ports in this execution. */ + return null; + } + byte[] hashInput = new byte[2 + 20 + 33]; + int portNumber = Integer.parseInt(portString); + hashInput[0] = (byte) (portNumber >> 8); + hashInput[1] = (byte) portNumber; + System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20); + byte[] secret = this.getSecretForMonth(published); + System.arraycopy(secret, 50, hashInput, 22, 33); + byte[] hashOutput = DigestUtils.sha256(hashInput); + int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8) + | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000; + return String.valueOf(hashedPort); + } else { + return "1"; + } + } + + private byte[] getSecretForMonth(String published) throws IOException { + if (this.bridgeSanitizingCutOffTimestamp + .compareTo(published) > 0) { + String text = "Sanitizing and storing bridge descriptor with publication " + + "time outside our descriptor sanitizing interval."; + if (this.haveWarnedAboutInterval) { + logger.debug(text); + } else { + logger.warn(text); + this.haveWarnedAboutInterval = true; + } + } + String month = published.substring(0, "yyyy-MM".length()); + if (!this.secretsForHashingIpAddresses.containsKey(month) + || this.secretsForHashingIpAddresses.get(month).length < 83) { + byte[] secret = new byte[83]; + this.secureRandom.nextBytes(secret); + if (this.secretsForHashingIpAddresses.containsKey(month)) { + System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0, + secret, 0, + this.secretsForHashingIpAddresses.get(month).length); + } + if (month.compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + logger.warn("Generated a secret that we won't make " + + "persistent, because it's outside our bridge descriptor " + + "sanitizing interval."); + } else { + /* Append secret to file on disk immediately before using it, or + * we might end with inconsistently sanitized bridges. */ + byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n") + .getBytes(); + try { + if (Files.exists(this.bridgeIpSecretsFile)) { + Files.write(this.bridgeIpSecretsFile, newBytes, + StandardOpenOption.APPEND); + } else { + Files.createDirectories(this.bridgeIpSecretsFile.getParent()); + Files.write(this.bridgeIpSecretsFile, newBytes); + } + } catch (IOException e) { + logger.warn("Could not store new secret " + + "to disk! Not calculating any IP address or TCP port " + + "hashes in this execution!", e); + this.persistenceProblemWithSecrets = true; + throw new IOException(e); + } + } + this.secretsForHashingIpAddresses.put(month, secret); + } + return this.secretsForHashingIpAddresses.get(month); + } + + void finishWriting() { + + /* Delete secrets that we don't need anymore. */ + if (!this.secretsForHashingIpAddresses.isEmpty() + && this.secretsForHashingIpAddresses.firstKey().compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + try { + int kept = 0; + int deleted = 0; + List<String> lines = new ArrayList<>(); + for (Map.Entry<String, byte[]> e : + this.secretsForHashingIpAddresses.entrySet()) { + if (e.getKey().compareTo( + this.bridgeSanitizingCutOffTimestamp) < 0) { + deleted++; + } else { + lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue())); + kept++; + } + } + Files.write(bridgeIpSecretsFile, lines); + logger.info("Deleted {} secrets that we don't " + + "need anymore and kept {}.", deleted, kept); + } catch (IOException e) { + logger.warn("Could not store reduced set of " + + "secrets to disk! This is a bad sign, better check what's " + + "going on!", e); + } + } + } +} + diff --git a/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java b/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java index 67e9738..d8c7119 100644 --- a/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java +++ b/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java @@ -821,6 +821,8 @@ public class SanitizedBridgesWriterTest { @Test public void testBridgeIpSecretsIsDirectory() throws Exception { + this.configuration.setProperty(Key.ReplaceIpAddressesWithHashes.name(), + "true"); Files.createDirectory(Paths.get(statsDirectory, "bridge-ip-secrets")); this.runTest(); assertTrue("Sanitized server descriptors without secrets.",