commit 2e8cdf7fe1cd11b6afe599512e4844c4234e257a Author: Karsten Loesing karsten.loesing@gmx.net Date: Tue Dec 1 10:35:26 2020 +0100
Move sanitizing code to one class per type.
Part of #20542. --- .../bridgedescs/SanitizedBridgeDescriptor.java | 118 ++++ .../SanitizedBridgeExtraInfoDescriptor.java | 192 +++++ .../bridgedescs/SanitizedBridgeNetworkStatus.java | 230 ++++++ .../SanitizedBridgeServerDescriptor.java | 360 ++++++++++ .../bridgedescs/SanitizedBridgesWriter.java | 771 +-------------------- 5 files changed, 934 insertions(+), 737 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeDescriptor.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeDescriptor.java new file mode 100644 index 0000000..5ddeefe --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeDescriptor.java @@ -0,0 +1,118 @@ +/* Copyright 2010--2020 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedescs; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.charset.StandardCharsets; + +public abstract class SanitizedBridgeDescriptor { + + private static final Logger logger = LoggerFactory.getLogger( + SanitizedBridgeDescriptor.class); + + protected byte[] originalBytes; + + protected SensitivePartsSanitizer sensitivePartsSanitizer; + + protected byte[] sanitizedBytes; + + protected String publishedString; + + SanitizedBridgeDescriptor(byte[] originalBytes, + SensitivePartsSanitizer sensitivePartsSanitizer) { + this.originalBytes = originalBytes; + this.sensitivePartsSanitizer = sensitivePartsSanitizer; + } + + protected String parseMasterKeyEd25519FromIdentityEd25519( + String identityEd25519Base64) { + byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64); + if (identityEd25519.length < 40) { + logger.warn("Invalid length of identity-ed25519 (in bytes): {}", + identityEd25519.length); + } else if (identityEd25519[0] != 0x01) { + logger.warn("Unknown version in identity-ed25519: {}", + identityEd25519[0]); + } else if (identityEd25519[1] != 0x04) { + logger.warn("Unknown cert type in identity-ed25519: {}", + identityEd25519[1]); + } else if (identityEd25519[6] != 0x01) { + logger.warn("Unknown certified key type in identity-ed25519: {}", + identityEd25519[1]); + } else if (identityEd25519[39] == 0x00) { + logger.warn("No extensions in identity-ed25519 (which " + + "would contain the encoded master-key-ed25519): {}", + identityEd25519[39]); + } else { + int extensionStart = 40; + for (int i = 0; i < (int) identityEd25519[39]; i++) { + if (identityEd25519.length < extensionStart + 4) { + logger.warn("Invalid extension with id {} in identity-ed25519.", i); + break; + } + int extensionLength = identityEd25519[extensionStart]; + extensionLength <<= 8; + extensionLength += identityEd25519[extensionStart + 1]; + int extensionType = identityEd25519[extensionStart + 2]; + if (extensionLength == 32 && extensionType == 4) { + if (identityEd25519.length < extensionStart + 4 + 32) { + logger.warn("Invalid extension with id {} in identity-ed25519.", i); + break; + } + byte[] masterKeyEd25519 = new byte[32]; + System.arraycopy(identityEd25519, extensionStart + 4, + masterKeyEd25519, 0, masterKeyEd25519.length); + String masterKeyEd25519Base64 = Base64.encodeBase64String( + masterKeyEd25519); + return masterKeyEd25519Base64.replaceAll("=", ""); + } + extensionStart += 4 + extensionLength; + } + } + logger.warn("Unable to locate master-key-ed25519 in identity-ed25519."); + return null; + } + + protected String computeDescriptorDigest(byte[] descriptorBytes, + String startToken, String sigToken) { + String descriptorDigest = null; + String ascii = new String(descriptorBytes, StandardCharsets.US_ASCII); + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(descriptorBytes, start, forDigest, 0, sig - start); + descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest)); + } + if (descriptorDigest == null) { + logger.warn("Could not calculate extra-info descriptor digest."); + } + return descriptorDigest; + } + + protected String computeSha256Base64Digest(byte[] descriptorBytes, + String startToken, String sigToken) { + String descriptorDigestSha256Base64 = null; + String ascii = new String(descriptorBytes, StandardCharsets.US_ASCII); + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + if (start >= 0 && sig >= 0 && sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(descriptorBytes, start, forDigest, 0, sig - start); + descriptorDigestSha256Base64 = Base64.encodeBase64String( + DigestUtils.sha256(DigestUtils.sha256(forDigest))) + .replaceAll("=", ""); + } + if (descriptorDigestSha256Base64 == null) { + logger.warn("Could not calculate extra-info " + + "descriptor SHA256 digest."); + } + return descriptorDigestSha256Base64; + } +} + diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeExtraInfoDescriptor.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeExtraInfoDescriptor.java new file mode 100644 index 0000000..f2ec992 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeExtraInfoDescriptor.java @@ -0,0 +1,192 @@ +/* Copyright 2010--2020 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedescs; + +import org.torproject.metrics.collector.conf.Annotation; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; + +public class SanitizedBridgeExtraInfoDescriptor + extends SanitizedBridgeDescriptor { + + private static final Logger logger = LoggerFactory.getLogger( + SanitizedBridgeExtraInfoDescriptor.class); + + private String descriptorDigest; + + SanitizedBridgeExtraInfoDescriptor(byte[] originalBytes, + SensitivePartsSanitizer sensitivePartsSanitizer) { + super(originalBytes, sensitivePartsSanitizer); + } + + boolean sanitizeDescriptor() { + + /* Parse descriptor to generate a sanitized version. */ + String masterKeyEd25519FromIdentityEd25519 = null; + DescriptorBuilder scrubbed = new DescriptorBuilder(); + try (BufferedReader br = new BufferedReader(new StringReader(new String( + this.originalBytes, StandardCharsets.US_ASCII)))) { + scrubbed.append(Annotation.BridgeExtraInfo.toString()); + String line; + String hashedBridgeIdentity; + String masterKeyEd25519 = null; + while ((line = br.readLine()) != null) { + + /* Parse bridge identity from extra-info line and replace it with + * its hash in the sanitized descriptor. */ + String[] parts = line.split(" "); + if (line.startsWith("extra-info ")) { + if (parts.length < 3) { + logger.debug("Illegal line in extra-info descriptor: '{}'. " + + "Skipping descriptor.", line); + return false; + } + hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex( + parts[2].toCharArray())).toLowerCase(); + scrubbed.append("extra-info ").append(parts[1]) + .space().append(hashedBridgeIdentity.toUpperCase()).newLine(); + + /* Parse the publication time to determine the file name. */ + } else if (line.startsWith("published ")) { + scrubbed.append(line).newLine(); + this.publishedString = line.substring("published ".length()); + + /* Remove everything from transport lines except the transport + * name. */ + } else if (line.startsWith("transport ")) { + if (parts.length < 3) { + logger.debug("Illegal line in extra-info descriptor: '{}'. " + + "Skipping descriptor.", line); + return false; + } + scrubbed.append("transport ").append(parts[1]).newLine(); + + /* Skip transport-info lines entirely. */ + } else if (line.startsWith("transport-info ")) { + + /* Extract master-key-ed25519 from identity-ed25519. */ + } else if (line.equals("identity-ed25519")) { + StringBuilder sb = new StringBuilder(); + while ((line = br.readLine()) != null + && !line.equals("-----END ED25519 CERT-----")) { + if (line.equals("-----BEGIN ED25519 CERT-----")) { + continue; + } + sb.append(line); + } + masterKeyEd25519FromIdentityEd25519 = + this.parseMasterKeyEd25519FromIdentityEd25519( + sb.toString()); + String sha256MasterKeyEd25519 = Base64.encodeBase64String( + DigestUtils.sha256(Base64.decodeBase64( + masterKeyEd25519FromIdentityEd25519 + "="))) + .replaceAll("=", ""); + scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519) + .newLine(); + if (masterKeyEd25519 != null && !masterKeyEd25519.equals( + masterKeyEd25519FromIdentityEd25519)) { + logger.warn("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return false; + } + + /* Verify that identity-ed25519 and master-key-ed25519 match. */ + } else if (line.startsWith("master-key-ed25519 ")) { + masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); + if (masterKeyEd25519FromIdentityEd25519 != null + && !masterKeyEd25519FromIdentityEd25519.equals( + masterKeyEd25519)) { + logger.warn("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return false; + } + + /* Write the following lines unmodified to the sanitized + * descriptor. */ + } else if (line.startsWith("write-history ") + || line.startsWith("read-history ") + || line.startsWith("ipv6-write-history ") + || line.startsWith("ipv6-read-history ") + || line.startsWith("geoip-start-time ") + || line.startsWith("geoip-client-origins ") + || line.startsWith("geoip-db-digest ") + || line.startsWith("geoip6-db-digest ") + || line.startsWith("conn-bi-direct ") + || line.startsWith("ipv6-conn-bi-direct ") + || line.startsWith("bridge-") + || line.startsWith("dirreq-") + || line.startsWith("cell-") + || line.startsWith("entry-") + || line.startsWith("exit-") + || line.startsWith("hidserv-") + || line.startsWith("padding-counts ")) { + scrubbed.append(line).newLine(); + + /* When we reach the signature, we're done. Write the sanitized + * descriptor to disk below. */ + } else if (line.startsWith("router-signature")) { + break; + + /* Skip the ed25519 signature; we'll include a SHA256 digest of + * the SHA256 descriptor digest in router-digest-sha256. */ + } else if (line.startsWith("router-sig-ed25519 ")) { + continue; + + /* If we encounter an unrecognized line, stop parsing and print + * out a warning. We might have overlooked sensitive information + * that we need to remove or replace for the sanitized descriptor + * version. */ + } else { + logger.warn("Unrecognized line '{}'. Skipping.", line); + return false; + } + } + } catch (DecoderException | IOException e) { + logger.warn("Could not parse extra-info descriptor.", e); + return false; + } + + /* Determine digest(s) of sanitized extra-info descriptor. */ + this.descriptorDigest = this.computeDescriptorDigest(this.originalBytes, + "extra-info ", "\nrouter-signature\n"); + String descriptorDigestSha256Base64 = null; + if (masterKeyEd25519FromIdentityEd25519 != null) { + descriptorDigestSha256Base64 = this.computeSha256Base64Digest( + this.originalBytes, "extra-info ", "\n-----END SIGNATURE-----\n"); + } + if (null != descriptorDigestSha256Base64) { + scrubbed.append("router-digest-sha256 ") + .append(descriptorDigestSha256Base64).newLine(); + } + if (null != this.descriptorDigest) { + scrubbed.append("router-digest ") + .append(this.descriptorDigest.toUpperCase()).newLine(); + } + this.sanitizedBytes = scrubbed.toBytes(); + return true; + } + + byte[] getSanitizedBytes() { + return this.sanitizedBytes; + } + + public String getPublishedString() { + return this.publishedString; + } + + public String getDescriptorDigest() { + return this.descriptorDigest; + } +} + diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeNetworkStatus.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeNetworkStatus.java new file mode 100644 index 0000000..d94cb0d --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeNetworkStatus.java @@ -0,0 +1,230 @@ +/* Copyright 2010--2020 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedescs; + +import org.torproject.metrics.collector.conf.Annotation; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.SortedMap; +import java.util.TreeMap; + +public class SanitizedBridgeNetworkStatus extends SanitizedBridgeDescriptor { + + private static final Logger logger = LoggerFactory.getLogger( + SanitizedBridgeNetworkStatus.class); + + private final String authorityFingerprint; + + SanitizedBridgeNetworkStatus(byte[] originalBytes, + SensitivePartsSanitizer sensitivePartsSanitizer, String publicationTime, + String authorityFingerprint) { + super(originalBytes, sensitivePartsSanitizer); + this.publishedString = publicationTime; + this.authorityFingerprint = authorityFingerprint; + } + + boolean sanitizeDescriptor() { + + if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return false; + } + + /* Parse the given network status line by line. */ + boolean includesFingerprintLine = false; + DescriptorBuilder scrubbed = new DescriptorBuilder(); + scrubbed.append(Annotation.Status.toString()); + SortedMap<String, String> scrubbedEntries = new TreeMap<>(); + StringBuilder publishedStringBuilder = new StringBuilder(); + scrubbed.append("published ").append(publishedStringBuilder).newLine(); + DescriptorBuilder header = new DescriptorBuilder(); + scrubbed.append(header); + + try { + BufferedReader br = new BufferedReader(new StringReader(new String( + this.originalBytes, StandardCharsets.US_ASCII))); + String line; + String mostRecentDescPublished = null; + byte[] fingerprintBytes = null; + String descPublicationTime = null; + String hashedBridgeIdentityHex = null; + DescriptorBuilder scrubbedEntry = new DescriptorBuilder(); + while ((line = br.readLine()) != null) { + + /* Use publication time from "published" line instead of the + * file's last-modified time. Don't copy over the line, because + * we're going to write a "published" line below. */ + if (line.startsWith("published ")) { + this.publishedString = line.substring("published ".length()); + + /* Additional header lines don't have to be cleaned up. */ + } else if (line.startsWith("flag-thresholds ")) { + header.append(line).newLine(); + + /* The authority fingerprint in the "fingerprint" line can go in + * unscrubbed. */ + } else if (line.startsWith("fingerprint ")) { + if (!("fingerprint " + authorityFingerprint).equals(line)) { + logger.warn("Mismatch between authority fingerprint expected from " + + "file name ({}) and parsed from "fingerprint" " + + "line ("{}").", authorityFingerprint, line); + return false; + } + header.append(line).newLine(); + includesFingerprintLine = true; + + /* r lines contain sensitive information that needs to be removed + * or replaced. */ + } else if (line.startsWith("r ")) { + + /* Clear buffer from previously scrubbed lines. */ + if (scrubbedEntry.hasContent()) { + scrubbedEntries.put(hashedBridgeIdentityHex, + scrubbedEntry.toString()); + scrubbedEntry = new DescriptorBuilder(); + } + + /* Parse the relevant parts of this r line. */ + String[] parts = line.split(" "); + if (parts.length < 9) { + logger.warn("Illegal line '{}' in bridge network " + + "status. Skipping descriptor.", line); + return false; + } + if (!Base64.isBase64(parts[2])) { + logger.warn("Illegal base64 character in r line '{}'. " + + "Skipping descriptor.", parts[2]); + return false; + } + fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); + descPublicationTime = parts[4] + " " + parts[5]; + String address = parts[6]; + String orPort = parts[7]; + String dirPort = parts[8]; + + /* Determine most recent descriptor publication time. */ + if (descPublicationTime.compareTo(this.publishedString) <= 0 + && (mostRecentDescPublished == null + || descPublicationTime.compareTo( + mostRecentDescPublished) > 0)) { + mostRecentDescPublished = descPublicationTime; + } + + /* Write scrubbed r line to buffer. */ + byte[] hashedBridgeIdentity = DigestUtils.sha1(fingerprintBytes); + String hashedBridgeIdentityBase64 = Base64.encodeBase64String( + hashedBridgeIdentity).substring(0, 27); + hashedBridgeIdentityHex = Hex.encodeHexString( + hashedBridgeIdentity); + String descriptorIdentifier = parts[3]; + String hashedDescriptorIdentifier = Base64.encodeBase64String( + DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier + + "=="))).substring(0, 27); + String scrubbedAddress = this.sensitivePartsSanitizer + .scrubIpv4Address(address, fingerprintBytes, descPublicationTime); + String nickname = parts[1]; + String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort( + orPort, fingerprintBytes, descPublicationTime); + String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort( + dirPort, fingerprintBytes, descPublicationTime); + scrubbedEntry.append("r ").append(nickname).space() + .append(hashedBridgeIdentityBase64).space() + .append(hashedDescriptorIdentifier).space() + .append(descPublicationTime).space() + .append(scrubbedAddress).space() + .append(scrubbedOrPort).space() + .append(scrubbedDirPort).newLine(); + + /* Sanitize any addresses in a lines using the fingerprint and + * descriptor publication time from the previous r line. */ + } else if (line.startsWith("a ")) { + String scrubbedOrAddress = this.sensitivePartsSanitizer + .scrubOrAddress(line.substring("a ".length()), fingerprintBytes, + descPublicationTime); + if (scrubbedOrAddress != null) { + scrubbedEntry.append("a ").append(scrubbedOrAddress).newLine(); + } else { + logger.warn("Invalid address in line '{}' " + + "in bridge network status. Skipping line!", line); + } + + /* Nothing special about s, w, and p lines; just copy them. */ + } else if (line.startsWith("s ") || line.equals("s") + || line.startsWith("w ") || line.equals("w") + || line.startsWith("p ") || line.equals("p")) { + scrubbedEntry.append(line).newLine(); + + /* There should be nothing else but r, a, w, p, and s lines in the + * network status. If there is, we should probably learn before + * writing anything to the sanitized descriptors. */ + } else { + logger.debug("Unknown line '{}' in bridge " + + "network status. Not writing to disk!", line); + return false; + } + } + br.close(); + if (scrubbedEntry.hasContent()) { + scrubbedEntries.put(hashedBridgeIdentityHex, scrubbedEntry.toString()); + } + if (!includesFingerprintLine) { + header.append("fingerprint ").append(authorityFingerprint).newLine(); + } + + /* Check if we can tell from the descriptor publication times + * whether this status is possibly stale. */ + SimpleDateFormat formatter = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + if (null == mostRecentDescPublished) { + logger.warn("The bridge network status published at {}" + + " does not contain a single entry. Please ask the bridge " + + "authority operator to check!", this.publishedString); + } else if (formatter.parse(this.publishedString).getTime() + - formatter.parse(mostRecentDescPublished).getTime() + > 60L * 60L * 1000L) { + logger.warn("The most recent descriptor in the bridge " + + "network status published at {} was published at {} which is " + + "more than 1 hour before the status. This is a sign for " + + "the status being stale. Please check!", + this.publishedString, mostRecentDescPublished); + } + } catch (ParseException e) { + logger.warn("Could not parse timestamp in bridge network status.", e); + return false; + } catch (IOException e) { + logger.warn("Could not parse bridge network status.", e); + return false; + } + + /* Write the sanitized network status to disk. */ + publishedStringBuilder.append(this.publishedString); + for (String scrubbedEntry : scrubbedEntries.values()) { + scrubbed.append(scrubbedEntry); + } + this.sanitizedBytes = scrubbed.toBytes(); + return true; + } + + + byte[] getSanitizedBytes() { + return this.sanitizedBytes; + } + + public String getPublishedString() { + return this.publishedString; + } +} + diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeServerDescriptor.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeServerDescriptor.java new file mode 100644 index 0000000..7f3d4d8 --- /dev/null +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeServerDescriptor.java @@ -0,0 +1,360 @@ +/* Copyright 2010--2020 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.collector.bridgedescs; + +import org.torproject.metrics.collector.conf.Annotation; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +public class SanitizedBridgeServerDescriptor + extends SanitizedBridgeDescriptor { + + private static final Logger logger = LoggerFactory.getLogger( + SanitizedBridgeServerDescriptor.class); + + private String descriptorDigest; + + SanitizedBridgeServerDescriptor(byte[] originalBytes, + SensitivePartsSanitizer sensitivePartsSanitizer) { + super(originalBytes, sensitivePartsSanitizer); + } + + boolean sanitizeDescriptor() { + + if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) { + /* There's a persistence problem, so we shouldn't scrub more IP + * addresses in this execution. */ + return false; + } + + /* Parse descriptor to generate a sanitized version. */ + String address = null; + byte[] fingerprintBytes = null; + StringBuilder scrubbedAddress = null; + Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>(); + Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>(); + String masterKeyEd25519FromIdentityEd25519 = null; + DescriptorBuilder scrubbed = new DescriptorBuilder(); + try (BufferedReader br = new BufferedReader(new StringReader( + new String(this.originalBytes, StandardCharsets.US_ASCII)))) { + scrubbed.append(Annotation.BridgeServer.toString()); + String line; + String masterKeyEd25519 = null; + boolean skipCrypto = false; + while ((line = br.readLine()) != null) { + + /* Skip all crypto parts that might be used to derive the bridge's + * identity fingerprint. */ + if (skipCrypto && !line.startsWith("-----END ")) { + continue; + + /* Store the router line for later processing, because we may need + * the bridge identity fingerprint for replacing the IP address in + * the scrubbed version. */ + } else if (line.startsWith("router ")) { + String[] parts = line.split(" "); + if (parts.length != 6) { + logger.warn("Invalid router line: '{}'. Skipping.", line); + return false; + } + address = parts[2]; + scrubbedAddress = new StringBuilder(); + StringBuilder scrubbedOrPort = new StringBuilder(); + scrubbedTcpPorts.put(scrubbedOrPort, parts[3]); + StringBuilder scrubbedDirPort = new StringBuilder(); + scrubbedTcpPorts.put(scrubbedDirPort, parts[4]); + StringBuilder scrubbedSocksPort = new StringBuilder(); + scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]); + scrubbed.append("router ").append(parts[1]).space() + .append(scrubbedAddress).space() + .append(scrubbedOrPort).space() + .append(scrubbedDirPort).space() + .append(scrubbedSocksPort).newLine(); + + /* Store or-address and sanitize it when we have read the fingerprint + * and descriptor publication time. */ + } else if (line.startsWith("or-address ")) { + String orAddress = line.substring("or-address ".length()); + StringBuilder scrubbedOrAddress = new StringBuilder(); + scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress); + scrubbed.append("or-address ").append(scrubbedOrAddress).newLine(); + + /* Parse the publication time to see if we're still inside the + * sanitizing interval. */ + } else if (line.startsWith("published ")) { + this.publishedString = line.substring("published ".length()); + scrubbed.append(line).newLine(); + + /* Parse the fingerprint to determine the hashed bridge + * identity. */ + } else if (line.startsWith("opt fingerprint ") + || line.startsWith("fingerprint ")) { + String fingerprint = line.substring(line.startsWith("opt ") + ? "opt fingerprint".length() : "fingerprint".length()) + .replaceAll(" ", "").toLowerCase(); + fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray()); + String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes) + .toLowerCase(); + scrubbed.append(line.startsWith("opt ") ? "opt " : "") + .append("fingerprint"); + for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) { + scrubbed.space().append(hashedBridgeIdentity.substring(4 * i, + 4 * (i + 1)).toUpperCase()); + } + scrubbed.newLine(); + + /* Replace the contact line (if present) with a generic one. */ + } else if (line.startsWith("contact ")) { + scrubbed.append("contact somebody").newLine(); + + /* When we reach the signature, we're done. Write the sanitized + * descriptor to disk below. */ + } else if (line.startsWith("router-signature")) { + break; + + /* Replace extra-info digest with the hashed digest of the + * non-scrubbed descriptor. */ + } else if (line.startsWith("opt extra-info-digest ") + || line.startsWith("extra-info-digest ")) { + String[] parts = line.split(" "); + if (line.startsWith("opt ")) { + scrubbed.append("opt "); + parts = line.substring(4).split(" "); + } + if (parts.length > 3) { + logger.warn("extra-info-digest line contains more arguments than" + + "expected: '{}'. Skipping descriptor.", line); + return false; + } + scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex( + Hex.decodeHex(parts[1].toCharArray())).toUpperCase()); + if (parts.length > 2) { + if (!Base64.isBase64(parts[2])) { + logger.warn("Illegal base64 character in extra-info-digest line " + + "'{}'. Skipping descriptor.", line); + return false; + } + scrubbed.space().append(Base64.encodeBase64String( + DigestUtils.sha256(Base64.decodeBase64(parts[2]))) + .replaceAll("=", "")); + } + scrubbed.newLine(); + + /* Possibly sanitize reject lines if they contain the bridge's own + * IP address. */ + } else if (line.startsWith("reject ")) { + if (address != null && line.startsWith("reject " + address)) { + scrubbed.append("reject ").append(scrubbedAddress) + .append(line.substring("reject ".length() + address.length())) + .newLine(); + } else { + scrubbed.append(line).newLine(); + } + + /* Extract master-key-ed25519 from identity-ed25519. */ + } else if (line.equals("identity-ed25519")) { + StringBuilder sb = new StringBuilder(); + while ((line = br.readLine()) != null + && !line.equals("-----END ED25519 CERT-----")) { + if (line.equals("-----BEGIN ED25519 CERT-----")) { + continue; + } + sb.append(line); + } + masterKeyEd25519FromIdentityEd25519 + = this.parseMasterKeyEd25519FromIdentityEd25519(sb.toString()); + if (masterKeyEd25519FromIdentityEd25519 == null) { + logger.warn("Could not parse master-key-ed25519 from " + + "identity-ed25519. Skipping descriptor."); + return false; + } + String sha256MasterKeyEd25519 = Base64.encodeBase64String( + DigestUtils.sha256(Base64.decodeBase64( + masterKeyEd25519FromIdentityEd25519 + "="))) + .replaceAll("=", ""); + scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519) + .newLine(); + if (masterKeyEd25519 != null && !masterKeyEd25519.equals( + masterKeyEd25519FromIdentityEd25519)) { + logger.warn("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return false; + } + + /* Verify that identity-ed25519 and master-key-ed25519 match. */ + } else if (line.startsWith("master-key-ed25519 ")) { + masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); + if (masterKeyEd25519FromIdentityEd25519 != null + && !masterKeyEd25519FromIdentityEd25519.equals( + masterKeyEd25519)) { + logger.warn("Mismatch between identity-ed25519 and " + + "master-key-ed25519. Skipping."); + return false; + } + + /* Write the following lines unmodified to the sanitized + * descriptor. */ + } else if (line.startsWith("accept ") + || line.startsWith("platform ") + || line.startsWith("opt protocols ") + || line.startsWith("protocols ") + || line.startsWith("proto ") + || line.startsWith("uptime ") + || line.startsWith("bandwidth ") + || line.startsWith("opt hibernating ") + || line.startsWith("hibernating ") + || line.startsWith("ntor-onion-key ") + || line.equals("opt hidden-service-dir") + || line.equals("hidden-service-dir") + || line.equals("opt caches-extra-info") + || line.equals("caches-extra-info") + || line.equals("opt allow-single-hop-exits") + || line.equals("allow-single-hop-exits") + || line.startsWith("ipv6-policy ") + || line.equals("tunnelled-dir-server") + || line.startsWith("bridge-distribution-request ")) { + scrubbed.append(line).newLine(); + + /* Replace node fingerprints in the family line with their hashes + * and leave nicknames unchanged. */ + } else if (line.startsWith("family ")) { + DescriptorBuilder familyLine = new DescriptorBuilder("family"); + for (String s : line.substring(7).split(" ")) { + if (s.startsWith("$")) { + familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex( + s.substring(1).toCharArray())).toUpperCase()); + } else { + familyLine.space().append(s); + } + } + scrubbed.append(familyLine.toString()).newLine(); + + /* Skip the purpose line that the bridge authority adds to its + * cached-descriptors file. */ + } else if (line.startsWith("@purpose ")) { + continue; + + /* Skip all crypto parts that might leak the bridge's identity + * fingerprint. */ + } else if (line.startsWith("-----BEGIN ") + || line.equals("onion-key") || line.equals("signing-key") + || line.equals("onion-key-crosscert") + || line.startsWith("ntor-onion-key-crosscert ")) { + skipCrypto = true; + + /* Stop skipping lines when the crypto parts are over. */ + } else if (line.startsWith("-----END ")) { + skipCrypto = false; + + /* Skip the ed25519 signature; we'll include a SHA256 digest of + * the SHA256 descriptor digest in router-digest-sha256. */ + } else if (line.startsWith("router-sig-ed25519 ")) { + continue; + + /* If we encounter an unrecognized line, stop parsing and print + * out a warning. We might have overlooked sensitive information + * that we need to remove or replace for the sanitized descriptor + * version. */ + } else { + logger.warn("Unrecognized line '{}'. Skipping.", line); + return false; + } + } + } catch (Exception e) { + logger.warn("Could not parse server descriptor.", e); + return false; + } + + /* Sanitize the parts that we couldn't sanitize earlier. */ + if (null == address || null == fingerprintBytes + || null == this.publishedString) { + logger.warn("Missing either of the following lines that are " + + "required to sanitize this server bridge descriptor: " + + ""router", "fingerprint", "published". Skipping " + + "descriptor."); + return false; + } + try { + String scrubbedAddressString = this.sensitivePartsSanitizer + .scrubIpv4Address(address, fingerprintBytes, + this.getPublishedString()); + if (null == scrubbedAddressString) { + logger.warn("Invalid IP address in "router" line in bridge server " + + "descriptor. Skipping descriptor."); + return false; + } + scrubbedAddress.append(scrubbedAddressString); + for (Map.Entry<StringBuilder, String> e + : scrubbedIpAddressesAndTcpPorts.entrySet()) { + String scrubbedOrAddress = this.sensitivePartsSanitizer + .scrubOrAddress(e.getValue(), fingerprintBytes, + this.getPublishedString()); + if (null == scrubbedOrAddress) { + logger.warn("Invalid IP address or TCP port in "or-address" line " + + "in bridge server descriptor. Skipping descriptor."); + return false; + } + e.getKey().append(scrubbedOrAddress); + } + for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) { + String scrubbedTcpPort = this.sensitivePartsSanitizer + .scrubTcpPort(e.getValue(), fingerprintBytes, + this.getPublishedString()); + if (null == scrubbedTcpPort) { + logger.warn("Invalid TCP port in "router" line in bridge server " + + "descriptor. Skipping descriptor."); + return false; + } + e.getKey().append(scrubbedTcpPort); + } + } catch (IOException exception) { + /* There's a persistence problem, so we shouldn't scrub more IP addresses + * or TCP ports in this execution. */ + return false; + } + + /* Determine digest(s) of sanitized server descriptor. */ + this.descriptorDigest = this.computeDescriptorDigest(this.originalBytes, + "router ", "\nrouter-signature\n"); + String descriptorDigestSha256Base64 = null; + if (masterKeyEd25519FromIdentityEd25519 != null) { + descriptorDigestSha256Base64 = this.computeSha256Base64Digest( + this.originalBytes, "router ", "\n-----END SIGNATURE-----\n"); + } + if (null != descriptorDigestSha256Base64) { + scrubbed.append("router-digest-sha256 ") + .append(descriptorDigestSha256Base64).newLine(); + } + if (null != this.descriptorDigest) { + scrubbed.append("router-digest ") + .append(this.descriptorDigest.toUpperCase()).newLine(); + } + this.sanitizedBytes = scrubbed.toBytes(); + return true; + } + + byte[] getSanitizedBytes() { + return this.sanitizedBytes; + } + + public String getPublishedString() { + return this.publishedString; + } + + public String getDescriptorDigest() { + return this.descriptorDigest; + } +} + diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java index 77ab406..d5009e1 100644 --- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java +++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java @@ -6,15 +6,12 @@ package org.torproject.metrics.collector.bridgedescs; import org.torproject.descriptor.BridgeExtraInfoDescriptor; import org.torproject.descriptor.BridgeNetworkStatus; import org.torproject.descriptor.BridgeServerDescriptor; -import org.torproject.metrics.collector.conf.Annotation; import org.torproject.metrics.collector.conf.Configuration; import org.torproject.metrics.collector.conf.ConfigurationException; import org.torproject.metrics.collector.conf.Key; import org.torproject.metrics.collector.cron.CollecTorMain; import org.torproject.metrics.collector.persist.PersistenceUtils;
-import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.binary.Base64; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; @@ -37,14 +34,10 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; import java.time.temporal.ChronoUnit; -import java.util.HashMap; import java.util.HashSet; -import java.util.Map; import java.util.Set; -import java.util.SortedMap; import java.util.SortedSet; import java.util.Stack; -import java.util.TreeMap; import java.util.TreeSet;
/** @@ -347,187 +340,18 @@ public class SanitizedBridgesWriter extends CollecTorMain { public void sanitizeAndStoreNetworkStatus(byte[] data, String publicationTime, String authorityFingerprint) {
- if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ + SanitizedBridgeNetworkStatus sanitizedBridgeNetworkStatus + = new SanitizedBridgeNetworkStatus(data, this.sensitivePartsSanitizer, + publicationTime, authorityFingerprint); + if (!sanitizedBridgeNetworkStatus.sanitizeDescriptor()) { + logger.warn("Unable to sanitize bridge network status."); return; } - + byte[] scrubbedBytes = sanitizedBridgeNetworkStatus.getSanitizedBytes(); + publicationTime = sanitizedBridgeNetworkStatus.getPublishedString(); if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) { maxNetworkStatusPublishedTime = publicationTime; } - - /* Parse the given network status line by line. */ - boolean includesFingerprintLine = false; - DescriptorBuilder scrubbed = new DescriptorBuilder(); - scrubbed.append(Annotation.Status.toString()); - SortedMap<String, String> scrubbedEntries = new TreeMap<>(); - StringBuilder publishedStringBuilder = new StringBuilder(); - scrubbed.append("published ").append(publishedStringBuilder).newLine(); - DescriptorBuilder header = new DescriptorBuilder(); - scrubbed.append(header); - - try { - BufferedReader br = new BufferedReader(new StringReader(new String( - data, StandardCharsets.US_ASCII))); - String line; - String mostRecentDescPublished = null; - byte[] fingerprintBytes = null; - String descPublicationTime = null; - String hashedBridgeIdentityHex = null; - DescriptorBuilder scrubbedEntry = new DescriptorBuilder(); - while ((line = br.readLine()) != null) { - - /* Use publication time from "published" line instead of the - * file's last-modified time. Don't copy over the line, because - * we're going to write a "published" line below. */ - if (line.startsWith("published ")) { - publicationTime = line.substring("published ".length()); - - /* Additional header lines don't have to be cleaned up. */ - } else if (line.startsWith("flag-thresholds ")) { - header.append(line).newLine(); - - /* The authority fingerprint in the "fingerprint" line can go in - * unscrubbed. */ - } else if (line.startsWith("fingerprint ")) { - if (!("fingerprint " + authorityFingerprint).equals(line)) { - logger.warn("Mismatch between authority fingerprint expected from " - + "file name ({}) and parsed from "fingerprint" " - + "line ("{}").", authorityFingerprint, line); - return; - } - header.append(line).newLine(); - includesFingerprintLine = true; - - /* r lines contain sensitive information that needs to be removed - * or replaced. */ - } else if (line.startsWith("r ")) { - - /* Clear buffer from previously scrubbed lines. */ - if (scrubbedEntry.hasContent()) { - scrubbedEntries.put(hashedBridgeIdentityHex, - scrubbedEntry.toString()); - scrubbedEntry = new DescriptorBuilder(); - } - - /* Parse the relevant parts of this r line. */ - String[] parts = line.split(" "); - if (parts.length < 9) { - logger.warn("Illegal line '{}' in bridge network " - + "status. Skipping descriptor.", line); - return; - } - if (!Base64.isBase64(parts[2])) { - logger.warn("Illegal base64 character in r line '{}'. " - + "Skipping descriptor.", parts[2]); - return; - } - fingerprintBytes = Base64.decodeBase64(parts[2] + "=="); - descPublicationTime = parts[4] + " " + parts[5]; - String address = parts[6]; - String orPort = parts[7]; - String dirPort = parts[8]; - - /* Determine most recent descriptor publication time. */ - if (descPublicationTime.compareTo(publicationTime) <= 0 - && (mostRecentDescPublished == null - || descPublicationTime.compareTo( - mostRecentDescPublished) > 0)) { - mostRecentDescPublished = descPublicationTime; - } - - /* Write scrubbed r line to buffer. */ - byte[] hashedBridgeIdentity = DigestUtils.sha1(fingerprintBytes); - String hashedBridgeIdentityBase64 = Base64.encodeBase64String( - hashedBridgeIdentity).substring(0, 27); - hashedBridgeIdentityHex = Hex.encodeHexString( - hashedBridgeIdentity); - String descriptorIdentifier = parts[3]; - String hashedDescriptorIdentifier = Base64.encodeBase64String( - DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier - + "=="))).substring(0, 27); - String scrubbedAddress = this.sensitivePartsSanitizer - .scrubIpv4Address(address, fingerprintBytes, descPublicationTime); - String nickname = parts[1]; - String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort( - orPort, fingerprintBytes, descPublicationTime); - String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort( - dirPort, fingerprintBytes, descPublicationTime); - scrubbedEntry.append("r ").append(nickname).space() - .append(hashedBridgeIdentityBase64).space() - .append(hashedDescriptorIdentifier).space() - .append(descPublicationTime).space() - .append(scrubbedAddress).space() - .append(scrubbedOrPort).space() - .append(scrubbedDirPort).newLine(); - - /* Sanitize any addresses in a lines using the fingerprint and - * descriptor publication time from the previous r line. */ - } else if (line.startsWith("a ")) { - String scrubbedOrAddress = this.sensitivePartsSanitizer - .scrubOrAddress(line.substring("a ".length()), fingerprintBytes, - descPublicationTime); - if (scrubbedOrAddress != null) { - scrubbedEntry.append("a ").append(scrubbedOrAddress).newLine(); - } else { - logger.warn("Invalid address in line '{}' " - + "in bridge network status. Skipping line!", line); - } - - /* Nothing special about s, w, and p lines; just copy them. */ - } else if (line.startsWith("s ") || line.equals("s") - || line.startsWith("w ") || line.equals("w") - || line.startsWith("p ") || line.equals("p")) { - scrubbedEntry.append(line).newLine(); - - /* There should be nothing else but r, a, w, p, and s lines in the - * network status. If there is, we should probably learn before - * writing anything to the sanitized descriptors. */ - } else { - logger.debug("Unknown line '{}' in bridge " - + "network status. Not writing to disk!", line); - return; - } - } - br.close(); - if (scrubbedEntry.hasContent()) { - scrubbedEntries.put(hashedBridgeIdentityHex, scrubbedEntry.toString()); - } - if (!includesFingerprintLine) { - header.append("fingerprint ").append(authorityFingerprint).newLine(); - } - - /* Check if we can tell from the descriptor publication times - * whether this status is possibly stale. */ - SimpleDateFormat formatter = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - if (null == mostRecentDescPublished) { - logger.warn("The bridge network status published at {}" - + " does not contain a single entry. Please ask the bridge " - + "authority operator to check!", publicationTime); - } else if (formatter.parse(publicationTime).getTime() - - formatter.parse(mostRecentDescPublished).getTime() - > 60L * 60L * 1000L) { - logger.warn("The most recent descriptor in the bridge " - + "network status published at {} was published at {} which is " - + "more than 1 hour before the status. This is a sign for " - + "the status being stale. Please check!", - publicationTime, mostRecentDescPublished); - } - } catch (ParseException e) { - logger.warn("Could not parse timestamp in bridge network status.", e); - return; - } catch (IOException e) { - logger.warn("Could not parse bridge network status.", e); - return; - } - - /* Write the sanitized network status to disk. */ - publishedStringBuilder.append(publicationTime); - for (String scrubbedEntry : scrubbedEntries.values()) { - scrubbed.append(scrubbedEntry); - } try { String syear = publicationTime.substring(0, 4); String smonth = publicationTime.substring(5, 7); @@ -543,7 +367,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { Paths.get("statuses", fileName)); for (Path outputFile : new Path[] { tarballFile, rsyncFile }) { Files.createDirectories(outputFile.getParent()); - Files.write(outputFile, scrubbed.toBytes()); + Files.write(outputFile, scrubbedBytes); } } catch (IOException e) { logger.warn("Could not write sanitized bridge " @@ -558,341 +382,21 @@ public class SanitizedBridgesWriter extends CollecTorMain { */ public void sanitizeAndStoreServerDescriptor(byte[] data) {
- if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) { - /* There's a persistence problem, so we shouldn't scrub more IP - * addresses in this execution. */ + SanitizedBridgeServerDescriptor sanitizedBridgeServerDescriptor + = new SanitizedBridgeServerDescriptor(data, + this.sensitivePartsSanitizer); + if (!sanitizedBridgeServerDescriptor.sanitizeDescriptor()) { + logger.warn("Unable to sanitize bridge server descriptor."); return; } - - /* Parse descriptor to generate a sanitized version. */ - String address = null; - String published = null; - byte[] fingerprintBytes = null; - StringBuilder scrubbedAddress = null; - Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>(); - Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>(); - String masterKeyEd25519FromIdentityEd25519 = null; - DescriptorBuilder scrubbed = new DescriptorBuilder(); - try (BufferedReader br = new BufferedReader(new StringReader( - new String(data, StandardCharsets.US_ASCII)))) { - scrubbed.append(Annotation.BridgeServer.toString()); - String line; - String masterKeyEd25519 = null; - boolean skipCrypto = false; - while ((line = br.readLine()) != null) { - - /* Skip all crypto parts that might be used to derive the bridge's - * identity fingerprint. */ - if (skipCrypto && !line.startsWith("-----END ")) { - continue; - - /* Store the router line for later processing, because we may need - * the bridge identity fingerprint for replacing the IP address in - * the scrubbed version. */ - } else if (line.startsWith("router ")) { - String[] parts = line.split(" "); - if (parts.length != 6) { - logger.warn("Invalid router line: '{}'. Skipping.", line); - return; - } - address = parts[2]; - scrubbedAddress = new StringBuilder(); - StringBuilder scrubbedOrPort = new StringBuilder(); - scrubbedTcpPorts.put(scrubbedOrPort, parts[3]); - StringBuilder scrubbedDirPort = new StringBuilder(); - scrubbedTcpPorts.put(scrubbedDirPort, parts[4]); - StringBuilder scrubbedSocksPort = new StringBuilder(); - scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]); - scrubbed.append("router ").append(parts[1]).space() - .append(scrubbedAddress).space() - .append(scrubbedOrPort).space() - .append(scrubbedDirPort).space() - .append(scrubbedSocksPort).newLine(); - - /* Store or-address and sanitize it when we have read the fingerprint - * and descriptor publication time. */ - } else if (line.startsWith("or-address ")) { - String orAddress = line.substring("or-address ".length()); - StringBuilder scrubbedOrAddress = new StringBuilder(); - scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress); - scrubbed.append("or-address ").append(scrubbedOrAddress).newLine(); - - /* Parse the publication time to see if we're still inside the - * sanitizing interval. */ - } else if (line.startsWith("published ")) { - published = line.substring("published ".length()); - if (published.compareTo(maxServerDescriptorPublishedTime) > 0) { - maxServerDescriptorPublishedTime = published; - } - scrubbed.append(line).newLine(); - - /* Parse the fingerprint to determine the hashed bridge - * identity. */ - } else if (line.startsWith("opt fingerprint ") - || line.startsWith("fingerprint ")) { - String fingerprint = line.substring(line.startsWith("opt ") - ? "opt fingerprint".length() : "fingerprint".length()) - .replaceAll(" ", "").toLowerCase(); - fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray()); - String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes) - .toLowerCase(); - scrubbed.append(line.startsWith("opt ") ? "opt " : "") - .append("fingerprint"); - for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) { - scrubbed.space().append(hashedBridgeIdentity.substring(4 * i, - 4 * (i + 1)).toUpperCase()); - } - scrubbed.newLine(); - - /* Replace the contact line (if present) with a generic one. */ - } else if (line.startsWith("contact ")) { - scrubbed.append("contact somebody").newLine(); - - /* When we reach the signature, we're done. Write the sanitized - * descriptor to disk below. */ - } else if (line.startsWith("router-signature")) { - break; - - /* Replace extra-info digest with the hashed digest of the - * non-scrubbed descriptor. */ - } else if (line.startsWith("opt extra-info-digest ") - || line.startsWith("extra-info-digest ")) { - String[] parts = line.split(" "); - if (line.startsWith("opt ")) { - scrubbed.append("opt "); - parts = line.substring(4).split(" "); - } - if (parts.length > 3) { - logger.warn("extra-info-digest line contains more arguments than" - + "expected: '{}'. Skipping descriptor.", line); - return; - } - scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex( - Hex.decodeHex(parts[1].toCharArray())).toUpperCase()); - if (parts.length > 2) { - if (!Base64.isBase64(parts[2])) { - logger.warn("Illegal base64 character in extra-info-digest line " - + "'{}'. Skipping descriptor.", line); - return; - } - scrubbed.space().append(Base64.encodeBase64String( - DigestUtils.sha256(Base64.decodeBase64(parts[2]))) - .replaceAll("=", "")); - } - scrubbed.newLine(); - - /* Possibly sanitize reject lines if they contain the bridge's own - * IP address. */ - } else if (line.startsWith("reject ")) { - if (address != null && line.startsWith("reject " + address)) { - scrubbed.append("reject ").append(scrubbedAddress) - .append(line.substring("reject ".length() + address.length())) - .newLine(); - } else { - scrubbed.append(line).newLine(); - } - - /* Extract master-key-ed25519 from identity-ed25519. */ - } else if (line.equals("identity-ed25519")) { - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null - && !line.equals("-----END ED25519 CERT-----")) { - if (line.equals("-----BEGIN ED25519 CERT-----")) { - continue; - } - sb.append(line); - } - masterKeyEd25519FromIdentityEd25519 = - this.parseMasterKeyEd25519FromIdentityEd25519( - sb.toString()); - if (masterKeyEd25519FromIdentityEd25519 == null) { - logger.warn("Could not parse master-key-ed25519 from " - + "identity-ed25519. Skipping descriptor."); - return; - } - String sha256MasterKeyEd25519 = Base64.encodeBase64String( - DigestUtils.sha256(Base64.decodeBase64( - masterKeyEd25519FromIdentityEd25519 + "="))) - .replaceAll("=", ""); - scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519) - .newLine(); - if (masterKeyEd25519 != null && !masterKeyEd25519.equals( - masterKeyEd25519FromIdentityEd25519)) { - logger.warn("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Verify that identity-ed25519 and master-key-ed25519 match. */ - } else if (line.startsWith("master-key-ed25519 ")) { - masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); - if (masterKeyEd25519FromIdentityEd25519 != null - && !masterKeyEd25519FromIdentityEd25519.equals( - masterKeyEd25519)) { - logger.warn("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Write the following lines unmodified to the sanitized - * descriptor. */ - } else if (line.startsWith("accept ") - || line.startsWith("platform ") - || line.startsWith("opt protocols ") - || line.startsWith("protocols ") - || line.startsWith("proto ") - || line.startsWith("uptime ") - || line.startsWith("bandwidth ") - || line.startsWith("opt hibernating ") - || line.startsWith("hibernating ") - || line.startsWith("ntor-onion-key ") - || line.equals("opt hidden-service-dir") - || line.equals("hidden-service-dir") - || line.equals("opt caches-extra-info") - || line.equals("caches-extra-info") - || line.equals("opt allow-single-hop-exits") - || line.equals("allow-single-hop-exits") - || line.startsWith("ipv6-policy ") - || line.equals("tunnelled-dir-server") - || line.startsWith("bridge-distribution-request ")) { - scrubbed.append(line).newLine(); - - /* Replace node fingerprints in the family line with their hashes - * and leave nicknames unchanged. */ - } else if (line.startsWith("family ")) { - DescriptorBuilder familyLine = new DescriptorBuilder("family"); - for (String s : line.substring(7).split(" ")) { - if (s.startsWith("$")) { - familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex( - s.substring(1).toCharArray())).toUpperCase()); - } else { - familyLine.space().append(s); - } - } - scrubbed.append(familyLine.toString()).newLine(); - - /* Skip the purpose line that the bridge authority adds to its - * cached-descriptors file. */ - } else if (line.startsWith("@purpose ")) { - continue; - - /* Skip all crypto parts that might leak the bridge's identity - * fingerprint. */ - } else if (line.startsWith("-----BEGIN ") - || line.equals("onion-key") || line.equals("signing-key") - || line.equals("onion-key-crosscert") - || line.startsWith("ntor-onion-key-crosscert ")) { - skipCrypto = true; - - /* Stop skipping lines when the crypto parts are over. */ - } else if (line.startsWith("-----END ")) { - skipCrypto = false; - - /* Skip the ed25519 signature; we'll include a SHA256 digest of - * the SHA256 descriptor digest in router-digest-sha256. */ - } else if (line.startsWith("router-sig-ed25519 ")) { - continue; - - /* If we encounter an unrecognized line, stop parsing and print - * out a warning. We might have overlooked sensitive information - * that we need to remove or replace for the sanitized descriptor - * version. */ - } else { - logger.warn("Unrecognized line '{}'. Skipping.", line); - return; - } - } - } catch (Exception e) { - logger.warn("Could not parse server descriptor.", e); - return; + byte[] scrubbedBytes + = sanitizedBridgeServerDescriptor.getSanitizedBytes(); + String published = sanitizedBridgeServerDescriptor.getPublishedString(); + if (published.compareTo(maxServerDescriptorPublishedTime) > 0) { + maxServerDescriptorPublishedTime = published; } - - /* Sanitize the parts that we couldn't sanitize earlier. */ - if (null == address || null == fingerprintBytes - || null == published) { - logger.warn("Missing either of the following lines that are " - + "required to sanitize this server bridge descriptor: " - + ""router", "fingerprint", "published". Skipping " - + "descriptor."); - return; - } - try { - String scrubbedAddressString = this.sensitivePartsSanitizer - .scrubIpv4Address(address, fingerprintBytes, published); - if (null == scrubbedAddressString) { - logger.warn("Invalid IP address in "router" line in bridge server " - + "descriptor. Skipping descriptor."); - return; - } - scrubbedAddress.append(scrubbedAddressString); - for (Map.Entry<StringBuilder, String> e - : scrubbedIpAddressesAndTcpPorts.entrySet()) { - String scrubbedOrAddress = this.sensitivePartsSanitizer - .scrubOrAddress(e.getValue(), fingerprintBytes, published); - if (null == scrubbedOrAddress) { - logger.warn("Invalid IP address or TCP port in "or-address" line " - + "in bridge server descriptor. Skipping descriptor."); - return; - } - e.getKey().append(scrubbedOrAddress); - } - for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) { - String scrubbedTcpPort = this.sensitivePartsSanitizer - .scrubTcpPort(e.getValue(), fingerprintBytes, published); - if (null == scrubbedTcpPort) { - logger.warn("Invalid TCP port in "router" line in bridge server " - + "descriptor. Skipping descriptor."); - return; - } - e.getKey().append(scrubbedTcpPort); - } - } catch (IOException exception) { - /* There's a persistence problem, so we shouldn't scrub more IP addresses - * or TCP ports in this execution. */ - return; - } - - /* Determine digest(s) of sanitized server descriptor. */ - String descriptorDigest = null; - String ascii = new String(data, StandardCharsets.US_ASCII); - String startToken = "router "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest)); - } - if (descriptorDigest == null) { - logger.warn("Could not calculate server descriptor digest."); - return; - } - String descriptorDigestSha256Base64 = null; - if (masterKeyEd25519FromIdentityEd25519 != null) { - ascii = new String(data, StandardCharsets.US_ASCII); - startToken = "router "; - sigToken = "\n-----END SIGNATURE-----\n"; - start = ascii.indexOf(startToken); - sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigestSha256Base64 = Base64.encodeBase64String( - DigestUtils.sha256(DigestUtils.sha256(forDigest))) - .replaceAll("=", ""); - } - if (descriptorDigestSha256Base64 == null) { - logger.warn("Could not calculate server descriptor SHA256 digest."); - return; - } - } - if (null != descriptorDigestSha256Base64) { - scrubbed.append("router-digest-sha256 ") - .append(descriptorDigestSha256Base64).newLine(); - } - scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase()) - .newLine(); + String descriptorDigest + = sanitizedBridgeServerDescriptor.getDescriptorDigest();
/* Determine filename of sanitized server descriptor. */ String dyear = published.substring(0, 4); @@ -918,62 +422,13 @@ public class SanitizedBridgesWriter extends CollecTorMain { break; } Files.createDirectories(outputFile.getParent()); - Files.write(outputFile, scrubbed.toBytes(), openOption); + Files.write(outputFile, scrubbedBytes, openOption); } } catch (IOException e) { logger.warn("Could not write sanitized server descriptor to disk.", e); } }
- private String parseMasterKeyEd25519FromIdentityEd25519( - String identityEd25519Base64) { - byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64); - if (identityEd25519.length < 40) { - logger.warn("Invalid length of identity-ed25519 (in bytes): {}", - identityEd25519.length); - } else if (identityEd25519[0] != 0x01) { - logger.warn("Unknown version in identity-ed25519: {}", - identityEd25519[0]); - } else if (identityEd25519[1] != 0x04) { - logger.warn("Unknown cert type in identity-ed25519: {}", - identityEd25519[1]); - } else if (identityEd25519[6] != 0x01) { - logger.warn("Unknown certified key type in identity-ed25519: {}", - identityEd25519[1]); - } else if (identityEd25519[39] == 0x00) { - logger.warn("No extensions in identity-ed25519 (which " - + "would contain the encoded master-key-ed25519): {}", - identityEd25519[39]); - } else { - int extensionStart = 40; - for (int i = 0; i < (int) identityEd25519[39]; i++) { - if (identityEd25519.length < extensionStart + 4) { - logger.warn("Invalid extension with id {} in identity-ed25519.", i); - break; - } - int extensionLength = identityEd25519[extensionStart]; - extensionLength <<= 8; - extensionLength += identityEd25519[extensionStart + 1]; - int extensionType = identityEd25519[extensionStart + 2]; - if (extensionLength == 32 && extensionType == 4) { - if (identityEd25519.length < extensionStart + 4 + 32) { - logger.warn("Invalid extension with id {} in identity-ed25519.", i); - break; - } - byte[] masterKeyEd25519 = new byte[32]; - System.arraycopy(identityEd25519, extensionStart + 4, - masterKeyEd25519, 0, masterKeyEd25519.length); - String masterKeyEd25519Base64 = Base64.encodeBase64String( - masterKeyEd25519); - return masterKeyEd25519Base64.replaceAll("=", ""); - } - extensionStart += 4 + extensionLength; - } - } - logger.warn("Unable to locate master-key-ed25519 in identity-ed25519."); - return null; - } - private String maxExtraInfoDescriptorPublishedTime = "1970-01-01 00:00:00";
@@ -982,179 +437,21 @@ public class SanitizedBridgesWriter extends CollecTorMain { */ public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
- /* Parse descriptor to generate a sanitized version. */ - String published = null; - String masterKeyEd25519FromIdentityEd25519 = null; - DescriptorBuilder scrubbed = new DescriptorBuilder(); - try (BufferedReader br = new BufferedReader(new StringReader(new String( - data, StandardCharsets.US_ASCII)))) { - scrubbed.append(Annotation.BridgeExtraInfo.toString()); - String line; - String hashedBridgeIdentity; - String masterKeyEd25519 = null; - while ((line = br.readLine()) != null) { - - /* Parse bridge identity from extra-info line and replace it with - * its hash in the sanitized descriptor. */ - String[] parts = line.split(" "); - if (line.startsWith("extra-info ")) { - if (parts.length < 3) { - logger.debug("Illegal line in extra-info descriptor: '{}'. " - + "Skipping descriptor.", line); - return; - } - hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex( - parts[2].toCharArray())).toLowerCase(); - scrubbed.append("extra-info ").append(parts[1]) - .space().append(hashedBridgeIdentity.toUpperCase()).newLine(); - - /* Parse the publication time to determine the file name. */ - } else if (line.startsWith("published ")) { - scrubbed.append(line).newLine(); - published = line.substring("published ".length()); - if (published.compareTo(maxExtraInfoDescriptorPublishedTime) - > 0) { - maxExtraInfoDescriptorPublishedTime = published; - } - - /* Remove everything from transport lines except the transport - * name. */ - } else if (line.startsWith("transport ")) { - if (parts.length < 3) { - logger.debug("Illegal line in extra-info descriptor: '{}'. " - + "Skipping descriptor.", line); - return; - } - scrubbed.append("transport ").append(parts[1]).newLine(); - - /* Skip transport-info lines entirely. */ - } else if (line.startsWith("transport-info ")) { - - /* Extract master-key-ed25519 from identity-ed25519. */ - } else if (line.equals("identity-ed25519")) { - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null - && !line.equals("-----END ED25519 CERT-----")) { - if (line.equals("-----BEGIN ED25519 CERT-----")) { - continue; - } - sb.append(line); - } - masterKeyEd25519FromIdentityEd25519 = - this.parseMasterKeyEd25519FromIdentityEd25519( - sb.toString()); - String sha256MasterKeyEd25519 = Base64.encodeBase64String( - DigestUtils.sha256(Base64.decodeBase64( - masterKeyEd25519FromIdentityEd25519 + "="))) - .replaceAll("=", ""); - scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519) - .newLine(); - if (masterKeyEd25519 != null && !masterKeyEd25519.equals( - masterKeyEd25519FromIdentityEd25519)) { - logger.warn("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Verify that identity-ed25519 and master-key-ed25519 match. */ - } else if (line.startsWith("master-key-ed25519 ")) { - masterKeyEd25519 = line.substring(line.indexOf(" ") + 1); - if (masterKeyEd25519FromIdentityEd25519 != null - && !masterKeyEd25519FromIdentityEd25519.equals( - masterKeyEd25519)) { - logger.warn("Mismatch between identity-ed25519 and " - + "master-key-ed25519. Skipping."); - return; - } - - /* Write the following lines unmodified to the sanitized - * descriptor. */ - } else if (line.startsWith("write-history ") - || line.startsWith("read-history ") - || line.startsWith("ipv6-write-history ") - || line.startsWith("ipv6-read-history ") - || line.startsWith("geoip-start-time ") - || line.startsWith("geoip-client-origins ") - || line.startsWith("geoip-db-digest ") - || line.startsWith("geoip6-db-digest ") - || line.startsWith("conn-bi-direct ") - || line.startsWith("ipv6-conn-bi-direct ") - || line.startsWith("bridge-") - || line.startsWith("dirreq-") - || line.startsWith("cell-") - || line.startsWith("entry-") - || line.startsWith("exit-") - || line.startsWith("hidserv-") - || line.startsWith("padding-counts ")) { - scrubbed.append(line).newLine(); - - /* When we reach the signature, we're done. Write the sanitized - * descriptor to disk below. */ - } else if (line.startsWith("router-signature")) { - break; - - /* Skip the ed25519 signature; we'll include a SHA256 digest of - * the SHA256 descriptor digest in router-digest-sha256. */ - } else if (line.startsWith("router-sig-ed25519 ")) { - continue; - - /* If we encounter an unrecognized line, stop parsing and print - * out a warning. We might have overlooked sensitive information - * that we need to remove or replace for the sanitized descriptor - * version. */ - } else { - logger.warn("Unrecognized line '{}'. Skipping.", line); - return; - } - } - br.close(); - } catch (DecoderException | IOException e) { - logger.warn("Could not parse extra-info descriptor.", e); + SanitizedBridgeExtraInfoDescriptor sanitizedBridgeExtraInfoDescriptor + = new SanitizedBridgeExtraInfoDescriptor(data, + this.sensitivePartsSanitizer); + if (!sanitizedBridgeExtraInfoDescriptor.sanitizeDescriptor()) { + logger.warn("Unable to sanitize bridge extra-info descriptor."); return; } - - /* Determine filename of sanitized extra-info descriptor. */ - String descriptorDigest = null; - String ascii = new String(data, StandardCharsets.US_ASCII); - String startToken = "extra-info "; - String sigToken = "\nrouter-signature\n"; - int start = ascii.indexOf(startToken); - int sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest)); - } - if (descriptorDigest == null) { - logger.warn("Could not calculate extra-info descriptor digest."); - return; - } - String descriptorDigestSha256Base64 = null; - if (masterKeyEd25519FromIdentityEd25519 != null) { - ascii = new String(data, StandardCharsets.US_ASCII); - startToken = "extra-info "; - sigToken = "\n-----END SIGNATURE-----\n"; - start = ascii.indexOf(startToken); - sig = ascii.indexOf(sigToken) + sigToken.length(); - if (start >= 0 && sig >= 0 && sig > start) { - byte[] forDigest = new byte[sig - start]; - System.arraycopy(data, start, forDigest, 0, sig - start); - descriptorDigestSha256Base64 = Base64.encodeBase64String( - DigestUtils.sha256(DigestUtils.sha256(forDigest))) - .replaceAll("=", ""); - } - if (descriptorDigestSha256Base64 == null) { - logger.warn("Could not calculate extra-info " - + "descriptor SHA256 digest."); - return; - } - } - if (descriptorDigestSha256Base64 != null) { - scrubbed.append("router-digest-sha256 ") - .append(descriptorDigestSha256Base64).newLine(); + byte[] scrubbedBytes + = sanitizedBridgeExtraInfoDescriptor.getSanitizedBytes(); + String published = sanitizedBridgeExtraInfoDescriptor.getPublishedString(); + if (published.compareTo(maxExtraInfoDescriptorPublishedTime) > 0) { + maxExtraInfoDescriptorPublishedTime = published; } - scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase()) - .newLine(); + String descriptorDigest + = sanitizedBridgeExtraInfoDescriptor.getDescriptorDigest();
/* Determine filename of sanitized extra-info descriptor. */ String dyear = published.substring(0, 4); @@ -1181,7 +478,7 @@ public class SanitizedBridgesWriter extends CollecTorMain { break; } Files.createDirectories(outputFile.getParent()); - Files.write(outputFile, scrubbed.toBytes(), openOption); + Files.write(outputFile, scrubbedBytes, openOption); } } catch (IOException e) { logger.warn("Could not write sanitized extra-info descriptor to disk.",
tor-commits@lists.torproject.org