tor-commits
Threads by month
- ----- 2025 -----
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
December 2020
- 19 participants
- 1639 discussions

[collector/master] Update most of the bridgedescs module to NIO.
by karsten@torproject.org 01 Dec '20
by karsten@torproject.org 01 Dec '20
01 Dec '20
commit c0ee1a6cf76f6f6b1677edccc1dc7e4055de50e9
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sun Nov 29 00:05:47 2020 +0100
Update most of the bridgedescs module to NIO.
Replace all File references with their Path equivalents, and use Files
methods wherever feasible.
Part of #20542.
---
.../bridgedescs/SanitizedBridgesWriter.java | 288 +++++++++------------
1 file changed, 127 insertions(+), 161 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 8db7db5..34156c2 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -26,16 +26,15 @@ import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
-import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
+import java.io.InputStream;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
import java.security.GeneralSecurityException;
import java.security.SecureRandom;
import java.text.ParseException;
@@ -45,7 +44,6 @@ import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -89,12 +87,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
private String rsyncCatString;
- private File bridgeDirectoriesDirectory;
-
- /**
- * Output directory for writing sanitized bridge descriptors.
- */
- private File sanitizedBridgesDirectory;
+ private Path inputDirectory;
private boolean replaceIpAddressesWithHashes;
@@ -106,13 +99,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
private boolean haveWarnedAboutInterval;
- private File bridgeIpSecretsFile;
+ private Path bridgeIpSecretsFile;
private SecureRandom secureRandom;
- private String outputPathName;
+ private Path outputDirectory;
+
+ private Path recentDirectory;
- private String recentPathName;
+ private Path statsDirectory;
@Override
public String module() {
@@ -127,25 +122,12 @@ public class SanitizedBridgesWriter extends CollecTorMain {
@Override
protected void startProcessing() throws ConfigurationException {
- outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(),
- BRIDGE_DESCRIPTORS).toString();
- recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(),
- BRIDGE_DESCRIPTORS).toString();
- File bridgeDirectoriesDirectory =
- config.getPath(Key.BridgeLocalOrigins).toFile();
- File sanitizedBridgesDirectory = new File(outputPathName);
- File statsDirectory = config.getPath(Key.StatsPath).toFile();
-
- if (bridgeDirectoriesDirectory == null
- || sanitizedBridgesDirectory == null || statsDirectory == null) {
- throw new ConfigurationException("BridgeSnapshotsDirectory, "
- + "SanitizedBridgesWriteDirectory, StatsPath should be set. "
- + "Please, edit the 'collector.properties' file.");
- }
-
- /* Memorize argument values. */
- this.bridgeDirectoriesDirectory = bridgeDirectoriesDirectory;
- this.sanitizedBridgesDirectory = sanitizedBridgesDirectory;
+ this.outputDirectory = config.getPath(Key.OutputPath)
+ .resolve(BRIDGE_DESCRIPTORS);
+ this.recentDirectory = config.getPath(Key.RecentPath)
+ .resolve(BRIDGE_DESCRIPTORS);
+ this.inputDirectory = config.getPath(Key.BridgeLocalOrigins);
+ this.statsDirectory = config.getPath(Key.StatsPath);
this.replaceIpAddressesWithHashes =
config.getBool(Key.ReplaceIpAddressesWithHashes);
SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
@@ -168,13 +150,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Read hex-encoded secrets for replacing IP addresses with hashes
* from disk. */
this.secretsForHashingIpAddresses = new TreeMap<>();
- this.bridgeIpSecretsFile = new File(statsDirectory,
- "bridge-ip-secrets");
- if (this.bridgeIpSecretsFile.exists()) {
- try (BufferedReader br = new BufferedReader(new FileReader(
- this.bridgeIpSecretsFile))) {
- String line;
- while ((line = br.readLine()) != null) {
+ this.bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
+ if (Files.exists(this.bridgeIpSecretsFile)) {
+ try {
+ for (String line : Files.readAllLines(this.bridgeIpSecretsFile)) {
String[] parts = line.split(",");
if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
&& line.length() != ("yyyy-MM,".length() + 50 * 2)
@@ -229,7 +208,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.bridgeSanitizingCutOffTimestamp);
// Import bridge descriptors
- this.readBridgeSnapshots(this.bridgeDirectoriesDirectory, statsDirectory);
+ this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory);
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
@@ -239,8 +218,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.cleanUpDirectories();
}
- private void readBridgeSnapshots(File bridgeDirectoriesDir,
- File statsDirectory) {
+ private void readBridgeSnapshots(Path bridgeDirectoriesDir,
+ Path statsDirectory) {
if (bridgeDirectoriesDir == null
|| statsDirectory == null) {
@@ -248,21 +227,16 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
SortedSet<String> parsed = new TreeSet<>();
- File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
+ Path pbdFile = statsDirectory.resolve("parsed-bridge-directories");
boolean modified = false;
- if (bridgeDirectoriesDir.exists()) {
- if (pbdFile.exists()) {
- logger.debug("Reading file {}...", pbdFile.getAbsolutePath());
+ if (Files.exists(bridgeDirectoriesDir)) {
+ if (Files.exists(pbdFile)) {
+ logger.debug("Reading file {}...", pbdFile);
try {
- BufferedReader br = new BufferedReader(new FileReader(pbdFile));
- String line;
- while ((line = br.readLine()) != null) {
- parsed.add(line);
- }
- br.close();
- logger.debug("Finished reading file {}.", pbdFile.getAbsolutePath());
+ parsed.addAll(Files.readAllLines(pbdFile));
+ logger.debug("Finished reading file {}.", pbdFile);
} catch (IOException e) {
- logger.warn("Failed reading file {}!", pbdFile.getAbsolutePath(), e);
+ logger.warn("Failed reading file {}!", pbdFile, e);
return;
}
}
@@ -275,28 +249,31 @@ public class SanitizedBridgesWriter extends CollecTorMain {
int skippedServerDescriptors = 0;
int parsedExtraInfoDescriptors = 0;
int skippedExtraInfoDescriptors = 0;
- Stack<File> filesInInputDir = new Stack<>();
+ Stack<Path> filesInInputDir = new Stack<>();
filesInInputDir.add(bridgeDirectoriesDir);
while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- Collections.addAll(filesInInputDir, pop.listFiles());
- } else if (!parsed.contains(pop.getName())) {
+ Path pop = filesInInputDir.pop();
+ String fn = pop.getFileName().toString();
+ if (Files.isDirectory(pop)) {
try {
- FileInputStream in = new FileInputStream(pop);
+ Files.list(pop).forEachOrdered(filesInInputDir::add);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } else if (!parsed.contains(pop.getFileName().toString())) {
+ try (InputStream in = Files.newInputStream(pop)) {
if (in.available() > 0) {
TarArchiveInputStream tais;
- if (pop.getName().endsWith(".tar.gz")) {
+ if (fn.endsWith(".tar.gz")) {
GzipCompressorInputStream gcis =
new GzipCompressorInputStream(in);
tais = new TarArchiveInputStream(gcis);
- } else if (pop.getName().endsWith(".tar")) {
+ } else if (fn.endsWith(".tar")) {
tais = new TarArchiveInputStream(in);
} else {
continue;
}
BufferedInputStream bis = new BufferedInputStream(tais);
- String fn = pop.getName();
String[] fnParts = fn.split("-");
if (fnParts.length != 5) {
logger.warn("Invalid bridge descriptor tarball file name: {}. "
@@ -416,16 +393,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
bis.close();
}
- in.close();
/* Let's give some memory back, or we'll run out of it. */
System.gc();
- parsed.add(pop.getName());
+ parsed.add(fn);
modified = true;
} catch (IOException e) {
- logger.warn("Could not parse bridge snapshot {}!", pop.getName(),
- e);
+ logger.warn("Could not parse bridge snapshot {}!", pop, e);
}
}
}
@@ -436,15 +411,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
parsedStatuses, parsedServerDescriptors, skippedServerDescriptors,
parsedExtraInfoDescriptors, skippedExtraInfoDescriptors);
if (!parsed.isEmpty() && modified) {
- logger.debug("Writing file {}...", pbdFile.getAbsolutePath());
- pbdFile.getParentFile().mkdirs();
- try (BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile))) {
- for (String f : parsed) {
- bw.append(f).append("\n");
- }
- logger.debug("Finished writing file {}.", pbdFile.getAbsolutePath());
+ logger.debug("Writing file {}...", pbdFile);
+ try {
+ Files.createDirectories(pbdFile.getParent());
+ Files.write(pbdFile, parsed);
+ logger.debug("Finished writing file {}.", pbdFile);
} catch (IOException e) {
- logger.warn("Failed writing file {}!", pbdFile.getAbsolutePath(), e);
+ logger.warn("Failed writing file {}!", pbdFile, e);
}
}
}
@@ -627,15 +600,16 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else {
/* Append secret to file on disk immediately before using it, or
* we might end with inconsistently sanitized bridges. */
+ byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
+ .getBytes();
try {
- if (!this.bridgeIpSecretsFile.exists()) {
- this.bridgeIpSecretsFile.getParentFile().mkdirs();
+ if (Files.exists(this.bridgeIpSecretsFile)) {
+ Files.write(this.bridgeIpSecretsFile, newBytes,
+ StandardOpenOption.APPEND);
+ } else {
+ Files.createDirectories(this.bridgeIpSecretsFile.getParent());
+ Files.write(this.bridgeIpSecretsFile, newBytes);
}
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile,
- this.bridgeIpSecretsFile.exists()));
- bw.write(month + "," + Hex.encodeHexString(secret) + "\n");
- bw.close();
} catch (IOException e) {
logger.warn("Could not store new secret "
+ "to disk! Not calculating any IP address or TCP port "
@@ -850,24 +824,24 @@ public class SanitizedBridgesWriter extends CollecTorMain {
String stime = publicationTime.substring(11, 13)
+ publicationTime.substring(14, 16)
+ publicationTime.substring(17, 19);
- File tarballFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
- + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
- + sday + "-" + stime + "-" + authorityFingerprint);
- File rsyncFile = new File(recentPathName, "statuses/"
- + tarballFile.getName());
- File[] outputFiles = new File[] { tarballFile, rsyncFile };
- for (File outputFile : outputFiles) {
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile));
- bw.write(Annotation.Status.toString());
- bw.write("published " + publicationTime + "\n");
- bw.write(header.toString());
+ String fileName = syear + smonth + sday + "-" + stime + "-"
+ + authorityFingerprint;
+ Path tarballFile = this.outputDirectory.resolve(
+ Paths.get(syear, smonth, "statuses", sday, fileName));
+ Path rsyncFile = this.recentDirectory.resolve(
+ Paths.get("statuses", fileName));
+ Path[] outputFiles = new Path[] { tarballFile, rsyncFile };
+ for (Path outputFile : outputFiles) {
+ Files.createDirectories(outputFile.getParent());
+ StringBuilder sanitizedStatus = new StringBuilder();
+ sanitizedStatus.append(Annotation.Status.toString());
+ sanitizedStatus.append("published ").append(publicationTime)
+ .append("\n");
+ sanitizedStatus.append(header.toString());
for (String scrubbed : scrubbedLines.values()) {
- bw.write(scrubbed);
+ sanitizedStatus.append(scrubbed);
}
- bw.close();
+ Files.write(outputFile, sanitizedStatus.toString().getBytes());
}
} catch (IOException e) {
logger.warn("Could not write sanitized bridge "
@@ -1234,33 +1208,30 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Determine filename of sanitized server descriptor. */
String dyear = published.substring(0, 4);
String dmonth = published.substring(5, 7);
- File tarballFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/server-descriptors/"
- + "/" + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
try {
- File rsyncCatFile = new File(config.getPath(Key.RecentPath).toFile(),
- "bridge-descriptors/server-descriptors/" + this.rsyncCatString
- + "-server-descriptors.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ Path tarballFile = this.outputDirectory.resolve(
+ Paths.get(dyear, dmonth, "server-descriptors",
+ descriptorDigest.substring(0, 1), descriptorDigest.substring(1, 2),
+ descriptorDigest));
+ Path rsyncCatFile = this.recentDirectory.resolve(
+ Paths.get("bridge-descriptors", "server-descriptors",
+ this.rsyncCatString + "-server-descriptors.tmp"));
+ Path[] outputFiles = new Path[] { tarballFile, rsyncCatFile };
boolean[] append = new boolean[] { false, true };
for (int i = 0; i < outputFiles.length; i++) {
- File outputFile = outputFiles[i];
- boolean appendToFile = append[i];
- if (outputFile.exists() && !appendToFile) {
+ Path outputFile = outputFiles[i];
+ StandardOpenOption openOption = append[i] ? StandardOpenOption.APPEND
+ : StandardOpenOption.CREATE_NEW;
+ if (Files.exists(outputFile)
+ && openOption != StandardOpenOption.APPEND) {
/* We already stored this descriptor to disk before, so let's
* not store it yet another time. */
break;
}
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile, appendToFile));
- bw.write(scrubbed.toString());
- bw.close();
+ Files.createDirectories(outputFile.getParent());
+ Files.write(outputFile, scrubbed.toString().getBytes(), openOption);
}
- } catch (ConfigurationException | IOException e) {
+ } catch (IOException e) {
logger.warn("Could not write sanitized server descriptor to disk.", e);
}
}
@@ -1323,14 +1294,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
/* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null;
String published = null;
String masterKeyEd25519FromIdentityEd25519 = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, StandardCharsets.US_ASCII)));
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
+ try (BufferedReader br = new BufferedReader(new StringReader(new String(
+ data, StandardCharsets.US_ASCII)))) {
+ scrubbed.append(Annotation.BridgeExtraInfo.toString());
String line;
- DescriptorBuilder scrubbed = null;
String hashedBridgeIdentity;
String masterKeyEd25519 = null;
while ((line = br.readLine()) != null) {
@@ -1346,7 +1316,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
parts[2].toCharArray())).toLowerCase();
- scrubbed = new DescriptorBuilder("extra-info ").append(parts[1])
+ scrubbed.append("extra-info ").append(parts[1])
.space().append(hashedBridgeIdentity.toUpperCase()).newLine();
/* Parse the publication time to determine the file name. */
@@ -1432,7 +1402,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* When we reach the signature, we're done. Write the sanitized
* descriptor to disk below. */
} else if (line.startsWith("router-signature")) {
- scrubbedDesc = scrubbed.toString();
break;
/* Skip the ed25519 signature; we'll include a SHA256 digest of
@@ -1491,44 +1460,43 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
}
+ if (descriptorDigestSha256Base64 != null) {
+ scrubbed.append("router-digest-sha256 ")
+ .append(descriptorDigestSha256Base64).newLine();
+ }
+ scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
+ .newLine();
+
+ /* Determine filename of sanitized extra-info descriptor. */
String dyear = published.substring(0, 4);
String dmonth = published.substring(5, 7);
- File tarballFile = new File(
- this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
- + dyear + "/" + dmonth + "/extra-infos/"
- + descriptorDigest.charAt(0) + "/"
- + descriptorDigest.charAt(1) + "/"
- + descriptorDigest);
+
try {
- File rsyncCatFile = new File(config.getPath(Key.RecentPath).toFile(),
- "bridge-descriptors/extra-infos/" + this.rsyncCatString
- + "-extra-infos.tmp");
- File[] outputFiles = new File[] { tarballFile, rsyncCatFile };
+ Path tarballFile = this.outputDirectory.resolve(
+ Paths.get(dyear, dmonth, "extra-infos",
+ descriptorDigest.substring(0, 1), descriptorDigest.substring(1, 2),
+ descriptorDigest));
+ Path rsyncCatFile = this.recentDirectory.resolve(
+ Paths.get("bridge-descriptors", "extra-infos",
+ this.rsyncCatString + "-extra-infos.tmp"));
+ Path[] outputFiles = new Path[] { tarballFile, rsyncCatFile };
boolean[] append = new boolean[] { false, true };
for (int i = 0; i < outputFiles.length; i++) {
- File outputFile = outputFiles[i];
- boolean appendToFile = append[i];
- if (outputFile.exists() && !appendToFile) {
+ Path outputFile = outputFiles[i];
+ StandardOpenOption openOption = append[i] ? StandardOpenOption.APPEND
+ : StandardOpenOption.CREATE_NEW;
+ if (Files.exists(outputFile)
+ && openOption != StandardOpenOption.APPEND) {
/* We already stored this descriptor to disk before, so let's
* not store it yet another time. */
break;
}
- outputFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- outputFile, appendToFile));
- bw.write(Annotation.BridgeExtraInfo.toString());
- bw.write(scrubbedDesc);
- if (descriptorDigestSha256Base64 != null) {
- bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
- + "\n");
- }
- bw.write("router-digest " + descriptorDigest.toUpperCase()
- + "\n");
- bw.close();
+ Files.createDirectories(outputFile.getParent());
+ Files.write(outputFile, scrubbed.toString().getBytes(), openOption);
}
- } catch (Exception e) {
- logger.warn("Could not write sanitized "
- + "extra-info descriptor to disk.", e);
+ } catch (IOException e) {
+ logger.warn("Could not write sanitized extra-info descriptor to disk.",
+ e);
}
}
@@ -1547,20 +1515,18 @@ public class SanitizedBridgesWriter extends CollecTorMain {
try {
int kept = 0;
int deleted = 0;
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- this.bridgeIpSecretsFile));
+ List<String> lines = new ArrayList<>();
for (Map.Entry<String, byte[]> e :
this.secretsForHashingIpAddresses.entrySet()) {
if (e.getKey().compareTo(
this.bridgeSanitizingCutOffTimestamp) < 0) {
deleted++;
} else {
- bw.write(e.getKey() + "," + Hex.encodeHexString(e.getValue())
- + "\n");
+ lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
kept++;
}
}
- bw.close();
+ Files.write(this.bridgeIpSecretsFile, lines);
logger.info("Deleted {} secrets that we don't "
+ "need anymore and kept {}.", deleted, kept);
} catch (IOException e) {
@@ -1612,9 +1578,9 @@ public class SanitizedBridgesWriter extends CollecTorMain {
* in the last three days (seven weeks), and remove the .tmp extension from
* newly written files. */
private void cleanUpDirectories() {
- PersistenceUtils.cleanDirectory(Paths.get(this.recentPathName),
+ PersistenceUtils.cleanDirectory(this.recentDirectory,
Instant.now().minus(3, ChronoUnit.DAYS).toEpochMilli());
- PersistenceUtils.cleanDirectory(Paths.get(this.outputPathName),
+ PersistenceUtils.cleanDirectory(this.outputDirectory,
Instant.now().minus(49, ChronoUnit.DAYS).toEpochMilli());
}
}
1
0

[collector/master] Make some minor optimizations to bridgedescs code.
by karsten@torproject.org 01 Dec '20
by karsten@torproject.org 01 Dec '20
01 Dec '20
commit 47a4c7a962de55ee8354c1c8605216965f68d116
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Nov 30 22:45:22 2020 +0100
Make some minor optimizations to bridgedescs code.
Part of #20542.
---
.../collector/bridgedescs/DescriptorBuilder.java | 3 ++
.../bridgedescs/SanitizedBridgesWriter.java | 62 ++++++++++------------
2 files changed, 32 insertions(+), 33 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/DescriptorBuilder.java
index b4b63e7..946fcdb 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/DescriptorBuilder.java
@@ -96,4 +96,7 @@ class DescriptorBuilder {
return value;
}
+ public byte[] toBytes() {
+ return this.toString().getBytes();
+ }
}
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 843aa40..77ab406 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -79,14 +79,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
private String rsyncCatString;
- private Path inputDirectory;
-
private Path outputDirectory;
private Path recentDirectory;
- private Path statsDirectory;
-
private SensitivePartsSanitizer sensitivePartsSanitizer;
@Override
@@ -106,8 +102,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
.resolve(BRIDGE_DESCRIPTORS);
this.recentDirectory = config.getPath(Key.RecentPath)
.resolve(BRIDGE_DESCRIPTORS);
- this.inputDirectory = config.getPath(Key.BridgeLocalOrigins);
- this.statsDirectory = config.getPath(Key.StatsPath);
+ Path inputDirectory = config.getPath(Key.BridgeLocalOrigins);
+ Path statsDirectory = config.getPath(Key.StatsPath);
boolean replaceIpAddressesWithHashes =
config.getBool(Key.ReplaceIpAddressesWithHashes);
SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
@@ -126,7 +122,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
// Import bridge descriptors
- this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory);
+ this.readBridgeSnapshots(inputDirectory, statsDirectory);
// Finish writing sanitized bridge descriptors to disk
if (replaceIpAddressesWithHashes) {
@@ -362,11 +358,16 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
/* Parse the given network status line by line. */
- DescriptorBuilder header = new DescriptorBuilder();
boolean includesFingerprintLine = false;
- SortedMap<String, String> scrubbedLines = new TreeMap<>();
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
+ scrubbed.append(Annotation.Status.toString());
+ SortedMap<String, String> scrubbedEntries = new TreeMap<>();
+ StringBuilder publishedStringBuilder = new StringBuilder();
+ scrubbed.append("published ").append(publishedStringBuilder).newLine();
+ DescriptorBuilder header = new DescriptorBuilder();
+ scrubbed.append(header);
+
try {
- DescriptorBuilder scrubbed = new DescriptorBuilder();
BufferedReader br = new BufferedReader(new StringReader(new String(
data, StandardCharsets.US_ASCII)));
String line;
@@ -374,6 +375,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
byte[] fingerprintBytes = null;
String descPublicationTime = null;
String hashedBridgeIdentityHex = null;
+ DescriptorBuilder scrubbedEntry = new DescriptorBuilder();
while ((line = br.readLine()) != null) {
/* Use publication time from "published" line instead of the
@@ -403,10 +405,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else if (line.startsWith("r ")) {
/* Clear buffer from previously scrubbed lines. */
- if (scrubbed.hasContent()) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new DescriptorBuilder();
+ if (scrubbedEntry.hasContent()) {
+ scrubbedEntries.put(hashedBridgeIdentityHex,
+ scrubbedEntry.toString());
+ scrubbedEntry = new DescriptorBuilder();
}
/* Parse the relevant parts of this r line. */
@@ -452,7 +454,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
orPort, fingerprintBytes, descPublicationTime);
String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
dirPort, fingerprintBytes, descPublicationTime);
- scrubbed.append("r ").append(nickname).space()
+ scrubbedEntry.append("r ").append(nickname).space()
.append(hashedBridgeIdentityBase64).space()
.append(hashedDescriptorIdentifier).space()
.append(descPublicationTime).space()
@@ -467,7 +469,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
.scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
descPublicationTime);
if (scrubbedOrAddress != null) {
- scrubbed.append("a ").append(scrubbedOrAddress).newLine();
+ scrubbedEntry.append("a ").append(scrubbedOrAddress).newLine();
} else {
logger.warn("Invalid address in line '{}' "
+ "in bridge network status. Skipping line!", line);
@@ -477,7 +479,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else if (line.startsWith("s ") || line.equals("s")
|| line.startsWith("w ") || line.equals("w")
|| line.startsWith("p ") || line.equals("p")) {
- scrubbed.append(line).newLine();
+ scrubbedEntry.append(line).newLine();
/* There should be nothing else but r, a, w, p, and s lines in the
* network status. If there is, we should probably learn before
@@ -489,9 +491,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
br.close();
- if (scrubbed.hasContent()) {
- String scrubbedLine = scrubbed.toString();
- scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
+ if (scrubbedEntry.hasContent()) {
+ scrubbedEntries.put(hashedBridgeIdentityHex, scrubbedEntry.toString());
}
if (!includesFingerprintLine) {
header.append("fingerprint ").append(authorityFingerprint).newLine();
@@ -523,6 +524,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
/* Write the sanitized network status to disk. */
+ publishedStringBuilder.append(publicationTime);
+ for (String scrubbedEntry : scrubbedEntries.values()) {
+ scrubbed.append(scrubbedEntry);
+ }
try {
String syear = publicationTime.substring(0, 4);
String smonth = publicationTime.substring(5, 7);
@@ -536,18 +541,9 @@ public class SanitizedBridgesWriter extends CollecTorMain {
Paths.get(syear, smonth, "statuses", sday, fileName));
Path rsyncFile = this.recentDirectory.resolve(
Paths.get("statuses", fileName));
- Path[] outputFiles = new Path[] { tarballFile, rsyncFile };
- for (Path outputFile : outputFiles) {
+ for (Path outputFile : new Path[] { tarballFile, rsyncFile }) {
Files.createDirectories(outputFile.getParent());
- StringBuilder sanitizedStatus = new StringBuilder();
- sanitizedStatus.append(Annotation.Status.toString());
- sanitizedStatus.append("published ").append(publicationTime)
- .append("\n");
- sanitizedStatus.append(header.toString());
- for (String scrubbed : scrubbedLines.values()) {
- sanitizedStatus.append(scrubbed);
- }
- Files.write(outputFile, sanitizedStatus.toString().getBytes());
+ Files.write(outputFile, scrubbed.toBytes());
}
} catch (IOException e) {
logger.warn("Could not write sanitized bridge "
@@ -922,7 +918,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
break;
}
Files.createDirectories(outputFile.getParent());
- Files.write(outputFile, scrubbed.toString().getBytes(), openOption);
+ Files.write(outputFile, scrubbed.toBytes(), openOption);
}
} catch (IOException e) {
logger.warn("Could not write sanitized server descriptor to disk.", e);
@@ -1185,7 +1181,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
break;
}
Files.createDirectories(outputFile.getParent());
- Files.write(outputFile, scrubbed.toString().getBytes(), openOption);
+ Files.write(outputFile, scrubbed.toBytes(), openOption);
}
} catch (IOException e) {
logger.warn("Could not write sanitized extra-info descriptor to disk.",
1
0
commit 106852425554f6001f114ee711648798c78609ec
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Nov 28 22:14:53 2020 +0100
Simplify the bridgedescs module.
The separation between BridgeSnapshotReader, BridgeDescriptorParser,
and SanitizedBridgesWriter doesn't make much sense anymore:
- BridgeSnapshotReader only has a constructor of more than 200 lines
of code.
- BridgeDescriptorParser actually only determines the descriptor type
and
- SanitizedBridgesWriter performs parsing and obfuscation.
There are better ways to structure this code. The first step in that
direction is to remove clutter by moving the code to read bridge
snapshots to SanitizedBridgesWriter and deleting the other two
classes.
Part of #20542.
---
.../bridgedescs/BridgeDescriptorParser.java | 55 -----
.../bridgedescs/BridgeSnapshotReader.java | 248 ---------------------
.../bridgedescs/SanitizedBridgesWriter.java | 228 ++++++++++++++++++-
.../bridgedescs/BridgeDescriptorParserTest.java | 43 ----
4 files changed, 223 insertions(+), 351 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/BridgeDescriptorParser.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/BridgeDescriptorParser.java
deleted file mode 100644
index b5e30bc..0000000
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/BridgeDescriptorParser.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2010--2020 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.collector.bridgedescs;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
-
-public class BridgeDescriptorParser {
-
- private SanitizedBridgesWriter sbw;
-
- private static final Logger logger = LoggerFactory.getLogger(
- BridgeDescriptorParser.class);
-
- /** Initializes a new bridge descriptor parser and links it to a
- * sanitized bridges writer to sanitize and store bridge descriptors. */
- public BridgeDescriptorParser(SanitizedBridgesWriter sbw) {
- if (null == sbw) {
- throw new IllegalArgumentException("SanitizedBridgesWriter has to be "
- + "provided, but was null.");
- }
- this.sbw = sbw;
- }
-
- /** Parses the first line of the given descriptor data to determine the
- * descriptor type and passes it to the sanitized bridges writer. */
- public void parse(byte[] allData, String dateTime,
- String authorityFingerprint) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, StandardCharsets.US_ASCII)));
- String line = br.readLine();
- if (line == null) {
- return;
- }
- if (line.startsWith("router ")) {
- this.sbw.sanitizeAndStoreServerDescriptor(allData);
- } else if (line.startsWith("extra-info ")) {
- this.sbw.sanitizeAndStoreExtraInfoDescriptor(allData);
- } else {
- this.sbw.sanitizeAndStoreNetworkStatus(allData, dateTime,
- authorityFingerprint);
- }
- } catch (IOException e) {
- logger.warn("Could not parse or write bridge descriptor.", e);
- }
- }
-}
-
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/BridgeSnapshotReader.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/BridgeSnapshotReader.java
deleted file mode 100644
index de9cd4b..0000000
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/BridgeSnapshotReader.java
+++ /dev/null
@@ -1,248 +0,0 @@
-/* Copyright 2010--2020 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.collector.bridgedescs;
-
-import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TreeSet;
-
-public class BridgeSnapshotReader {
-
- private static final Logger logger = LoggerFactory.getLogger(
- BridgeSnapshotReader.class);
-
- /**
- * Reads the half-hourly snapshots of bridge descriptors from Bifroest.
- */
- public BridgeSnapshotReader(BridgeDescriptorParser bdp,
- File bridgeDirectoriesDir, File statsDirectory) {
-
- if (bdp == null || bridgeDirectoriesDir == null
- || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- SortedSet<String> parsed = new TreeSet<>();
- File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
- boolean modified = false;
- if (bridgeDirectoriesDir.exists()) {
- if (pbdFile.exists()) {
- logger.debug("Reading file {}...", pbdFile.getAbsolutePath());
- try {
- BufferedReader br = new BufferedReader(new FileReader(pbdFile));
- String line;
- while ((line = br.readLine()) != null) {
- parsed.add(line);
- }
- br.close();
- logger.debug("Finished reading file {}.", pbdFile.getAbsolutePath());
- } catch (IOException e) {
- logger.warn("Failed reading file {}!", pbdFile.getAbsolutePath(), e);
- return;
- }
- }
- logger.debug("Importing files in directory {}/...", bridgeDirectoriesDir);
- Set<String> descriptorImportHistory = new HashSet<>();
- int parsedFiles = 0;
- int skippedFiles = 0;
- int parsedStatuses = 0;
- int parsedServerDescriptors = 0;
- int skippedServerDescriptors = 0;
- int parsedExtraInfoDescriptors = 0;
- int skippedExtraInfoDescriptors = 0;
- Stack<File> filesInInputDir = new Stack<>();
- filesInInputDir.add(bridgeDirectoriesDir);
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- Collections.addAll(filesInInputDir, pop.listFiles());
- } else if (!parsed.contains(pop.getName())) {
- try {
- FileInputStream in = new FileInputStream(pop);
- if (in.available() > 0) {
- TarArchiveInputStream tais;
- if (pop.getName().endsWith(".tar.gz")) {
- GzipCompressorInputStream gcis =
- new GzipCompressorInputStream(in);
- tais = new TarArchiveInputStream(gcis);
- } else if (pop.getName().endsWith(".tar")) {
- tais = new TarArchiveInputStream(in);
- } else {
- continue;
- }
- BufferedInputStream bis = new BufferedInputStream(tais);
- String fn = pop.getName();
- String[] fnParts = fn.split("-");
- if (fnParts.length != 5) {
- logger.warn("Invalid bridge descriptor tarball file name: {}. "
- + "Skipping.", fn);
- continue;
- }
- String authorityPart = String.format("%s-%s-", fnParts[0],
- fnParts[1]);
- String datePart = String.format("%s-%s-%s", fnParts[2],
- fnParts[3], fnParts[4]);
- String authorityFingerprint;
- switch (authorityPart) {
- case "from-tonga-":
- authorityFingerprint =
- "4A0CCD2DDC7995083D73F5D667100C8A5831F16D";
- break;
- case "from-bifroest-":
- authorityFingerprint =
- "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1";
- break;
- case "from-serge-":
- authorityFingerprint =
- "BA44A889E64B93FAA2B114E02C2A279A8555C533";
- break;
- default:
- logger.warn("Did not recognize the bridge authority that "
- + "generated {}. Skipping.", fn);
- continue;
- }
- String dateTime = datePart.substring(0, 10) + " "
- + datePart.substring(11, 13) + ":"
- + datePart.substring(13, 15) + ":"
- + datePart.substring(15, 17);
- while ((tais.getNextTarEntry()) != null) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- byte[] allData = baos.toByteArray();
- if (allData.length == 0) {
- continue;
- }
- String fileDigest = Hex.encodeHexString(DigestUtils.sha1(
- allData));
- String ascii = new String(allData, StandardCharsets.US_ASCII);
- BufferedReader br3 = new BufferedReader(new StringReader(
- ascii));
- String firstLine;
- do {
- firstLine = br3.readLine();
- } while (firstLine != null && firstLine.startsWith("@"));
- if (firstLine == null) {
- continue;
- }
- if (firstLine.startsWith("published ")
- || firstLine.startsWith("flag-thresholds ")
- || firstLine.startsWith("r ")) {
- bdp.parse(allData, dateTime, authorityFingerprint);
- parsedStatuses++;
- } else if (descriptorImportHistory.contains(fileDigest)) {
- /* Skip server descriptors or extra-info descriptors if
- * we parsed them before. */
- skippedFiles++;
- continue;
- } else {
- int start;
- int sig;
- int end = -1;
- String startToken = firstLine.startsWith("router ")
- ? "router " : "extra-info ";
- String sigToken = "\nrouter-signature\n";
- String endToken = "\n-----END SIGNATURE-----\n";
- while (end < ascii.length()) {
- start = ascii.indexOf(startToken, end);
- if (start < 0) {
- break;
- }
- sig = ascii.indexOf(sigToken, start);
- if (sig < 0) {
- break;
- }
- sig += sigToken.length();
- end = ascii.indexOf(endToken, sig);
- if (end < 0) {
- break;
- }
- end += endToken.length();
- byte[] descBytes = new byte[end - start];
- System.arraycopy(allData, start, descBytes, 0,
- end - start);
- String descriptorDigest = Hex.encodeHexString(
- DigestUtils.sha1(descBytes));
- if (!descriptorImportHistory.contains(
- descriptorDigest)) {
- bdp.parse(descBytes, dateTime, authorityFingerprint);
- descriptorImportHistory.add(descriptorDigest);
- if (firstLine.startsWith("router ")) {
- parsedServerDescriptors++;
- } else {
- parsedExtraInfoDescriptors++;
- }
- } else {
- if (firstLine.startsWith("router ")) {
- skippedServerDescriptors++;
- } else {
- skippedExtraInfoDescriptors++;
- }
- }
- }
- }
- descriptorImportHistory.add(fileDigest);
- parsedFiles++;
- }
- bis.close();
- }
- in.close();
-
- /* Let's give some memory back, or we'll run out of it. */
- System.gc();
-
- parsed.add(pop.getName());
- modified = true;
- } catch (IOException e) {
- logger.warn("Could not parse bridge snapshot {}!", pop.getName(),
- e);
- }
- }
- }
- logger.debug("Finished importing files in directory {}/. In total, we "
- + "parsed {} files (skipped {}) containing {} statuses, {} server "
- + "descriptors (skipped {}), and {} extra-info descriptors (skipped "
- + "{}).", bridgeDirectoriesDir, parsedFiles, skippedFiles,
- parsedStatuses, parsedServerDescriptors, skippedServerDescriptors,
- parsedExtraInfoDescriptors, skippedExtraInfoDescriptors);
- if (!parsed.isEmpty() && modified) {
- logger.debug("Writing file {}...", pbdFile.getAbsolutePath());
- pbdFile.getParentFile().mkdirs();
- try (BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile))) {
- for (String f : parsed) {
- bw.append(f).append("\n");
- }
- logger.debug("Finished writing file {}.", pbdFile.getAbsolutePath());
- } catch (IOException e) {
- logger.warn("Failed writing file {}!", pbdFile.getAbsolutePath(), e);
- }
- }
- }
- }
-}
-
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 62288ad..8db7db5 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -19,12 +19,17 @@ import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
+import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
@@ -40,11 +45,17 @@ import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.Stack;
import java.util.TreeMap;
+import java.util.TreeSet;
/**
* <p>Sanitizes bridge descriptors, i.e., removes all possibly sensitive
@@ -217,12 +228,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
logger.info("Using cut-off datetime '{}' for secrets.",
this.bridgeSanitizingCutOffTimestamp);
- // Prepare bridge descriptor parser
- BridgeDescriptorParser bdp = new BridgeDescriptorParser(this);
-
// Import bridge descriptors
- new BridgeSnapshotReader(bdp, this.bridgeDirectoriesDirectory,
- statsDirectory);
+ this.readBridgeSnapshots(this.bridgeDirectoriesDirectory, statsDirectory);
// Finish writing sanitized bridge descriptors to disk
this.finishWriting();
@@ -232,6 +239,217 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.cleanUpDirectories();
}
+ private void readBridgeSnapshots(File bridgeDirectoriesDir,
+ File statsDirectory) {
+
+ if (bridgeDirectoriesDir == null
+ || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ SortedSet<String> parsed = new TreeSet<>();
+ File pbdFile = new File(statsDirectory, "parsed-bridge-directories");
+ boolean modified = false;
+ if (bridgeDirectoriesDir.exists()) {
+ if (pbdFile.exists()) {
+ logger.debug("Reading file {}...", pbdFile.getAbsolutePath());
+ try {
+ BufferedReader br = new BufferedReader(new FileReader(pbdFile));
+ String line;
+ while ((line = br.readLine()) != null) {
+ parsed.add(line);
+ }
+ br.close();
+ logger.debug("Finished reading file {}.", pbdFile.getAbsolutePath());
+ } catch (IOException e) {
+ logger.warn("Failed reading file {}!", pbdFile.getAbsolutePath(), e);
+ return;
+ }
+ }
+ logger.debug("Importing files in directory {}/...", bridgeDirectoriesDir);
+ Set<String> descriptorImportHistory = new HashSet<>();
+ int parsedFiles = 0;
+ int skippedFiles = 0;
+ int parsedStatuses = 0;
+ int parsedServerDescriptors = 0;
+ int skippedServerDescriptors = 0;
+ int parsedExtraInfoDescriptors = 0;
+ int skippedExtraInfoDescriptors = 0;
+ Stack<File> filesInInputDir = new Stack<>();
+ filesInInputDir.add(bridgeDirectoriesDir);
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ Collections.addAll(filesInInputDir, pop.listFiles());
+ } else if (!parsed.contains(pop.getName())) {
+ try {
+ FileInputStream in = new FileInputStream(pop);
+ if (in.available() > 0) {
+ TarArchiveInputStream tais;
+ if (pop.getName().endsWith(".tar.gz")) {
+ GzipCompressorInputStream gcis =
+ new GzipCompressorInputStream(in);
+ tais = new TarArchiveInputStream(gcis);
+ } else if (pop.getName().endsWith(".tar")) {
+ tais = new TarArchiveInputStream(in);
+ } else {
+ continue;
+ }
+ BufferedInputStream bis = new BufferedInputStream(tais);
+ String fn = pop.getName();
+ String[] fnParts = fn.split("-");
+ if (fnParts.length != 5) {
+ logger.warn("Invalid bridge descriptor tarball file name: {}. "
+ + "Skipping.", fn);
+ continue;
+ }
+ String authorityPart = String.format("%s-%s-", fnParts[0],
+ fnParts[1]);
+ String datePart = String.format("%s-%s-%s", fnParts[2],
+ fnParts[3], fnParts[4]);
+ String authorityFingerprint;
+ switch (authorityPart) {
+ case "from-tonga-":
+ authorityFingerprint =
+ "4A0CCD2DDC7995083D73F5D667100C8A5831F16D";
+ break;
+ case "from-bifroest-":
+ authorityFingerprint =
+ "1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1";
+ break;
+ case "from-serge-":
+ authorityFingerprint =
+ "BA44A889E64B93FAA2B114E02C2A279A8555C533";
+ break;
+ default:
+ logger.warn("Did not recognize the bridge authority that "
+ + "generated {}. Skipping.", fn);
+ continue;
+ }
+ String dateTime = datePart.substring(0, 10) + " "
+ + datePart.substring(11, 13) + ":"
+ + datePart.substring(13, 15) + ":"
+ + datePart.substring(15, 17);
+ while ((tais.getNextTarEntry()) != null) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ baos.write(data, 0, len);
+ }
+ byte[] allData = baos.toByteArray();
+ if (allData.length == 0) {
+ continue;
+ }
+ String fileDigest = Hex.encodeHexString(DigestUtils.sha1(
+ allData));
+ String ascii = new String(allData, StandardCharsets.US_ASCII);
+ BufferedReader br3 = new BufferedReader(new StringReader(
+ ascii));
+ String firstLine;
+ do {
+ firstLine = br3.readLine();
+ } while (firstLine != null && firstLine.startsWith("@"));
+ if (firstLine == null) {
+ continue;
+ }
+ if (firstLine.startsWith("published ")
+ || firstLine.startsWith("flag-thresholds ")
+ || firstLine.startsWith("r ")) {
+ this.sanitizeAndStoreNetworkStatus(allData, dateTime,
+ authorityFingerprint);
+ parsedStatuses++;
+ } else if (descriptorImportHistory.contains(fileDigest)) {
+ /* Skip server descriptors or extra-info descriptors if
+ * we parsed them before. */
+ skippedFiles++;
+ continue;
+ } else {
+ int start;
+ int sig;
+ int end = -1;
+ String startToken = firstLine.startsWith("router ")
+ ? "router " : "extra-info ";
+ String sigToken = "\nrouter-signature\n";
+ String endToken = "\n-----END SIGNATURE-----\n";
+ while (end < ascii.length()) {
+ start = ascii.indexOf(startToken, end);
+ if (start < 0) {
+ break;
+ }
+ sig = ascii.indexOf(sigToken, start);
+ if (sig < 0) {
+ break;
+ }
+ sig += sigToken.length();
+ end = ascii.indexOf(endToken, sig);
+ if (end < 0) {
+ break;
+ }
+ end += endToken.length();
+ byte[] descBytes = new byte[end - start];
+ System.arraycopy(allData, start, descBytes, 0,
+ end - start);
+ String descriptorDigest = Hex.encodeHexString(
+ DigestUtils.sha1(descBytes));
+ if (!descriptorImportHistory.contains(
+ descriptorDigest)) {
+ descriptorImportHistory.add(descriptorDigest);
+ if (firstLine.startsWith("router ")) {
+ this.sanitizeAndStoreServerDescriptor(descBytes);
+ parsedServerDescriptors++;
+ } else {
+ this.sanitizeAndStoreExtraInfoDescriptor(descBytes);
+ parsedExtraInfoDescriptors++;
+ }
+ } else {
+ if (firstLine.startsWith("router ")) {
+ skippedServerDescriptors++;
+ } else {
+ skippedExtraInfoDescriptors++;
+ }
+ }
+ }
+ }
+ descriptorImportHistory.add(fileDigest);
+ parsedFiles++;
+ }
+ bis.close();
+ }
+ in.close();
+
+ /* Let's give some memory back, or we'll run out of it. */
+ System.gc();
+
+ parsed.add(pop.getName());
+ modified = true;
+ } catch (IOException e) {
+ logger.warn("Could not parse bridge snapshot {}!", pop.getName(),
+ e);
+ }
+ }
+ }
+ logger.debug("Finished importing files in directory {}/. In total, we "
+ + "parsed {} files (skipped {}) containing {} statuses, {} server "
+ + "descriptors (skipped {}), and {} extra-info descriptors (skipped "
+ + "{}).", bridgeDirectoriesDir, parsedFiles, skippedFiles,
+ parsedStatuses, parsedServerDescriptors, skippedServerDescriptors,
+ parsedExtraInfoDescriptors, skippedExtraInfoDescriptors);
+ if (!parsed.isEmpty() && modified) {
+ logger.debug("Writing file {}...", pbdFile.getAbsolutePath());
+ pbdFile.getParentFile().mkdirs();
+ try (BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile))) {
+ for (String f : parsed) {
+ bw.append(f).append("\n");
+ }
+ logger.debug("Finished writing file {}.", pbdFile.getAbsolutePath());
+ } catch (IOException e) {
+ logger.warn("Failed writing file {}!", pbdFile.getAbsolutePath(), e);
+ }
+ }
+ }
+ }
+
private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
String published) throws IOException {
if (!orAddress.contains(":")) {
diff --git a/src/test/java/org/torproject/metrics/collector/bridgedescs/BridgeDescriptorParserTest.java b/src/test/java/org/torproject/metrics/collector/bridgedescs/BridgeDescriptorParserTest.java
deleted file mode 100644
index de82ab7..0000000
--- a/src/test/java/org/torproject/metrics/collector/bridgedescs/BridgeDescriptorParserTest.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright 2016--2020 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.collector.bridgedescs;
-
-import org.torproject.metrics.collector.conf.Configuration;
-
-import org.junit.Test;
-
-public class BridgeDescriptorParserTest {
-
- @Test(expected = IllegalArgumentException.class)
- public void testNullArgForConstructor() {
- new BridgeDescriptorParser(null);
- }
-
- @Test(expected = NullPointerException.class)
- public void testNullData() {
- BridgeDescriptorParser bdp = new BridgeDescriptorParser(
- new SanitizedBridgesWriter(new Configuration()));
- bdp.parse(null, "", "");
- }
-
- @Test
- /* Empty data is not passed down to the sanitized writer.
- * This test passes when there is no exception. */
- public void testDataEmpty() {
- BridgeDescriptorParser bdp = new BridgeDescriptorParser(
- new SanitizedBridgesWriter(new Configuration()));
- bdp.parse(new byte[]{}, null, null);
- }
-
- @Test(expected = NullPointerException.class)
- /* The SanitizedBridgesWriter wasn't initialized sufficiently.
- * Actually that should be corrected in SanitizedBridgesWriter
- * at some point, but that's a bigger rewrite. */
- public void testMinimalData() {
- BridgeDescriptorParser bdp = new BridgeDescriptorParser(
- new SanitizedBridgesWriter(new Configuration()));
- bdp.parse(new byte[]{0}, "2010-10-10 10:10:10", null);
- }
-
-}
1
0

[collector/master] Move lower-level sanitizing code to its own class.
by karsten@torproject.org 01 Dec '20
by karsten@torproject.org 01 Dec '20
01 Dec '20
commit a2fdbf3c6f67e5ddb735773e1ab456ee4f464555
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Nov 30 21:59:17 2020 +0100
Move lower-level sanitizing code to its own class.
Part of #20542.
---
.../bridgedescs/SanitizedBridgesWriter.java | 404 ++-------------------
.../bridgedescs/SensitivePartsSanitizer.java | 378 +++++++++++++++++++
.../bridgedescs/SanitizedBridgesWriterTest.java | 2 +
3 files changed, 410 insertions(+), 374 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 34156c2..843aa40 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -3,8 +3,6 @@
package org.torproject.metrics.collector.bridgedescs;
-import static java.time.ZoneOffset.UTC;
-
import org.torproject.descriptor.BridgeExtraInfoDescriptor;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgeServerDescriptor;
@@ -35,18 +33,12 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
-import java.security.GeneralSecurityException;
-import java.security.SecureRandom;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
-import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
@@ -89,26 +81,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
private Path inputDirectory;
- private boolean replaceIpAddressesWithHashes;
-
- private boolean persistenceProblemWithSecrets;
-
- private SortedMap<String, byte[]> secretsForHashingIpAddresses;
-
- private String bridgeSanitizingCutOffTimestamp;
-
- private boolean haveWarnedAboutInterval;
-
- private Path bridgeIpSecretsFile;
-
- private SecureRandom secureRandom;
-
private Path outputDirectory;
private Path recentDirectory;
private Path statsDirectory;
+ private SensitivePartsSanitizer sensitivePartsSanitizer;
+
@Override
public String module() {
return "bridgedescs";
@@ -128,90 +108,30 @@ public class SanitizedBridgesWriter extends CollecTorMain {
.resolve(BRIDGE_DESCRIPTORS);
this.inputDirectory = config.getPath(Key.BridgeLocalOrigins);
this.statsDirectory = config.getPath(Key.StatsPath);
- this.replaceIpAddressesWithHashes =
+ boolean replaceIpAddressesWithHashes =
config.getBool(Key.ReplaceIpAddressesWithHashes);
SimpleDateFormat rsyncCatFormat = new SimpleDateFormat(
"yyyy-MM-dd-HH-mm-ss");
this.rsyncCatString = rsyncCatFormat.format(
System.currentTimeMillis());
- /* Initialize secure random number generator if we need it. */
- if (this.replaceIpAddressesWithHashes) {
- try {
- this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
- } catch (GeneralSecurityException e) {
- logger.warn("Could not initialize secure "
- + "random number generator! Not calculating any IP address "
- + "hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- /* Read hex-encoded secrets for replacing IP addresses with hashes
- * from disk. */
- this.secretsForHashingIpAddresses = new TreeMap<>();
- this.bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
- if (Files.exists(this.bridgeIpSecretsFile)) {
- try {
- for (String line : Files.readAllLines(this.bridgeIpSecretsFile)) {
- String[] parts = line.split(",");
- if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
- && line.length() != ("yyyy-MM,".length() + 50 * 2)
- && line.length() != ("yyyy-MM,".length() + 83 * 2))
- || parts.length != 2) {
- logger.warn("Invalid line in bridge-ip-secrets file "
- + "starting with '{}'! "
- + "Not calculating any IP address hashes in this "
- + "execution!", line.substring(0, 7));
- this.persistenceProblemWithSecrets = true;
- break;
- }
- String month = parts[0];
- byte[] secret = Hex.decodeHex(parts[1].toCharArray());
- this.secretsForHashingIpAddresses.put(month, secret);
- }
- if (!this.persistenceProblemWithSecrets) {
- logger.debug("Read {} secrets for hashing bridge IP addresses.",
- this.secretsForHashingIpAddresses.size());
- }
- } catch (DecoderException e) {
- logger.warn("Failed to decode hex string in {}! Not calculating any IP "
- + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
- this.persistenceProblemWithSecrets = true;
- } catch (IOException e) {
- logger.warn("Failed to read {}! Not calculating any IP "
- + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
- this.persistenceProblemWithSecrets = true;
- }
- }
-
- long limitBridgeSanitizingIntervalDays
- = config.getInt(Key.BridgeDescriptorMappingsLimit);
-
- /* If we're configured to keep secrets only for a limited time, define
- * the cut-off day and time. */
- LocalDateTime bridgeSanitizingCutOffDateTime
- = LocalDateTime.of(1999, 12, 31, 23, 59, 59);
- if (limitBridgeSanitizingIntervalDays >= 0L) {
- LocalDateTime configuredBridgeSanitizingCutOffDateTime
- = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays);
- if (configuredBridgeSanitizingCutOffDateTime.isAfter(
- bridgeSanitizingCutOffDateTime)) {
- bridgeSanitizingCutOffDateTime
- = configuredBridgeSanitizingCutOffDateTime;
- }
+ Path bridgeIpSecretsFile = statsDirectory.resolve("bridge-ip-secrets");
+ if (replaceIpAddressesWithHashes) {
+ long limitBridgeSanitizingIntervalDays
+ = config.getInt(Key.BridgeDescriptorMappingsLimit);
+ this.sensitivePartsSanitizer = new SensitivePartsSanitizer(
+ bridgeIpSecretsFile, limitBridgeSanitizingIntervalDays);
+ } else {
+ this.sensitivePartsSanitizer = new SensitivePartsSanitizer();
}
- this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime
- .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
-
- logger.info("Using cut-off datetime '{}' for secrets.",
- this.bridgeSanitizingCutOffTimestamp);
// Import bridge descriptors
this.readBridgeSnapshots(this.inputDirectory, this.statsDirectory);
// Finish writing sanitized bridge descriptors to disk
- this.finishWriting();
+ if (replaceIpAddressesWithHashes) {
+ this.sensitivePartsSanitizer.finishWriting();
+ }
this.checkStaleDescriptors();
@@ -423,206 +343,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
- private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
- String published) throws IOException {
- if (!orAddress.contains(":")) {
- /* Malformed or-address or a line. */
- return null;
- }
- String addressPart = orAddress.substring(0,
- orAddress.lastIndexOf(":"));
- String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
- String scrubbedAddressPart;
- if (addressPart.startsWith("[")) {
- scrubbedAddressPart = this.scrubIpv6Address(addressPart,
- fingerprintBytes, published);
- } else {
- scrubbedAddressPart = this.scrubIpv4Address(addressPart,
- fingerprintBytes, published);
- }
- String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
- published);
- return (scrubbedAddressPart == null ? null :
- scrubbedAddressPart + ":" + scrubbedPort);
- }
-
- private String scrubIpv4Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- if (this.replaceIpAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- byte[] hashInput = new byte[4 + 20 + 31];
- String[] ipParts = address.split("\\.");
- for (int i = 0; i < 4; i++) {
- hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
- }
- System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 0, hashInput, 24, 31);
- byte[] hashOutput = DigestUtils.sha256(hashInput);
- return "10."
- + (((int) hashOutput[0] + 256) % 256) + "."
- + (((int) hashOutput[1] + 256) % 256) + "."
- + (((int) hashOutput[2] + 256) % 256);
- } else {
- return "127.0.0.1";
- }
- }
-
- private String scrubIpv6Address(String address, byte[] fingerprintBytes,
- String published) throws IOException {
- StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
- if (this.replaceIpAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
- return null;
- }
- String[] doubleColonSeparatedParts = address.substring(1,
- address.length() - 1).split("::", -1);
- if (doubleColonSeparatedParts.length > 2) {
- /* Invalid IPv6 address. */
- return null;
- }
- List<String> hexParts = new ArrayList<>();
- for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
- StringBuilder hexPart = new StringBuilder();
- String[] parts = doubleColonSeparatedPart.split(":", -1);
- if (parts.length < 1 || parts.length > 8) {
- /* Invalid IPv6 address. */
- return null;
- }
- for (String part : parts) {
- if (part.contains(".")) {
- String[] ipParts = part.split("\\.");
- byte[] ipv4Bytes = new byte[4];
- if (ipParts.length != 4) {
- /* Invalid IPv4 part in IPv6 address. */
- return null;
- }
- for (int m = 0; m < 4; m++) {
- ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
- }
- hexPart.append(Hex.encodeHexString(ipv4Bytes));
- } else if (part.length() > 4) {
- /* Invalid IPv6 address. */
- return null;
- } else {
- for (int k = part.length(); k < 4; k++) {
- hexPart.append("0");
- }
- hexPart.append(part);
- }
- }
- hexParts.add(hexPart.toString());
- }
- StringBuilder hex = new StringBuilder();
- hex.append(hexParts.get(0));
- if (hexParts.size() == 2) {
- for (int i = 32 - hexParts.get(0).length()
- - hexParts.get(1).length(); i > 0; i--) {
- hex.append("0");
- }
- hex.append(hexParts.get(1));
- }
- byte[] ipBytes;
- try {
- ipBytes = Hex.decodeHex(hex.toString().toCharArray());
- } catch (DecoderException e) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- if (ipBytes.length != 16) {
- /* TODO Invalid IPv6 address. */
- return null;
- }
- byte[] hashInput = new byte[16 + 20 + 19];
- System.arraycopy(ipBytes, 0, hashInput, 0, 16);
- System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 31, hashInput, 36, 19);
- String hashOutput = DigestUtils.sha256Hex(hashInput);
- sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4);
- sb.append(":");
- sb.append(hashOutput.substring(hashOutput.length() - 4));
- }
- sb.append("]");
- return sb.toString();
- }
-
- private String scrubTcpPort(String portString, byte[] fingerprintBytes,
- String published) throws IOException {
- if (portString.equals("0")) {
- return "0";
- } else if (this.replaceIpAddressesWithHashes) {
- if (this.persistenceProblemWithSecrets) {
- /* There's a persistence problem, so we shouldn't scrub more TCP
- * ports in this execution. */
- return null;
- }
- byte[] hashInput = new byte[2 + 20 + 33];
- int portNumber = Integer.parseInt(portString);
- hashInput[0] = (byte) (portNumber >> 8);
- hashInput[1] = (byte) portNumber;
- System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
- String month = published.substring(0, "yyyy-MM".length());
- byte[] secret = this.getSecretForMonth(month);
- System.arraycopy(secret, 50, hashInput, 22, 33);
- byte[] hashOutput = DigestUtils.sha256(hashInput);
- int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8)
- | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000;
- return String.valueOf(hashedPort);
- } else {
- return "1";
- }
- }
-
- private byte[] getSecretForMonth(String month) throws IOException {
- if (!this.secretsForHashingIpAddresses.containsKey(month)
- || this.secretsForHashingIpAddresses.get(month).length < 83) {
- byte[] secret = new byte[83];
- this.secureRandom.nextBytes(secret);
- if (this.secretsForHashingIpAddresses.containsKey(month)) {
- System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
- secret, 0,
- this.secretsForHashingIpAddresses.get(month).length);
- }
- if (month.compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- logger.warn("Generated a secret that we won't make "
- + "persistent, because it's outside our bridge descriptor "
- + "sanitizing interval.");
- } else {
- /* Append secret to file on disk immediately before using it, or
- * we might end with inconsistently sanitized bridges. */
- byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
- .getBytes();
- try {
- if (Files.exists(this.bridgeIpSecretsFile)) {
- Files.write(this.bridgeIpSecretsFile, newBytes,
- StandardOpenOption.APPEND);
- } else {
- Files.createDirectories(this.bridgeIpSecretsFile.getParent());
- Files.write(this.bridgeIpSecretsFile, newBytes);
- }
- } catch (IOException e) {
- logger.warn("Could not store new secret "
- + "to disk! Not calculating any IP address or TCP port "
- + "hashes in this execution!", e);
- this.persistenceProblemWithSecrets = true;
- throw new IOException(e);
- }
- }
- this.secretsForHashingIpAddresses.put(month, secret);
- }
- return this.secretsForHashingIpAddresses.get(month);
- }
-
private String maxNetworkStatusPublishedTime = "1970-01-01 00:00:00";
/**
@@ -631,7 +351,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
public void sanitizeAndStoreNetworkStatus(byte[] data,
String publicationTime, String authorityFingerprint) {
- if (this.persistenceProblemWithSecrets) {
+ if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
/* There's a persistence problem, so we shouldn't scrub more IP
* addresses in this execution. */
return;
@@ -641,19 +361,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
maxNetworkStatusPublishedTime = publicationTime;
}
- if (this.bridgeSanitizingCutOffTimestamp
- .compareTo(publicationTime) > 0) {
- String text = "Sanitizing and storing network status with "
- + "publication time outside our descriptor sanitizing "
- + "interval.";
- if (this.haveWarnedAboutInterval) {
- logger.debug(text);
- } else {
- logger.warn(text);
- this.haveWarnedAboutInterval = true;
- }
- }
-
/* Parse the given network status line by line. */
DescriptorBuilder header = new DescriptorBuilder();
boolean includesFingerprintLine = false;
@@ -738,14 +445,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
String hashedDescriptorIdentifier = Base64.encodeBase64String(
DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier
+ "=="))).substring(0, 27);
- String scrubbedAddress = scrubIpv4Address(address,
- fingerprintBytes,
- descPublicationTime);
+ String scrubbedAddress = this.sensitivePartsSanitizer
+ .scrubIpv4Address(address, fingerprintBytes, descPublicationTime);
String nickname = parts[1];
- String scrubbedOrPort = this.scrubTcpPort(orPort,
- fingerprintBytes, descPublicationTime);
- String scrubbedDirPort = this.scrubTcpPort(dirPort,
- fingerprintBytes, descPublicationTime);
+ String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort(
+ orPort, fingerprintBytes, descPublicationTime);
+ String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
+ dirPort, fingerprintBytes, descPublicationTime);
scrubbed.append("r ").append(nickname).space()
.append(hashedBridgeIdentityBase64).space()
.append(hashedDescriptorIdentifier).space()
@@ -757,8 +463,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Sanitize any addresses in a lines using the fingerprint and
* descriptor publication time from the previous r line. */
} else if (line.startsWith("a ")) {
- String scrubbedOrAddress = scrubOrAddress(
- line.substring("a ".length()), fingerprintBytes,
+ String scrubbedOrAddress = this.sensitivePartsSanitizer
+ .scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
descPublicationTime);
if (scrubbedOrAddress != null) {
scrubbed.append("a ").append(scrubbedOrAddress).newLine();
@@ -856,7 +562,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
*/
public void sanitizeAndStoreServerDescriptor(byte[] data) {
- if (this.persistenceProblemWithSecrets) {
+ if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
/* There's a persistence problem, so we shouldn't scrub more IP
* addresses in this execution. */
return;
@@ -922,18 +628,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
maxServerDescriptorPublishedTime = published;
}
- if (this.bridgeSanitizingCutOffTimestamp
- .compareTo(published) > 0) {
- String text = "Sanitizing and storing "
- + "server descriptor with publication time outside our "
- + "descriptor sanitizing interval.";
- if (this.haveWarnedAboutInterval) {
- logger.debug(text);
- } else {
- logger.warn(text);
- this.haveWarnedAboutInterval = true;
- }
- }
scrubbed.append(line).newLine();
/* Parse the fingerprint to determine the hashed bridge
@@ -1127,8 +821,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
try {
- String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes,
- published);
+ String scrubbedAddressString = this.sensitivePartsSanitizer
+ .scrubIpv4Address(address, fingerprintBytes, published);
if (null == scrubbedAddressString) {
logger.warn("Invalid IP address in \"router\" line in bridge server "
+ "descriptor. Skipping descriptor.");
@@ -1137,8 +831,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
scrubbedAddress.append(scrubbedAddressString);
for (Map.Entry<StringBuilder, String> e
: scrubbedIpAddressesAndTcpPorts.entrySet()) {
- String scrubbedOrAddress = scrubOrAddress(e.getValue(),
- fingerprintBytes, published);
+ String scrubbedOrAddress = this.sensitivePartsSanitizer
+ .scrubOrAddress(e.getValue(), fingerprintBytes, published);
if (null == scrubbedOrAddress) {
logger.warn("Invalid IP address or TCP port in \"or-address\" line "
+ "in bridge server descriptor. Skipping descriptor.");
@@ -1147,8 +841,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
e.getKey().append(scrubbedOrAddress);
}
for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
- String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes,
- published);
+ String scrubbedTcpPort = this.sensitivePartsSanitizer
+ .scrubTcpPort(e.getValue(), fingerprintBytes, published);
if (null == scrubbedTcpPort) {
logger.warn("Invalid TCP port in \"router\" line in bridge server "
+ "descriptor. Skipping descriptor.");
@@ -1159,7 +853,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} catch (IOException exception) {
/* There's a persistence problem, so we shouldn't scrub more IP addresses
* or TCP ports in this execution. */
- this.persistenceProblemWithSecrets = true;
return;
}
@@ -1500,43 +1193,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
- /**
- * Rewrite all network statuses that might contain references to server
- * descriptors we added or updated in this execution. This applies to
- * all statuses that have been published up to 24 hours after any added
- * or updated server descriptor.
- */
- public void finishWriting() {
-
- /* Delete secrets that we don't need anymore. */
- if (!this.secretsForHashingIpAddresses.isEmpty()
- && this.secretsForHashingIpAddresses.firstKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- try {
- int kept = 0;
- int deleted = 0;
- List<String> lines = new ArrayList<>();
- for (Map.Entry<String, byte[]> e :
- this.secretsForHashingIpAddresses.entrySet()) {
- if (e.getKey().compareTo(
- this.bridgeSanitizingCutOffTimestamp) < 0) {
- deleted++;
- } else {
- lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
- kept++;
- }
- }
- Files.write(this.bridgeIpSecretsFile, lines);
- logger.info("Deleted {} secrets that we don't "
- + "need anymore and kept {}.", deleted, kept);
- } catch (IOException e) {
- logger.warn("Could not store reduced set of "
- + "secrets to disk! This is a bad sign, better check what's "
- + "going on!", e);
- }
- }
- }
-
private void checkStaleDescriptors() {
SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java
new file mode 100644
index 0000000..71931dd
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SensitivePartsSanitizer.java
@@ -0,0 +1,378 @@
+/* Copyright 2010--2020 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedescs;
+
+import static java.time.ZoneOffset.UTC;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.security.GeneralSecurityException;
+import java.security.SecureRandom;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class SensitivePartsSanitizer {
+
+ private static final Logger logger = LoggerFactory.getLogger(
+ SensitivePartsSanitizer.class);
+
+ private boolean replaceIpAddressesWithHashes = false;
+
+ private Path bridgeIpSecretsFile;
+
+ private boolean persistenceProblemWithSecrets;
+
+ private final SortedMap<String, byte[]> secretsForHashingIpAddresses
+ = new TreeMap<>();
+
+ private String bridgeSanitizingCutOffTimestamp;
+
+ private SecureRandom secureRandom;
+
+ private boolean haveWarnedAboutInterval;
+
+ SensitivePartsSanitizer() {
+ /* Nothing to do, if we're not using secrets for computing hashes. */
+ }
+
+ SensitivePartsSanitizer(Path bridgeIpSecretsFile,
+ long limitBridgeSanitizingIntervalDays) {
+ this.bridgeIpSecretsFile = bridgeIpSecretsFile;
+ this.readBridgeIpSecretsFile();
+ this.determineCutOffTimestamp(limitBridgeSanitizingIntervalDays);
+ this.replaceIpAddressesWithHashes = true;
+ this.initializeSecureRandom();
+ }
+
+ /* Read hex-encoded secrets for replacing IP addresses with hashes
+ * from disk. */
+ private void readBridgeIpSecretsFile() {
+ if (Files.exists(this.bridgeIpSecretsFile)) {
+ try {
+ for (String line : Files.readAllLines(bridgeIpSecretsFile)) {
+ String[] parts = line.split(",");
+ if ((line.length() != ("yyyy-MM,".length() + 31 * 2)
+ && line.length() != ("yyyy-MM,".length() + 50 * 2)
+ && line.length() != ("yyyy-MM,".length() + 83 * 2))
+ || parts.length != 2) {
+ logger.warn("Invalid line in bridge-ip-secrets file "
+ + "starting with '{}'! "
+ + "Not calculating any IP address hashes in this "
+ + "execution!", line.substring(0, 7));
+ this.persistenceProblemWithSecrets = true;
+ break;
+ }
+ String month = parts[0];
+ byte[] secret = Hex.decodeHex(parts[1].toCharArray());
+ this.secretsForHashingIpAddresses.put(month, secret);
+ }
+ if (!this.persistenceProblemWithSecrets) {
+ logger.debug("Read {} secrets for hashing bridge IP addresses.",
+ this.secretsForHashingIpAddresses.size());
+ }
+ } catch (DecoderException e) {
+ logger.warn("Failed to decode hex string in {}! Not calculating any IP "
+ + "address hashes in this execution!", bridgeIpSecretsFile, e);
+ this.persistenceProblemWithSecrets = true;
+ } catch (IOException e) {
+ logger.warn("Failed to read {}! Not calculating any IP "
+ + "address hashes in this execution!", bridgeIpSecretsFile, e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+ }
+
+ boolean hasPersistenceProblemWithSecrets() {
+ return this.persistenceProblemWithSecrets;
+ }
+
+ private void determineCutOffTimestamp(
+ long limitBridgeSanitizingIntervalDays) {
+
+ /* If we're configured to keep secrets only for a limited time, define
+ * the cut-off day and time. */
+ LocalDateTime bridgeSanitizingCutOffDateTime
+ = LocalDateTime.of(1999, 12, 31, 23, 59, 59);
+ if (limitBridgeSanitizingIntervalDays >= 0L) {
+ LocalDateTime configuredBridgeSanitizingCutOffDateTime
+ = LocalDateTime.now(UTC).minusDays(limitBridgeSanitizingIntervalDays);
+ if (configuredBridgeSanitizingCutOffDateTime.isAfter(
+ bridgeSanitizingCutOffDateTime)) {
+ bridgeSanitizingCutOffDateTime
+ = configuredBridgeSanitizingCutOffDateTime;
+ }
+ }
+ this.bridgeSanitizingCutOffTimestamp = bridgeSanitizingCutOffDateTime
+ .format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
+
+ logger.info("Using cut-off datetime '{}' for secrets.",
+ this.bridgeSanitizingCutOffTimestamp);
+ }
+
+ private void initializeSecureRandom() {
+ /* Initialize secure random number generator. */
+ try {
+ this.secureRandom = SecureRandom.getInstance("SHA1PRNG", "SUN");
+ } catch (GeneralSecurityException e) {
+ logger.warn("Could not initialize secure "
+ + "random number generator! Not calculating any IP address "
+ + "hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ }
+ }
+
+ String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (!orAddress.contains(":")) {
+ /* Malformed or-address or a line. */
+ return null;
+ }
+ String addressPart = orAddress.substring(0,
+ orAddress.lastIndexOf(":"));
+ String portPart = orAddress.substring(orAddress.lastIndexOf(":") + 1);
+ String scrubbedAddressPart;
+ if (addressPart.startsWith("[")) {
+ scrubbedAddressPart = this.scrubIpv6Address(addressPart,
+ fingerprintBytes, published);
+ } else {
+ scrubbedAddressPart = this.scrubIpv4Address(addressPart,
+ fingerprintBytes, published);
+ }
+ String scrubbedPort = this.scrubTcpPort(portPart, fingerprintBytes,
+ published);
+ return (scrubbedAddressPart == null ? null :
+ scrubbedAddressPart + ":" + scrubbedPort);
+ }
+
+ String scrubIpv4Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (this.replaceIpAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[4 + 20 + 31];
+ String[] ipParts = address.split("\\.");
+ for (int i = 0; i < 4; i++) {
+ hashInput[i] = (byte) Integer.parseInt(ipParts[i]);
+ }
+ System.arraycopy(fingerprintBytes, 0, hashInput, 4, 20);
+ byte[] secret = this.getSecretForMonth(published);
+ System.arraycopy(secret, 0, hashInput, 24, 31);
+ byte[] hashOutput = DigestUtils.sha256(hashInput);
+ return "10."
+ + (((int) hashOutput[0] + 256) % 256) + "."
+ + (((int) hashOutput[1] + 256) % 256) + "."
+ + (((int) hashOutput[2] + 256) % 256);
+ } else {
+ return "127.0.0.1";
+ }
+ }
+
+ private String scrubIpv6Address(String address, byte[] fingerprintBytes,
+ String published) throws IOException {
+ StringBuilder sb = new StringBuilder("[fd9f:2e19:3bcf::");
+ if (this.replaceIpAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return null;
+ }
+ String[] doubleColonSeparatedParts = address.substring(1,
+ address.length() - 1).split("::", -1);
+ if (doubleColonSeparatedParts.length > 2) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ List<String> hexParts = new ArrayList<>();
+ for (String doubleColonSeparatedPart : doubleColonSeparatedParts) {
+ StringBuilder hexPart = new StringBuilder();
+ String[] parts = doubleColonSeparatedPart.split(":", -1);
+ if (parts.length < 1 || parts.length > 8) {
+ /* Invalid IPv6 address. */
+ return null;
+ }
+ for (String part : parts) {
+ if (part.contains(".")) {
+ String[] ipParts = part.split("\\.");
+ byte[] ipv4Bytes = new byte[4];
+ if (ipParts.length != 4) {
+ /* Invalid IPv4 part in IPv6 address. */
+ return null;
+ }
+ for (int m = 0; m < 4; m++) {
+ ipv4Bytes[m] = (byte) Integer.parseInt(ipParts[m]);
+ }
+ hexPart.append(Hex.encodeHexString(ipv4Bytes));
+ } else if (part.length() > 4) {
+ /* Invalid IPv6 address. */
+ return null;
+ } else {
+ for (int k = part.length(); k < 4; k++) {
+ hexPart.append("0");
+ }
+ hexPart.append(part);
+ }
+ }
+ hexParts.add(hexPart.toString());
+ }
+ StringBuilder hex = new StringBuilder();
+ hex.append(hexParts.get(0));
+ if (hexParts.size() == 2) {
+ for (int i = 32 - hexParts.get(0).length()
+ - hexParts.get(1).length(); i > 0; i--) {
+ hex.append("0");
+ }
+ hex.append(hexParts.get(1));
+ }
+ byte[] ipBytes;
+ try {
+ ipBytes = Hex.decodeHex(hex.toString().toCharArray());
+ } catch (DecoderException e) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ if (ipBytes.length != 16) {
+ /* TODO Invalid IPv6 address. */
+ return null;
+ }
+ byte[] hashInput = new byte[16 + 20 + 19];
+ System.arraycopy(ipBytes, 0, hashInput, 0, 16);
+ System.arraycopy(fingerprintBytes, 0, hashInput, 16, 20);
+ byte[] secret = this.getSecretForMonth(published);
+ System.arraycopy(secret, 31, hashInput, 36, 19);
+ String hashOutput = DigestUtils.sha256Hex(hashInput);
+ sb.append(hashOutput, hashOutput.length() - 6, hashOutput.length() - 4);
+ sb.append(":");
+ sb.append(hashOutput.substring(hashOutput.length() - 4));
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+
+ String scrubTcpPort(String portString, byte[] fingerprintBytes,
+ String published) throws IOException {
+ if (portString.equals("0")) {
+ return "0";
+ } else if (this.replaceIpAddressesWithHashes) {
+ if (this.persistenceProblemWithSecrets) {
+ /* There's a persistence problem, so we shouldn't scrub more TCP
+ * ports in this execution. */
+ return null;
+ }
+ byte[] hashInput = new byte[2 + 20 + 33];
+ int portNumber = Integer.parseInt(portString);
+ hashInput[0] = (byte) (portNumber >> 8);
+ hashInput[1] = (byte) portNumber;
+ System.arraycopy(fingerprintBytes, 0, hashInput, 2, 20);
+ byte[] secret = this.getSecretForMonth(published);
+ System.arraycopy(secret, 50, hashInput, 22, 33);
+ byte[] hashOutput = DigestUtils.sha256(hashInput);
+ int hashedPort = ((((hashOutput[0] & 0b1111_1111) << 8)
+ | (hashOutput[1] & 0b1111_1111)) >> 2) | 0b1100_0000_0000_0000;
+ return String.valueOf(hashedPort);
+ } else {
+ return "1";
+ }
+ }
+
+ private byte[] getSecretForMonth(String published) throws IOException {
+ if (this.bridgeSanitizingCutOffTimestamp
+ .compareTo(published) > 0) {
+ String text = "Sanitizing and storing bridge descriptor with publication "
+ + "time outside our descriptor sanitizing interval.";
+ if (this.haveWarnedAboutInterval) {
+ logger.debug(text);
+ } else {
+ logger.warn(text);
+ this.haveWarnedAboutInterval = true;
+ }
+ }
+ String month = published.substring(0, "yyyy-MM".length());
+ if (!this.secretsForHashingIpAddresses.containsKey(month)
+ || this.secretsForHashingIpAddresses.get(month).length < 83) {
+ byte[] secret = new byte[83];
+ this.secureRandom.nextBytes(secret);
+ if (this.secretsForHashingIpAddresses.containsKey(month)) {
+ System.arraycopy(this.secretsForHashingIpAddresses.get(month), 0,
+ secret, 0,
+ this.secretsForHashingIpAddresses.get(month).length);
+ }
+ if (month.compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ logger.warn("Generated a secret that we won't make "
+ + "persistent, because it's outside our bridge descriptor "
+ + "sanitizing interval.");
+ } else {
+ /* Append secret to file on disk immediately before using it, or
+ * we might end with inconsistently sanitized bridges. */
+ byte[] newBytes = (month + "," + Hex.encodeHexString(secret) + "\n")
+ .getBytes();
+ try {
+ if (Files.exists(this.bridgeIpSecretsFile)) {
+ Files.write(this.bridgeIpSecretsFile, newBytes,
+ StandardOpenOption.APPEND);
+ } else {
+ Files.createDirectories(this.bridgeIpSecretsFile.getParent());
+ Files.write(this.bridgeIpSecretsFile, newBytes);
+ }
+ } catch (IOException e) {
+ logger.warn("Could not store new secret "
+ + "to disk! Not calculating any IP address or TCP port "
+ + "hashes in this execution!", e);
+ this.persistenceProblemWithSecrets = true;
+ throw new IOException(e);
+ }
+ }
+ this.secretsForHashingIpAddresses.put(month, secret);
+ }
+ return this.secretsForHashingIpAddresses.get(month);
+ }
+
+ void finishWriting() {
+
+ /* Delete secrets that we don't need anymore. */
+ if (!this.secretsForHashingIpAddresses.isEmpty()
+ && this.secretsForHashingIpAddresses.firstKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ try {
+ int kept = 0;
+ int deleted = 0;
+ List<String> lines = new ArrayList<>();
+ for (Map.Entry<String, byte[]> e :
+ this.secretsForHashingIpAddresses.entrySet()) {
+ if (e.getKey().compareTo(
+ this.bridgeSanitizingCutOffTimestamp) < 0) {
+ deleted++;
+ } else {
+ lines.add(e.getKey() + "," + Hex.encodeHexString(e.getValue()));
+ kept++;
+ }
+ }
+ Files.write(bridgeIpSecretsFile, lines);
+ logger.info("Deleted {} secrets that we don't "
+ + "need anymore and kept {}.", deleted, kept);
+ } catch (IOException e) {
+ logger.warn("Could not store reduced set of "
+ + "secrets to disk! This is a bad sign, better check what's "
+ + "going on!", e);
+ }
+ }
+ }
+}
+
diff --git a/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java b/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java
index 67e9738..d8c7119 100644
--- a/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java
+++ b/src/test/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriterTest.java
@@ -821,6 +821,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testBridgeIpSecretsIsDirectory() throws Exception {
+ this.configuration.setProperty(Key.ReplaceIpAddressesWithHashes.name(),
+ "true");
Files.createDirectory(Paths.get(statsDirectory, "bridge-ip-secrets"));
this.runTest();
assertTrue("Sanitized server descriptors without secrets.",
1
0

01 Dec '20
commit 2e8cdf7fe1cd11b6afe599512e4844c4234e257a
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Tue Dec 1 10:35:26 2020 +0100
Move sanitizing code to one class per type.
Part of #20542.
---
.../bridgedescs/SanitizedBridgeDescriptor.java | 118 ++++
.../SanitizedBridgeExtraInfoDescriptor.java | 192 +++++
.../bridgedescs/SanitizedBridgeNetworkStatus.java | 230 ++++++
.../SanitizedBridgeServerDescriptor.java | 360 ++++++++++
.../bridgedescs/SanitizedBridgesWriter.java | 771 +--------------------
5 files changed, 934 insertions(+), 737 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeDescriptor.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeDescriptor.java
new file mode 100644
index 0000000..5ddeefe
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeDescriptor.java
@@ -0,0 +1,118 @@
+/* Copyright 2010--2020 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedescs;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.charset.StandardCharsets;
+
+public abstract class SanitizedBridgeDescriptor {
+
+ private static final Logger logger = LoggerFactory.getLogger(
+ SanitizedBridgeDescriptor.class);
+
+ protected byte[] originalBytes;
+
+ protected SensitivePartsSanitizer sensitivePartsSanitizer;
+
+ protected byte[] sanitizedBytes;
+
+ protected String publishedString;
+
+ SanitizedBridgeDescriptor(byte[] originalBytes,
+ SensitivePartsSanitizer sensitivePartsSanitizer) {
+ this.originalBytes = originalBytes;
+ this.sensitivePartsSanitizer = sensitivePartsSanitizer;
+ }
+
+ protected String parseMasterKeyEd25519FromIdentityEd25519(
+ String identityEd25519Base64) {
+ byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64);
+ if (identityEd25519.length < 40) {
+ logger.warn("Invalid length of identity-ed25519 (in bytes): {}",
+ identityEd25519.length);
+ } else if (identityEd25519[0] != 0x01) {
+ logger.warn("Unknown version in identity-ed25519: {}",
+ identityEd25519[0]);
+ } else if (identityEd25519[1] != 0x04) {
+ logger.warn("Unknown cert type in identity-ed25519: {}",
+ identityEd25519[1]);
+ } else if (identityEd25519[6] != 0x01) {
+ logger.warn("Unknown certified key type in identity-ed25519: {}",
+ identityEd25519[1]);
+ } else if (identityEd25519[39] == 0x00) {
+ logger.warn("No extensions in identity-ed25519 (which "
+ + "would contain the encoded master-key-ed25519): {}",
+ identityEd25519[39]);
+ } else {
+ int extensionStart = 40;
+ for (int i = 0; i < (int) identityEd25519[39]; i++) {
+ if (identityEd25519.length < extensionStart + 4) {
+ logger.warn("Invalid extension with id {} in identity-ed25519.", i);
+ break;
+ }
+ int extensionLength = identityEd25519[extensionStart];
+ extensionLength <<= 8;
+ extensionLength += identityEd25519[extensionStart + 1];
+ int extensionType = identityEd25519[extensionStart + 2];
+ if (extensionLength == 32 && extensionType == 4) {
+ if (identityEd25519.length < extensionStart + 4 + 32) {
+ logger.warn("Invalid extension with id {} in identity-ed25519.", i);
+ break;
+ }
+ byte[] masterKeyEd25519 = new byte[32];
+ System.arraycopy(identityEd25519, extensionStart + 4,
+ masterKeyEd25519, 0, masterKeyEd25519.length);
+ String masterKeyEd25519Base64 = Base64.encodeBase64String(
+ masterKeyEd25519);
+ return masterKeyEd25519Base64.replaceAll("=", "");
+ }
+ extensionStart += 4 + extensionLength;
+ }
+ }
+ logger.warn("Unable to locate master-key-ed25519 in identity-ed25519.");
+ return null;
+ }
+
+ protected String computeDescriptorDigest(byte[] descriptorBytes,
+ String startToken, String sigToken) {
+ String descriptorDigest = null;
+ String ascii = new String(descriptorBytes, StandardCharsets.US_ASCII);
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(descriptorBytes, start, forDigest, 0, sig - start);
+ descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest));
+ }
+ if (descriptorDigest == null) {
+ logger.warn("Could not calculate extra-info descriptor digest.");
+ }
+ return descriptorDigest;
+ }
+
+ protected String computeSha256Base64Digest(byte[] descriptorBytes,
+ String startToken, String sigToken) {
+ String descriptorDigestSha256Base64 = null;
+ String ascii = new String(descriptorBytes, StandardCharsets.US_ASCII);
+ int start = ascii.indexOf(startToken);
+ int sig = ascii.indexOf(sigToken) + sigToken.length();
+ if (start >= 0 && sig >= 0 && sig > start) {
+ byte[] forDigest = new byte[sig - start];
+ System.arraycopy(descriptorBytes, start, forDigest, 0, sig - start);
+ descriptorDigestSha256Base64 = Base64.encodeBase64String(
+ DigestUtils.sha256(DigestUtils.sha256(forDigest)))
+ .replaceAll("=", "");
+ }
+ if (descriptorDigestSha256Base64 == null) {
+ logger.warn("Could not calculate extra-info "
+ + "descriptor SHA256 digest.");
+ }
+ return descriptorDigestSha256Base64;
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeExtraInfoDescriptor.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeExtraInfoDescriptor.java
new file mode 100644
index 0000000..f2ec992
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeExtraInfoDescriptor.java
@@ -0,0 +1,192 @@
+/* Copyright 2010--2020 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedescs;
+
+import org.torproject.metrics.collector.conf.Annotation;
+
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+
+public class SanitizedBridgeExtraInfoDescriptor
+ extends SanitizedBridgeDescriptor {
+
+ private static final Logger logger = LoggerFactory.getLogger(
+ SanitizedBridgeExtraInfoDescriptor.class);
+
+ private String descriptorDigest;
+
+ SanitizedBridgeExtraInfoDescriptor(byte[] originalBytes,
+ SensitivePartsSanitizer sensitivePartsSanitizer) {
+ super(originalBytes, sensitivePartsSanitizer);
+ }
+
+ boolean sanitizeDescriptor() {
+
+ /* Parse descriptor to generate a sanitized version. */
+ String masterKeyEd25519FromIdentityEd25519 = null;
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
+ try (BufferedReader br = new BufferedReader(new StringReader(new String(
+ this.originalBytes, StandardCharsets.US_ASCII)))) {
+ scrubbed.append(Annotation.BridgeExtraInfo.toString());
+ String line;
+ String hashedBridgeIdentity;
+ String masterKeyEd25519 = null;
+ while ((line = br.readLine()) != null) {
+
+ /* Parse bridge identity from extra-info line and replace it with
+ * its hash in the sanitized descriptor. */
+ String[] parts = line.split(" ");
+ if (line.startsWith("extra-info ")) {
+ if (parts.length < 3) {
+ logger.debug("Illegal line in extra-info descriptor: '{}'. "
+ + "Skipping descriptor.", line);
+ return false;
+ }
+ hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
+ parts[2].toCharArray())).toLowerCase();
+ scrubbed.append("extra-info ").append(parts[1])
+ .space().append(hashedBridgeIdentity.toUpperCase()).newLine();
+
+ /* Parse the publication time to determine the file name. */
+ } else if (line.startsWith("published ")) {
+ scrubbed.append(line).newLine();
+ this.publishedString = line.substring("published ".length());
+
+ /* Remove everything from transport lines except the transport
+ * name. */
+ } else if (line.startsWith("transport ")) {
+ if (parts.length < 3) {
+ logger.debug("Illegal line in extra-info descriptor: '{}'. "
+ + "Skipping descriptor.", line);
+ return false;
+ }
+ scrubbed.append("transport ").append(parts[1]).newLine();
+
+ /* Skip transport-info lines entirely. */
+ } else if (line.startsWith("transport-info ")) {
+
+ /* Extract master-key-ed25519 from identity-ed25519. */
+ } else if (line.equals("identity-ed25519")) {
+ StringBuilder sb = new StringBuilder();
+ while ((line = br.readLine()) != null
+ && !line.equals("-----END ED25519 CERT-----")) {
+ if (line.equals("-----BEGIN ED25519 CERT-----")) {
+ continue;
+ }
+ sb.append(line);
+ }
+ masterKeyEd25519FromIdentityEd25519 =
+ this.parseMasterKeyEd25519FromIdentityEd25519(
+ sb.toString());
+ String sha256MasterKeyEd25519 = Base64.encodeBase64String(
+ DigestUtils.sha256(Base64.decodeBase64(
+ masterKeyEd25519FromIdentityEd25519 + "=")))
+ .replaceAll("=", "");
+ scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
+ .newLine();
+ if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
+ masterKeyEd25519FromIdentityEd25519)) {
+ logger.warn("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return false;
+ }
+
+ /* Verify that identity-ed25519 and master-key-ed25519 match. */
+ } else if (line.startsWith("master-key-ed25519 ")) {
+ masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
+ if (masterKeyEd25519FromIdentityEd25519 != null
+ && !masterKeyEd25519FromIdentityEd25519.equals(
+ masterKeyEd25519)) {
+ logger.warn("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return false;
+ }
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("write-history ")
+ || line.startsWith("read-history ")
+ || line.startsWith("ipv6-write-history ")
+ || line.startsWith("ipv6-read-history ")
+ || line.startsWith("geoip-start-time ")
+ || line.startsWith("geoip-client-origins ")
+ || line.startsWith("geoip-db-digest ")
+ || line.startsWith("geoip6-db-digest ")
+ || line.startsWith("conn-bi-direct ")
+ || line.startsWith("ipv6-conn-bi-direct ")
+ || line.startsWith("bridge-")
+ || line.startsWith("dirreq-")
+ || line.startsWith("cell-")
+ || line.startsWith("entry-")
+ || line.startsWith("exit-")
+ || line.startsWith("hidserv-")
+ || line.startsWith("padding-counts ")) {
+ scrubbed.append(line).newLine();
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ break;
+
+ /* Skip the ed25519 signature; we'll include a SHA256 digest of
+ * the SHA256 descriptor digest in router-digest-sha256. */
+ } else if (line.startsWith("router-sig-ed25519 ")) {
+ continue;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ logger.warn("Unrecognized line '{}'. Skipping.", line);
+ return false;
+ }
+ }
+ } catch (DecoderException | IOException e) {
+ logger.warn("Could not parse extra-info descriptor.", e);
+ return false;
+ }
+
+ /* Determine digest(s) of sanitized extra-info descriptor. */
+ this.descriptorDigest = this.computeDescriptorDigest(this.originalBytes,
+ "extra-info ", "\nrouter-signature\n");
+ String descriptorDigestSha256Base64 = null;
+ if (masterKeyEd25519FromIdentityEd25519 != null) {
+ descriptorDigestSha256Base64 = this.computeSha256Base64Digest(
+ this.originalBytes, "extra-info ", "\n-----END SIGNATURE-----\n");
+ }
+ if (null != descriptorDigestSha256Base64) {
+ scrubbed.append("router-digest-sha256 ")
+ .append(descriptorDigestSha256Base64).newLine();
+ }
+ if (null != this.descriptorDigest) {
+ scrubbed.append("router-digest ")
+ .append(this.descriptorDigest.toUpperCase()).newLine();
+ }
+ this.sanitizedBytes = scrubbed.toBytes();
+ return true;
+ }
+
+ byte[] getSanitizedBytes() {
+ return this.sanitizedBytes;
+ }
+
+ public String getPublishedString() {
+ return this.publishedString;
+ }
+
+ public String getDescriptorDigest() {
+ return this.descriptorDigest;
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeNetworkStatus.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeNetworkStatus.java
new file mode 100644
index 0000000..d94cb0d
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeNetworkStatus.java
@@ -0,0 +1,230 @@
+/* Copyright 2010--2020 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedescs;
+
+import org.torproject.metrics.collector.conf.Annotation;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+public class SanitizedBridgeNetworkStatus extends SanitizedBridgeDescriptor {
+
+ private static final Logger logger = LoggerFactory.getLogger(
+ SanitizedBridgeNetworkStatus.class);
+
+ private final String authorityFingerprint;
+
+ SanitizedBridgeNetworkStatus(byte[] originalBytes,
+ SensitivePartsSanitizer sensitivePartsSanitizer, String publicationTime,
+ String authorityFingerprint) {
+ super(originalBytes, sensitivePartsSanitizer);
+ this.publishedString = publicationTime;
+ this.authorityFingerprint = authorityFingerprint;
+ }
+
+ boolean sanitizeDescriptor() {
+
+ if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return false;
+ }
+
+ /* Parse the given network status line by line. */
+ boolean includesFingerprintLine = false;
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
+ scrubbed.append(Annotation.Status.toString());
+ SortedMap<String, String> scrubbedEntries = new TreeMap<>();
+ StringBuilder publishedStringBuilder = new StringBuilder();
+ scrubbed.append("published ").append(publishedStringBuilder).newLine();
+ DescriptorBuilder header = new DescriptorBuilder();
+ scrubbed.append(header);
+
+ try {
+ BufferedReader br = new BufferedReader(new StringReader(new String(
+ this.originalBytes, StandardCharsets.US_ASCII)));
+ String line;
+ String mostRecentDescPublished = null;
+ byte[] fingerprintBytes = null;
+ String descPublicationTime = null;
+ String hashedBridgeIdentityHex = null;
+ DescriptorBuilder scrubbedEntry = new DescriptorBuilder();
+ while ((line = br.readLine()) != null) {
+
+ /* Use publication time from "published" line instead of the
+ * file's last-modified time. Don't copy over the line, because
+ * we're going to write a "published" line below. */
+ if (line.startsWith("published ")) {
+ this.publishedString = line.substring("published ".length());
+
+ /* Additional header lines don't have to be cleaned up. */
+ } else if (line.startsWith("flag-thresholds ")) {
+ header.append(line).newLine();
+
+ /* The authority fingerprint in the "fingerprint" line can go in
+ * unscrubbed. */
+ } else if (line.startsWith("fingerprint ")) {
+ if (!("fingerprint " + authorityFingerprint).equals(line)) {
+ logger.warn("Mismatch between authority fingerprint expected from "
+ + "file name ({}) and parsed from \"fingerprint\" "
+ + "line (\"{}\").", authorityFingerprint, line);
+ return false;
+ }
+ header.append(line).newLine();
+ includesFingerprintLine = true;
+
+ /* r lines contain sensitive information that needs to be removed
+ * or replaced. */
+ } else if (line.startsWith("r ")) {
+
+ /* Clear buffer from previously scrubbed lines. */
+ if (scrubbedEntry.hasContent()) {
+ scrubbedEntries.put(hashedBridgeIdentityHex,
+ scrubbedEntry.toString());
+ scrubbedEntry = new DescriptorBuilder();
+ }
+
+ /* Parse the relevant parts of this r line. */
+ String[] parts = line.split(" ");
+ if (parts.length < 9) {
+ logger.warn("Illegal line '{}' in bridge network "
+ + "status. Skipping descriptor.", line);
+ return false;
+ }
+ if (!Base64.isBase64(parts[2])) {
+ logger.warn("Illegal base64 character in r line '{}'. "
+ + "Skipping descriptor.", parts[2]);
+ return false;
+ }
+ fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
+ descPublicationTime = parts[4] + " " + parts[5];
+ String address = parts[6];
+ String orPort = parts[7];
+ String dirPort = parts[8];
+
+ /* Determine most recent descriptor publication time. */
+ if (descPublicationTime.compareTo(this.publishedString) <= 0
+ && (mostRecentDescPublished == null
+ || descPublicationTime.compareTo(
+ mostRecentDescPublished) > 0)) {
+ mostRecentDescPublished = descPublicationTime;
+ }
+
+ /* Write scrubbed r line to buffer. */
+ byte[] hashedBridgeIdentity = DigestUtils.sha1(fingerprintBytes);
+ String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
+ hashedBridgeIdentity).substring(0, 27);
+ hashedBridgeIdentityHex = Hex.encodeHexString(
+ hashedBridgeIdentity);
+ String descriptorIdentifier = parts[3];
+ String hashedDescriptorIdentifier = Base64.encodeBase64String(
+ DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier
+ + "=="))).substring(0, 27);
+ String scrubbedAddress = this.sensitivePartsSanitizer
+ .scrubIpv4Address(address, fingerprintBytes, descPublicationTime);
+ String nickname = parts[1];
+ String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort(
+ orPort, fingerprintBytes, descPublicationTime);
+ String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
+ dirPort, fingerprintBytes, descPublicationTime);
+ scrubbedEntry.append("r ").append(nickname).space()
+ .append(hashedBridgeIdentityBase64).space()
+ .append(hashedDescriptorIdentifier).space()
+ .append(descPublicationTime).space()
+ .append(scrubbedAddress).space()
+ .append(scrubbedOrPort).space()
+ .append(scrubbedDirPort).newLine();
+
+ /* Sanitize any addresses in a lines using the fingerprint and
+ * descriptor publication time from the previous r line. */
+ } else if (line.startsWith("a ")) {
+ String scrubbedOrAddress = this.sensitivePartsSanitizer
+ .scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
+ descPublicationTime);
+ if (scrubbedOrAddress != null) {
+ scrubbedEntry.append("a ").append(scrubbedOrAddress).newLine();
+ } else {
+ logger.warn("Invalid address in line '{}' "
+ + "in bridge network status. Skipping line!", line);
+ }
+
+ /* Nothing special about s, w, and p lines; just copy them. */
+ } else if (line.startsWith("s ") || line.equals("s")
+ || line.startsWith("w ") || line.equals("w")
+ || line.startsWith("p ") || line.equals("p")) {
+ scrubbedEntry.append(line).newLine();
+
+ /* There should be nothing else but r, a, w, p, and s lines in the
+ * network status. If there is, we should probably learn before
+ * writing anything to the sanitized descriptors. */
+ } else {
+ logger.debug("Unknown line '{}' in bridge "
+ + "network status. Not writing to disk!", line);
+ return false;
+ }
+ }
+ br.close();
+ if (scrubbedEntry.hasContent()) {
+ scrubbedEntries.put(hashedBridgeIdentityHex, scrubbedEntry.toString());
+ }
+ if (!includesFingerprintLine) {
+ header.append("fingerprint ").append(authorityFingerprint).newLine();
+ }
+
+ /* Check if we can tell from the descriptor publication times
+ * whether this status is possibly stale. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ if (null == mostRecentDescPublished) {
+ logger.warn("The bridge network status published at {}"
+ + " does not contain a single entry. Please ask the bridge "
+ + "authority operator to check!", this.publishedString);
+ } else if (formatter.parse(this.publishedString).getTime()
+ - formatter.parse(mostRecentDescPublished).getTime()
+ > 60L * 60L * 1000L) {
+ logger.warn("The most recent descriptor in the bridge "
+ + "network status published at {} was published at {} which is "
+ + "more than 1 hour before the status. This is a sign for "
+ + "the status being stale. Please check!",
+ this.publishedString, mostRecentDescPublished);
+ }
+ } catch (ParseException e) {
+ logger.warn("Could not parse timestamp in bridge network status.", e);
+ return false;
+ } catch (IOException e) {
+ logger.warn("Could not parse bridge network status.", e);
+ return false;
+ }
+
+ /* Write the sanitized network status to disk. */
+ publishedStringBuilder.append(this.publishedString);
+ for (String scrubbedEntry : scrubbedEntries.values()) {
+ scrubbed.append(scrubbedEntry);
+ }
+ this.sanitizedBytes = scrubbed.toBytes();
+ return true;
+ }
+
+
+ byte[] getSanitizedBytes() {
+ return this.sanitizedBytes;
+ }
+
+ public String getPublishedString() {
+ return this.publishedString;
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeServerDescriptor.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeServerDescriptor.java
new file mode 100644
index 0000000..7f3d4d8
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgeServerDescriptor.java
@@ -0,0 +1,360 @@
+/* Copyright 2010--2020 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.bridgedescs;
+
+import org.torproject.metrics.collector.conf.Annotation;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+
+public class SanitizedBridgeServerDescriptor
+ extends SanitizedBridgeDescriptor {
+
+ private static final Logger logger = LoggerFactory.getLogger(
+ SanitizedBridgeServerDescriptor.class);
+
+ private String descriptorDigest;
+
+ SanitizedBridgeServerDescriptor(byte[] originalBytes,
+ SensitivePartsSanitizer sensitivePartsSanitizer) {
+ super(originalBytes, sensitivePartsSanitizer);
+ }
+
+ boolean sanitizeDescriptor() {
+
+ if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
+ /* There's a persistence problem, so we shouldn't scrub more IP
+ * addresses in this execution. */
+ return false;
+ }
+
+ /* Parse descriptor to generate a sanitized version. */
+ String address = null;
+ byte[] fingerprintBytes = null;
+ StringBuilder scrubbedAddress = null;
+ Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>();
+ Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>();
+ String masterKeyEd25519FromIdentityEd25519 = null;
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
+ try (BufferedReader br = new BufferedReader(new StringReader(
+ new String(this.originalBytes, StandardCharsets.US_ASCII)))) {
+ scrubbed.append(Annotation.BridgeServer.toString());
+ String line;
+ String masterKeyEd25519 = null;
+ boolean skipCrypto = false;
+ while ((line = br.readLine()) != null) {
+
+ /* Skip all crypto parts that might be used to derive the bridge's
+ * identity fingerprint. */
+ if (skipCrypto && !line.startsWith("-----END ")) {
+ continue;
+
+ /* Store the router line for later processing, because we may need
+ * the bridge identity fingerprint for replacing the IP address in
+ * the scrubbed version. */
+ } else if (line.startsWith("router ")) {
+ String[] parts = line.split(" ");
+ if (parts.length != 6) {
+ logger.warn("Invalid router line: '{}'. Skipping.", line);
+ return false;
+ }
+ address = parts[2];
+ scrubbedAddress = new StringBuilder();
+ StringBuilder scrubbedOrPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedOrPort, parts[3]);
+ StringBuilder scrubbedDirPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedDirPort, parts[4]);
+ StringBuilder scrubbedSocksPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]);
+ scrubbed.append("router ").append(parts[1]).space()
+ .append(scrubbedAddress).space()
+ .append(scrubbedOrPort).space()
+ .append(scrubbedDirPort).space()
+ .append(scrubbedSocksPort).newLine();
+
+ /* Store or-address and sanitize it when we have read the fingerprint
+ * and descriptor publication time. */
+ } else if (line.startsWith("or-address ")) {
+ String orAddress = line.substring("or-address ".length());
+ StringBuilder scrubbedOrAddress = new StringBuilder();
+ scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress);
+ scrubbed.append("or-address ").append(scrubbedOrAddress).newLine();
+
+ /* Parse the publication time to see if we're still inside the
+ * sanitizing interval. */
+ } else if (line.startsWith("published ")) {
+ this.publishedString = line.substring("published ".length());
+ scrubbed.append(line).newLine();
+
+ /* Parse the fingerprint to determine the hashed bridge
+ * identity. */
+ } else if (line.startsWith("opt fingerprint ")
+ || line.startsWith("fingerprint ")) {
+ String fingerprint = line.substring(line.startsWith("opt ")
+ ? "opt fingerprint".length() : "fingerprint".length())
+ .replaceAll(" ", "").toLowerCase();
+ fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray());
+ String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
+ .toLowerCase();
+ scrubbed.append(line.startsWith("opt ") ? "opt " : "")
+ .append("fingerprint");
+ for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) {
+ scrubbed.space().append(hashedBridgeIdentity.substring(4 * i,
+ 4 * (i + 1)).toUpperCase());
+ }
+ scrubbed.newLine();
+
+ /* Replace the contact line (if present) with a generic one. */
+ } else if (line.startsWith("contact ")) {
+ scrubbed.append("contact somebody").newLine();
+
+ /* When we reach the signature, we're done. Write the sanitized
+ * descriptor to disk below. */
+ } else if (line.startsWith("router-signature")) {
+ break;
+
+ /* Replace extra-info digest with the hashed digest of the
+ * non-scrubbed descriptor. */
+ } else if (line.startsWith("opt extra-info-digest ")
+ || line.startsWith("extra-info-digest ")) {
+ String[] parts = line.split(" ");
+ if (line.startsWith("opt ")) {
+ scrubbed.append("opt ");
+ parts = line.substring(4).split(" ");
+ }
+ if (parts.length > 3) {
+ logger.warn("extra-info-digest line contains more arguments than"
+ + "expected: '{}'. Skipping descriptor.", line);
+ return false;
+ }
+ scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex(
+ Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
+ if (parts.length > 2) {
+ if (!Base64.isBase64(parts[2])) {
+ logger.warn("Illegal base64 character in extra-info-digest line "
+ + "'{}'. Skipping descriptor.", line);
+ return false;
+ }
+ scrubbed.space().append(Base64.encodeBase64String(
+ DigestUtils.sha256(Base64.decodeBase64(parts[2])))
+ .replaceAll("=", ""));
+ }
+ scrubbed.newLine();
+
+ /* Possibly sanitize reject lines if they contain the bridge's own
+ * IP address. */
+ } else if (line.startsWith("reject ")) {
+ if (address != null && line.startsWith("reject " + address)) {
+ scrubbed.append("reject ").append(scrubbedAddress)
+ .append(line.substring("reject ".length() + address.length()))
+ .newLine();
+ } else {
+ scrubbed.append(line).newLine();
+ }
+
+ /* Extract master-key-ed25519 from identity-ed25519. */
+ } else if (line.equals("identity-ed25519")) {
+ StringBuilder sb = new StringBuilder();
+ while ((line = br.readLine()) != null
+ && !line.equals("-----END ED25519 CERT-----")) {
+ if (line.equals("-----BEGIN ED25519 CERT-----")) {
+ continue;
+ }
+ sb.append(line);
+ }
+ masterKeyEd25519FromIdentityEd25519
+ = this.parseMasterKeyEd25519FromIdentityEd25519(sb.toString());
+ if (masterKeyEd25519FromIdentityEd25519 == null) {
+ logger.warn("Could not parse master-key-ed25519 from "
+ + "identity-ed25519. Skipping descriptor.");
+ return false;
+ }
+ String sha256MasterKeyEd25519 = Base64.encodeBase64String(
+ DigestUtils.sha256(Base64.decodeBase64(
+ masterKeyEd25519FromIdentityEd25519 + "=")))
+ .replaceAll("=", "");
+ scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
+ .newLine();
+ if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
+ masterKeyEd25519FromIdentityEd25519)) {
+ logger.warn("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return false;
+ }
+
+ /* Verify that identity-ed25519 and master-key-ed25519 match. */
+ } else if (line.startsWith("master-key-ed25519 ")) {
+ masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
+ if (masterKeyEd25519FromIdentityEd25519 != null
+ && !masterKeyEd25519FromIdentityEd25519.equals(
+ masterKeyEd25519)) {
+ logger.warn("Mismatch between identity-ed25519 and "
+ + "master-key-ed25519. Skipping.");
+ return false;
+ }
+
+ /* Write the following lines unmodified to the sanitized
+ * descriptor. */
+ } else if (line.startsWith("accept ")
+ || line.startsWith("platform ")
+ || line.startsWith("opt protocols ")
+ || line.startsWith("protocols ")
+ || line.startsWith("proto ")
+ || line.startsWith("uptime ")
+ || line.startsWith("bandwidth ")
+ || line.startsWith("opt hibernating ")
+ || line.startsWith("hibernating ")
+ || line.startsWith("ntor-onion-key ")
+ || line.equals("opt hidden-service-dir")
+ || line.equals("hidden-service-dir")
+ || line.equals("opt caches-extra-info")
+ || line.equals("caches-extra-info")
+ || line.equals("opt allow-single-hop-exits")
+ || line.equals("allow-single-hop-exits")
+ || line.startsWith("ipv6-policy ")
+ || line.equals("tunnelled-dir-server")
+ || line.startsWith("bridge-distribution-request ")) {
+ scrubbed.append(line).newLine();
+
+ /* Replace node fingerprints in the family line with their hashes
+ * and leave nicknames unchanged. */
+ } else if (line.startsWith("family ")) {
+ DescriptorBuilder familyLine = new DescriptorBuilder("family");
+ for (String s : line.substring(7).split(" ")) {
+ if (s.startsWith("$")) {
+ familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
+ s.substring(1).toCharArray())).toUpperCase());
+ } else {
+ familyLine.space().append(s);
+ }
+ }
+ scrubbed.append(familyLine.toString()).newLine();
+
+ /* Skip the purpose line that the bridge authority adds to its
+ * cached-descriptors file. */
+ } else if (line.startsWith("@purpose ")) {
+ continue;
+
+ /* Skip all crypto parts that might leak the bridge's identity
+ * fingerprint. */
+ } else if (line.startsWith("-----BEGIN ")
+ || line.equals("onion-key") || line.equals("signing-key")
+ || line.equals("onion-key-crosscert")
+ || line.startsWith("ntor-onion-key-crosscert ")) {
+ skipCrypto = true;
+
+ /* Stop skipping lines when the crypto parts are over. */
+ } else if (line.startsWith("-----END ")) {
+ skipCrypto = false;
+
+ /* Skip the ed25519 signature; we'll include a SHA256 digest of
+ * the SHA256 descriptor digest in router-digest-sha256. */
+ } else if (line.startsWith("router-sig-ed25519 ")) {
+ continue;
+
+ /* If we encounter an unrecognized line, stop parsing and print
+ * out a warning. We might have overlooked sensitive information
+ * that we need to remove or replace for the sanitized descriptor
+ * version. */
+ } else {
+ logger.warn("Unrecognized line '{}'. Skipping.", line);
+ return false;
+ }
+ }
+ } catch (Exception e) {
+ logger.warn("Could not parse server descriptor.", e);
+ return false;
+ }
+
+ /* Sanitize the parts that we couldn't sanitize earlier. */
+ if (null == address || null == fingerprintBytes
+ || null == this.publishedString) {
+ logger.warn("Missing either of the following lines that are "
+ + "required to sanitize this server bridge descriptor: "
+ + "\"router\", \"fingerprint\", \"published\". Skipping "
+ + "descriptor.");
+ return false;
+ }
+ try {
+ String scrubbedAddressString = this.sensitivePartsSanitizer
+ .scrubIpv4Address(address, fingerprintBytes,
+ this.getPublishedString());
+ if (null == scrubbedAddressString) {
+ logger.warn("Invalid IP address in \"router\" line in bridge server "
+ + "descriptor. Skipping descriptor.");
+ return false;
+ }
+ scrubbedAddress.append(scrubbedAddressString);
+ for (Map.Entry<StringBuilder, String> e
+ : scrubbedIpAddressesAndTcpPorts.entrySet()) {
+ String scrubbedOrAddress = this.sensitivePartsSanitizer
+ .scrubOrAddress(e.getValue(), fingerprintBytes,
+ this.getPublishedString());
+ if (null == scrubbedOrAddress) {
+ logger.warn("Invalid IP address or TCP port in \"or-address\" line "
+ + "in bridge server descriptor. Skipping descriptor.");
+ return false;
+ }
+ e.getKey().append(scrubbedOrAddress);
+ }
+ for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
+ String scrubbedTcpPort = this.sensitivePartsSanitizer
+ .scrubTcpPort(e.getValue(), fingerprintBytes,
+ this.getPublishedString());
+ if (null == scrubbedTcpPort) {
+ logger.warn("Invalid TCP port in \"router\" line in bridge server "
+ + "descriptor. Skipping descriptor.");
+ return false;
+ }
+ e.getKey().append(scrubbedTcpPort);
+ }
+ } catch (IOException exception) {
+ /* There's a persistence problem, so we shouldn't scrub more IP addresses
+ * or TCP ports in this execution. */
+ return false;
+ }
+
+ /* Determine digest(s) of sanitized server descriptor. */
+ this.descriptorDigest = this.computeDescriptorDigest(this.originalBytes,
+ "router ", "\nrouter-signature\n");
+ String descriptorDigestSha256Base64 = null;
+ if (masterKeyEd25519FromIdentityEd25519 != null) {
+ descriptorDigestSha256Base64 = this.computeSha256Base64Digest(
+ this.originalBytes, "router ", "\n-----END SIGNATURE-----\n");
+ }
+ if (null != descriptorDigestSha256Base64) {
+ scrubbed.append("router-digest-sha256 ")
+ .append(descriptorDigestSha256Base64).newLine();
+ }
+ if (null != this.descriptorDigest) {
+ scrubbed.append("router-digest ")
+ .append(this.descriptorDigest.toUpperCase()).newLine();
+ }
+ this.sanitizedBytes = scrubbed.toBytes();
+ return true;
+ }
+
+ byte[] getSanitizedBytes() {
+ return this.sanitizedBytes;
+ }
+
+ public String getPublishedString() {
+ return this.publishedString;
+ }
+
+ public String getDescriptorDigest() {
+ return this.descriptorDigest;
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
index 77ab406..d5009e1 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -6,15 +6,12 @@ package org.torproject.metrics.collector.bridgedescs;
import org.torproject.descriptor.BridgeExtraInfoDescriptor;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgeServerDescriptor;
-import org.torproject.metrics.collector.conf.Annotation;
import org.torproject.metrics.collector.conf.Configuration;
import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.persist.PersistenceUtils;
-import org.apache.commons.codec.DecoderException;
-import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
@@ -37,14 +34,10 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
-import java.util.HashMap;
import java.util.HashSet;
-import java.util.Map;
import java.util.Set;
-import java.util.SortedMap;
import java.util.SortedSet;
import java.util.Stack;
-import java.util.TreeMap;
import java.util.TreeSet;
/**
@@ -347,187 +340,18 @@ public class SanitizedBridgesWriter extends CollecTorMain {
public void sanitizeAndStoreNetworkStatus(byte[] data,
String publicationTime, String authorityFingerprint) {
- if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
+ SanitizedBridgeNetworkStatus sanitizedBridgeNetworkStatus
+ = new SanitizedBridgeNetworkStatus(data, this.sensitivePartsSanitizer,
+ publicationTime, authorityFingerprint);
+ if (!sanitizedBridgeNetworkStatus.sanitizeDescriptor()) {
+ logger.warn("Unable to sanitize bridge network status.");
return;
}
-
+ byte[] scrubbedBytes = sanitizedBridgeNetworkStatus.getSanitizedBytes();
+ publicationTime = sanitizedBridgeNetworkStatus.getPublishedString();
if (publicationTime.compareTo(maxNetworkStatusPublishedTime) > 0) {
maxNetworkStatusPublishedTime = publicationTime;
}
-
- /* Parse the given network status line by line. */
- boolean includesFingerprintLine = false;
- DescriptorBuilder scrubbed = new DescriptorBuilder();
- scrubbed.append(Annotation.Status.toString());
- SortedMap<String, String> scrubbedEntries = new TreeMap<>();
- StringBuilder publishedStringBuilder = new StringBuilder();
- scrubbed.append("published ").append(publishedStringBuilder).newLine();
- DescriptorBuilder header = new DescriptorBuilder();
- scrubbed.append(header);
-
- try {
- BufferedReader br = new BufferedReader(new StringReader(new String(
- data, StandardCharsets.US_ASCII)));
- String line;
- String mostRecentDescPublished = null;
- byte[] fingerprintBytes = null;
- String descPublicationTime = null;
- String hashedBridgeIdentityHex = null;
- DescriptorBuilder scrubbedEntry = new DescriptorBuilder();
- while ((line = br.readLine()) != null) {
-
- /* Use publication time from "published" line instead of the
- * file's last-modified time. Don't copy over the line, because
- * we're going to write a "published" line below. */
- if (line.startsWith("published ")) {
- publicationTime = line.substring("published ".length());
-
- /* Additional header lines don't have to be cleaned up. */
- } else if (line.startsWith("flag-thresholds ")) {
- header.append(line).newLine();
-
- /* The authority fingerprint in the "fingerprint" line can go in
- * unscrubbed. */
- } else if (line.startsWith("fingerprint ")) {
- if (!("fingerprint " + authorityFingerprint).equals(line)) {
- logger.warn("Mismatch between authority fingerprint expected from "
- + "file name ({}) and parsed from \"fingerprint\" "
- + "line (\"{}\").", authorityFingerprint, line);
- return;
- }
- header.append(line).newLine();
- includesFingerprintLine = true;
-
- /* r lines contain sensitive information that needs to be removed
- * or replaced. */
- } else if (line.startsWith("r ")) {
-
- /* Clear buffer from previously scrubbed lines. */
- if (scrubbedEntry.hasContent()) {
- scrubbedEntries.put(hashedBridgeIdentityHex,
- scrubbedEntry.toString());
- scrubbedEntry = new DescriptorBuilder();
- }
-
- /* Parse the relevant parts of this r line. */
- String[] parts = line.split(" ");
- if (parts.length < 9) {
- logger.warn("Illegal line '{}' in bridge network "
- + "status. Skipping descriptor.", line);
- return;
- }
- if (!Base64.isBase64(parts[2])) {
- logger.warn("Illegal base64 character in r line '{}'. "
- + "Skipping descriptor.", parts[2]);
- return;
- }
- fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
- descPublicationTime = parts[4] + " " + parts[5];
- String address = parts[6];
- String orPort = parts[7];
- String dirPort = parts[8];
-
- /* Determine most recent descriptor publication time. */
- if (descPublicationTime.compareTo(publicationTime) <= 0
- && (mostRecentDescPublished == null
- || descPublicationTime.compareTo(
- mostRecentDescPublished) > 0)) {
- mostRecentDescPublished = descPublicationTime;
- }
-
- /* Write scrubbed r line to buffer. */
- byte[] hashedBridgeIdentity = DigestUtils.sha1(fingerprintBytes);
- String hashedBridgeIdentityBase64 = Base64.encodeBase64String(
- hashedBridgeIdentity).substring(0, 27);
- hashedBridgeIdentityHex = Hex.encodeHexString(
- hashedBridgeIdentity);
- String descriptorIdentifier = parts[3];
- String hashedDescriptorIdentifier = Base64.encodeBase64String(
- DigestUtils.sha1(Base64.decodeBase64(descriptorIdentifier
- + "=="))).substring(0, 27);
- String scrubbedAddress = this.sensitivePartsSanitizer
- .scrubIpv4Address(address, fingerprintBytes, descPublicationTime);
- String nickname = parts[1];
- String scrubbedOrPort = this.sensitivePartsSanitizer.scrubTcpPort(
- orPort, fingerprintBytes, descPublicationTime);
- String scrubbedDirPort = this.sensitivePartsSanitizer.scrubTcpPort(
- dirPort, fingerprintBytes, descPublicationTime);
- scrubbedEntry.append("r ").append(nickname).space()
- .append(hashedBridgeIdentityBase64).space()
- .append(hashedDescriptorIdentifier).space()
- .append(descPublicationTime).space()
- .append(scrubbedAddress).space()
- .append(scrubbedOrPort).space()
- .append(scrubbedDirPort).newLine();
-
- /* Sanitize any addresses in a lines using the fingerprint and
- * descriptor publication time from the previous r line. */
- } else if (line.startsWith("a ")) {
- String scrubbedOrAddress = this.sensitivePartsSanitizer
- .scrubOrAddress(line.substring("a ".length()), fingerprintBytes,
- descPublicationTime);
- if (scrubbedOrAddress != null) {
- scrubbedEntry.append("a ").append(scrubbedOrAddress).newLine();
- } else {
- logger.warn("Invalid address in line '{}' "
- + "in bridge network status. Skipping line!", line);
- }
-
- /* Nothing special about s, w, and p lines; just copy them. */
- } else if (line.startsWith("s ") || line.equals("s")
- || line.startsWith("w ") || line.equals("w")
- || line.startsWith("p ") || line.equals("p")) {
- scrubbedEntry.append(line).newLine();
-
- /* There should be nothing else but r, a, w, p, and s lines in the
- * network status. If there is, we should probably learn before
- * writing anything to the sanitized descriptors. */
- } else {
- logger.debug("Unknown line '{}' in bridge "
- + "network status. Not writing to disk!", line);
- return;
- }
- }
- br.close();
- if (scrubbedEntry.hasContent()) {
- scrubbedEntries.put(hashedBridgeIdentityHex, scrubbedEntry.toString());
- }
- if (!includesFingerprintLine) {
- header.append("fingerprint ").append(authorityFingerprint).newLine();
- }
-
- /* Check if we can tell from the descriptor publication times
- * whether this status is possibly stale. */
- SimpleDateFormat formatter = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- if (null == mostRecentDescPublished) {
- logger.warn("The bridge network status published at {}"
- + " does not contain a single entry. Please ask the bridge "
- + "authority operator to check!", publicationTime);
- } else if (formatter.parse(publicationTime).getTime()
- - formatter.parse(mostRecentDescPublished).getTime()
- > 60L * 60L * 1000L) {
- logger.warn("The most recent descriptor in the bridge "
- + "network status published at {} was published at {} which is "
- + "more than 1 hour before the status. This is a sign for "
- + "the status being stale. Please check!",
- publicationTime, mostRecentDescPublished);
- }
- } catch (ParseException e) {
- logger.warn("Could not parse timestamp in bridge network status.", e);
- return;
- } catch (IOException e) {
- logger.warn("Could not parse bridge network status.", e);
- return;
- }
-
- /* Write the sanitized network status to disk. */
- publishedStringBuilder.append(publicationTime);
- for (String scrubbedEntry : scrubbedEntries.values()) {
- scrubbed.append(scrubbedEntry);
- }
try {
String syear = publicationTime.substring(0, 4);
String smonth = publicationTime.substring(5, 7);
@@ -543,7 +367,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
Paths.get("statuses", fileName));
for (Path outputFile : new Path[] { tarballFile, rsyncFile }) {
Files.createDirectories(outputFile.getParent());
- Files.write(outputFile, scrubbed.toBytes());
+ Files.write(outputFile, scrubbedBytes);
}
} catch (IOException e) {
logger.warn("Could not write sanitized bridge "
@@ -558,341 +382,21 @@ public class SanitizedBridgesWriter extends CollecTorMain {
*/
public void sanitizeAndStoreServerDescriptor(byte[] data) {
- if (this.sensitivePartsSanitizer.hasPersistenceProblemWithSecrets()) {
- /* There's a persistence problem, so we shouldn't scrub more IP
- * addresses in this execution. */
+ SanitizedBridgeServerDescriptor sanitizedBridgeServerDescriptor
+ = new SanitizedBridgeServerDescriptor(data,
+ this.sensitivePartsSanitizer);
+ if (!sanitizedBridgeServerDescriptor.sanitizeDescriptor()) {
+ logger.warn("Unable to sanitize bridge server descriptor.");
return;
}
-
- /* Parse descriptor to generate a sanitized version. */
- String address = null;
- String published = null;
- byte[] fingerprintBytes = null;
- StringBuilder scrubbedAddress = null;
- Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>();
- Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>();
- String masterKeyEd25519FromIdentityEd25519 = null;
- DescriptorBuilder scrubbed = new DescriptorBuilder();
- try (BufferedReader br = new BufferedReader(new StringReader(
- new String(data, StandardCharsets.US_ASCII)))) {
- scrubbed.append(Annotation.BridgeServer.toString());
- String line;
- String masterKeyEd25519 = null;
- boolean skipCrypto = false;
- while ((line = br.readLine()) != null) {
-
- /* Skip all crypto parts that might be used to derive the bridge's
- * identity fingerprint. */
- if (skipCrypto && !line.startsWith("-----END ")) {
- continue;
-
- /* Store the router line for later processing, because we may need
- * the bridge identity fingerprint for replacing the IP address in
- * the scrubbed version. */
- } else if (line.startsWith("router ")) {
- String[] parts = line.split(" ");
- if (parts.length != 6) {
- logger.warn("Invalid router line: '{}'. Skipping.", line);
- return;
- }
- address = parts[2];
- scrubbedAddress = new StringBuilder();
- StringBuilder scrubbedOrPort = new StringBuilder();
- scrubbedTcpPorts.put(scrubbedOrPort, parts[3]);
- StringBuilder scrubbedDirPort = new StringBuilder();
- scrubbedTcpPorts.put(scrubbedDirPort, parts[4]);
- StringBuilder scrubbedSocksPort = new StringBuilder();
- scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]);
- scrubbed.append("router ").append(parts[1]).space()
- .append(scrubbedAddress).space()
- .append(scrubbedOrPort).space()
- .append(scrubbedDirPort).space()
- .append(scrubbedSocksPort).newLine();
-
- /* Store or-address and sanitize it when we have read the fingerprint
- * and descriptor publication time. */
- } else if (line.startsWith("or-address ")) {
- String orAddress = line.substring("or-address ".length());
- StringBuilder scrubbedOrAddress = new StringBuilder();
- scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress);
- scrubbed.append("or-address ").append(scrubbedOrAddress).newLine();
-
- /* Parse the publication time to see if we're still inside the
- * sanitizing interval. */
- } else if (line.startsWith("published ")) {
- published = line.substring("published ".length());
- if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
- maxServerDescriptorPublishedTime = published;
- }
- scrubbed.append(line).newLine();
-
- /* Parse the fingerprint to determine the hashed bridge
- * identity. */
- } else if (line.startsWith("opt fingerprint ")
- || line.startsWith("fingerprint ")) {
- String fingerprint = line.substring(line.startsWith("opt ")
- ? "opt fingerprint".length() : "fingerprint".length())
- .replaceAll(" ", "").toLowerCase();
- fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray());
- String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
- .toLowerCase();
- scrubbed.append(line.startsWith("opt ") ? "opt " : "")
- .append("fingerprint");
- for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) {
- scrubbed.space().append(hashedBridgeIdentity.substring(4 * i,
- 4 * (i + 1)).toUpperCase());
- }
- scrubbed.newLine();
-
- /* Replace the contact line (if present) with a generic one. */
- } else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody").newLine();
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- break;
-
- /* Replace extra-info digest with the hashed digest of the
- * non-scrubbed descriptor. */
- } else if (line.startsWith("opt extra-info-digest ")
- || line.startsWith("extra-info-digest ")) {
- String[] parts = line.split(" ");
- if (line.startsWith("opt ")) {
- scrubbed.append("opt ");
- parts = line.substring(4).split(" ");
- }
- if (parts.length > 3) {
- logger.warn("extra-info-digest line contains more arguments than"
- + "expected: '{}'. Skipping descriptor.", line);
- return;
- }
- scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex(
- Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
- if (parts.length > 2) {
- if (!Base64.isBase64(parts[2])) {
- logger.warn("Illegal base64 character in extra-info-digest line "
- + "'{}'. Skipping descriptor.", line);
- return;
- }
- scrubbed.space().append(Base64.encodeBase64String(
- DigestUtils.sha256(Base64.decodeBase64(parts[2])))
- .replaceAll("=", ""));
- }
- scrubbed.newLine();
-
- /* Possibly sanitize reject lines if they contain the bridge's own
- * IP address. */
- } else if (line.startsWith("reject ")) {
- if (address != null && line.startsWith("reject " + address)) {
- scrubbed.append("reject ").append(scrubbedAddress)
- .append(line.substring("reject ".length() + address.length()))
- .newLine();
- } else {
- scrubbed.append(line).newLine();
- }
-
- /* Extract master-key-ed25519 from identity-ed25519. */
- } else if (line.equals("identity-ed25519")) {
- StringBuilder sb = new StringBuilder();
- while ((line = br.readLine()) != null
- && !line.equals("-----END ED25519 CERT-----")) {
- if (line.equals("-----BEGIN ED25519 CERT-----")) {
- continue;
- }
- sb.append(line);
- }
- masterKeyEd25519FromIdentityEd25519 =
- this.parseMasterKeyEd25519FromIdentityEd25519(
- sb.toString());
- if (masterKeyEd25519FromIdentityEd25519 == null) {
- logger.warn("Could not parse master-key-ed25519 from "
- + "identity-ed25519. Skipping descriptor.");
- return;
- }
- String sha256MasterKeyEd25519 = Base64.encodeBase64String(
- DigestUtils.sha256(Base64.decodeBase64(
- masterKeyEd25519FromIdentityEd25519 + "=")))
- .replaceAll("=", "");
- scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
- .newLine();
- if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
- masterKeyEd25519FromIdentityEd25519)) {
- logger.warn("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Verify that identity-ed25519 and master-key-ed25519 match. */
- } else if (line.startsWith("master-key-ed25519 ")) {
- masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
- if (masterKeyEd25519FromIdentityEd25519 != null
- && !masterKeyEd25519FromIdentityEd25519.equals(
- masterKeyEd25519)) {
- logger.warn("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("accept ")
- || line.startsWith("platform ")
- || line.startsWith("opt protocols ")
- || line.startsWith("protocols ")
- || line.startsWith("proto ")
- || line.startsWith("uptime ")
- || line.startsWith("bandwidth ")
- || line.startsWith("opt hibernating ")
- || line.startsWith("hibernating ")
- || line.startsWith("ntor-onion-key ")
- || line.equals("opt hidden-service-dir")
- || line.equals("hidden-service-dir")
- || line.equals("opt caches-extra-info")
- || line.equals("caches-extra-info")
- || line.equals("opt allow-single-hop-exits")
- || line.equals("allow-single-hop-exits")
- || line.startsWith("ipv6-policy ")
- || line.equals("tunnelled-dir-server")
- || line.startsWith("bridge-distribution-request ")) {
- scrubbed.append(line).newLine();
-
- /* Replace node fingerprints in the family line with their hashes
- * and leave nicknames unchanged. */
- } else if (line.startsWith("family ")) {
- DescriptorBuilder familyLine = new DescriptorBuilder("family");
- for (String s : line.substring(7).split(" ")) {
- if (s.startsWith("$")) {
- familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
- s.substring(1).toCharArray())).toUpperCase());
- } else {
- familyLine.space().append(s);
- }
- }
- scrubbed.append(familyLine.toString()).newLine();
-
- /* Skip the purpose line that the bridge authority adds to its
- * cached-descriptors file. */
- } else if (line.startsWith("@purpose ")) {
- continue;
-
- /* Skip all crypto parts that might leak the bridge's identity
- * fingerprint. */
- } else if (line.startsWith("-----BEGIN ")
- || line.equals("onion-key") || line.equals("signing-key")
- || line.equals("onion-key-crosscert")
- || line.startsWith("ntor-onion-key-crosscert ")) {
- skipCrypto = true;
-
- /* Stop skipping lines when the crypto parts are over. */
- } else if (line.startsWith("-----END ")) {
- skipCrypto = false;
-
- /* Skip the ed25519 signature; we'll include a SHA256 digest of
- * the SHA256 descriptor digest in router-digest-sha256. */
- } else if (line.startsWith("router-sig-ed25519 ")) {
- continue;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- logger.warn("Unrecognized line '{}'. Skipping.", line);
- return;
- }
- }
- } catch (Exception e) {
- logger.warn("Could not parse server descriptor.", e);
- return;
+ byte[] scrubbedBytes
+ = sanitizedBridgeServerDescriptor.getSanitizedBytes();
+ String published = sanitizedBridgeServerDescriptor.getPublishedString();
+ if (published.compareTo(maxServerDescriptorPublishedTime) > 0) {
+ maxServerDescriptorPublishedTime = published;
}
-
- /* Sanitize the parts that we couldn't sanitize earlier. */
- if (null == address || null == fingerprintBytes
- || null == published) {
- logger.warn("Missing either of the following lines that are "
- + "required to sanitize this server bridge descriptor: "
- + "\"router\", \"fingerprint\", \"published\". Skipping "
- + "descriptor.");
- return;
- }
- try {
- String scrubbedAddressString = this.sensitivePartsSanitizer
- .scrubIpv4Address(address, fingerprintBytes, published);
- if (null == scrubbedAddressString) {
- logger.warn("Invalid IP address in \"router\" line in bridge server "
- + "descriptor. Skipping descriptor.");
- return;
- }
- scrubbedAddress.append(scrubbedAddressString);
- for (Map.Entry<StringBuilder, String> e
- : scrubbedIpAddressesAndTcpPorts.entrySet()) {
- String scrubbedOrAddress = this.sensitivePartsSanitizer
- .scrubOrAddress(e.getValue(), fingerprintBytes, published);
- if (null == scrubbedOrAddress) {
- logger.warn("Invalid IP address or TCP port in \"or-address\" line "
- + "in bridge server descriptor. Skipping descriptor.");
- return;
- }
- e.getKey().append(scrubbedOrAddress);
- }
- for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
- String scrubbedTcpPort = this.sensitivePartsSanitizer
- .scrubTcpPort(e.getValue(), fingerprintBytes, published);
- if (null == scrubbedTcpPort) {
- logger.warn("Invalid TCP port in \"router\" line in bridge server "
- + "descriptor. Skipping descriptor.");
- return;
- }
- e.getKey().append(scrubbedTcpPort);
- }
- } catch (IOException exception) {
- /* There's a persistence problem, so we shouldn't scrub more IP addresses
- * or TCP ports in this execution. */
- return;
- }
-
- /* Determine digest(s) of sanitized server descriptor. */
- String descriptorDigest = null;
- String ascii = new String(data, StandardCharsets.US_ASCII);
- String startToken = "router ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest));
- }
- if (descriptorDigest == null) {
- logger.warn("Could not calculate server descriptor digest.");
- return;
- }
- String descriptorDigestSha256Base64 = null;
- if (masterKeyEd25519FromIdentityEd25519 != null) {
- ascii = new String(data, StandardCharsets.US_ASCII);
- startToken = "router ";
- sigToken = "\n-----END SIGNATURE-----\n";
- start = ascii.indexOf(startToken);
- sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigestSha256Base64 = Base64.encodeBase64String(
- DigestUtils.sha256(DigestUtils.sha256(forDigest)))
- .replaceAll("=", "");
- }
- if (descriptorDigestSha256Base64 == null) {
- logger.warn("Could not calculate server descriptor SHA256 digest.");
- return;
- }
- }
- if (null != descriptorDigestSha256Base64) {
- scrubbed.append("router-digest-sha256 ")
- .append(descriptorDigestSha256Base64).newLine();
- }
- scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
- .newLine();
+ String descriptorDigest
+ = sanitizedBridgeServerDescriptor.getDescriptorDigest();
/* Determine filename of sanitized server descriptor. */
String dyear = published.substring(0, 4);
@@ -918,62 +422,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
break;
}
Files.createDirectories(outputFile.getParent());
- Files.write(outputFile, scrubbed.toBytes(), openOption);
+ Files.write(outputFile, scrubbedBytes, openOption);
}
} catch (IOException e) {
logger.warn("Could not write sanitized server descriptor to disk.", e);
}
}
- private String parseMasterKeyEd25519FromIdentityEd25519(
- String identityEd25519Base64) {
- byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64);
- if (identityEd25519.length < 40) {
- logger.warn("Invalid length of identity-ed25519 (in bytes): {}",
- identityEd25519.length);
- } else if (identityEd25519[0] != 0x01) {
- logger.warn("Unknown version in identity-ed25519: {}",
- identityEd25519[0]);
- } else if (identityEd25519[1] != 0x04) {
- logger.warn("Unknown cert type in identity-ed25519: {}",
- identityEd25519[1]);
- } else if (identityEd25519[6] != 0x01) {
- logger.warn("Unknown certified key type in identity-ed25519: {}",
- identityEd25519[1]);
- } else if (identityEd25519[39] == 0x00) {
- logger.warn("No extensions in identity-ed25519 (which "
- + "would contain the encoded master-key-ed25519): {}",
- identityEd25519[39]);
- } else {
- int extensionStart = 40;
- for (int i = 0; i < (int) identityEd25519[39]; i++) {
- if (identityEd25519.length < extensionStart + 4) {
- logger.warn("Invalid extension with id {} in identity-ed25519.", i);
- break;
- }
- int extensionLength = identityEd25519[extensionStart];
- extensionLength <<= 8;
- extensionLength += identityEd25519[extensionStart + 1];
- int extensionType = identityEd25519[extensionStart + 2];
- if (extensionLength == 32 && extensionType == 4) {
- if (identityEd25519.length < extensionStart + 4 + 32) {
- logger.warn("Invalid extension with id {} in identity-ed25519.", i);
- break;
- }
- byte[] masterKeyEd25519 = new byte[32];
- System.arraycopy(identityEd25519, extensionStart + 4,
- masterKeyEd25519, 0, masterKeyEd25519.length);
- String masterKeyEd25519Base64 = Base64.encodeBase64String(
- masterKeyEd25519);
- return masterKeyEd25519Base64.replaceAll("=", "");
- }
- extensionStart += 4 + extensionLength;
- }
- }
- logger.warn("Unable to locate master-key-ed25519 in identity-ed25519.");
- return null;
- }
-
private String maxExtraInfoDescriptorPublishedTime =
"1970-01-01 00:00:00";
@@ -982,179 +437,21 @@ public class SanitizedBridgesWriter extends CollecTorMain {
*/
public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {
- /* Parse descriptor to generate a sanitized version. */
- String published = null;
- String masterKeyEd25519FromIdentityEd25519 = null;
- DescriptorBuilder scrubbed = new DescriptorBuilder();
- try (BufferedReader br = new BufferedReader(new StringReader(new String(
- data, StandardCharsets.US_ASCII)))) {
- scrubbed.append(Annotation.BridgeExtraInfo.toString());
- String line;
- String hashedBridgeIdentity;
- String masterKeyEd25519 = null;
- while ((line = br.readLine()) != null) {
-
- /* Parse bridge identity from extra-info line and replace it with
- * its hash in the sanitized descriptor. */
- String[] parts = line.split(" ");
- if (line.startsWith("extra-info ")) {
- if (parts.length < 3) {
- logger.debug("Illegal line in extra-info descriptor: '{}'. "
- + "Skipping descriptor.", line);
- return;
- }
- hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
- parts[2].toCharArray())).toLowerCase();
- scrubbed.append("extra-info ").append(parts[1])
- .space().append(hashedBridgeIdentity.toUpperCase()).newLine();
-
- /* Parse the publication time to determine the file name. */
- } else if (line.startsWith("published ")) {
- scrubbed.append(line).newLine();
- published = line.substring("published ".length());
- if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
- > 0) {
- maxExtraInfoDescriptorPublishedTime = published;
- }
-
- /* Remove everything from transport lines except the transport
- * name. */
- } else if (line.startsWith("transport ")) {
- if (parts.length < 3) {
- logger.debug("Illegal line in extra-info descriptor: '{}'. "
- + "Skipping descriptor.", line);
- return;
- }
- scrubbed.append("transport ").append(parts[1]).newLine();
-
- /* Skip transport-info lines entirely. */
- } else if (line.startsWith("transport-info ")) {
-
- /* Extract master-key-ed25519 from identity-ed25519. */
- } else if (line.equals("identity-ed25519")) {
- StringBuilder sb = new StringBuilder();
- while ((line = br.readLine()) != null
- && !line.equals("-----END ED25519 CERT-----")) {
- if (line.equals("-----BEGIN ED25519 CERT-----")) {
- continue;
- }
- sb.append(line);
- }
- masterKeyEd25519FromIdentityEd25519 =
- this.parseMasterKeyEd25519FromIdentityEd25519(
- sb.toString());
- String sha256MasterKeyEd25519 = Base64.encodeBase64String(
- DigestUtils.sha256(Base64.decodeBase64(
- masterKeyEd25519FromIdentityEd25519 + "=")))
- .replaceAll("=", "");
- scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
- .newLine();
- if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
- masterKeyEd25519FromIdentityEd25519)) {
- logger.warn("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Verify that identity-ed25519 and master-key-ed25519 match. */
- } else if (line.startsWith("master-key-ed25519 ")) {
- masterKeyEd25519 = line.substring(line.indexOf(" ") + 1);
- if (masterKeyEd25519FromIdentityEd25519 != null
- && !masterKeyEd25519FromIdentityEd25519.equals(
- masterKeyEd25519)) {
- logger.warn("Mismatch between identity-ed25519 and "
- + "master-key-ed25519. Skipping.");
- return;
- }
-
- /* Write the following lines unmodified to the sanitized
- * descriptor. */
- } else if (line.startsWith("write-history ")
- || line.startsWith("read-history ")
- || line.startsWith("ipv6-write-history ")
- || line.startsWith("ipv6-read-history ")
- || line.startsWith("geoip-start-time ")
- || line.startsWith("geoip-client-origins ")
- || line.startsWith("geoip-db-digest ")
- || line.startsWith("geoip6-db-digest ")
- || line.startsWith("conn-bi-direct ")
- || line.startsWith("ipv6-conn-bi-direct ")
- || line.startsWith("bridge-")
- || line.startsWith("dirreq-")
- || line.startsWith("cell-")
- || line.startsWith("entry-")
- || line.startsWith("exit-")
- || line.startsWith("hidserv-")
- || line.startsWith("padding-counts ")) {
- scrubbed.append(line).newLine();
-
- /* When we reach the signature, we're done. Write the sanitized
- * descriptor to disk below. */
- } else if (line.startsWith("router-signature")) {
- break;
-
- /* Skip the ed25519 signature; we'll include a SHA256 digest of
- * the SHA256 descriptor digest in router-digest-sha256. */
- } else if (line.startsWith("router-sig-ed25519 ")) {
- continue;
-
- /* If we encounter an unrecognized line, stop parsing and print
- * out a warning. We might have overlooked sensitive information
- * that we need to remove or replace for the sanitized descriptor
- * version. */
- } else {
- logger.warn("Unrecognized line '{}'. Skipping.", line);
- return;
- }
- }
- br.close();
- } catch (DecoderException | IOException e) {
- logger.warn("Could not parse extra-info descriptor.", e);
+ SanitizedBridgeExtraInfoDescriptor sanitizedBridgeExtraInfoDescriptor
+ = new SanitizedBridgeExtraInfoDescriptor(data,
+ this.sensitivePartsSanitizer);
+ if (!sanitizedBridgeExtraInfoDescriptor.sanitizeDescriptor()) {
+ logger.warn("Unable to sanitize bridge extra-info descriptor.");
return;
}
-
- /* Determine filename of sanitized extra-info descriptor. */
- String descriptorDigest = null;
- String ascii = new String(data, StandardCharsets.US_ASCII);
- String startToken = "extra-info ";
- String sigToken = "\nrouter-signature\n";
- int start = ascii.indexOf(startToken);
- int sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigest = DigestUtils.sha1Hex(DigestUtils.sha1(forDigest));
- }
- if (descriptorDigest == null) {
- logger.warn("Could not calculate extra-info descriptor digest.");
- return;
- }
- String descriptorDigestSha256Base64 = null;
- if (masterKeyEd25519FromIdentityEd25519 != null) {
- ascii = new String(data, StandardCharsets.US_ASCII);
- startToken = "extra-info ";
- sigToken = "\n-----END SIGNATURE-----\n";
- start = ascii.indexOf(startToken);
- sig = ascii.indexOf(sigToken) + sigToken.length();
- if (start >= 0 && sig >= 0 && sig > start) {
- byte[] forDigest = new byte[sig - start];
- System.arraycopy(data, start, forDigest, 0, sig - start);
- descriptorDigestSha256Base64 = Base64.encodeBase64String(
- DigestUtils.sha256(DigestUtils.sha256(forDigest)))
- .replaceAll("=", "");
- }
- if (descriptorDigestSha256Base64 == null) {
- logger.warn("Could not calculate extra-info "
- + "descriptor SHA256 digest.");
- return;
- }
- }
- if (descriptorDigestSha256Base64 != null) {
- scrubbed.append("router-digest-sha256 ")
- .append(descriptorDigestSha256Base64).newLine();
+ byte[] scrubbedBytes
+ = sanitizedBridgeExtraInfoDescriptor.getSanitizedBytes();
+ String published = sanitizedBridgeExtraInfoDescriptor.getPublishedString();
+ if (published.compareTo(maxExtraInfoDescriptorPublishedTime) > 0) {
+ maxExtraInfoDescriptorPublishedTime = published;
}
- scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
- .newLine();
+ String descriptorDigest
+ = sanitizedBridgeExtraInfoDescriptor.getDescriptorDigest();
/* Determine filename of sanitized extra-info descriptor. */
String dyear = published.substring(0, 4);
@@ -1181,7 +478,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
break;
}
Files.createDirectories(outputFile.getParent());
- Files.write(outputFile, scrubbed.toBytes(), openOption);
+ Files.write(outputFile, scrubbedBytes, openOption);
}
} catch (IOException e) {
logger.warn("Could not write sanitized extra-info descriptor to disk.",
1
0

[translation/communitytpo-contentspot] https://gitweb.torproject.org/translation.git/commit/?h=communitytpo-contentspot
by translation@torproject.org 01 Dec '20
by translation@torproject.org 01 Dec '20
01 Dec '20
commit 15ae4300a38f0a0ffe880b9fc764a08039d3d756
Author: Translation commit bot <translation(a)torproject.org>
Date: Tue Dec 1 09:15:13 2020 +0000
https://gitweb.torproject.org/translation.git/commit/?h=communitytpo-conten…
---
contents+he.po | 35 ++++++++++++++++++++---------------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/contents+he.po b/contents+he.po
index 3c9782e2cd..2e6153112b 100644
--- a/contents+he.po
+++ b/contents+he.po
@@ -15908,12 +15908,12 @@ msgstr "sudo cp $GOPATH/bin/obfs4proxy /usr/local/bin/"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
msgid "And tell selinux to allow Tor to use obfs4proxy:"
-msgstr ""
+msgstr "ואמרו ל selinux לאפשר ל Tor להשתמש ב obfs4proxy:"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
msgid "/sbin/restorecon -v /usr/local/bin/obfs4proxy"
-msgstr ""
+msgstr "/sbin/restorecon -v /usr/local/bin/obfs4proxy"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
@@ -15922,6 +15922,7 @@ msgstr ""
msgid ""
"sudo semanage port -a -t tor_port_t -p tcp [OR port set earlier, in TODO1]"
msgstr ""
+"sudo semanage port -a -t tor_port_t -p tcp [OR port set earlier, in TODO1]"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
@@ -15931,63 +15932,65 @@ msgid ""
"sudo semanage port -a -t tor_port_t -p tcp [obfs4 port set earlier, in "
"TODO2]"
msgstr ""
+"sudo semanage port -a -t tor_port_t -p tcp [obfs4 port set earlier, in "
+"TODO2]"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
msgid "systemctl restart tor"
-msgstr ""
+msgstr "systemctl restart tor"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "### 5. Monitor your logs (usually in your syslog)"
-msgstr ""
+msgstr "### 5. נטרו את הלוגים שלכם (שבדרך כלל נמצאים ב syslog)"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.title)
msgid "CentOS / RHEL / OpenSUSE"
-msgstr ""
+msgstr "CentOS / RHEL / OpenSUSE"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.subtitle)
msgid "How to deploy an obfs4 bridge on CentOS / RHEL / OpenSUSE"
-msgstr ""
+msgstr "כיצד ליישם גשר obfs4 על CentOS / RHEL / OpenSUSE"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "* CentOS / RHEL:"
-msgstr ""
+msgstr "* CentOS / RHEL:"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "yum install epel-release -y"
-msgstr ""
+msgstr "yum install epel-release -y"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "Then add the following to the `/etc/yum.repos.d/tor.repo` file."
-msgstr ""
+msgstr "ואז להוסיף אל קובץ `/etc/yum.repos.d/tor.repo` ."
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "And then install the following packages."
-msgstr ""
+msgstr "ואז להתקין את חבילות התוכנה הבאות:"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "yum install git golang tor policycoreutils-python-utils"
-msgstr ""
+msgstr "yum install git golang עבור policycoreutils-python-utils"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "* OpenSUSE:"
-msgstr ""
+msgstr "* OpenSUSE:"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "zypper install tor go git"
-msgstr ""
+msgstr "zypper install tor go git"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
@@ -15995,16 +15998,18 @@ msgid ""
"Heavily outdated versions of git can make `go get` fail, so try upgrading to"
" a more recent git version if you're running into this problem."
msgstr ""
+"גירסות מאד עתיקות של git יכולות לגרום ל `go get` להכשל, כך שנסו לשדרג "
+"לגירסה מתקדמת יותר אם נתקלתם בקשיים."
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "chcon --reference=/usr/bin/tor /usr/local/bin/obfs4proxy"
-msgstr ""
+msgstr "chcon --reference=/usr/bin/tor /usr/local/bin/obfs4proxy"
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "* CentOS / RHEL / OpenSUSE:"
-msgstr ""
+msgstr "* CentOS / RHEL / OpenSUSE:"
#: https//community.torproject.org/relay/setup/bridge/dragonflybsd/
#: (content/relay-operations/technical-setup/bridge/dragonflybsd/contents+en.lrpage.subtitle)
1
0

[translation/communitytpo-contentspot] https://gitweb.torproject.org/translation.git/commit/?h=communitytpo-contentspot
by translation@torproject.org 01 Dec '20
by translation@torproject.org 01 Dec '20
01 Dec '20
commit b02cf1d14997a0dc1860207a3540df635d5ce4e1
Author: Translation commit bot <translation(a)torproject.org>
Date: Tue Dec 1 06:45:10 2020 +0000
https://gitweb.torproject.org/translation.git/commit/?h=communitytpo-conten…
---
contents+he.po | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/contents+he.po b/contents+he.po
index 4cb4cd0e88..3c9782e2cd 100644
--- a/contents+he.po
+++ b/contents+he.po
@@ -15845,61 +15845,65 @@ msgid ""
"bridge is now running, check out the [post-install "
"notes](https://community.torproject.org/relay/setup/bridge/post-install/)."
msgstr ""
+"אם נתקלתם בקשיים בהגדרת הגשר שלכם, ניתן לעיין ב [our help "
+"section](https://community.torproject.org/relay/getting-help/). אם הגשר שלכם"
+" פועל כעת, בידקו ב [post-install "
+"notes](https://community.torproject.org/relay/setup/bridge/post-install/)."
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.subtitle)
msgid "How to deploy an obfs4 bridge on Fedora"
-msgstr ""
+msgstr "כיצד ליישם גשר obfs4 bridge על Fedora"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "### 1. Install tor and dependencies"
-msgstr ""
+msgstr "### 1. התקינו את tor וה dependencies"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
msgid "Add the following to /etc/yum.repos.d/tor.repo"
-msgstr ""
+msgstr "הוסיפו אל /etc/yum.repos.d/tor.repo את הטקסט הבא"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
msgid "Then install tor and the dependencies."
-msgstr ""
+msgstr "ואז התקינו את tor והתלויות שלו."
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
msgid "dnf install git golang tor policycoreutils-python-utils"
-msgstr ""
+msgstr "dnf install git golang tor policycoreutils-python-utils"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "### 2. Build obfs4proxy and move it into place."
-msgstr ""
+msgstr "### 2. בנו obfs4proxy והציבו אותו במקומו."
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "export GOPATH=`mktemp -d`"
-msgstr ""
+msgstr "export GOPATH=`mktemp -d`"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "go get gitlab.com/yawning/obfs4.git/obfs4proxy"
-msgstr ""
+msgstr "go get gitlab.com/yawning/obfs4.git/obfs4proxy"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
#: https//community.torproject.org/relay/setup/bridge/centos-rhel-opensuse/
#: (content/relay-operations/technical-setup/bridge/centos-rhel-opensuse/contents+en.lrpage.body)
msgid "sudo cp $GOPATH/bin/obfs4proxy /usr/local/bin/"
-msgstr ""
+msgstr "sudo cp $GOPATH/bin/obfs4proxy /usr/local/bin/"
#: https//community.torproject.org/relay/setup/bridge/fedora/
#: (content/relay-operations/technical-setup/bridge/fedora/contents+en.lrpage.body)
1
0

[translation/communitytpo-contentspot] https://gitweb.torproject.org/translation.git/commit/?h=communitytpo-contentspot
by translation@torproject.org 01 Dec '20
by translation@torproject.org 01 Dec '20
01 Dec '20
commit 0d683fd2364a0a5d3e03d2bfc2ae52549508ea26
Author: Translation commit bot <translation(a)torproject.org>
Date: Tue Dec 1 06:15:18 2020 +0000
https://gitweb.torproject.org/translation.git/commit/?h=communitytpo-conten…
---
contents+he.po | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/contents+he.po b/contents+he.po
index de997f95b0..4cb4cd0e88 100644
--- a/contents+he.po
+++ b/contents+he.po
@@ -15777,7 +15777,7 @@ msgstr "ServerTransportPlugin obfs4 exec /usr/pkg/bin/obfs4proxy"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
msgid "Log notice syslog"
-msgstr ""
+msgstr "Log notice syslog"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
@@ -15789,6 +15789,10 @@ msgid ""
"You can use [our reachability test](https://bridges.torproject.org/scan/) to"
" see if your obfs4 port is reachable from the Internet."
msgstr ""
+"* שימו לב שגם ערוץ Tor OR וגם ערוץ obfs4 שלו חייבים לאפשר גישה אליהם. אם "
+"הגשר שלכם נמצא מאחורי חומת אש או NAT, וודאו שפתתחתם את שניהם.. אתם יכולים "
+"להעזר ב [our reachability test](https://bridges.torproject.org/scan/) כדי "
+"לבחון אם ערוץ obfs4 ניתן לגישה מהאנטרנט."
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
@@ -15797,16 +15801,18 @@ msgid ""
"talk to `tor` over the loopback interface - do not forget to allow the "
"**ExtORPort**."
msgstr ""
+"* האם אתם מגינים על NetBSD בחומת אש? אם כך, וודאו ש `obfs4proxy` יכול לדבר "
+"עם tor מעל ממשק loopback - לא לשכוח להרשות את **ExtORPort**."
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
msgid "### 3. Start `tor`:"
-msgstr ""
+msgstr "### 3. אתחלו את `tor`:"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
msgid "### 4. Monitor your logs"
-msgstr ""
+msgstr "### 4. נטרו את הלוגים שלכם"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
@@ -15817,17 +15823,17 @@ msgstr ""
msgid ""
"To confirm your bridge is running with no issues, you should see something "
"like this:"
-msgstr ""
+msgstr "לאשר שהגשר שלכם פועל ללא תקלות, אתם צריכים לראות משהו כגון זה:"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
msgid "tail /var/log/messages"
-msgstr ""
+msgstr "tail /var/log/messages"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
msgid "### 5. Final Notes"
-msgstr ""
+msgstr "### 5. הערות אחרונות"
#: https//community.torproject.org/relay/setup/bridge/netbsd/
#: (content/relay-operations/technical-setup/bridge/netbsd/contents+en.lrpage.body)
1
0

[tor-browser/tor-browser-78.5.0esr-10.5-1] squash! TB3: Tor Browser's official .mozconfigs.
by sysrqb@torproject.org 01 Dec '20
by sysrqb@torproject.org 01 Dec '20
01 Dec '20
commit 7a73628afb1c48d48e1b272382f4a51728ef00aa
Author: Georg Koppen <gk(a)torproject.org>
Date: Mon Nov 30 07:54:38 2020 +0000
squash! TB3: Tor Browser's official .mozconfigs.
Bug 40252: Add --enable-rust-simd to our tor-browser mozconfig files
---
.mozconfig | 1 +
.mozconfig-android | 1 +
.mozconfig-asan | 1 +
.mozconfig-mac | 1 +
.mozconfig-mingw | 1 +
5 files changed, 5 insertions(+)
diff --git a/.mozconfig b/.mozconfig
index d71c858844e3..c50c57d410de 100755
--- a/.mozconfig
+++ b/.mozconfig
@@ -14,6 +14,7 @@ mk_add_options MOZ_APP_DISPLAYNAME="Tor Browser"
export MOZILLA_OFFICIAL=1
ac_add_options --enable-optimize
+ac_add_options --enable-rust-simd
ac_add_options --enable-official-branding
# Let's support GTK3 for ESR60
diff --git a/.mozconfig-android b/.mozconfig-android
index 1b5e3f3178b7..50015ec615ef 100755
--- a/.mozconfig-android
+++ b/.mozconfig-android
@@ -3,6 +3,7 @@ mk_add_options MOZ_APP_DISPLAYNAME="Tor Browser"
export MOZILLA_OFFICIAL=1
ac_add_options --enable-optimize
+ac_add_options --enable-rust-simd
ac_add_options --enable-official-branding
# Android
diff --git a/.mozconfig-asan b/.mozconfig-asan
index a1c4e467cf1a..8bee813bfee8 100644
--- a/.mozconfig-asan
+++ b/.mozconfig-asan
@@ -23,6 +23,7 @@ ac_add_options --disable-elf-hack
ac_add_options --with-clang-path=/var/tmp/dist/clang/bin/clang
ac_add_options --enable-optimize
+ac_add_options --enable-rust-simd
ac_add_options --enable-official-branding
# Let's support GTK3 for ESR60
diff --git a/.mozconfig-mac b/.mozconfig-mac
index 9be7751f8241..5b4624ef1f67 100644
--- a/.mozconfig-mac
+++ b/.mozconfig-mac
@@ -38,6 +38,7 @@ ac_add_options --enable-application=browser
ac_add_options --enable-strip
ac_add_options --enable-official-branding
ac_add_options --enable-optimize
+ac_add_options --enable-rust-simd
ac_add_options --disable-debug
ac_add_options --enable-tor-browser-data-outside-app-dir
diff --git a/.mozconfig-mingw b/.mozconfig-mingw
index 29c58d8fdab2..ce6ace1dad67 100644
--- a/.mozconfig-mingw
+++ b/.mozconfig-mingw
@@ -10,6 +10,7 @@ export MOZILLA_OFFICIAL=1
ac_add_options --disable-debug
ac_add_options --enable-optimize
+ac_add_options --enable-rust-simd
ac_add_options --enable-strip
ac_add_options --enable-official-branding
1
0