tor-commits
Threads by month
- ----- 2025 -----
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
February 2018
- 19 participants
- 1579 discussions

[collector/release] Separate parsing and sanitizing steps for bridge descriptors.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit d5aba97f9b6c4ee74735b183552b8435e5e0661b
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 27 19:26:57 2017 +0200
Separate parsing and sanitizing steps for bridge descriptors.
First step towards implementing #20549.
---
.../SanitizedBridgeDescriptorBuilder.java | 54 +++++
.../bridgedescs/SanitizedBridgesWriter.java | 240 +++++++++------------
2 files changed, 161 insertions(+), 133 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
new file mode 100644
index 0000000..174a5ae
--- /dev/null
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
@@ -0,0 +1,54 @@
+package org.torproject.collector.bridgedescs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Builder for sanitized bridge descriptors.
+ *
+ * <p>This builder class can be used while parsing and sanitizing an original
+ * bridge descriptor. It accepts already sanitized {@code String}s and
+ * {@code StringBuilder}s as placeholders for parts that can only be sanitized
+ * after finishing the parsing step.</p> */
+class SanitizedBridgeDescriptorBuilder {
+
+ private List<StringBuilder> descriptorParts;
+
+ private StringBuilder lastDescriptorPart;
+
+ SanitizedBridgeDescriptorBuilder() {
+ this.descriptorParts = new ArrayList<>();
+ this.lastDescriptorPart = new StringBuilder();
+ this.descriptorParts.add(this.lastDescriptorPart);
+ }
+
+ SanitizedBridgeDescriptorBuilder append(String sanitizedString) {
+ this.lastDescriptorPart.append(sanitizedString);
+ return this;
+ }
+
+ SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) {
+ this.descriptorParts.add(placeholder);
+ this.lastDescriptorPart = new StringBuilder();
+ this.descriptorParts.add(this.lastDescriptorPart);
+ return this;
+ }
+
+ SanitizedBridgeDescriptorBuilder space() {
+ this.lastDescriptorPart.append(' ');
+ return this;
+ }
+
+ SanitizedBridgeDescriptorBuilder newLine() {
+ this.lastDescriptorPart.append('\n');
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder fullDescriptor = new StringBuilder();
+ for (StringBuilder descriptorPart : this.descriptorParts) {
+ fullDescriptor.append(descriptorPart);
+ }
+ return fullDescriptor.toString();
+ }
+}
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index e257245..1ef1d60 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -34,6 +34,7 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
@@ -671,23 +672,20 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
/* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null;
+ String address = null;
String published = null;
+ byte[] fingerprintBytes = null;
+ StringBuilder scrubbedAddress = null;
+ Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>();
+ Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>();
String masterKeyEd25519FromIdentityEd25519 = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(data, "US-ASCII")));
- StringBuilder scrubbed = new StringBuilder();
- String line = null;
- byte[] fingerprintBytes = null;
- String hashedBridgeIdentity = null;
- String address = null;
- String routerLine = null;
- String scrubbedRouterLine = null;
- String scrubbedAddress = null;
+ SanitizedBridgeDescriptorBuilder scrubbed =
+ new SanitizedBridgeDescriptorBuilder();
+ try (BufferedReader br = new BufferedReader(new StringReader(
+ new String(data, "US-ASCII")))) {
+ scrubbed.append(Annotation.BridgeServer.toString());
+ String line;
String masterKeyEd25519 = null;
- List<String> orAddresses = null;
- List<String> scrubbedOrAddresses = null;
boolean skipCrypto = false;
while ((line = br.readLine()) != null) {
@@ -706,15 +704,26 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
address = parts[2];
- routerLine = line;
-
- /* Store or-address parts in a list and sanitize them when we have
- * read the fingerprint. */
+ scrubbedAddress = new StringBuilder();
+ StringBuilder scrubbedOrPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedOrPort, parts[3]);
+ StringBuilder scrubbedDirPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedDirPort, parts[4]);
+ StringBuilder scrubbedSocksPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]);
+ scrubbed.append("router ").append(parts[1]).space()
+ .append(scrubbedAddress).space()
+ .append(scrubbedOrPort).space()
+ .append(scrubbedDirPort).space()
+ .append(scrubbedSocksPort).newLine();
+
+ /* Store or-address and sanitize it when we have read the fingerprint
+ * and descriptor publication time. */
} else if (line.startsWith("or-address ")) {
- if (orAddresses == null) {
- orAddresses = new ArrayList<>();
- }
- orAddresses.add(line.substring("or-address ".length()));
+ String orAddress = line.substring("or-address ".length());
+ StringBuilder scrubbedOrAddress = new StringBuilder();
+ scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress);
+ scrubbed.append("or-address ").append(scrubbedOrAddress).newLine();
/* Parse the publication time to see if we're still inside the
* sanitizing interval. */
@@ -735,21 +744,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.haveWarnedAboutInterval = true;
}
}
- if (null != fingerprintBytes) {
- /* We have read both published and fingerprint lines that we need to
- * scrub the bridge's address which we might need to scrub reject
- * lines. */
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- }
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
/* Parse the fingerprint to determine the hashed bridge
* identity. */
@@ -759,91 +754,23 @@ public class SanitizedBridgesWriter extends CollecTorMain {
? "opt fingerprint".length() : "fingerprint".length())
.replaceAll(" ", "").toLowerCase();
fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray());
- hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
+ String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
.toLowerCase();
- if (null != published) {
- /* We have read both published and fingerprint lines that we need to
- * scrub the bridge's address which we might need to scrub reject
- * lines. */
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- }
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "fingerprint");
+ scrubbed.append(line.startsWith("opt ") ? "opt " : "")
+ .append("fingerprint");
for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) {
- scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
+ scrubbed.space().append(hashedBridgeIdentity.substring(4 * i,
4 * (i + 1)).toUpperCase());
}
- scrubbed.append("\n");
+ scrubbed.newLine();
/* Replace the contact line (if present) with a generic one. */
} else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody\n");
+ scrubbed.append("contact somebody").newLine();
/* When we reach the signature, we're done. Write the sanitized
* descriptor to disk below. */
} else if (line.startsWith("router-signature")) {
-
- /* Write the scrubbed "router" line now based on the "router",
- * "fingerprint", and "published" lines that we read before. Also
- * scrub any "or-address" lines. */
- if (null == routerLine || null == fingerprintBytes
- || null == published) {
- logger.warn("Missing either of the following lines that are "
- + "required to sanitize this server bridge descriptor: "
- + "\"router\", \"fingerprint\", \"published\". Skipping "
- + "descriptor.");
- return;
- }
- try {
- if (orAddresses != null) {
- scrubbedOrAddresses = new ArrayList<>();
- for (String orAddress : orAddresses) {
- String scrubbedOrAddress = scrubOrAddress(orAddress,
- fingerprintBytes, published);
- if (scrubbedOrAddress != null) {
- scrubbedOrAddresses.add(scrubbedOrAddress);
- } else {
- logger.warn("Invalid address in line "
- + "'or-address " + orAddress + "' in bridge server "
- + "descriptor. Skipping line!");
- }
- }
- }
- String[] routerLineParts = routerLine.split(" ");
- String nickname = routerLineParts[1];
- String scrubbedOrPort = this.scrubTcpPort(routerLineParts[3],
- fingerprintBytes, published);
- String scrubbedDirPort = this.scrubTcpPort(routerLineParts[4],
- fingerprintBytes, published);
- String scrubbedSocksPort = this.scrubTcpPort(
- routerLineParts[5], fingerprintBytes, published);
- scrubbedRouterLine = String.format("router %s %s %s %s %s%n",
- nickname, scrubbedAddress, scrubbedOrPort,
- scrubbedDirPort, scrubbedSocksPort);
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
-
- /* Put together the scrubbed descriptor from "router" to the newline
- * before the original "router-signature" line. */
- scrubbedDesc = scrubbedRouterLine;
- if (scrubbedOrAddresses != null) {
- for (String scrubbedOrAddress : scrubbedOrAddresses) {
- scrubbedDesc += "or-address " + scrubbedOrAddress + "\n";
- }
- }
- scrubbedDesc += scrubbed.toString();
break;
/* Replace extra-info digest with the hashed digest of the
@@ -860,7 +787,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
+ "expected: '" + line + "'. Skipping descriptor.");
return;
}
- scrubbed.append("extra-info-digest " + DigestUtils.sha1Hex(
+ scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex(
Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
if (parts.length > 2) {
if (!Base64.isBase64(parts[2])) {
@@ -868,21 +795,21 @@ public class SanitizedBridgesWriter extends CollecTorMain {
+ line + "'. Skipping descriptor.");
return;
}
- scrubbed.append(" " + Base64.encodeBase64String(
+ scrubbed.space().append(Base64.encodeBase64String(
DigestUtils.sha256(Base64.decodeBase64(parts[2])))
.replaceAll("=", ""));
}
- scrubbed.append("\n");
+ scrubbed.newLine();
/* Possibly sanitize reject lines if they contain the bridge's own
* IP address. */
} else if (line.startsWith("reject ")) {
if (address != null && line.startsWith("reject " + address)) {
- scrubbed.append("reject " + scrubbedAddress
- + line.substring("reject ".length() + address.length())
- + "\n");
+ scrubbed.append("reject ").append(scrubbedAddress)
+ .append(line.substring("reject ".length() + address.length()))
+ .newLine();
} else {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
}
/* Extract master-key-ed25519 from identity-ed25519. */
@@ -907,8 +834,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
DigestUtils.sha256(Base64.decodeBase64(
masterKeyEd25519FromIdentityEd25519 + "=")))
.replaceAll("=", "");
- scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
- + "\n");
+ scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
+ .newLine();
if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
masterKeyEd25519FromIdentityEd25519)) {
logger.warn("Mismatch between identity-ed25519 and "
@@ -948,7 +875,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
|| line.startsWith("ipv6-policy ")
|| line.equals("tunnelled-dir-server")
|| line.startsWith("bridge-distribution-request ")) {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
/* Replace node fingerprints in the family line with their hashes
* and leave nicknames unchanged. */
@@ -956,13 +883,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
StringBuilder familyLine = new StringBuilder("family");
for (String s : line.substring(7).split(" ")) {
if (s.startsWith("$")) {
- familyLine.append(" $" + DigestUtils.sha1Hex(Hex.decodeHex(
+ familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
s.substring(1).toCharArray())).toUpperCase());
} else {
- familyLine.append(" " + s);
+ familyLine.append(" ").append(s);
}
}
- scrubbed.append(familyLine.toString() + "\n");
+ scrubbed.append(familyLine.toString()).newLine();
/* Skip the purpose line that the bridge authority adds to its
* cached-descriptors file. */
@@ -1003,7 +930,53 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
- /* Determine filename of sanitized server descriptor. */
+ /* Sanitize the parts that we couldn't sanitize earlier. */
+ if (null == address || null == fingerprintBytes
+ || null == published) {
+ logger.warn("Missing either of the following lines that are "
+ + "required to sanitize this server bridge descriptor: "
+ + "\"router\", \"fingerprint\", \"published\". Skipping "
+ + "descriptor.");
+ return;
+ }
+ try {
+ String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes,
+ published);
+ if (null == scrubbedAddressString) {
+ logger.warn("Invalid IP address in \"router\" line in bridge server "
+ + "descriptor. Skipping descriptor.");
+ return;
+ }
+ scrubbedAddress.append(scrubbedAddressString);
+ for (Map.Entry<StringBuilder, String> e
+ : scrubbedIpAddressesAndTcpPorts.entrySet()) {
+ String scrubbedOrAddress = scrubOrAddress(e.getValue(),
+ fingerprintBytes, published);
+ if (null == scrubbedOrAddress) {
+ logger.warn("Invalid IP address or TCP port in \"or-address\" line "
+ + "in bridge server descriptor. Skipping descriptor.");
+ return;
+ }
+ e.getKey().append(scrubbedOrAddress);
+ }
+ for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
+ String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes,
+ published);
+ if (null == scrubbedTcpPort) {
+ logger.warn("Invalid TCP port in \"router\" line in bridge server "
+ + "descriptor. Skipping descriptor.");
+ return;
+ }
+ e.getKey().append(scrubbedTcpPort);
+ }
+ } catch (IOException exception) {
+ /* There's a persistence problem, so we shouldn't scrub more IP addresses
+ * or TCP ports in this execution. */
+ this.persistenceProblemWithSecrets = true;
+ return;
+ }
+
+ /* Determine digest(s) of sanitized server descriptor. */
String descriptorDigest = null;
try {
String ascii = new String(data, "US-ASCII");
@@ -1048,6 +1021,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
}
+ if (null != descriptorDigestSha256Base64) {
+ scrubbed.append("router-digest-sha256 ")
+ .append(descriptorDigestSha256Base64).newLine();
+ }
+ scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
+ .newLine();
+
+ /* Determine filename of sanitized server descriptor. */
String dyear = published.substring(0, 4);
String dmonth = published.substring(5, 7);
File tarballFile = new File(
@@ -1073,14 +1054,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
outputFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(new FileWriter(
outputFile, appendToFile));
- bw.write(Annotation.BridgeServer.toString());
- bw.write(scrubbedDesc);
- if (descriptorDigestSha256Base64 != null) {
- bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
- + "\n");
- }
- bw.write("router-digest " + descriptorDigest.toUpperCase()
- + "\n");
+ bw.write(scrubbed.toString());
bw.close();
}
} catch (ConfigurationException | IOException e) {
1
0

[collector/release] Make logging statements comply to Metrics' standards.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit 43cd15876635d763d0f6adbf6bcc5c7df6380406
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:18 2017 +0000
Make logging statements comply to Metrics' standards.
Also edit here and there for more readability and less lines.
---
.../bridgedescs/SanitizedBridgesWriter.java | 145 +++++++++------------
1 file changed, 62 insertions(+), 83 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index b4cd49e..22bf8f7 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -167,9 +167,9 @@ public class SanitizedBridgesWriter extends CollecTorMain {
&& line.length() != ("yyyy-MM,".length() + 83 * 2))
|| parts.length != 2) {
logger.warn("Invalid line in bridge-ip-secrets file "
- + "starting with '" + line.substring(0, 7) + "'! "
+ + "starting with '{}'! "
+ "Not calculating any IP address hashes in this "
- + "execution!");
+ + "execution!", line.substring(0, 7));
this.persistenceProblemWithSecrets = true;
break;
}
@@ -178,19 +178,16 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.secretsForHashingIpAddresses.put(month, secret);
}
if (!this.persistenceProblemWithSecrets) {
- logger.debug("Read "
- + this.secretsForHashingIpAddresses.size() + " secrets for "
- + "hashing bridge IP addresses.");
+ logger.debug("Read {} secrets for hashing bridge IP addresses.",
+ this.secretsForHashingIpAddresses.size());
}
} catch (DecoderException e) {
- logger.warn("Failed to decode hex string in "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
+ logger.warn("Failed to decode hex string in {}! Not calculating any IP "
+ + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
this.persistenceProblemWithSecrets = true;
} catch (IOException e) {
- logger.warn("Failed to read "
- + this.bridgeIpSecretsFile + "! Not calculating any IP "
- + "address hashes in this execution!", e);
+ logger.warn("Failed to read {}! Not calculating any IP "
+ + "address hashes in this execution!", this.bridgeIpSecretsFile, e);
this.persistenceProblemWithSecrets = true;
}
}
@@ -490,8 +487,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else if (line.startsWith("fingerprint ")) {
if (!("fingerprint " + authorityFingerprint).equals(line)) {
logger.warn("Mismatch between authority fingerprint expected from "
- + "file name (" + authorityFingerprint + ") and parsed from "
- + "\"fingerprint\" line (\"" + line + "\").");
+ + "file name ({}) and parsed from \"fingerprint\" "
+ + "line (\"{}\").", authorityFingerprint, line);
return;
}
header.append(line).newLine();
@@ -511,13 +508,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Parse the relevant parts of this r line. */
String[] parts = line.split(" ");
if (parts.length < 9) {
- logger.warn("Illegal line '" + line + "' in bridge network "
- + "status. Skipping descriptor.");
+ logger.warn("Illegal line '{}' in bridge network "
+ + "status. Skipping descriptor.", line);
return;
}
if (!Base64.isBase64(parts[2])) {
- logger.warn("Illegal base64 character in r line '" + parts[2]
- + "'. Skipping descriptor.");
+ logger.warn("Illegal base64 character in r line '{}'. "
+ + "Skipping descriptor.", parts[2]);
return;
}
fingerprintBytes = Base64.decodeBase64(parts[2] + "==");
@@ -567,8 +564,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
if (scrubbedOrAddress != null) {
scrubbed.append("a " + scrubbedOrAddress + "\n");
} else {
- logger.warn("Invalid address in line '" + line
- + "' in bridge network status. Skipping line!");
+ logger.warn("Invalid address in line '{}' "
+ + "in bridge network status. Skipping line!", line);
}
/* Nothing special about s, w, and p lines; just copy them. */
@@ -581,8 +578,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
* network status. If there is, we should probably learn before
* writing anything to the sanitized descriptors. */
} else {
- logger.debug("Unknown line '" + line + "' in bridge "
- + "network status. Not writing to disk!");
+ logger.debug("Unknown line '{}' in bridge "
+ + "network status. Not writing to disk!", line);
return;
}
}
@@ -602,25 +599,23 @@ public class SanitizedBridgesWriter extends CollecTorMain {
"yyyy-MM-dd HH:mm:ss");
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
if (null == mostRecentDescPublished) {
- logger.warn("The bridge network status published at " + publicationTime
+ logger.warn("The bridge network status published at {}"
+ " does not contain a single entry. Please ask the bridge "
- + "authority operator to check!");
+ + "authority operator to check!", publicationTime);
} else if (formatter.parse(publicationTime).getTime()
- formatter.parse(mostRecentDescPublished).getTime()
> 60L * 60L * 1000L) {
logger.warn("The most recent descriptor in the bridge "
- + "network status published at " + publicationTime + " was "
- + "published at " + mostRecentDescPublished + " which is "
+ + "network status published at {} was published at {} which is "
+ "more than 1 hour before the status. This is a sign for "
- + "the status being stale. Please check!");
+ + "the status being stale. Please check!",
+ publicationTime, mostRecentDescPublished);
}
} catch (ParseException e) {
- logger.warn("Could not parse timestamp in "
- + "bridge network status.", e);
+ logger.warn("Could not parse timestamp in bridge network status.", e);
return;
} catch (IOException e) {
- logger.warn("Could not parse bridge network "
- + "status.", e);
+ logger.warn("Could not parse bridge network status.", e);
return;
}
@@ -699,7 +694,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else if (line.startsWith("router ")) {
String[] parts = line.split(" ");
if (parts.length != 6) {
- logger.warn("Invalid router line: '" + line + "'. Skipping.");
+ logger.warn("Invalid router line: '{}'. Skipping.", line);
return;
}
address = parts[2];
@@ -783,15 +778,15 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
if (parts.length > 3) {
logger.warn("extra-info-digest line contains more arguments than"
- + "expected: '" + line + "'. Skipping descriptor.");
+ + "expected: '{}'. Skipping descriptor.", line);
return;
}
scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex(
Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
if (parts.length > 2) {
if (!Base64.isBase64(parts[2])) {
- logger.warn("Illegal base64 character in extra-info-digest line '"
- + line + "'. Skipping descriptor.");
+ logger.warn("Illegal base64 character in extra-info-digest line "
+ + "'{}'. Skipping descriptor.", line);
return;
}
scrubbed.space().append(Base64.encodeBase64String(
@@ -917,14 +912,12 @@ public class SanitizedBridgesWriter extends CollecTorMain {
* that we need to remove or replace for the sanitized descriptor
* version. */
} else {
- logger.warn("Unrecognized line '" + line
- + "'. Skipping.");
+ logger.warn("Unrecognized line '{}'. Skipping.", line);
return;
}
}
} catch (Exception e) {
- logger.warn("Could not parse server "
- + "descriptor.", e);
+ logger.warn("Could not parse server descriptor.", e);
return;
}
@@ -991,8 +984,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Handle below. */
}
if (descriptorDigest == null) {
- logger.warn("Could not calculate server "
- + "descriptor digest.");
+ logger.warn("Could not calculate server descriptor digest.");
return;
}
String descriptorDigestSha256Base64 = null;
@@ -1014,8 +1006,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Handle below. */
}
if (descriptorDigestSha256Base64 == null) {
- logger.warn("Could not calculate server "
- + "descriptor SHA256 digest.");
+ logger.warn("Could not calculate server descriptor SHA256 digest.");
return;
}
}
@@ -1056,8 +1047,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
bw.close();
}
} catch (ConfigurationException | IOException e) {
- logger.warn("Could not write sanitized server "
- + "descriptor to disk.", e);
+ logger.warn("Could not write sanitized server descriptor to disk.", e);
return;
}
}
@@ -1066,27 +1056,26 @@ public class SanitizedBridgesWriter extends CollecTorMain {
String identityEd25519Base64) {
byte[] identityEd25519 = Base64.decodeBase64(identityEd25519Base64);
if (identityEd25519.length < 40) {
- logger.warn("Invalid length of identity-ed25519 (in "
- + "bytes): " + identityEd25519.length);
+ logger.warn("Invalid length of identity-ed25519 (in bytes): {}",
+ identityEd25519.length);
} else if (identityEd25519[0] != 0x01) {
- logger.warn("Unknown version in identity-ed25519: "
- + identityEd25519[0]);
+ logger.warn("Unknown version in identity-ed25519: {}",
+ identityEd25519[0]);
} else if (identityEd25519[1] != 0x04) {
- logger.warn("Unknown cert type in identity-ed25519: "
- + identityEd25519[1]);
+ logger.warn("Unknown cert type in identity-ed25519: {}",
+ identityEd25519[1]);
} else if (identityEd25519[6] != 0x01) {
logger.warn("Unknown certified key type in "
+ "identity-ed25519: " + identityEd25519[1]);
} else if (identityEd25519[39] == 0x00) {
logger.warn("No extensions in identity-ed25519 (which "
- + "would contain the encoded master-key-ed25519): "
- + identityEd25519[39]);
+ + "would contain the encoded master-key-ed25519): {}",
+ identityEd25519[39]);
} else {
int extensionStart = 40;
for (int i = 0; i < (int) identityEd25519[39]; i++) {
if (identityEd25519.length < extensionStart + 4) {
- logger.warn("Invalid extension with id " + i
- + " in identity-ed25519.");
+ logger.warn("Invalid extension with id {} in identity-ed25519.", i);
break;
}
int extensionLength = identityEd25519[extensionStart];
@@ -1095,8 +1084,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
int extensionType = identityEd25519[extensionStart + 2];
if (extensionLength == 32 && extensionType == 4) {
if (identityEd25519.length < extensionStart + 4 + 32) {
- logger.warn("Invalid extension with id " + i
- + " in identity-ed25519.");
+ logger.warn("Invalid extension with id {} in identity-ed25519.", i);
break;
}
byte[] masterKeyEd25519 = new byte[32];
@@ -1111,8 +1099,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
extensionStart += 4 + extensionLength;
}
}
- logger.warn("Unable to locate master-key-ed25519 in "
- + "identity-ed25519.");
+ logger.warn("Unable to locate master-key-ed25519 in identity-ed25519.");
return null;
}
@@ -1142,8 +1129,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
String[] parts = line.split(" ");
if (line.startsWith("extra-info ")) {
if (parts.length < 3) {
- logger.debug("Illegal line in extra-info descriptor: '" + line
- + "'. Skipping descriptor.");
+ logger.debug("Illegal line in extra-info descriptor: '{}'. "
+ + "Skipping descriptor.", line);
return;
}
hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
@@ -1164,8 +1151,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
* name. */
} else if (line.startsWith("transport ")) {
if (parts.length < 3) {
- logger.debug("Illegal line in extra-info descriptor: '"
- + line + "'. Skipping descriptor.");
+ logger.debug("Illegal line in extra-info descriptor: '{}'. "
+ + "Skipping descriptor.", line);
return;
}
scrubbed.append("transport " + parts[1] + "\n");
@@ -1244,19 +1231,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
* that we need to remove or replace for the sanitized descriptor
* version. */
} else {
- logger.warn("Unrecognized line '" + line
- + "'. Skipping.");
+ logger.warn("Unrecognized line '{}'. Skipping.", line);
return;
}
}
br.close();
- } catch (IOException e) {
- logger.warn("Could not parse extra-info "
- + "descriptor.", e);
- return;
- } catch (DecoderException e) {
- logger.warn("Could not parse extra-info "
- + "descriptor.", e);
+ } catch (DecoderException | IOException e) {
+ logger.warn("Could not parse extra-info descriptor.", e);
return;
}
@@ -1277,8 +1258,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Handle below. */
}
if (descriptorDigest == null) {
- logger.warn("Could not calculate extra-info "
- + "descriptor digest.");
+ logger.warn("Could not calculate extra-info descriptor digest.");
return;
}
String descriptorDigestSha256Base64 = null;
@@ -1375,8 +1355,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
bw.close();
- logger.info("Deleted " + deleted + " secrets that we don't "
- + "need anymore and kept " + kept + ".");
+ logger.info("Deleted {} secrets that we don't "
+ + "need anymore and kept {}.", deleted, kept);
} catch (IOException e) {
logger.warn("Could not store reduced set of "
+ "secrets to disk! This is a bad sign, better check what's "
@@ -1396,8 +1376,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
if (maxNetworkStatusPublishedMillis > 0L
&& maxNetworkStatusPublishedMillis < tooOldMillis) {
logger.warn("The last known bridge network status was "
- + "published " + maxNetworkStatusPublishedTime + ", which is "
- + "more than 5:30 hours in the past.");
+ + "published {}, which is more than 5:30 hours in the past.",
+ maxNetworkStatusPublishedTime);
}
long maxServerDescriptorPublishedMillis =
dateTimeFormat.parse(maxServerDescriptorPublishedTime)
@@ -1405,8 +1385,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
if (maxServerDescriptorPublishedMillis > 0L
&& maxServerDescriptorPublishedMillis < tooOldMillis) {
logger.warn("The last known bridge server descriptor was "
- + "published " + maxServerDescriptorPublishedTime + ", which "
- + "is more than 5:30 hours in the past.");
+ + "published {}, which is more than 5:30 hours in the past.",
+ maxServerDescriptorPublishedTime);
}
long maxExtraInfoDescriptorPublishedMillis =
dateTimeFormat.parse(maxExtraInfoDescriptorPublishedTime)
@@ -1414,12 +1394,11 @@ public class SanitizedBridgesWriter extends CollecTorMain {
if (maxExtraInfoDescriptorPublishedMillis > 0L
&& maxExtraInfoDescriptorPublishedMillis < tooOldMillis) {
logger.warn("The last known bridge extra-info descriptor "
- + "was published " + maxExtraInfoDescriptorPublishedTime
- + ", which is more than 5:30 hours in the past.");
+ + "was published {}, which is more than 5:30 hours in the past.",
+ maxExtraInfoDescriptorPublishedTime);
}
} catch (ParseException e) {
- logger.warn("Unable to parse timestamp for "
- + "stale check.", e);
+ logger.warn("Unable to parse timestamp for stale check.", e);
}
}
1
0

26 Feb '18
commit 2457eb5be72d508c4ec4e2d2c3b6f7a88c69ed4c
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:15 2017 +0000
Use Java8 idiom for toString method.
---
.../org/torproject/collector/bridgedescs/DescriptorBuilder.java | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index cc57662..4ca9dd1 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -5,6 +5,7 @@ package org.torproject.collector.bridgedescs;
import java.util.ArrayList;
import java.util.List;
+import java.util.stream.Collectors;
/** Builder for descriptors.
*
@@ -55,10 +56,7 @@ class DescriptorBuilder {
@Override
public String toString() {
- StringBuilder full = new StringBuilder();
- for (Object part : this.parts) {
- full.append(part.toString());
- }
- return full.toString();
+ return this.parts.stream().map(part -> part.toString())
+ .collect(Collectors.joining(""));
}
}
1
0

[collector/release] Add a finalized state to DescriptorBuilder.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit afe07d8efd4dc94b9dfb9b5896002286ba71dc6d
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:16 2017 +0000
Add a finalized state to DescriptorBuilder.
To avoid possible inconsistencies DescriptorBuilder is finalized after the first
call to 'toString' and cannot be altered anymore. Any attempt to add more leads
to an IllegalStateException.
---
.../collector/bridgedescs/DescriptorBuilder.java | 34 ++++++++++++++++++++--
1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 4ca9dd1..9c47b5e 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -12,25 +12,43 @@ import java.util.stream.Collectors;
* <p>This builder class can be used while parsing and processing an original
* descriptor. It accepts {@code String}s, {@code DescriptorBuilder}s and
* {@code StringBuilder}s. The latter two as placeholders for parts that can
- * only be processed after finishing the parsing step.</p> */
+ * only be processed after finishing the parsing step.</p>
+ *
+ * <p>Calling {@code toString} finalizes the builder and any subsequent
+ * method calls other than {@code toString} will result in an
+ * {@code IllegalStateException}.
+ */
class DescriptorBuilder {
private List<Object> parts;
private StringBuilder lastPart;
+ private boolean finalized = false;
+
+ private String value;
+
public DescriptorBuilder() {
this.parts = new ArrayList<>();
this.lastPart = new StringBuilder();
this.parts.add(this.lastPart);
}
+ private void throwExceptionIfFinalized() {
+ if (this.finalized) {
+ throw new IllegalStateException("This DescriptorBuilder is finalized and"
+ + " calling anything other than 'toString' is illegal.");
+ }
+ }
+
public DescriptorBuilder append(String sanitizedString) {
+ this.throwExceptionIfFinalized();
this.lastPart.append(sanitizedString);
return this;
}
public DescriptorBuilder append(StringBuilder placeholder) {
+ this.throwExceptionIfFinalized();
this.parts.add(placeholder);
this.lastPart = new StringBuilder();
this.parts.add(this.lastPart);
@@ -38,6 +56,7 @@ class DescriptorBuilder {
}
public DescriptorBuilder append(DescriptorBuilder placeholder) {
+ this.throwExceptionIfFinalized();
this.parts.add(placeholder);
this.lastPart = new StringBuilder();
this.parts.add(this.lastPart);
@@ -45,18 +64,27 @@ class DescriptorBuilder {
}
public DescriptorBuilder space() {
+ this.throwExceptionIfFinalized();
this.lastPart.append(' ');
return this;
}
public DescriptorBuilder newLine() {
+ this.throwExceptionIfFinalized();
this.lastPart.append('\n');
return this;
}
@Override
public String toString() {
- return this.parts.stream().map(part -> part.toString())
- .collect(Collectors.joining(""));
+ if (!this.finalized) {
+ this.finalized = true;
+ this.value = this.parts.stream().map(part -> part.toString())
+ .collect(Collectors.joining(""));
+ this.parts.clear(); // not needed anymore
+ this.lastPart = null;
+ }
+ return value;
}
+
}
1
0

[collector/release] Adapt CollecTor to latest metrics-lib master branch.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit fbb35f75da022a23912b937b1825d8f216abad07
Author: iwakeh <iwakeh(a)torproject.org>
Date: Tue Feb 20 16:30:08 2018 +0000
Adapt CollecTor to latest metrics-lib master branch.
---
src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 53ad118..7601898 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -15,10 +15,10 @@ import org.torproject.collector.cron.CollecTorMain;
import org.torproject.collector.persist.PersistenceUtils;
import org.torproject.collector.persist.WebServerAccessLogPersistence;
import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.Method;
import org.torproject.descriptor.WebServerAccessLog;
import org.torproject.descriptor.log.InternalLogDescriptor;
import org.torproject.descriptor.log.InternalWebServerAccessLog;
-import org.torproject.descriptor.log.Method;
import org.torproject.descriptor.log.WebServerAccessLogImpl;
import org.torproject.descriptor.log.WebServerAccessLogLine;
1
0

[collector/release] Rename SanitizedBridgeDescriptorBuilder to DescriptorBuilder.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit 266051f3397bb0f676054ce2459502f680bfab6d
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:10 2017 +0000
Rename SanitizedBridgeDescriptorBuilder to DescriptorBuilder.
The class doesn't 'know' about descriptor sanitization, it is only a sort of
container for writing descriptors. It could be actually moved to some util
package and used in other parsing steps, too.
Also rename test helper classes to avoid naming conflicts.
Remove 'descriptor' from variable names.
Make DescriptorBuilder public.
Adapt other classes as well as tests.
---
.../collector/bridgedescs/DescriptorBuilder.java | 57 +++++++++
.../SanitizedBridgeDescriptorBuilder.java | 54 --------
.../bridgedescs/SanitizedBridgesWriter.java | 4 +-
...er.java => ExtraInfoTestDescriptorBuilder.java} | 4 +-
...ava => NetworkStatusTestDescriptorBuilder.java} | 4 +-
.../bridgedescs/SanitizedBridgesWriterTest.java | 142 +++++++++++----------
...ilder.java => ServerTestDescriptorBuilder.java} | 4 +-
...TarballBuilder.java => TarballTestBuilder.java} | 12 +-
...ptorBuilder.java => TestDescriptorBuilder.java} | 2 +-
9 files changed, 145 insertions(+), 138 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
new file mode 100644
index 0000000..12a8956
--- /dev/null
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -0,0 +1,57 @@
+/* Copyright 2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.bridgedescs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Builder for descriptors.
+ *
+ * <p>This builder class can be used while parsing and processing an original
+ * descriptor. It accepts {@code String}s, {@code DescriptorBuilder}s and
+ * {@code StringBuilder}s. The latter two as placeholders for parts that can
+ * only be processed after finishing the parsing step.</p> */
+class DescriptorBuilder {
+
+ private List<StringBuilder> parts;
+
+ private StringBuilder lastPart;
+
+ public DescriptorBuilder() {
+ this.parts = new ArrayList<>();
+ this.lastPart = new StringBuilder();
+ this.parts.add(this.lastPart);
+ }
+
+ public DescriptorBuilder append(String sanitizedString) {
+ this.lastPart.append(sanitizedString);
+ return this;
+ }
+
+ public DescriptorBuilder append(StringBuilder placeholder) {
+ this.parts.add(placeholder);
+ this.lastPart = new StringBuilder();
+ this.parts.add(this.lastPart);
+ return this;
+ }
+
+ public DescriptorBuilder space() {
+ this.lastPart.append(' ');
+ return this;
+ }
+
+ public DescriptorBuilder newLine() {
+ this.lastPart.append('\n');
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder full = new StringBuilder();
+ for (StringBuilder part : this.parts) {
+ full.append(part);
+ }
+ return full.toString();
+ }
+}
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
deleted file mode 100644
index 174a5ae..0000000
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package org.torproject.collector.bridgedescs;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/** Builder for sanitized bridge descriptors.
- *
- * <p>This builder class can be used while parsing and sanitizing an original
- * bridge descriptor. It accepts already sanitized {@code String}s and
- * {@code StringBuilder}s as placeholders for parts that can only be sanitized
- * after finishing the parsing step.</p> */
-class SanitizedBridgeDescriptorBuilder {
-
- private List<StringBuilder> descriptorParts;
-
- private StringBuilder lastDescriptorPart;
-
- SanitizedBridgeDescriptorBuilder() {
- this.descriptorParts = new ArrayList<>();
- this.lastDescriptorPart = new StringBuilder();
- this.descriptorParts.add(this.lastDescriptorPart);
- }
-
- SanitizedBridgeDescriptorBuilder append(String sanitizedString) {
- this.lastDescriptorPart.append(sanitizedString);
- return this;
- }
-
- SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) {
- this.descriptorParts.add(placeholder);
- this.lastDescriptorPart = new StringBuilder();
- this.descriptorParts.add(this.lastDescriptorPart);
- return this;
- }
-
- SanitizedBridgeDescriptorBuilder space() {
- this.lastDescriptorPart.append(' ');
- return this;
- }
-
- SanitizedBridgeDescriptorBuilder newLine() {
- this.lastDescriptorPart.append('\n');
- return this;
- }
-
- @Override
- public String toString() {
- StringBuilder fullDescriptor = new StringBuilder();
- for (StringBuilder descriptorPart : this.descriptorParts) {
- fullDescriptor.append(descriptorPart);
- }
- return fullDescriptor.toString();
- }
-}
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index 1ef1d60..1d264a5 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -679,8 +679,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>();
Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>();
String masterKeyEd25519FromIdentityEd25519 = null;
- SanitizedBridgeDescriptorBuilder scrubbed =
- new SanitizedBridgeDescriptorBuilder();
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
try (BufferedReader br = new BufferedReader(new StringReader(
new String(data, "US-ASCII")))) {
scrubbed.append(Annotation.BridgeServer.toString());
@@ -923,7 +922,6 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
}
- br.close();
} catch (Exception e) {
logger.warn("Could not parse server "
+ "descriptor.", e);
diff --git a/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoTestDescriptorBuilder.java
similarity index 95%
rename from src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java
rename to src/test/java/org/torproject/collector/bridgedescs/ExtraInfoTestDescriptorBuilder.java
index 2c77d47..71d52e9 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoTestDescriptorBuilder.java
@@ -8,10 +8,10 @@ import java.util.Arrays;
/** Builds a non-sanitized bridge extra-info descriptor that comes with an
* original bridge descriptor (of a bundled and therefore publicly known bridge)
* by default. */
-class ExtraInfoDescriptorBuilder extends DescriptorBuilder {
+class ExtraInfoTestDescriptorBuilder extends TestDescriptorBuilder {
/** Initializes the descriptor builder. */
- ExtraInfoDescriptorBuilder() {
+ ExtraInfoTestDescriptorBuilder() {
this.addAll(Arrays.asList(
"extra-info MeekGoogle "
+ "46D4A71197B8FA515A826C6B017C522FE264655B",
diff --git a/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusTestDescriptorBuilder.java
similarity index 89%
rename from src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java
rename to src/test/java/org/torproject/collector/bridgedescs/NetworkStatusTestDescriptorBuilder.java
index 31cccf7..b8c1cc9 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusTestDescriptorBuilder.java
@@ -8,10 +8,10 @@ import java.util.Arrays;
/** Builds a non-sanitized bridge network status that comes with an original
* bridge network status entry (of a bundled and therefore publicly known
* bridge) by default. */
-class NetworkStatusBuilder extends DescriptorBuilder {
+class NetworkStatusTestDescriptorBuilder extends TestDescriptorBuilder {
/** Initializes the descriptor builder. */
- NetworkStatusBuilder() {
+ NetworkStatusTestDescriptorBuilder() {
this.addAll(Arrays.asList(
"published 2016-06-30 23:40:28",
"flag-thresholds stable-uptime=807660 stable-mtbf=1425164 "
diff --git a/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java b/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java
index 7e58497..41a88c6 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java
@@ -62,23 +62,23 @@ public class SanitizedBridgesWriterTest {
/** Server descriptor builder used to build the first and only server
* descriptor for this test, unless removed from the tarball builder.*/
- private DescriptorBuilder defaultServerDescriptorBuilder;
+ private TestDescriptorBuilder defaultServerTestDescriptorBuilder;
/** Extra-info descriptor builder used to build the first and only
* extra-info descriptor for this test, unless removed from the tarball
* builder.*/
- private DescriptorBuilder defaultExtraInfoDescriptorBuilder;
+ private TestDescriptorBuilder defaultExtraInfoTestDescriptorBuilder;
/** Network status builder used to build the first and only network
* status for this test, unless removed from the tarball builder.*/
- private DescriptorBuilder defaultNetworkStatusBuilder;
+ private TestDescriptorBuilder defaultNetworkStatusTestDescriptorBuilder;
/** Tarball builder to build the first and only tarball, unless removed
* from the test. */
- private TarballBuilder defaultTarballBuilder;
+ private TarballTestBuilder defaultTarballTestBuilder;
/** Tarball builder(s) for this test. */
- private List<TarballBuilder> tarballBuilders;
+ private List<TarballTestBuilder> tarballBuilders;
/** Parsed sanitized bridge descriptors with keys being file names and
* values being sanitized descriptor lines. */
@@ -104,24 +104,26 @@ public class SanitizedBridgesWriterTest {
this.sanitizedBridgesDirectory =
this.temporaryFolder.newFolder("out", "bridge-descriptors").toPath();
this.initializeTestConfiguration();
- this.defaultServerDescriptorBuilder = new ServerDescriptorBuilder();
- this.defaultExtraInfoDescriptorBuilder = new ExtraInfoDescriptorBuilder();
- this.defaultNetworkStatusBuilder = new NetworkStatusBuilder();
- this.defaultTarballBuilder = new TarballBuilder(
+ this.defaultServerTestDescriptorBuilder = new ServerTestDescriptorBuilder();
+ this.defaultExtraInfoTestDescriptorBuilder
+ = new ExtraInfoTestDescriptorBuilder();
+ this.defaultNetworkStatusTestDescriptorBuilder
+ = new NetworkStatusTestDescriptorBuilder();
+ this.defaultTarballTestBuilder = new TarballTestBuilder(
"from-tonga-2016-07-01T000702Z.tar.gz", 1467331624000L);
- this.defaultTarballBuilder.add("bridge-descriptors", 1467331622000L,
- Arrays.asList(new DescriptorBuilder[] {
- this.defaultServerDescriptorBuilder }));
- this.defaultTarballBuilder.add("cached-extrainfo", 1467327972000L,
- Arrays.asList(new DescriptorBuilder[] {
- this.defaultExtraInfoDescriptorBuilder }));
- this.defaultTarballBuilder.add("cached-extrainfo.new", 1467331623000L,
- Arrays.asList(new DescriptorBuilder[] { }));
- this.defaultTarballBuilder.add("networkstatus-bridges",
- 1467330028000L, Arrays.asList(new DescriptorBuilder[] {
- this.defaultNetworkStatusBuilder }));
+ this.defaultTarballTestBuilder.add("bridge-descriptors", 1467331622000L,
+ Arrays.asList(new TestDescriptorBuilder[] {
+ this.defaultServerTestDescriptorBuilder }));
+ this.defaultTarballTestBuilder.add("cached-extrainfo", 1467327972000L,
+ Arrays.asList(new TestDescriptorBuilder[] {
+ this.defaultExtraInfoTestDescriptorBuilder }));
+ this.defaultTarballTestBuilder.add("cached-extrainfo.new", 1467331623000L,
+ Arrays.asList(new TestDescriptorBuilder[] { }));
+ this.defaultTarballTestBuilder.add("networkstatus-bridges",
+ 1467330028000L, Arrays.asList(new TestDescriptorBuilder[] {
+ this.defaultNetworkStatusTestDescriptorBuilder }));
this.tarballBuilders = new ArrayList<>(
- Arrays.asList(this.defaultTarballBuilder));
+ Arrays.asList(this.defaultTarballTestBuilder));
}
/** Initializes a configuration for the bridge descriptor sanitizer. */
@@ -142,7 +144,7 @@ public class SanitizedBridgesWriterTest {
/** Runs this test by executing all builders, performing the sanitizing
* process, and parsing sanitized bridge descriptors for inspection. */
private void runTest() throws IOException, ConfigurationException {
- for (TarballBuilder tarballBuilder : this.tarballBuilders) {
+ for (TarballTestBuilder tarballBuilder : this.tarballBuilders) {
tarballBuilder.build(new File(this.bridgeDirectoriesDir));
}
SanitizedBridgesWriter sbw = new SanitizedBridgesWriter(configuration);
@@ -206,7 +208,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorEmpty() throws Exception {
- this.defaultServerDescriptorBuilder.clear();
+ this.defaultServerTestDescriptorBuilder.clear();
this.runTest();
assertTrue("No server descriptor provided as input.",
this.parsedServerDescriptors.isEmpty());
@@ -214,14 +216,14 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorOnlyAnnotation() throws Exception {
- this.defaultServerDescriptorBuilder.removeAllExcept("@purpose bridge");
+ this.defaultServerTestDescriptorBuilder.removeAllExcept("@purpose bridge");
this.runTest();
}
@Test
public void testServerDescriptorAdditionalAnnotation()
throws Exception {
- this.defaultServerDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.insertBeforeLineStartingWith(
"@purpose bridge", Arrays.asList("@source 198.50.200.131"));
this.runTest();
assertEquals("Expected 3 sanitized descriptors.", 3,
@@ -234,7 +236,7 @@ public class SanitizedBridgesWriterTest {
"true");
this.configuration.setProperty(Key.BridgeDescriptorMappingsLimit.name(),
"30000");
- this.defaultServerDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.insertBeforeLineStartingWith(
"platform ", Arrays.asList("or-address [2:5:2:5:2:5:2:5]:25"));
Path bridgeIpSecretsFile = Paths.get(statsDirectory, "bridge-ip-secrets");
BufferedWriter writer = Files.newBufferedWriter(bridgeIpSecretsFile,
@@ -256,7 +258,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorRouterLineTruncated() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith("router ",
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith("router ",
Arrays.asList("router MeekGoogle"));
this.runTest();
assertTrue("Sanitized server descriptor with invalid router line.",
@@ -265,7 +267,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorProtoLine() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith("protocols ",
+ this.defaultServerTestDescriptorBuilder
+ .replaceLineStartingWith("protocols ",
Arrays.asList("proto Cons=1-2 Desc=1-2 DirCache=1 HSDir=1 HSIntro=3 "
+ "HSRend=1-2 Link=1-4 LinkAuth=1 Microdesc=1-2 Relay=1-2"));
this.runTest();
@@ -275,7 +278,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorFingerprintTruncated() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"fingerprint ", Arrays.asList("fingerprint 4"));
this.runTest();
assertTrue("Sanitized server descriptor with invalid fingerprint "
@@ -285,7 +288,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorFingerprintInvalidHex()
throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"fingerprint ", Arrays.asList("fingerprint FUN!"));
this.runTest();
assertTrue("Sanitized server descriptor with invalid fingerprint "
@@ -294,7 +297,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorFingerprintOpt() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith("fingerprint ",
+ this.defaultServerTestDescriptorBuilder
+ .replaceLineStartingWith("fingerprint ",
Arrays.asList("opt fingerprint 46D4 A711 97B8 FA51 5A82 6C6B 017C 522F "
+ "E264 655B"));
this.runTest();
@@ -305,7 +309,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorExtraInfoDigestInvalidHex()
throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"extra-info-digest ", Arrays.asList("extra-info-digest 6"));
this.runTest();
assertTrue("Sanitized server descriptor with invalid extra-info "
@@ -315,7 +319,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorExtraInfoDigestInvalidBase64()
throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"extra-info-digest ", Arrays.asList("extra-info-digest "
+ "6D03E80568DEFA102968D144CB35FFA6E3355B8A "
+ "#*?$%x@nxukmmcT1+UnDg4qh0yKbjVUYKhGL8VksoJA"));
@@ -327,7 +331,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorExtraInfoDigestSha1Only()
throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"extra-info-digest ", Arrays.asList("extra-info-digest "
+ "6D03E80568DEFA102968D144CB35FFA6E3355B8A"));
this.runTest();
@@ -339,7 +343,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorExtraInfoDigestThirdArgument()
throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"extra-info-digest ", Arrays.asList("extra-info-digest "
+ "6D03E80568DEFA102968D144CB35FFA6E3355B8A "
+ "cy/LwP7nxukmmcT1+UnDg4qh0yKbjVUYKhGL8VksoJA 00"));
@@ -350,7 +354,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorExtraInfoDigestOpt() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"extra-info-digest ", Arrays.asList("opt extra-info-digest "
+ "6D03E80568DEFA102968D144CB35FFA6E3355B8A "
+ "cy/LwP7nxukmmcT1+UnDg4qh0yKbjVUYKhGL8VksoJA"));
@@ -362,7 +366,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorRejectOwnAddress() throws Exception {
- this.defaultServerDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.insertBeforeLineStartingWith(
"reject *:*", Arrays.asList("reject 198.50.200.131:*", "accept *:80"));
this.runTest();
List<String> parsedLines = this.parsedServerDescriptors.get(0);
@@ -382,7 +386,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorEd25519IdentityMasterKeyMismatch()
throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"master-key-ed25519 ", Arrays.asList("master-key-ed25519 "
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"));
this.runTest();
@@ -392,7 +396,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorEd25519IdentityA() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"identity-ed25519", Arrays.asList("identity-ed25519",
"-----BEGIN ED25519 CERT-----",
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
@@ -408,7 +412,8 @@ public class SanitizedBridgesWriterTest {
public void testServerDescriptorEd25519IdentityEToF() throws Exception {
String change9sTo6s =
"ZEXE7RkiEJ1l5Ij9hc9TJOpM7/9XSPZnF/PbMfE0u3n3JbOO3s82GN6BPuA0v2Cs";
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(change9sTo6s,
+ this.defaultServerTestDescriptorBuilder
+ .replaceLineStartingWith(change9sTo6s,
Arrays.asList(change9sTo6s.replaceAll("9", "6")));
this.runTest();
assertTrue("Mismatch between identity and master key.",
@@ -417,7 +422,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorEd25519IdentitySlash() throws Exception {
- this.defaultServerDescriptorBuilder.replaceLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.replaceLineStartingWith(
"identity-ed25519", Arrays.asList("identity-ed25519",
"-----BEGIN ED25519 CERT-----",
"////////////////////////////////////////////////////////////////",
@@ -432,7 +437,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testServerDescriptorFamilyInvalidFingerprint()
throws Exception {
- this.defaultServerDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.insertBeforeLineStartingWith(
"hidden-service-dir", Arrays.asList("family $0"));
this.runTest();
assertTrue("Sanitized server descriptor with invalid fingerprint in "
@@ -446,8 +451,8 @@ public class SanitizedBridgesWriterTest {
"true");
String fingerprintLine =
"fingerprint 46D4 A711 97B8 FA51 5A82 6C6B 017C 522F E264 655B";
- this.defaultServerDescriptorBuilder.removeLine(fingerprintLine);
- this.defaultServerDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultServerTestDescriptorBuilder.removeLine(fingerprintLine);
+ this.defaultServerTestDescriptorBuilder.insertBeforeLineStartingWith(
"published ", Arrays.asList(fingerprintLine));
this.runTest();
assertFalse(this.parsedServerDescriptors.isEmpty());
@@ -493,7 +498,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorExtraInfoLineTruncated()
throws Exception {
- this.defaultExtraInfoDescriptorBuilder.replaceLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.replaceLineStartingWith(
"extra-info ", Arrays.asList("extra-info "));
this.runTest();
}
@@ -501,7 +506,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorExtraInfoInvalidHex()
throws Exception {
- this.defaultExtraInfoDescriptorBuilder.replaceLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.replaceLineStartingWith(
"extra-info ", Arrays.asList("extra-info MeekGoogle 4"));
this.runTest();
assertTrue("Sanitized extra-info descriptor with invalid extra-info "
@@ -510,7 +515,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorTransportSpace() throws Exception {
- this.defaultExtraInfoDescriptorBuilder.replaceLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.replaceLineStartingWith(
"transport ", Arrays.asList("transport "));
this.runTest();
assertTrue("Sanitized extra-info descriptor with invalid transport "
@@ -519,7 +524,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorTransportInfoRemoved() throws Exception {
- this.defaultExtraInfoDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.insertBeforeLineStartingWith(
"bridge-stats-end ", Arrays.asList("transport-info secretkey"));
this.runTest();
for (String line : this.parsedExtraInfoDescriptors.get(0)) {
@@ -530,7 +535,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorHidservRetained() throws Exception {
- this.defaultExtraInfoDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.insertBeforeLineStartingWith(
"transport ",
Arrays.asList("hidserv-stats-end 2016-11-23 14:48:05 (86400 s)",
"hidserv-rend-relayed-cells 27653088 delta_f=2048 epsilon=0.30 "
@@ -551,7 +556,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorPaddingCountsRetained() throws Exception {
- this.defaultExtraInfoDescriptorBuilder.insertBeforeLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.insertBeforeLineStartingWith(
"transport ",
Arrays.asList("padding-counts 2017-05-10 01:48:43 (86400 s) "
+ "bin-size=10000 write-drop=10000 write-pad=10000 "
@@ -575,7 +580,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testExtraInfoDescriptorRouterSignatureLineSpace()
throws Exception {
- this.defaultExtraInfoDescriptorBuilder.replaceLineStartingWith(
+ this.defaultExtraInfoTestDescriptorBuilder.replaceLineStartingWith(
"router-signature", Arrays.asList("router-signature "));
this.runTest();
assertTrue("Sanitized extra-info descriptor with invalid "
@@ -609,7 +614,7 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusPublishedLineMissing() throws Exception {
- this.defaultNetworkStatusBuilder.removeLine(
+ this.defaultNetworkStatusTestDescriptorBuilder.removeLine(
"published 2016-06-30 23:40:28");
this.runTest();
String sanitizedNetworkStatusFileName = "2016/07/statuses/01/"
@@ -622,9 +627,9 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusPublishedLineMissingTarballFileNameChange()
throws Exception {
- this.defaultNetworkStatusBuilder.removeLine(
+ this.defaultNetworkStatusTestDescriptorBuilder.removeLine(
"published 2016-06-30 23:40:28");
- this.defaultTarballBuilder.setTarballFileName(
+ this.defaultTarballTestBuilder.setTarballFileName(
"from-tonga-with-love-2016-07-01T000702Z.tar.gz");
this.runTest();
assertTrue("Sanitized network status without published line and with "
@@ -633,14 +638,14 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusRlineTruncated() throws Exception {
- this.defaultNetworkStatusBuilder.replaceLineStartingWith("r ",
+ this.defaultNetworkStatusTestDescriptorBuilder.replaceLineStartingWith("r ",
Arrays.asList("r MeekGoogle"));
this.runTest();
}
@Test
public void testNetworkStatusRlineInvalidBase64() throws Exception {
- this.defaultNetworkStatusBuilder.replaceLineStartingWith("r ",
+ this.defaultNetworkStatusTestDescriptorBuilder.replaceLineStartingWith("r ",
Arrays.asList("r MeekGoogle R#SnE*e4+lFag:xr_XxSL+J;ZVs "
+ "g+M7'w+lG$mv6NW9&RmvzLO(R0Y 2016-06-30 21:43:52 "
+ "198.50.200.131 8008 0"));
@@ -653,8 +658,8 @@ public class SanitizedBridgesWriterTest {
public void testNetworkStatusAlinePortMissing() throws Exception {
this.configuration.setProperty(Key.ReplaceIpAddressesWithHashes.name(),
"true");
- this.defaultNetworkStatusBuilder.insertBeforeLineStartingWith("s ",
- Arrays.asList("a 198.50.200.132"));
+ this.defaultNetworkStatusTestDescriptorBuilder
+ .insertBeforeLineStartingWith("s ", Arrays.asList("a 198.50.200.132"));
this.runTest();
for (String line : this.parsedNetworkStatuses.get(0)) {
if (line.startsWith("a ")) {
@@ -665,8 +670,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusVLineUnknown() throws Exception {
- this.defaultNetworkStatusBuilder.insertBeforeLineStartingWith("w ",
- Arrays.asList("v Tor 0.2.7.6"));
+ this.defaultNetworkStatusTestDescriptorBuilder
+ .insertBeforeLineStartingWith("w ", Arrays.asList("v Tor 0.2.7.6"));
this.runTest();
assertTrue("Should not have sanitized status with v line which is unknown "
+ "in this descriptor type.", this.parsedNetworkStatuses.isEmpty());
@@ -674,7 +679,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusNoEntry() throws Exception {
- this.defaultNetworkStatusBuilder.truncateAtLineStartingWith("r ");
+ this.defaultNetworkStatusTestDescriptorBuilder
+ .truncateAtLineStartingWith("r ");
this.runTest();
assertFalse("Skipped network status without entries.",
this.parsedNetworkStatuses.isEmpty());
@@ -687,8 +693,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusFromBifroest() throws Exception {
- this.defaultTarballBuilder.setTarballFileName(
- this.defaultTarballBuilder.getTarballFileName()
+ this.defaultTarballTestBuilder.setTarballFileName(
+ this.defaultTarballTestBuilder.getTarballFileName()
.replaceAll("tonga", "bifroest"));
this.runTest();
assertTrue("Sanitized status should contain Bifroest's fingerprint.",
@@ -698,8 +704,8 @@ public class SanitizedBridgesWriterTest {
@Test
public void testNetworkStatusFromTrifroest() throws Exception {
- this.defaultTarballBuilder.setTarballFileName(
- this.defaultTarballBuilder.getTarballFileName()
+ this.defaultTarballTestBuilder.setTarballFileName(
+ this.defaultTarballTestBuilder.getTarballFileName()
.replaceAll("tonga", "trifroest"));
this.runTest();
assertTrue("Should not have recognized unknown bridge authority Trifroest.",
@@ -708,9 +714,9 @@ public class SanitizedBridgesWriterTest {
@Test
public void testTarballContainsSameFileTwice() throws Exception {
- this.defaultTarballBuilder.add("cached-extrainfo.new", 1467331623000L,
- Arrays.asList(new DescriptorBuilder[] {
- this.defaultExtraInfoDescriptorBuilder }));
+ this.defaultTarballTestBuilder.add("cached-extrainfo.new", 1467331623000L,
+ Arrays.asList(new TestDescriptorBuilder[] {
+ this.defaultExtraInfoTestDescriptorBuilder }));
this.runTest();
assertEquals("There should only be one.",
1, this.parsedExtraInfoDescriptors.size());
diff --git a/src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/ServerTestDescriptorBuilder.java
similarity index 97%
rename from src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java
rename to src/test/java/org/torproject/collector/bridgedescs/ServerTestDescriptorBuilder.java
index e49e126..b23c9e9 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/ServerTestDescriptorBuilder.java
@@ -8,10 +8,10 @@ import java.util.Arrays;
/** Builds a non-sanitized bridge server descriptor that comes with an original
* bridge descriptor (of a bundled and therefore publicly known bridge) by
* default. */
-class ServerDescriptorBuilder extends DescriptorBuilder {
+class ServerTestDescriptorBuilder extends TestDescriptorBuilder {
/** Initializes the descriptor builder. */
- ServerDescriptorBuilder() {
+ ServerTestDescriptorBuilder() {
this.addAll(Arrays.asList(
"@purpose bridge",
"router MeekGoogle 198.50.200.131 8008 0 0",
diff --git a/src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/TarballTestBuilder.java
similarity index 91%
rename from src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java
rename to src/test/java/org/torproject/collector/bridgedescs/TarballTestBuilder.java
index c85e93b..2cfe0f5 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/TarballTestBuilder.java
@@ -22,7 +22,7 @@ import java.util.Map;
/** Builds a tarball containing non-sanitized bridge descriptors built using
* descriptor builders and writes the tarball to a new file with the given file
* name. */
-class TarballBuilder {
+class TarballTestBuilder {
/** Internal helper class to store details about a file contained in the
* tarball. */
@@ -32,7 +32,7 @@ class TarballBuilder {
private long modifiedMillis;
/** Descriptor builders used to generate the file content. */
- private List<DescriptorBuilder> descriptorBuilders;
+ private List<TestDescriptorBuilder> descriptorBuilders;
}
/** File name of the tarball. */
@@ -54,7 +54,7 @@ class TarballBuilder {
/** Initializes a new tarball builder that is going to write a tarball to the
* file with given file name and last-modified time. */
- TarballBuilder(String tarballFileName, long modifiedMillis) {
+ TarballTestBuilder(String tarballFileName, long modifiedMillis) {
this.tarballFileName = tarballFileName;
this.modifiedMillis = modifiedMillis;
this.tarballFiles = new LinkedHashMap<>();
@@ -62,8 +62,8 @@ class TarballBuilder {
/** Adds a new file to the tarball with given name, last-modified time, and
* descriptor builders to generate the file content. */
- TarballBuilder add(String fileName, long modifiedMillis,
- List<DescriptorBuilder> descriptorBuilders) throws IOException {
+ TarballTestBuilder add(String fileName, long modifiedMillis,
+ List<TestDescriptorBuilder> descriptorBuilders) throws IOException {
TarballFile file = new TarballFile();
file.modifiedMillis = modifiedMillis;
file.descriptorBuilders = descriptorBuilders;
@@ -91,7 +91,7 @@ class TarballBuilder {
for (Map.Entry<String, TarballFile> file : this.tarballFiles.entrySet()) {
TarArchiveEntry tae = new TarArchiveEntry(file.getKey());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- for (DescriptorBuilder descriptorBuilder
+ for (TestDescriptorBuilder descriptorBuilder
: file.getValue().descriptorBuilders) {
descriptorBuilder.build(baos);
}
diff --git a/src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/TestDescriptorBuilder.java
similarity index 97%
rename from src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
rename to src/test/java/org/torproject/collector/bridgedescs/TestDescriptorBuilder.java
index eaa6335..73c00b7 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/TestDescriptorBuilder.java
@@ -14,7 +14,7 @@ import java.util.List;
/** Builds a descriptor by concatenating the given lines with newlines and
* writing the output to the given output stream. */
-abstract class DescriptorBuilder extends ArrayList<String> {
+abstract class TestDescriptorBuilder extends ArrayList<String> {
/** Removes the given line, or fails if that line cannot be found. */
void removeLine(String line) {
1
0

[collector/release] Make DescriptorBuilder also accept DescriptorBuilders.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit fbfa16c05b3f74acd60ccdf780568e7e1b0b9e1b
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:14 2017 +0000
Make DescriptorBuilder also accept DescriptorBuilders.
This might facilitate easier processing of descriptors.
---
.../torproject/collector/bridgedescs/DescriptorBuilder.java | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 12a8956..cc57662 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -14,7 +14,7 @@ import java.util.List;
* only be processed after finishing the parsing step.</p> */
class DescriptorBuilder {
- private List<StringBuilder> parts;
+ private List<Object> parts;
private StringBuilder lastPart;
@@ -36,6 +36,13 @@ class DescriptorBuilder {
return this;
}
+ public DescriptorBuilder append(DescriptorBuilder placeholder) {
+ this.parts.add(placeholder);
+ this.lastPart = new StringBuilder();
+ this.parts.add(this.lastPart);
+ return this;
+ }
+
public DescriptorBuilder space() {
this.lastPart.append(' ');
return this;
@@ -49,8 +56,8 @@ class DescriptorBuilder {
@Override
public String toString() {
StringBuilder full = new StringBuilder();
- for (StringBuilder part : this.parts) {
- full.append(part);
+ for (Object part : this.parts) {
+ full.append(part.toString());
}
return full.toString();
}
1
0

26 Feb '18
commit 8557bf6255e6e3745088033e8e7bad7801421686
Author: iwakeh <iwakeh(a)torproject.org>
Date: Tue Feb 20 16:30:09 2018 +0000
Reduce memory footprint and wall time.
Adapt to latest changes of metrics-lib (task-25329) and make use of the high
redundancy of logs (e.g. a 3G file might only contain 350 different lines).
This avoids OOM and array out of bounds exceptions for large files (>2G) and
gives a speed-up of roughly 50%. (The earlier 66min are down to 34min for
meronense&weschniakowii files plus two larger files.)
There is a BATCH constant, which could be tuned for processing speed. It is
logged for each webstats module run. Currently, it is set to 100k. This
was more or less arbitrarily chosen and used for all the tests. A test run
using 500k didn't show significant differences.
---
.../persist/WebServerAccessLogPersistence.java | 8 ---
.../collector/webstats/SanitizeWeblogs.java | 61 ++++++++++++++++++----
2 files changed, 51 insertions(+), 18 deletions(-)
diff --git a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
index 792d3a9..dab4112 100644
--- a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
@@ -5,7 +5,6 @@ package org.torproject.collector.persist;
import org.torproject.descriptor.WebServerAccessLog;
import org.torproject.descriptor.internal.FileType;
-import org.torproject.descriptor.log.InternalLogDescriptor;
import org.torproject.descriptor.log.InternalWebServerAccessLog;
import org.slf4j.Logger;
@@ -30,13 +29,6 @@ public class WebServerAccessLogPersistence
/** Prepare storing the given descriptor. */
public WebServerAccessLogPersistence(WebServerAccessLog desc) {
super(desc, new byte[0]);
- byte[] compressedBytes = null;
- try { // The descriptor bytes have to be stored compressed.
- compressedBytes = COMPRESSION.compress(desc.getRawDescriptorBytes());
- ((InternalLogDescriptor)desc).setRawDescriptorBytes(compressedBytes);
- } catch (Exception ex) {
- log.warn("Cannot compress ’{}’. Storing uncompressed.", ex);
- }
calculatePaths();
}
diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 7601898..1f2e922 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -3,6 +3,7 @@
package org.torproject.collector.webstats;
+import static java.util.stream.Collectors.counting;
import static java.util.stream.Collectors.groupingByConcurrent;
import static java.util.stream.Collectors.toList;
@@ -17,6 +18,7 @@ import org.torproject.collector.persist.WebServerAccessLogPersistence;
import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.Method;
import org.torproject.descriptor.WebServerAccessLog;
+import org.torproject.descriptor.internal.FileType;
import org.torproject.descriptor.log.InternalLogDescriptor;
import org.torproject.descriptor.log.InternalWebServerAccessLog;
import org.torproject.descriptor.log.WebServerAccessLogImpl;
@@ -26,8 +28,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.InputStreamReader;
+import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
@@ -40,6 +43,7 @@ import java.util.SortedSet;
import java.util.StringJoiner;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
@@ -87,6 +91,7 @@ public class SanitizeWeblogs extends CollecTorMain {
Set<SourceType> sources = this.config.getSourceTypeSet(
Key.WebstatsSources);
if (sources.contains(SourceType.Local)) {
+ log.info("Processing logs using batch value {}.", BATCH);
findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins));
PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath));
}
@@ -126,24 +131,60 @@ public class SanitizeWeblogs extends CollecTorMain {
String name = new StringJoiner(InternalLogDescriptor.SEP)
.add(virtualHost).add(physicalHost)
.add(InternalWebServerAccessLog.MARKER)
- .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString();
+ .add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
+ .toString() + "." + FileType.XZ.name().toLowerCase();
log.debug("Sanitizing {}.", name);
- List<String> retainedLines = lines
+ Map<String, Long> retainedLines = new TreeMap<>(lines
.stream().parallel().map((line) -> sanitize(line, date))
- .filter((line) -> line.isPresent()).map((line) -> line.get())
- .collect(toList());
- retainedLines.sort(null);
+ .filter((line) -> line.isPresent())
+ .map((line) -> line.get())
+ .collect(groupingByConcurrent(line -> line, counting())));
+ lines.clear(); // not needed anymore
try {
WebServerAccessLogPersistence walp
= new WebServerAccessLogPersistence(
- new WebServerAccessLogImpl(retainedLines, name, false));
+ new WebServerAccessLogImpl(toCompressedBytes(retainedLines),
+ name, false));
log.debug("Storing {}.", name);
walp.storeOut(this.outputPathName);
walp.storeRecent(this.recentPathName);
} catch (DescriptorParseException dpe) {
log.error("Cannot store log desriptor {}.", name, dpe);
+ } catch (Throwable th) { // catch all else
+ log.error("Serious problem. Cannot store log desriptor {}.", name, th);
}
- lines.clear();
+ }
+
+ private static final int BATCH = 100_000;
+
+ static byte[] toCompressedBytes(Map<String, Long> lines)
+ throws DescriptorParseException {
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ OutputStream os = FileType.XZ.outputStream(baos)) {
+ for (Map.Entry<String, Long> entry : lines.entrySet()) {
+ long count = entry.getValue();
+ byte[] batch = bytesFor(entry.getKey(), BATCH);
+ while (count > 0) {
+ if (count > BATCH) {
+ os.write(batch);
+ count -= BATCH;
+ } else {
+ os.write(bytesFor(entry.getKey(), count));
+ break;
+ }
+ }
+ }
+ os.flush();
+ os.close();
+ return baos.toByteArray();
+ } catch (Exception ex) {
+ throw new DescriptorParseException(ex.getMessage());
+ }
+ }
+
+ private static byte[] bytesFor(String line, long times) {
+ return Stream.of(line).limit(times)
+ .collect(Collectors.joining("\n", "", "\n")).getBytes();
}
static Optional<String> sanitize(WebServerAccessLogLine logLine,
@@ -186,8 +227,8 @@ public class SanitizeWeblogs extends CollecTorMain {
private Stream<WebServerAccessLogLine> lineStream(LogMetadata metadata) {
log.debug("Processing file {}.", metadata.path);
try (BufferedReader br
- = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(
- metadata.fileType.decompress(Files.readAllBytes(metadata.path)))))) {
+ = new BufferedReader(new InputStreamReader(
+ metadata.fileType.decompress(Files.newInputStream(metadata.path))))) {
return br.lines()
.map((String line) -> WebServerAccessLogLine.makeLine(line))
.collect(toList()).stream();
1
0

[metrics-lib/release] Add log line interfaces and access methods.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit 59689a9fa4c162378f347902eb68e4c21ccf0043
Author: iwakeh <iwakeh(a)torproject.org>
Date: Tue Feb 6 14:59:05 2018 +0000
Add log line interfaces and access methods.
For both the general LogDescriptor and extension WebServerAccessLog.
Include some new tests.
---
.../org/torproject/descriptor/LogDescriptor.java | 12 ++++++++++
.../torproject/descriptor/{log => }/Method.java | 5 ++--
.../torproject/descriptor/WebServerAccessLog.java | 28 ++++++++++++++++++++++
.../descriptor/log/WebServerAccessLogImpl.java | 20 ++++++++++++++++
.../descriptor/log/WebServerAccessLogLine.java | 15 ++++++++++--
.../descriptor/log/LogDescriptorTest.java | 15 +++++++-----
6 files changed, 85 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index ff02cae..6a6bf84 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -43,5 +43,17 @@ public interface LogDescriptor extends Descriptor {
@Override
public List<String> getUnrecognizedLines();
+ /**
+ * Returns a list of all parseable log lines.
+ * <p>Might require a lot of memory depending on log size.</p>
+ */
+ public List<? extends Line> logLines() throws DescriptorParseException;
+
+ public interface Line {
+
+ /** Returns a log line string. */
+ public String toLogString();
+
+ }
}
diff --git a/src/main/java/org/torproject/descriptor/log/Method.java b/src/main/java/org/torproject/descriptor/Method.java
similarity index 50%
rename from src/main/java/org/torproject/descriptor/log/Method.java
rename to src/main/java/org/torproject/descriptor/Method.java
index c29d495..9135fe2 100644
--- a/src/main/java/org/torproject/descriptor/log/Method.java
+++ b/src/main/java/org/torproject/descriptor/Method.java
@@ -1,8 +1,9 @@
/* Copyright 2018 The Tor Project
* See LICENSE for licensing information */
-package org.torproject.descriptor.log;
+package org.torproject.descriptor;
-public enum Method {
+/** Enum for web server access log methods. */
+public enum Method {
GET, HEAD, POST;
}
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b94bc30..b4f1940 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
import java.time.LocalDate;
import java.util.List;
+import java.util.Optional;
/**
* Contains a sanitized web server access log file from a {@code torproject.org}
@@ -61,5 +62,32 @@ public interface WebServerAccessLog extends LogDescriptor {
@Override
public List<String> getUnrecognizedLines();
+ public interface Line extends LogDescriptor.Line {
+
+ /** Returns the IP address of the requesting host. */
+ public String getIp();
+
+ /** Returns the HTTP method, e.g., GET. */
+ public Method getMethod();
+
+ /** Returns the protocol and version, e.g., HTTP/1.1. */
+ public String getProtocol();
+
+ /** Returns the requested resource. */
+ public String getRequest();
+
+ /** Returns the size of the response in bytes, if available. */
+ public Optional<Integer> getSize();
+
+ /** Returns the final status code, e.g., 200. */
+ public int getResponse();
+
+ /** Returns the date when the request was received. */
+ public LocalDate getDate();
+
+ /** True, if this is a valid web server access log line. */
+ public boolean isValid();
+ }
+
}
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index f02b1d7..7b56528 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -10,12 +10,17 @@ import org.torproject.descriptor.internal.FileType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.InputStreamReader;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Collection;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
/**
* Implementation of web server access log descriptors.
@@ -126,5 +131,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
return this.logDate;
}
+ /** Returns a list of all valid log lines. */
+ @Override
+ public List<WebServerAccessLog.Line> logLines()
+ throws DescriptorParseException {
+ try (BufferedReader br
+ = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(
+ this.getRawDescriptorBytes())))) {
+ return br.lines().map(line
+ -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
+ .filter(line -> line.isValid()).collect(Collectors.toList());
+ } catch (Exception ex) {
+ throw new DescriptorParseException("Cannot retrieve log lines.", ex);
+ }
+ }
+
}
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
index c9d73cc..8a17230 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
@@ -3,6 +3,9 @@
package org.torproject.descriptor.log;
+import org.torproject.descriptor.Method;
+import org.torproject.descriptor.WebServerAccessLog;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -18,7 +21,7 @@ import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-public class WebServerAccessLogLine {
+public class WebServerAccessLogLine implements WebServerAccessLog.Line {
private static final Logger log = LoggerFactory
.getLogger(WebServerAccessLogLine.class);
@@ -54,6 +57,7 @@ public class WebServerAccessLogLine {
private String protocol;
/** Returns a log line string. Possibly empty. */
+ @Override
public String toLogString() {
if (!this.valid) {
return "";
@@ -74,7 +78,7 @@ public class WebServerAccessLogLine {
return this.date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
}
- /** Returns a string containing the ip. */
+ @Override
public String getIp() {
return this.ip;
}
@@ -84,22 +88,27 @@ public class WebServerAccessLogLine {
this.ip = fromMap(ip, ipMap);
}
+ @Override
public Method getMethod() {
return this.method;
}
+ @Override
public String getProtocol() {
return this.protocol;
}
+ @Override
public String getRequest() {
return this.request;
}
+ @Override
public Optional<Integer> getSize() {
return this.size < 0 ? Optional.empty() : Optional.of(this.size);
}
+ @Override
public int getResponse() {
return this.response;
}
@@ -109,10 +118,12 @@ public class WebServerAccessLogLine {
this.request = fromMap(request, requestMap);
}
+ @Override
public LocalDate getDate() {
return this.date;
}
+ @Override
public boolean isValid() {
return this.valid;
}
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index b12cfc0..a871791 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -1,4 +1,3 @@
-
/* Copyright 2017--2018 The Tor Project
* See LICENSE for licensing information */
@@ -51,6 +50,7 @@ public class LogDescriptorTest {
protected String[] pan;
protected Class<LogDescriptor> type;
protected boolean isDecompressionTest;
+ protected int lineCount;
/** All types of data that can be encountered during sync. */
@Parameters
@@ -60,29 +60,30 @@ public class LogDescriptorTest {
"metrics.torproject.org_meronense.torproject.org_access.log"
+ "_20170530.gz",
"metrics.torproject.org", "20170530", "gz"},
- WebServerAccessLog.class},
+ WebServerAccessLog.class, 24},
{Boolean.FALSE, 1878, new String[]{"meronense.torproject.org",
"xy.host.org_meronense.torproject.org_access.log_20170530.log",
"metrics.torproject.org", "20170530", "xz"},
- WebServerAccessLog.class},
+ WebServerAccessLog.class, 24},
{Boolean.TRUE, 70730, new String[]{"archeotrichon.torproject.org",
"archive.torproject.org_archeotrichon.torproject.org_access.log_"
+ "20151007.xz",
"archive.torproject.org", "20151007", "xz"},
- WebServerAccessLog.class},
+ WebServerAccessLog.class, 655},
{Boolean.TRUE, 0, new String[]{"dummy.host.net",
"nix.server.org_dummy.host.net_access.log_20111111.bz2",
"nix.server.org", "20111111", "bz2"},
- WebServerAccessLog.class}});
+ WebServerAccessLog.class, 0}});
}
/** This constructor receives the above defined data for each run. */
public LogDescriptorTest(boolean decompression, int size, String[] pan,
- Class<LogDescriptor> type) {
+ Class<LogDescriptor> type, int lineCount) {
this.pan = pan;
this.size = size;
this.type = type;
this.isDecompressionTest = decompression;
+ this.lineCount = lineCount;
}
/** Prepares the temporary folder and writes files to it for this test. */
@@ -129,6 +130,8 @@ public class LogDescriptorTest {
InternalLogDescriptor ld = (InternalLogDescriptor) descs.get(0);
assertEquals("Wrong compression type string. " + dataUsed(),
pan[4], ld.getCompressionType());
+ List<? extends LogDescriptor.Line> lines = ld.logLines();
+ assertEquals(this.lineCount, lines.size());
}
private String dataUsed() {
1
0

[metrics-lib/release] Accomodate logs with more than Integer.MAX_VALUE lines.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit 35feb816f81f26bcc9dc035a1aaf496c34a86647
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Feb 16 09:05:46 2018 +0000
Accomodate logs with more than Integer.MAX_VALUE lines.
Implements task-23046.
---
.../org/torproject/descriptor/LogDescriptor.java | 10 +++++--
.../torproject/descriptor/WebServerAccessLog.java | 6 ++++
.../descriptor/log/WebServerAccessLogImpl.java | 32 ++++++++++++++++++----
.../descriptor/log/LogDescriptorTest.java | 5 +++-
4 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index 826fcda..8dd8460 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
import java.io.InputStream;
import java.util.List;
+import java.util.stream.Stream;
/**
* Contains a log file.
@@ -64,11 +65,14 @@ public interface LogDescriptor extends Descriptor {
public List<String> getUnrecognizedLines();
/**
- * Returns a list of all parseable log lines.
- * <p>Might require a lot of memory depending on log size.</p>
+ * Returns a stream of all parseable log lines.
+ * <p>Depending on log size this might not fit into a collection type.</p>
+ *
+ * @since 2.2.0
*/
- public List<? extends Line> logLines() throws DescriptorParseException;
+ public Stream<? extends Line> logLines() throws DescriptorParseException;
+ /** Base interface for accessing log lines. */
public interface Line {
/** Returns a log line string. */
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b4f1940..5f3ad73 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -62,6 +62,12 @@ public interface WebServerAccessLog extends LogDescriptor {
@Override
public List<String> getUnrecognizedLines();
+ /**
+ * Facilitates access to all log line fields that don't only contain
+ * default values post sanitization.
+ *
+ * @since 2.2.0
+ */
public interface Line extends LogDescriptor.Line {
/** Returns the IP address of the requesting host. */
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index e48a262..3666d5d 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -15,10 +15,11 @@ import java.io.File;
import java.io.InputStreamReader;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* Implementation of web server access log descriptors.
@@ -128,15 +129,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
return this.logDate;
}
- /** Returns a list of all valid log lines. */
+ private static final int LISTLIMIT = Integer.MAX_VALUE / 2;
+
+ /** Returns a stream of all valid log lines. */
@Override
- public List<WebServerAccessLog.Line> logLines()
+ public Stream<WebServerAccessLog.Line> logLines()
throws DescriptorParseException {
try (BufferedReader br = new BufferedReader(new InputStreamReader(
this.decompressedByteStream()))) {
- return br.lines().map(line
- -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
- .filter(line -> line.isValid()).collect(Collectors.toList());
+ List<List<WebServerAccessLogLine>> lists = new ArrayList<>();
+ List<WebServerAccessLogLine> currentList = new ArrayList<>();
+ lists.add(currentList);
+ String lineStr = br.readLine();
+ int count = 0;
+ while (null != lineStr) {
+ WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr);
+ if (wsal.isValid()) {
+ currentList.add(wsal);
+ count++;
+ }
+ if (count >= LISTLIMIT) {
+ currentList = new ArrayList<>();
+ lists.add(currentList);
+ count = 0;
+ }
+ lineStr = br.readLine();
+ }
+ br.close();
+ return lists.stream().flatMap(list -> list.stream());
} catch (Exception ex) {
throw new DescriptorParseException("Cannot retrieve log lines.", ex);
}
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index 67ba638..0ff3e62 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -3,6 +3,8 @@
package org.torproject.descriptor.log;
+import static java.util.stream.Collectors.toList;
+
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -130,7 +132,8 @@ public class LogDescriptorTest {
InternalLogDescriptor ld = (InternalLogDescriptor) descs.get(0);
assertEquals("Wrong compression type string. " + dataUsed(),
pan[4], ld.getCompressionType());
- List<? extends LogDescriptor.Line> lines = ld.logLines();
+ List<? extends LogDescriptor.Line> lines
+ = ld.logLines().collect(toList());
assertEquals(this.lineCount, lines.size());
}
1
0