commit 3002d6bc6b6bf84953cf842cbf6b3b18dc944879 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Dec 11 12:16:05 2019 +0100
Add some real tests for the webstats module. --- src/build | 2 +- .../collector/webstats/SanitizeWeblogsTest.java | 290 ++++++++++++++++++++- 2 files changed, 287 insertions(+), 5 deletions(-)
diff --git a/src/build b/src/build index eb16cb3..264e498 160000 --- a/src/build +++ b/src/build @@ -1 +1 @@ -Subproject commit eb16cb359db41722e6089bafb1e26808df4338df +Subproject commit 264e498f54a20f7d299daaf2533d043f880e6a8b diff --git a/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java b/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java index a550c41..21617b5 100644 --- a/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java +++ b/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java @@ -4,17 +4,299 @@ package org.torproject.metrics.collector.webstats;
import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail;
+import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.WebServerAccessLog; +import org.torproject.metrics.collector.Main; +import org.torproject.metrics.collector.conf.Configuration; +import org.torproject.metrics.collector.conf.Key; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap;
public class SanitizeWeblogsTest {
+ /** Sample original web server access logs as input for tests. */ + private static final String[][] inputLogs = new String[][] { + { "metrics.torproject.org-access.log-20191120.gz", + "0.0.0.0 - - [19/Nov/2019:00:00:00 +0000] " + + ""GET /networksize.html HTTP/1.1" 200 3269 "-" "-" -\n" + + "0.0.0.0 - - [19/Nov/2019:00:00:00 +0000] " + + ""GET /networksize.png?start=2019-08-21&end=2019-11-19 HTTP/1.1" " + + "200 39383 "-" "-" -\n" + + "0.0.0.0 - - [19/Nov/2019:00:00:00 +0000] " + + ""GET /userstats-relay-country.html HTTP/1.1" 200 7350 " + + ""-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""GET /collector/recent/relay-descriptors/ HTTP/1.1" 200 10227 " + + ""-" "-" -\n" }, + { "metrics.torproject.org-access.log-20191121.gz", + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""HEAD /collector/recent/relay-descriptors/microdescs/ " + + "HTTP/1.1" 200 - "-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""HEAD /collector/recent/exit-lists/ HTTP/1.1" 200 " + + "- "-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""GET /collector/archive/bridge-descriptors/extra-infos/ " + + "HTTP/1.1" 200 48013 "-" "-" -\n" + + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /images/cc/sk.png HTTP/1.1" 200 395 "-" "-" -\n" }, + { "metrics.torproject.org-access.log-20191122.gz", + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /images/favicon.ico HTTP/1.1" 200 1150 "-" "-" -\n" + + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /images/flags/authority.png HTTP/1.1" 200 325 " + + ""https://metrics.torproject.org/rs.html%5C" "-" -\n" + + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /news.atom HTTP/1.1" 200 36362 "-" "-" -\n" + + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] " + + ""GET /onionperf-buildtimes.csv HTTP/1.1" 200 270336 " + + ""-" "-" -\n" }, + { "metrics.torproject.org-access.log-20191123.gz", + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] " + + ""GET /userstats-relay-country.html?" + + "start=2010-01-01&end=2019-11-22&country=vn&events=off HTTP/1.1" " + + "200 35517 "-" "-" -\n" + + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] " + + ""GET /userstats-relay-country.png?" + + "start=2010-01-01&end=2019-11-22&country=vn&events=off HTTP/1.1" " + + "200 28041 "-" "-" -\n" + + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] " + + ""GET /userstats-relay-country.png?" + + "start=2010-01-01&end=2019-11-22&country=vn&events=off HTTP/1.1" " + + "200 28041 "-" "-" -\n" + + "0.0.0.0 - - [23/Nov/2019:00:00:00 +0000] "GET / HTTP/1.1" " + + "200 3336 "-" "-" -\n" } + }; + + /** Sanitized web server access logs as output of tests. */ + private static final String[][] outputLogs = new String[][] { + { "metrics.torproject.org_meronense.torproject.org_" + + "access.log_20191120.xz", + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""GET /collector/archive/bridge-descriptors/extra-infos/ " + + "HTTP/1.1" 200 48013\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""GET /collector/recent/relay-descriptors/ HTTP/1.1" 200 10227\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""HEAD /collector/recent/exit-lists/ HTTP/1.1" 200 -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""HEAD /collector/recent/relay-descriptors/microdescs/ " + + "HTTP/1.1" 200 -\n" }, + { "metrics.torproject.org_meronense.torproject.org_" + + "access.log_20191121.xz", + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /images/cc/sk.png HTTP/1.1" 200 395\n" + + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /images/favicon.ico HTTP/1.1" 200 1150\n" + + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /images/flags/authority.png HTTP/1.1" 200 325\n" + + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] " + + ""GET /news.atom HTTP/1.1" 200 36362\n" } + }; + + /** Temporary folder containing all files for this test. */ + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + /** Directory containing web server logs to sanitize. */ + private Path inputDirectory; + + /** Directory storing all intermediate state that needs to be preserved + * between processing runs. */ + private Path statsDirectory; + + /** Directory holding sanitized bridge descriptor files for tarballs. */ + private Path outDirectory; + + /** Directory holding recent sanitized web server logs. */ + private Path recentDirectory; + + /** CollecTor configuration for this test. */ + private Configuration configuration; + + /** Prepares the temporary folder and the various builders for this + * test. */ + @Before + public void createTemporaryFolderAndBuilders() + throws IOException { + this.inputDirectory = this.temporaryFolder.newFolder("in", + "webstats", "meronense.torproject.org").toPath(); + this.statsDirectory = this.temporaryFolder.newFolder("stats").toPath(); + this.outDirectory = this.temporaryFolder.newFolder("out").toPath(); + this.recentDirectory = this.temporaryFolder.newFolder("indexed", "recent") + .toPath(); + this.initializeTestConfiguration(); + } + + /** Initializes a configuration for the bridge descriptor sanitizer. */ + private void initializeTestConfiguration() throws IOException { + this.configuration = new Configuration(); + this.configuration.load(getClass().getClassLoader().getResourceAsStream( + Main.CONF_FILE)); + this.configuration.setProperty(Key.WebstatsActivated.name(), "true"); + this.configuration.setProperty(Key.WebstatsLocalOrigins.name(), + this.inputDirectory.toString()); + this.configuration.setProperty(Key.StatsPath.name(), + this.statsDirectory.toString()); + this.configuration.setProperty(Key.RecentPath.name(), + this.recentDirectory.toString()); + this.configuration.setProperty(Key.OutputPath.name(), + this.outDirectory.toString()); + } + + private void writeInputFiles(String[] ... inputLogs) throws IOException { + for (String[] inputLog : inputLogs) { + Path inputLogFile = this.inputDirectory.resolve(inputLog[0]); + if (!Files.exists(inputLogFile.getParent())) { + Files.createDirectories(inputLogFile.getParent()); + } + try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( + new GzipCompressorOutputStream( + Files.newOutputStream(inputLogFile))))) { + bw.write(inputLog[1]); + } + } + } + + private void deleteInputFiles(String[] ... deleteLogs) throws IOException { + for (String[] deleteLog : deleteLogs) { + Path deleteLogFile = this.inputDirectory.resolve(deleteLog[0]); + Files.delete(deleteLogFile); + } + } + + private void sanitizeWeblogs() { + SanitizeWeblogs sw = new SanitizeWeblogs(this.configuration); + sw.startProcessing(); + } + + private void compareResults(String[] ... outputLogs) + throws DescriptorParseException { + SortedMap<String, WebServerAccessLog> parsedLogs = new TreeMap<>(); + for (Descriptor descriptor + : DescriptorSourceFactory.createDescriptorReader() + .readDescriptors(this.recentDirectory.toFile())) { + if (!(descriptor instanceof WebServerAccessLog)) { + fail("Parsed descriptor of unknown type."); + } else { + WebServerAccessLog wsal = (WebServerAccessLog) descriptor; + parsedLogs.put(wsal.getDescriptorFile().getName(), wsal); + } + } + assertEquals(outputLogs.length, parsedLogs.size()); + for (String[] outputLog : outputLogs) { + String expectedLogFilename = outputLog[0]; + List<String> expectedLogLines = Arrays.asList(outputLog[1].split("\n")); + assertTrue(parsedLogs.containsKey(expectedLogFilename)); + List<String> actualLogLines = new ArrayList<>(); + parsedLogs.get(expectedLogFilename).logLines() + .forEach((line) -> actualLogLines.add(line.toString())); + assertEquals(expectedLogLines, actualLogLines); + } + } + + @Test + public void testSingleRun() throws Exception { + this.writeInputFiles(inputLogs); + this.sanitizeWeblogs(); + this.compareResults(outputLogs); + } + @Test - public void bytesForTest() { - String lines = "line\nline\nline\nline\nline\n" - + "line\nline\nline\nline\nline\n"; - assertEquals(lines, new String(SanitizeWeblogs.bytesFor("line", 10))); + public void testSubsequentRuns() throws Exception { + for (String[] inputLog : inputLogs) { + this.writeInputFiles(inputLog); + this.sanitizeWeblogs(); + } + this.compareResults(outputLogs); }
+ @Test + public void testSubsequentRunsReverseOrder() throws Exception { + for (int i = inputLogs.length - 1; i >= 0; i--) { + this.writeInputFiles(inputLogs[i]); + this.sanitizeWeblogs(); + } + this.compareResults(outputLogs); + } + + @Test + public void testSlidingWindow() throws Exception { + this.writeInputFiles(inputLogs[0], inputLogs[1], inputLogs[2]); + this.sanitizeWeblogs(); + this.compareResults(outputLogs[0]); + this.deleteInputFiles(inputLogs[0]); + this.writeInputFiles(inputLogs[3]); + this.sanitizeWeblogs(); + this.compareResults(outputLogs); + } + + @Test + public void testSingleDayNoLimit() throws Exception { + this.configuration.setProperty(Key.WebstatsLimits.name(), "false"); + this.writeInputFiles(new String[][] { + { "metrics.torproject.org-access.log-20191120.gz", + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""GET /collector/recent/relay-descriptors/ " + + "HTTP/1.1" 200 10227 "-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""HEAD /collector/recent/relay-descriptors/microdescs/ " + + "HTTP/1.1" 200 - "-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""HEAD /collector/recent/exit-lists/ " + + "HTTP/1.1" 200 - "-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] " + + ""GET /collector/archive/bridge-descriptors/extra-infos/ " + + "HTTP/1.1" 200 48013 "-" "-" -\n" } }); + this.sanitizeWeblogs(); + this.compareResults(outputLogs[0]); + } + + @Test + public void testErrorLog() throws Exception { + this.configuration.setProperty(Key.WebstatsLimits.name(), "false"); + this.writeInputFiles(new String[][] { + { "metrics.torproject.org-error.log-20191121.gz", + "[Thu Nov 21 15:13:15.211234 2019] [authz_core:error] " + + "[pid 12920:tid 139635582793920] [client 127.0.0.1:59912]\n" } }); + this.sanitizeWeblogs(); + this.compareResults(); + } + + @Test + public void testNonMatchingLines() throws Exception { + this.configuration.setProperty(Key.WebstatsLimits.name(), "false"); + this.writeInputFiles(new String[][] { + { "metrics.torproject.org-access.log-20191121.gz", + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "GET /favicon.ico " + + "HTTP/1.1" 404 8903 "-" "-" -\n" + + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "POST /con.php " + + "HTTP/1.1" 301 320 "http://metrics.torproject.org/con.php%5C" " + + ""-" -\n" + + "[Thu Nov 21 15:13:15.211234 2019] [authz_core:error] " + + "[pid 12920:tid 139635582793920] [client 127.0.0.1:59912]\n" } }); + this.sanitizeWeblogs(); + this.compareResults(); + } }
tor-commits@lists.torproject.org