commit 59689a9fa4c162378f347902eb68e4c21ccf0043 Author: iwakeh iwakeh@torproject.org Date: Tue Feb 6 14:59:05 2018 +0000
Add log line interfaces and access methods.
For both the general LogDescriptor and extension WebServerAccessLog. Include some new tests. --- .../org/torproject/descriptor/LogDescriptor.java | 12 ++++++++++ .../torproject/descriptor/{log => }/Method.java | 5 ++-- .../torproject/descriptor/WebServerAccessLog.java | 28 ++++++++++++++++++++++ .../descriptor/log/WebServerAccessLogImpl.java | 20 ++++++++++++++++ .../descriptor/log/WebServerAccessLogLine.java | 15 ++++++++++-- .../descriptor/log/LogDescriptorTest.java | 15 +++++++----- 6 files changed, 85 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java index ff02cae..6a6bf84 100644 --- a/src/main/java/org/torproject/descriptor/LogDescriptor.java +++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java @@ -43,5 +43,17 @@ public interface LogDescriptor extends Descriptor { @Override public List<String> getUnrecognizedLines();
+ /** + * Returns a list of all parseable log lines. + * <p>Might require a lot of memory depending on log size.</p> + */ + public List<? extends Line> logLines() throws DescriptorParseException; + + public interface Line { + + /** Returns a log line string. */ + public String toLogString(); + + } }
diff --git a/src/main/java/org/torproject/descriptor/log/Method.java b/src/main/java/org/torproject/descriptor/Method.java similarity index 50% rename from src/main/java/org/torproject/descriptor/log/Method.java rename to src/main/java/org/torproject/descriptor/Method.java index c29d495..9135fe2 100644 --- a/src/main/java/org/torproject/descriptor/log/Method.java +++ b/src/main/java/org/torproject/descriptor/Method.java @@ -1,8 +1,9 @@ /* Copyright 2018 The Tor Project * See LICENSE for licensing information */
-package org.torproject.descriptor.log; +package org.torproject.descriptor;
-public enum Method { +/** Enum for web server access log methods. */ +public enum Method { GET, HEAD, POST; } diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java index b94bc30..b4f1940 100644 --- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java +++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java @@ -5,6 +5,7 @@ package org.torproject.descriptor;
import java.time.LocalDate; import java.util.List; +import java.util.Optional;
/** * Contains a sanitized web server access log file from a {@code torproject.org} @@ -61,5 +62,32 @@ public interface WebServerAccessLog extends LogDescriptor { @Override public List<String> getUnrecognizedLines();
+ public interface Line extends LogDescriptor.Line { + + /** Returns the IP address of the requesting host. */ + public String getIp(); + + /** Returns the HTTP method, e.g., GET. */ + public Method getMethod(); + + /** Returns the protocol and version, e.g., HTTP/1.1. */ + public String getProtocol(); + + /** Returns the requested resource. */ + public String getRequest(); + + /** Returns the size of the response in bytes, if available. */ + public Optional<Integer> getSize(); + + /** Returns the final status code, e.g., 200. */ + public int getResponse(); + + /** Returns the date when the request was received. */ + public LocalDate getDate(); + + /** True, if this is a valid web server access log line. */ + public boolean isValid(); + } + }
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java index f02b1d7..7b56528 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java @@ -10,12 +10,17 @@ import org.torproject.descriptor.internal.FileType; import org.slf4j.Logger; import org.slf4j.LoggerFactory;
+import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.File; +import java.io.InputStreamReader; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.Collection; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors;
/** * Implementation of web server access log descriptors. @@ -126,5 +131,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl return this.logDate; }
+ /** Returns a list of all valid log lines. */ + @Override + public List<WebServerAccessLog.Line> logLines() + throws DescriptorParseException { + try (BufferedReader br + = new BufferedReader(new InputStreamReader(new ByteArrayInputStream( + this.getRawDescriptorBytes())))) { + return br.lines().map(line + -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line)) + .filter(line -> line.isValid()).collect(Collectors.toList()); + } catch (Exception ex) { + throw new DescriptorParseException("Cannot retrieve log lines.", ex); + } + } + }
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java index c9d73cc..8a17230 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java @@ -3,6 +3,9 @@
package org.torproject.descriptor.log;
+import org.torproject.descriptor.Method; +import org.torproject.descriptor.WebServerAccessLog; + import org.slf4j.Logger; import org.slf4j.LoggerFactory;
@@ -18,7 +21,7 @@ import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern;
-public class WebServerAccessLogLine { +public class WebServerAccessLogLine implements WebServerAccessLog.Line {
private static final Logger log = LoggerFactory .getLogger(WebServerAccessLogLine.class); @@ -54,6 +57,7 @@ public class WebServerAccessLogLine { private String protocol;
/** Returns a log line string. Possibly empty. */ + @Override public String toLogString() { if (!this.valid) { return ""; @@ -74,7 +78,7 @@ public class WebServerAccessLogLine { return this.date.format(DateTimeFormatter.ofPattern(DATE_PATTERN)); }
- /** Returns a string containing the ip. */ + @Override public String getIp() { return this.ip; } @@ -84,22 +88,27 @@ public class WebServerAccessLogLine { this.ip = fromMap(ip, ipMap); }
+ @Override public Method getMethod() { return this.method; }
+ @Override public String getProtocol() { return this.protocol; }
+ @Override public String getRequest() { return this.request; }
+ @Override public Optional<Integer> getSize() { return this.size < 0 ? Optional.empty() : Optional.of(this.size); }
+ @Override public int getResponse() { return this.response; } @@ -109,10 +118,12 @@ public class WebServerAccessLogLine { this.request = fromMap(request, requestMap); }
+ @Override public LocalDate getDate() { return this.date; }
+ @Override public boolean isValid() { return this.valid; } diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java index b12cfc0..a871791 100644 --- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java +++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java @@ -1,4 +1,3 @@ - /* Copyright 2017--2018 The Tor Project * See LICENSE for licensing information */
@@ -51,6 +50,7 @@ public class LogDescriptorTest { protected String[] pan; protected Class<LogDescriptor> type; protected boolean isDecompressionTest; + protected int lineCount;
/** All types of data that can be encountered during sync. */ @Parameters @@ -60,29 +60,30 @@ public class LogDescriptorTest { "metrics.torproject.org_meronense.torproject.org_access.log" + "_20170530.gz", "metrics.torproject.org", "20170530", "gz"}, - WebServerAccessLog.class}, + WebServerAccessLog.class, 24}, {Boolean.FALSE, 1878, new String[]{"meronense.torproject.org", "xy.host.org_meronense.torproject.org_access.log_20170530.log", "metrics.torproject.org", "20170530", "xz"}, - WebServerAccessLog.class}, + WebServerAccessLog.class, 24}, {Boolean.TRUE, 70730, new String[]{"archeotrichon.torproject.org", "archive.torproject.org_archeotrichon.torproject.org_access.log_" + "20151007.xz", "archive.torproject.org", "20151007", "xz"}, - WebServerAccessLog.class}, + WebServerAccessLog.class, 655}, {Boolean.TRUE, 0, new String[]{"dummy.host.net", "nix.server.org_dummy.host.net_access.log_20111111.bz2", "nix.server.org", "20111111", "bz2"}, - WebServerAccessLog.class}}); + WebServerAccessLog.class, 0}}); }
/** This constructor receives the above defined data for each run. */ public LogDescriptorTest(boolean decompression, int size, String[] pan, - Class<LogDescriptor> type) { + Class<LogDescriptor> type, int lineCount) { this.pan = pan; this.size = size; this.type = type; this.isDecompressionTest = decompression; + this.lineCount = lineCount; }
/** Prepares the temporary folder and writes files to it for this test. */ @@ -129,6 +130,8 @@ public class LogDescriptorTest { InternalLogDescriptor ld = (InternalLogDescriptor) descs.get(0); assertEquals("Wrong compression type string. " + dataUsed(), pan[4], ld.getCompressionType()); + List<? extends LogDescriptor.Line> lines = ld.logLines(); + assertEquals(this.lineCount, lines.size()); }
private String dataUsed() {