[tor-commits] [metrics-lib/master] Add log line interfaces and access methods.

karsten at torproject.org karsten at torproject.org
Fri Feb 16 09:06:04 UTC 2018


commit 59689a9fa4c162378f347902eb68e4c21ccf0043
Author: iwakeh <iwakeh at torproject.org>
Date:   Tue Feb 6 14:59:05 2018 +0000

    Add log line interfaces and access methods.
    
    For both the general LogDescriptor and extension WebServerAccessLog.
    Include some new tests.
---
 .../org/torproject/descriptor/LogDescriptor.java   | 12 ++++++++++
 .../torproject/descriptor/{log => }/Method.java    |  5 ++--
 .../torproject/descriptor/WebServerAccessLog.java  | 28 ++++++++++++++++++++++
 .../descriptor/log/WebServerAccessLogImpl.java     | 20 ++++++++++++++++
 .../descriptor/log/WebServerAccessLogLine.java     | 15 ++++++++++--
 .../descriptor/log/LogDescriptorTest.java          | 15 +++++++-----
 6 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index ff02cae..6a6bf84 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -43,5 +43,17 @@ public interface LogDescriptor extends Descriptor {
   @Override
   public List<String> getUnrecognizedLines();
 
+  /**
+   * Returns a list of all parseable log lines.
+   * <p>Might require a lot of memory depending on log size.</p>
+   */
+  public List<? extends Line> logLines() throws DescriptorParseException;
+
+  public interface Line {
+
+    /** Returns a log line string. */
+    public String toLogString();
+
+  }
 }
 
diff --git a/src/main/java/org/torproject/descriptor/log/Method.java b/src/main/java/org/torproject/descriptor/Method.java
similarity index 50%
rename from src/main/java/org/torproject/descriptor/log/Method.java
rename to src/main/java/org/torproject/descriptor/Method.java
index c29d495..9135fe2 100644
--- a/src/main/java/org/torproject/descriptor/log/Method.java
+++ b/src/main/java/org/torproject/descriptor/Method.java
@@ -1,8 +1,9 @@
 /* Copyright 2018 The Tor Project
  * See LICENSE for licensing information */
 
-package org.torproject.descriptor.log;
+package org.torproject.descriptor;
 
-public  enum Method {
+/** Enum for web server access log methods. */
+public enum Method {
     GET, HEAD, POST;
 }
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b94bc30..b4f1940 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
 
 import java.time.LocalDate;
 import java.util.List;
+import java.util.Optional;
 
 /**
  * Contains a sanitized web server access log file from a {@code torproject.org}
@@ -61,5 +62,32 @@ public interface WebServerAccessLog extends LogDescriptor {
   @Override
   public List<String> getUnrecognizedLines();
 
+  public interface Line extends LogDescriptor.Line {
+
+    /** Returns the IP address of the requesting host. */
+    public String getIp();
+
+    /** Returns the HTTP method, e.g., GET. */
+    public Method getMethod();
+
+    /** Returns the protocol and version, e.g., HTTP/1.1. */
+    public String getProtocol();
+
+    /** Returns the requested resource. */
+    public String getRequest();
+
+    /** Returns the size of the response in bytes, if available. */
+    public Optional<Integer> getSize();
+
+    /** Returns the final status code, e.g., 200. */
+    public int getResponse();
+
+    /** Returns the date when the request was received. */
+    public LocalDate getDate();
+
+    /** True, if this is a valid web server access log line. */
+    public boolean isValid();
+  }
+
 }
 
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index f02b1d7..7b56528 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -10,12 +10,17 @@ import org.torproject.descriptor.internal.FileType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.InputStreamReader;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
 import java.util.Collection;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 /**
  * Implementation of web server access log descriptors.
@@ -126,5 +131,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
     return this.logDate;
   }
 
+  /** Returns a list of all valid log lines. */
+  @Override
+  public List<WebServerAccessLog.Line> logLines()
+      throws DescriptorParseException {
+    try (BufferedReader br
+        = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(
+        this.getRawDescriptorBytes())))) {
+      return br.lines().map(line
+          -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
+        .filter(line -> line.isValid()).collect(Collectors.toList());
+    } catch (Exception ex) {
+      throw new DescriptorParseException("Cannot retrieve log lines.", ex);
+    }
+  }
+
 }
 
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
index c9d73cc..8a17230 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
@@ -3,6 +3,9 @@
 
 package org.torproject.descriptor.log;
 
+import org.torproject.descriptor.Method;
+import org.torproject.descriptor.WebServerAccessLog;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -18,7 +21,7 @@ import java.util.Optional;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-public class WebServerAccessLogLine {
+public class WebServerAccessLogLine implements WebServerAccessLog.Line {
 
   private static final Logger log = LoggerFactory
       .getLogger(WebServerAccessLogLine.class);
@@ -54,6 +57,7 @@ public class WebServerAccessLogLine {
   private String protocol;
 
   /** Returns a log line string. Possibly empty. */
+  @Override
   public String toLogString() {
     if (!this.valid) {
       return "";
@@ -74,7 +78,7 @@ public class WebServerAccessLogLine {
     return this.date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
   }
 
-  /** Returns a string containing the ip. */
+  @Override
   public String getIp() {
     return this.ip;
   }
@@ -84,22 +88,27 @@ public class WebServerAccessLogLine {
     this.ip = fromMap(ip, ipMap);
   }
 
+  @Override
   public Method getMethod() {
     return this.method;
   }
 
+  @Override
   public String getProtocol() {
     return this.protocol;
   }
 
+  @Override
   public String getRequest() {
     return this.request;
   }
 
+  @Override
   public Optional<Integer> getSize() {
     return this.size < 0 ? Optional.empty() : Optional.of(this.size);
   }
 
+  @Override
   public int getResponse() {
     return this.response;
   }
@@ -109,10 +118,12 @@ public class WebServerAccessLogLine {
     this.request = fromMap(request, requestMap);
   }
 
+  @Override
   public LocalDate getDate() {
     return this.date;
   }
 
+  @Override
   public boolean isValid() {
     return this.valid;
   }
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index b12cfc0..a871791 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -1,4 +1,3 @@
-
 /* Copyright 2017--2018 The Tor Project
  * See LICENSE for licensing information */
 
@@ -51,6 +50,7 @@ public class LogDescriptorTest {
   protected String[] pan;
   protected Class<LogDescriptor> type;
   protected boolean isDecompressionTest;
+  protected int lineCount;
 
   /** All types of data that can be encountered during sync. */
   @Parameters
@@ -60,29 +60,30 @@ public class LogDescriptorTest {
             "metrics.torproject.org_meronense.torproject.org_access.log"
             + "_20170530.gz",
             "metrics.torproject.org", "20170530", "gz"},
-         WebServerAccessLog.class},
+         WebServerAccessLog.class, 24},
         {Boolean.FALSE, 1878, new String[]{"meronense.torproject.org",
             "xy.host.org_meronense.torproject.org_access.log_20170530.log",
             "metrics.torproject.org", "20170530", "xz"},
-         WebServerAccessLog.class},
+         WebServerAccessLog.class, 24},
         {Boolean.TRUE, 70730, new String[]{"archeotrichon.torproject.org",
             "archive.torproject.org_archeotrichon.torproject.org_access.log_"
             + "20151007.xz",
             "archive.torproject.org", "20151007", "xz"},
-         WebServerAccessLog.class},
+         WebServerAccessLog.class, 655},
         {Boolean.TRUE, 0, new String[]{"dummy.host.net",
             "nix.server.org_dummy.host.net_access.log_20111111.bz2",
             "nix.server.org", "20111111", "bz2"},
-         WebServerAccessLog.class}});
+         WebServerAccessLog.class, 0}});
   }
 
   /** This constructor receives the above defined data for each run. */
   public LogDescriptorTest(boolean decompression, int size, String[] pan,
-      Class<LogDescriptor> type) {
+        Class<LogDescriptor> type, int lineCount) {
     this.pan = pan;
     this.size = size;
     this.type = type;
     this.isDecompressionTest = decompression;
+    this.lineCount = lineCount;
   }
 
   /** Prepares the temporary folder and writes files to it for this test. */
@@ -129,6 +130,8 @@ public class LogDescriptorTest {
     InternalLogDescriptor ld = (InternalLogDescriptor) descs.get(0);
     assertEquals("Wrong compression type string. " + dataUsed(),
         pan[4], ld.getCompressionType());
+    List<? extends LogDescriptor.Line> lines = ld.logLines();
+    assertEquals(this.lineCount, lines.size());
   }
 
   private String dataUsed() {



More information about the tor-commits mailing list