[tor-commits] [metrics-lib/release] Reduce memory footprint of log lines.

karsten at torproject.org karsten at torproject.org
Wed Feb 7 11:20:03 UTC 2018


commit 8578de6d64569b410678b3c847e7b82b267501ad
Author: iwakeh <iwakeh at torproject.org>
Date:   Wed Jan 31 12:35:31 2018 +0000

    Reduce memory footprint of log lines.
    
    Also make validation optional for internal web log constructor.
    CollecTor uses this for storing freshly sanitized logs,
    which don't need to be validated a second time.
---
 .../descriptor/log/WebServerAccessLogImpl.java     | 19 +++++++--
 .../descriptor/log/WebServerAccessLogLine.java     | 46 ++++++++++++++++------
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index 6708c3a..f02b1d7 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -44,6 +44,8 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
 
   private final LocalDate logDate;
 
+  private boolean validate = true;
+
   /**
    * Creates a WebServerAccessLog from the given bytes and filename.
    *
@@ -65,13 +67,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
   }
 
   /** For internal use only. */
-  public WebServerAccessLogImpl(Collection<String> lines, String filename)
-      throws DescriptorParseException {
-    this(LogDescriptorImpl.collectionToBytes(lines), new File(filename));
+  public WebServerAccessLogImpl(Collection<String> lines, String filename,
+      boolean validate) throws DescriptorParseException {
+    this(LogDescriptorImpl.collectionToBytes(lines), new File(filename),
+        FileType.XZ, validate);
   }
 
   private WebServerAccessLogImpl(byte[] logBytes, File file,
       FileType defaultCompression) throws DescriptorParseException {
+    this(logBytes, file, defaultCompression, true);
+  }
+
+  private WebServerAccessLogImpl(byte[] logBytes, File file,
+      FileType defaultCompression, boolean validate)
+      throws DescriptorParseException {
     super(logBytes, file, defaultCompression);
     try {
       String fn = file.toPath().getFileName().toString();
@@ -91,7 +100,9 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
       this.logDate = LocalDate.parse(ymd, DateTimeFormatter.BASIC_ISO_DATE);
       this.setValidator((line)
           -> WebServerAccessLogLine.makeLine(line).isValid());
-      this.validate();
+      if (validate) {
+        this.validate();
+      }
     } catch (DescriptorParseException dpe) {
       throw dpe; // escalate
     } catch (Exception pe) {
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
index fdbf5c1..2f27441 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java
@@ -10,6 +10,9 @@ import java.time.LocalDate;
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Optional;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -31,15 +34,23 @@ public class WebServerAccessLogLine {
       + "\"([A-Z]+) ([^\"]+) ([A-Z]+/\\d\\.\\d)\" "
       + "(\\d{3}) (\\d+|-)(.*)");
 
+  private static Map<String, String> ipMap
+      = Collections.synchronizedMap(new HashMap<>());
+  private static Map<LocalDate, LocalDate> dateMap
+      = Collections.synchronizedMap(new HashMap<>());
+  private static Map<String, String> protocolMap
+      = Collections.synchronizedMap(new HashMap<>());
+  private static Map<String, String> requestMap
+      = Collections.synchronizedMap(new HashMap<>());
+
   private String ip;
   private int response;
   private String request;
   private Method method;
   private LocalDate date;
-  private String protocol;
   private int size = -1;
   private boolean valid = false;
-  private String type;
+  private String protocol;
 
   /** Returns a log line string. Possibly empty. */
   public String toLogString() {
@@ -53,10 +64,11 @@ public class WebServerAccessLogLine {
   public String toString() {
     return String.format("%s - - [%s:00:00:00 +0000] \"%s %s %s\" %d %s",
         this.ip, this.getDateString(), this.method.name(), this.request,
-        this.type, this.response, this.size < 0 ? DASH : this.size);
+        this.protocol, this.response, this.size < 0 ? DASH : this.size);
   }
 
-  /** Returns the string of the date using 'yyyymmdd' format. */
+  /** Only used internally during sanitization.
+   * Returns the string of the date using 'dd/MMM/yyyy' format. */
   public String getDateString() {
     return this.date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
   }
@@ -68,7 +80,7 @@ public class WebServerAccessLogLine {
 
   /** Only used internally during sanitization. */
   public void setIp(String ip) {
-    this.ip = ip;
+    this.ip = fromMap(ip, ipMap);
   }
 
   public Method getMethod() {
@@ -93,7 +105,7 @@ public class WebServerAccessLogLine {
 
   /** Only used internally during sanitization. */
   public void setRequest(String request) {
-    this.request = request;
+    this.request = fromMap(request, requestMap);
   }
 
   public LocalDate getDate() {
@@ -112,14 +124,13 @@ public class WebServerAccessLogLine {
       if (mat.find()) {
         res.response = Integer.valueOf(mat.group(10));
         res.method = Method.valueOf(mat.group(7));
-        res.protocol = mat.group(9);
         String dateTimeString = mat.group(4) + mat.group(5) + mat.group(6);
-        res.date = ZonedDateTime.parse(dateTimeString,
+        res.date = fromMap(ZonedDateTime.parse(dateTimeString,
             dateTimeFormatter).withZoneSameInstant(ZoneOffset.UTC)
-            .toLocalDate();
-        res.ip = mat.group(1);
-        res.request = mat.group(8);
-        res.type = mat.group(9);
+            .toLocalDate(), dateMap);
+        res.ip = fromMap(mat.group(1), ipMap);
+        res.request = fromMap(mat.group(8), requestMap);
+        res.protocol = fromMap(mat.group(9), protocolMap);
         if (DASH.equals(mat.group(11))) {
           res.size = -1;
         } else {
@@ -134,5 +145,16 @@ public class WebServerAccessLogLine {
     return res;
   }
 
+  private static <T> T fromMap(T val, Map<T, T> map) {
+    synchronized (map) {
+      T reference = map.get(val);
+      if (null == reference) {
+        map.put(val, val);
+        reference = map.get(val);
+      }
+      return reference;
+    }
+  }
+
 }
 





More information about the tor-commits mailing list