commit 8578de6d64569b410678b3c847e7b82b267501ad Author: iwakeh iwakeh@torproject.org Date: Wed Jan 31 12:35:31 2018 +0000
Reduce memory footprint of log lines.
Also make validation optional for internal web log constructor. CollecTor uses this for storing freshly sanitized logs, which don't need to be validated a second time. --- .../descriptor/log/WebServerAccessLogImpl.java | 19 +++++++-- .../descriptor/log/WebServerAccessLogLine.java | 46 ++++++++++++++++------ 2 files changed, 49 insertions(+), 16 deletions(-)
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java index 6708c3a..f02b1d7 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java @@ -44,6 +44,8 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
private final LocalDate logDate;
+ private boolean validate = true; + /** * Creates a WebServerAccessLog from the given bytes and filename. * @@ -65,13 +67,20 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl }
/** For internal use only. */ - public WebServerAccessLogImpl(Collection<String> lines, String filename) - throws DescriptorParseException { - this(LogDescriptorImpl.collectionToBytes(lines), new File(filename)); + public WebServerAccessLogImpl(Collection<String> lines, String filename, + boolean validate) throws DescriptorParseException { + this(LogDescriptorImpl.collectionToBytes(lines), new File(filename), + FileType.XZ, validate); }
private WebServerAccessLogImpl(byte[] logBytes, File file, FileType defaultCompression) throws DescriptorParseException { + this(logBytes, file, defaultCompression, true); + } + + private WebServerAccessLogImpl(byte[] logBytes, File file, + FileType defaultCompression, boolean validate) + throws DescriptorParseException { super(logBytes, file, defaultCompression); try { String fn = file.toPath().getFileName().toString(); @@ -91,7 +100,9 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl this.logDate = LocalDate.parse(ymd, DateTimeFormatter.BASIC_ISO_DATE); this.setValidator((line) -> WebServerAccessLogLine.makeLine(line).isValid()); - this.validate(); + if (validate) { + this.validate(); + } } catch (DescriptorParseException dpe) { throw dpe; // escalate } catch (Exception pe) { diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java index fdbf5c1..2f27441 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogLine.java @@ -10,6 +10,9 @@ import java.time.LocalDate; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -31,15 +34,23 @@ public class WebServerAccessLogLine { + ""([A-Z]+) ([^"]+) ([A-Z]+/\d\.\d)" " + "(\d{3}) (\d+|-)(.*)");
+ private static Map<String, String> ipMap + = Collections.synchronizedMap(new HashMap<>()); + private static Map<LocalDate, LocalDate> dateMap + = Collections.synchronizedMap(new HashMap<>()); + private static Map<String, String> protocolMap + = Collections.synchronizedMap(new HashMap<>()); + private static Map<String, String> requestMap + = Collections.synchronizedMap(new HashMap<>()); + private String ip; private int response; private String request; private Method method; private LocalDate date; - private String protocol; private int size = -1; private boolean valid = false; - private String type; + private String protocol;
/** Returns a log line string. Possibly empty. */ public String toLogString() { @@ -53,10 +64,11 @@ public class WebServerAccessLogLine { public String toString() { return String.format("%s - - [%s:00:00:00 +0000] "%s %s %s" %d %s", this.ip, this.getDateString(), this.method.name(), this.request, - this.type, this.response, this.size < 0 ? DASH : this.size); + this.protocol, this.response, this.size < 0 ? DASH : this.size); }
- /** Returns the string of the date using 'yyyymmdd' format. */ + /** Only used internally during sanitization. + * Returns the string of the date using 'dd/MMM/yyyy' format. */ public String getDateString() { return this.date.format(DateTimeFormatter.ofPattern(DATE_PATTERN)); } @@ -68,7 +80,7 @@ public class WebServerAccessLogLine {
/** Only used internally during sanitization. */ public void setIp(String ip) { - this.ip = ip; + this.ip = fromMap(ip, ipMap); }
public Method getMethod() { @@ -93,7 +105,7 @@ public class WebServerAccessLogLine {
/** Only used internally during sanitization. */ public void setRequest(String request) { - this.request = request; + this.request = fromMap(request, requestMap); }
public LocalDate getDate() { @@ -112,14 +124,13 @@ public class WebServerAccessLogLine { if (mat.find()) { res.response = Integer.valueOf(mat.group(10)); res.method = Method.valueOf(mat.group(7)); - res.protocol = mat.group(9); String dateTimeString = mat.group(4) + mat.group(5) + mat.group(6); - res.date = ZonedDateTime.parse(dateTimeString, + res.date = fromMap(ZonedDateTime.parse(dateTimeString, dateTimeFormatter).withZoneSameInstant(ZoneOffset.UTC) - .toLocalDate(); - res.ip = mat.group(1); - res.request = mat.group(8); - res.type = mat.group(9); + .toLocalDate(), dateMap); + res.ip = fromMap(mat.group(1), ipMap); + res.request = fromMap(mat.group(8), requestMap); + res.protocol = fromMap(mat.group(9), protocolMap); if (DASH.equals(mat.group(11))) { res.size = -1; } else { @@ -134,5 +145,16 @@ public class WebServerAccessLogLine { return res; }
+ private static <T> T fromMap(T val, Map<T, T> map) { + synchronized (map) { + T reference = map.get(val); + if (null == reference) { + map.put(val, val); + reference = map.get(val); + } + return reference; + } + } + }
tor-commits@lists.torproject.org