[tor-commits] [metrics-web/master] Avoid uncompressing webstats logs in memory.

karsten at torproject.org karsten at torproject.org
Fri Jan 27 14:43:48 UTC 2017


commit 9f412c8450704514e5caa2bd7106f1ee96898348
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Jan 25 09:54:16 2017 +0100

    Avoid uncompressing webstats logs in memory.
---
 .../java/org/torproject/metrics/webstats/Main.java | 58 +++++++++++-----------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java b/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
index b6e2f96..919241b 100644
--- a/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
+++ b/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
@@ -171,9 +171,8 @@ public class Main {
         if (metaData == null) {
           continue;
         }
-        List<String> downloadedLogLines = downloadLogFile(urlString);
-        Map<String, Integer> parsedLogLines = parseLogLines(urlString,
-            downloadedLogLines);
+        Map<String, Integer> parsedLogLines = downloadAndParseLogFile(
+            urlString);
         importLogLines(connection, urlString, metaData, parsedLogLines);
       } catch (IOException | ParseException exc) {
         log.warn("Cannot download or parse log file with URL {}.  Retrying "
@@ -210,37 +209,17 @@ public class Main {
     return new Object[] { server, site, new Long(logDateMillis) };
   }
 
-  static List<String> downloadLogFile(String urlString) throws IOException {
-    List<String> downloadedLogLines = new ArrayList<>();
+  static Map<String, Integer> downloadAndParseLogFile(String urlString)
+      throws IOException {
+    int skippedLines = 0;
+    Map<String, Integer> parsedLogLines = new HashMap<>();
     try (BufferedReader br = new BufferedReader(new InputStreamReader(
         new XZCompressorInputStream(new URL(urlString).openStream())))) {
       String line;
       while ((line = br.readLine()) != null) {
-        downloadedLogLines.add(line);
-      }
-    }
-    return downloadedLogLines;
-  }
-
-  static Map<String, Integer> parseLogLines(String urlString,
-      List<String> logLines) {
-    int skippedLines = 0;
-    Map<String, Integer> parsedLogLines = new HashMap<>();
-    for (String logLine : logLines) {
-      Matcher logLineMatcher = LOG_LINE_PATTERN.matcher(logLine);
-      if (!logLineMatcher.matches()) {
-        skippedLines++;
-        continue;
-      }
-      String method = logLineMatcher.group(1);
-      String resource = logLineMatcher.group(2);
-      int responseCode = Integer.parseInt(logLineMatcher.group(3));
-      String combined = String.format("%s %s %d", method, resource,
-          responseCode);
-      if (!parsedLogLines.containsKey(combined)) {
-        parsedLogLines.put(combined, 1);
-      } else {
-        parsedLogLines.put(combined, parsedLogLines.get(combined) + 1);
+        if (!parseLogLine(line, parsedLogLines)) {
+          skippedLines++;
+        }
       }
     }
     if (skippedLines > 0) {
@@ -250,6 +229,25 @@ public class Main {
     return parsedLogLines;
   }
 
+  static boolean parseLogLine(String logLine,
+      Map<String, Integer> parsedLogLines) {
+    Matcher logLineMatcher = LOG_LINE_PATTERN.matcher(logLine);
+    if (!logLineMatcher.matches()) {
+      return false;
+    }
+    String method = logLineMatcher.group(1);
+    String resource = logLineMatcher.group(2);
+    int responseCode = Integer.parseInt(logLineMatcher.group(3));
+    String combined = String.format("%s %s %d", method, resource,
+        responseCode);
+    if (!parsedLogLines.containsKey(combined)) {
+      parsedLogLines.put(combined, 1);
+    } else {
+      parsedLogLines.put(combined, parsedLogLines.get(combined) + 1);
+    }
+    return true;
+  }
+
   private static void importLogLines(Connection connection, String urlString,
       Object[] metaData, Map<String, Integer> parsedLogLines)
       throws SQLException {





More information about the tor-commits mailing list