commit 9f412c8450704514e5caa2bd7106f1ee96898348
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Jan 25 09:54:16 2017 +0100
Avoid uncompressing webstats logs in memory.
---
.../java/org/torproject/metrics/webstats/Main.java | 58 +++++++++++-----------
1 file changed, 28 insertions(+), 30 deletions(-)
diff --git a/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java b/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
index b6e2f96..919241b 100644
--- a/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
+++ b/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
@@ -171,9 +171,8 @@ public class Main {
if (metaData == null) {
continue;
}
- List<String> downloadedLogLines = downloadLogFile(urlString);
- Map<String, Integer> parsedLogLines = parseLogLines(urlString,
- downloadedLogLines);
+ Map<String, Integer> parsedLogLines = downloadAndParseLogFile(
+ urlString);
importLogLines(connection, urlString, metaData, parsedLogLines);
} catch (IOException | ParseException exc) {
log.warn("Cannot download or parse log file with URL {}. Retrying "
@@ -210,37 +209,17 @@ public class Main {
return new Object[] { server, site, new Long(logDateMillis) };
}
- static List<String> downloadLogFile(String urlString) throws IOException {
- List<String> downloadedLogLines = new ArrayList<>();
+ static Map<String, Integer> downloadAndParseLogFile(String urlString)
+ throws IOException {
+ int skippedLines = 0;
+ Map<String, Integer> parsedLogLines = new HashMap<>();
try (BufferedReader br = new BufferedReader(new InputStreamReader(
new XZCompressorInputStream(new URL(urlString).openStream())))) {
String line;
while ((line = br.readLine()) != null) {
- downloadedLogLines.add(line);
- }
- }
- return downloadedLogLines;
- }
-
- static Map<String, Integer> parseLogLines(String urlString,
- List<String> logLines) {
- int skippedLines = 0;
- Map<String, Integer> parsedLogLines = new HashMap<>();
- for (String logLine : logLines) {
- Matcher logLineMatcher = LOG_LINE_PATTERN.matcher(logLine);
- if (!logLineMatcher.matches()) {
- skippedLines++;
- continue;
- }
- String method = logLineMatcher.group(1);
- String resource = logLineMatcher.group(2);
- int responseCode = Integer.parseInt(logLineMatcher.group(3));
- String combined = String.format("%s %s %d", method, resource,
- responseCode);
- if (!parsedLogLines.containsKey(combined)) {
- parsedLogLines.put(combined, 1);
- } else {
- parsedLogLines.put(combined, parsedLogLines.get(combined) + 1);
+ if (!parseLogLine(line, parsedLogLines)) {
+ skippedLines++;
+ }
}
}
if (skippedLines > 0) {
@@ -250,6 +229,25 @@ public class Main {
return parsedLogLines;
}
+ static boolean parseLogLine(String logLine,
+ Map<String, Integer> parsedLogLines) {
+ Matcher logLineMatcher = LOG_LINE_PATTERN.matcher(logLine);
+ if (!logLineMatcher.matches()) {
+ return false;
+ }
+ String method = logLineMatcher.group(1);
+ String resource = logLineMatcher.group(2);
+ int responseCode = Integer.parseInt(logLineMatcher.group(3));
+ String combined = String.format("%s %s %d", method, resource,
+ responseCode);
+ if (!parsedLogLines.containsKey(combined)) {
+ parsedLogLines.put(combined, 1);
+ } else {
+ parsedLogLines.put(combined, parsedLogLines.get(combined) + 1);
+ }
+ return true;
+ }
+
private static void importLogLines(Connection connection, String urlString,
Object[] metaData, Map<String, Integer> parsedLogLines)
throws SQLException {