[or-cvs] [metrics-utils/master 2/4] Support parsing .gz-compressed web server logs.

karsten at torproject.org karsten at torproject.org
Fri Sep 24 16:03:47 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri, 24 Sep 2010 13:07:59 +0200
Subject: Support parsing .gz-compressed web server logs.
Commit: fb018b0b006a72d455c9d379ae4710984c96c56d

---
 visitor/ChangeLog    |    3 +++
 visitor/HOWTO        |    8 +++++---
 visitor/VisiTor.java |   10 ++++++++--
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/visitor/ChangeLog b/visitor/ChangeLog
index 8b00c97..01c2294 100644
--- a/visitor/ChangeLog
+++ b/visitor/ChangeLog
@@ -1,5 +1,8 @@
 VisiTor change log:
 
+Changes in version 0.0.3 - 2010-09-2?
+  - Support parsing .gz-compressed web server logs. Suggested by murb.
+
 Changes in version 0.0.2 - 2010-09-22
   - Don't break if we're given zero exit lists.
   - If we saw zero requests on a day, write "0", not "NA". Only write "NA"
diff --git a/visitor/HOWTO b/visitor/HOWTO
index 85361ac..57d7a8f 100644
--- a/visitor/HOWTO
+++ b/visitor/HOWTO
@@ -50,8 +50,8 @@ for Linux and Mac OS X; commands for Windows may vary):
   Note that as of August 2010, one month of exit lists is 20M compressed
   and 168M uncompressed.
 
-- Put your web server log in your working directory, too, e.g.
-  /home/you/visitor/access_log .
+- Put your .gz-compressed or decompressed web server log in your working
+  directory, too, e.g. /home/you/visitor/access_log.gz .
 
 - Compile the (single) Java class using this command:
 
@@ -64,10 +64,12 @@ for Linux and Mac OS X; commands for Windows may vary):
   java VisiTor <web server log> <exit list directory> <output file>
        [<server log part with Tor user requests>]
 
-  A sample invocation might be:
+  Sample invocations might be:
 
   $ java VisiTor access_log exitlists/ out.csv tor_access_log
 
+  $ java VisiTor access_log.gz exitlists/ out.csv tor_access_log
+
 - Find the results in /home/you/visitor/out.csv in a format that can be
   imported by any spreadsheet application like OpenOffice.org Calc or
   processed by R.
diff --git a/visitor/VisiTor.java b/visitor/VisiTor.java
index 0af583d..624fd3a 100644
--- a/visitor/VisiTor.java
+++ b/visitor/VisiTor.java
@@ -5,6 +5,7 @@ import java.io.*;
 import java.text.*;
 import java.util.*;
 import java.util.regex.*;
+import java.util.zip.*;
 
 public final class VisiTor {
 
@@ -61,8 +62,13 @@ public final class VisiTor {
       return;
     }
     try {
-      webServerLogReader = new BufferedReader(new FileReader(
-          webServerLog));
+      if (webServerLog.endsWith(".gz")) {
+        webServerLogReader = new BufferedReader(new InputStreamReader(
+            new GZIPInputStream(new FileInputStream(webServerLog))));
+      } else {
+        webServerLogReader = new BufferedReader(new FileReader(
+            webServerLog));
+      }
       logLine = webServerLogReader.readLine();
     } catch (IOException e) {
       System.out.println("FAILED\nCould not read file! Exiting!");
-- 
1.7.1




More information about the tor-commits mailing list