[or-cvs] [metrics/master] Add script to parse entry stats.

karsten at seul.org karsten at seul.org
Wed Jul 1 19:26:00 UTC 2009


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Wed, 1 Jul 2009 21:04:55 +0200
Subject: Add script to parse entry stats.
Commit: 74017d403859e6cca46fa6f553df713ae9b45c2a

---
 HOWTO                                              |   19 +++
 out/entrystats/statsyrtne.csv                      |   10 ++
 .../torproject/metrics/entry/ParseEntryStats.java  |  146 ++++++++++++++++++++
 3 files changed, 175 insertions(+), 0 deletions(-)
 create mode 100644 out/entrystats/statsyrtne.csv
 create mode 100644 src/org/torproject/metrics/entry/ParseEntryStats.java

diff --git a/HOWTO b/HOWTO
index b817eb2..002285a 100644
--- a/HOWTO
+++ b/HOWTO
@@ -240,3 +240,22 @@ $ java -cp bin/:lib/*
   out/performance/
 
 
+5  Entry-guard statistics
+=========================
+
+Put the entry-stats files in a directory data/entrystats/, giving them
+arbitrary filenames to identify the routers later on. Router nicknames are
+probably a fine choice.
+
+$ make data/
+$ make data/entrystats/
+
+Compile the parsing app:
+
+$ javac -d bin/ -cp src/:lib/* src/org/torproject/metrics/entry/*.java
+
+Run the parsing script:
+
+$ java -cp bin/:lib/* org.torproject.metrics.entry.ParseEntryStats
+  data/entrystats/ out/entrystats/
+
diff --git a/out/entrystats/statsyrtne.csv b/out/entrystats/statsyrtne.csv
new file mode 100644
index 0000000..8007b75
--- /dev/null
+++ b/out/entrystats/statsyrtne.csv
@@ -0,0 +1,10 @@
+time,ipsad,ipsae,ipsai,ipsal,ipsam,ipsar,ipsat,ipsau,ipsaz,ipsba,ipsbd,ipsbe,ipsbg,ipsbh,ipsbr,ipsbs,ipsby,ipsca,ipsch,ipscl,ipscn,ipsco,ipscr,ipscs,ipscy,ipscz,ipsde,ipsdk,ipsdo,ipsec,ipsee,ipseg,ipses,ipsfi,ipsfr,ipsgb,ipsgd,ipsge,ipsgr,ipsgt,ipsgu,ipsgy,ipshk,ipshn,ipshr,ipshu,ipsid,ipsie,ipsil,ipsin,ipsiq,ipsir,ipsis,ipsit,ipsjm,ipsjo,ipsjp,ipske,ipskg,ipskh,ipskr,ipskw,ipskz,ipslb,ipslk,ipslt,ipslu,ipslv,ipsmc,ipsmd,ipsmk,ipsmo,ipsmq,ipsmv,ipsmx,ipsmy,ipsng,ipsni,ipsnl,ipsno,ipsnp,ipsnz,ipsom,ipspa,ipspe,ipsph,ipspk,ipspl,ipspr,ipsps,ipspt,ipsqa,ipsro,ipsrs,ipsru,ipssa,ipsse,ipssg,ipssi,ipssk,ipssv,ipssy,ipsth,ipstr,ipstt,ipstw,ipsua,ipsug,ipsus,ipsuy,ipsve,ipsvn,ipsye,ipszw,ipstotal
+2009-06-17,4,12,0,0,4,4,12,12,0,4,4,4,4,4,28,4,0,20,4,4,244,4,4,4,0,4,124,4,0,0,4,0,12,4,44,36,0,4,12,4,4,0,4,0,4,4,12,4,4,28,4,84,0,28,0,4,36,0,0,0,28,12,4,4,4,4,4,0,0,4,0,0,0,4,4,4,4,0,12,0,12,0,4,4,4,28,4,28,4,4,4,4,4,0,36,20,12,4,4,4,0,4,12,20,4,12,12,4,140,0,4,20,4,0,1368
+2009-06-18,0,4,4,0,4,4,12,12,4,4,0,4,4,4,28,4,4,20,4,4,236,4,0,4,4,4,228,4,4,0,0,4,20,4,60,52,0,4,4,0,0,4,12,4,4,4,20,4,12,44,4,92,0,44,0,4,44,0,0,0,28,12,4,4,0,4,4,4,0,4,4,0,0,0,4,12,0,0,28,4,4,4,4,4,4,20,4,44,0,4,12,4,12,4,36,20,28,4,4,4,0,20,12,12,0,20,12,0,220,0,4,4,0,4,1700
+2009-06-19,0,4,0,0,4,12,20,12,4,4,4,4,4,4,28,0,0,20,4,4,172,4,4,4,4,12,260,4,4,0,4,0,20,12,68,44,0,4,4,0,0,0,4,0,4,12,12,4,12,20,4,60,0,36,4,4,20,0,0,0,28,4,4,4,0,4,4,4,0,4,4,4,4,0,12,12,4,0,28,4,4,0,0,4,4,12,4,36,0,4,4,4,12,4,52,20,44,4,0,4,0,4,12,28,0,4,12,0,292,0,4,12,4,0,1668
+2009-06-20,0,4,4,4,0,12,12,4,4,4,0,4,4,4,20,0,0,20,4,4,204,4,4,4,4,12,268,4,0,0,4,0,20,20,68,44,0,0,12,0,0,0,4,4,4,4,4,4,12,20,4,68,0,36,4,12,20,4,0,4,12,4,0,4,4,4,4,4,0,0,0,0,0,0,4,12,0,0,28,12,4,4,0,4,0,12,4,28,0,4,12,0,4,0,52,20,44,4,4,4,0,4,4,20,4,4,12,0,268,4,0,12,0,0,1616
+2009-06-21,0,4,0,0,4,4,20,4,4,0,0,4,4,4,12,0,4,20,4,4,164,4,4,4,4,4,252,4,4,0,4,4,20,4,68,44,0,0,4,0,4,0,4,0,4,4,4,4,12,12,4,84,0,44,4,4,20,0,0,0,4,12,0,0,0,4,4,4,0,4,0,0,0,0,4,4,0,4,36,4,4,0,4,4,4,4,4,28,0,4,4,4,4,4,44,20,44,4,4,4,0,4,4,12,0,12,12,0,284,0,4,4,4,0,1512
+2009-06-22,0,12,0,0,0,4,28,4,4,4,4,4,4,4,12,0,4,20,20,4,164,0,0,4,0,12,308,4,0,4,0,4,20,20,92,36,0,0,4,0,0,0,4,0,4,4,12,0,20,20,4,68,0,44,4,4,20,0,0,0,20,12,0,4,4,4,4,4,0,4,0,0,0,0,4,12,0,0,36,4,4,0,0,4,4,12,4,28,0,0,4,4,12,4,44,20,68,4,0,4,0,4,4,12,0,12,12,0,332,0,4,12,0,4,1740
+2009-06-23,0,4,0,0,4,4,20,12,4,4,4,4,4,0,12,0,4,20,12,4,164,0,4,4,0,4,292,4,4,0,4,4,12,12,76,36,0,4,4,0,0,0,4,4,4,12,4,4,12,20,4,68,0,44,0,4,20,0,0,0,20,4,4,4,4,4,4,4,0,4,0,0,0,0,4,12,0,0,20,4,4,0,0,4,4,12,4,28,0,0,12,4,4,0,44,20,52,4,0,4,4,4,4,20,0,4,20,0,300,0,4,12,4,0,1616
+2009-06-24,0,4,0,0,0,4,20,12,0,4,0,4,4,4,20,0,4,28,12,4,156,4,4,4,4,12,260,4,0,0,4,0,20,12,76,44,4,4,4,0,0,0,4,0,4,12,12,4,12,28,0,84,0,44,0,4,20,0,0,0,20,12,4,4,0,4,4,4,4,4,0,0,0,4,12,4,0,0,28,4,4,0,0,4,4,4,4,28,0,4,12,4,4,4,44,20,52,4,0,0,0,4,4,20,4,4,4,0,300,0,4,12,0,0,1632
+2009-06-25,0,4,0,0,0,4,28,4,0,4,0,0,4,4,20,0,4,20,12,4,212,4,4,4,4,12,276,4,0,0,4,0,12,12,68,36,0,0,4,0,0,0,12,0,4,4,12,4,12,28,4,92,4,52,0,4,20,0,4,0,20,12,4,4,0,4,4,4,0,0,0,4,0,4,4,12,0,0,28,4,4,4,4,4,4,12,4,36,0,0,12,0,4,0,60,20,52,4,0,4,0,4,4,28,4,12,12,0,316,0,4,12,4,0,1760
diff --git a/src/org/torproject/metrics/entry/ParseEntryStats.java b/src/org/torproject/metrics/entry/ParseEntryStats.java
new file mode 100644
index 0000000..0cb0669
--- /dev/null
+++ b/src/org/torproject/metrics/entry/ParseEntryStats.java
@@ -0,0 +1,146 @@
+/* Copyright 2009 Karsten Loesing
+ * See LICENSE for licensing information */
+package org.torproject.metrics.entry;
+
+import java.io.*;
+import java.text.*;
+import java.util.*;
+
+public final class ParseEntryStats {
+
+    private static class DataPoint {
+        String date;
+        SortedMap<String, Integer> ips;
+    }
+
+    private static SortedSet<String> allCountries = new TreeSet<String>();
+    private static SortedSet<String> allDates = new TreeSet<String>();
+    private static SortedMap<String, SortedMap<String, DataPoint>> allDataPoints
+            = new TreeMap<String, SortedMap<String, DataPoint>>();
+
+    private static SortedMap<String, Integer> parseCountryLine(String line) {
+        SortedMap<String, Integer> result = new TreeMap<String, Integer>();
+        if (line.length() < 2 || line.split(" ").length < 2) {
+            return result;
+        }
+        String[] countries = line.split(" ")[1].split(",");
+        for (String part : countries) {
+            String country = part.split("=")[0];
+            Integer count = Integer.parseInt(part.split("=")[1]) - 4;
+            allCountries.add(country);
+            result.put(country, count);
+        }
+        return result;
+    }
+
+    private ParseEntryStats() {
+    }
+
+    public static void main(final String[] args) throws Exception {
+
+        // check input parameters
+        if (args.length < 2) {
+            System.err.println("Usage: java "
+                    + ParseEntryStats.class.getSimpleName()
+                    + " <input directory> <output directory>");
+            System.exit(1);
+        }
+        File inputDirectory = new File(args[0]);
+        if (!inputDirectory.exists() || !inputDirectory.isDirectory()) {
+            System.err.println("Input directory '"
+                    + inputDirectory.getAbsolutePath()
+                    + "' does not exist or is not a directory.");
+            System.exit(1);
+        }
+        File outputDirectory = new File(args[1]);
+        if (outputDirectory.exists() && !outputDirectory.isDirectory()) {
+            System.err.println("Output directory '"
+                    + outputDirectory.getAbsolutePath()
+                    + "' exists, but is not a directory.");
+            System.exit(1);
+        }
+        outputDirectory.mkdir();
+
+        long started = System.currentTimeMillis();
+
+        // parse input files
+        for (File inputFile : inputDirectory.listFiles()) {
+            SortedMap<String, DataPoint> currentDataPoints
+                    = new TreeMap<String, DataPoint>();
+            allDataPoints.put(inputFile.getName(), currentDataPoints);
+            BufferedReader br = new BufferedReader(new FileReader(
+                    inputFile));
+            String line = null;
+            String currentDate = null;
+            DataPoint currentDataPoint = null;
+            boolean haveSeenActualNumbers = false;
+            while ((line = br.readLine()) != null) {
+                if (line.startsWith("written ")) {
+                    if (haveSeenActualNumbers) {
+                        currentDataPoints.put(currentDate, currentDataPoint);
+                    }
+                    currentDataPoint = new DataPoint();
+                    currentDate = line.split(" ")[1];
+                    allDates.add(currentDate);
+                } else if (line.startsWith("started-at ")) {
+                    // ignored
+                } else if (line.startsWith("ips ")) {
+                    currentDataPoint.ips = parseCountryLine(line);
+                    if (line.split(" ").length > 1) {
+                        haveSeenActualNumbers = true;
+                    }
+                }
+            }
+            if (haveSeenActualNumbers) {
+                currentDataPoints.put(currentDate, currentDataPoint);
+            }
+            br.close();
+        }
+
+        System.out.printf("We have seen %d countries on %d days on %d "
+                + "entry nodes.%n", allCountries.size(), allDates.size(),
+                allDataPoints.size());
+
+        for (Map.Entry<String, SortedMap<String, DataPoint>> e
+                : allDataPoints.entrySet()) {
+            String directory = e.getKey();
+            SortedMap<String, DataPoint> dataPoints = e.getValue();
+            File outFile = new File(outputDirectory.getAbsolutePath()
+                    + File.separatorChar + directory + ".csv");
+            BufferedWriter out = new BufferedWriter(new FileWriter(
+                    outFile, false));
+            out.write("time,");
+            for (String f : allCountries) {
+                out.write(String.format("ips%s,", f));
+            }
+            out.write("ipstotal\n");
+            for (String date : allDates) {
+                if (!dataPoints.containsKey(date)) {
+                    out.write(date + ",");
+                    int nas = allCountries.size();
+                    for (int i = 0; i < nas; i++) {
+                        out.write("NA,");
+                    }
+                    out.write("NA\n");
+                } else {
+                    DataPoint currentDataPoint = dataPoints.get(date);
+                    out.write(date + ",");
+                    int ipstotal = 0;
+                    for (String f : allCountries) {
+                        int ips = currentDataPoint.ips.containsKey(f)
+                                ? currentDataPoint.ips.get(f) : 0;
+                        ipstotal += ips;
+                        out.write(String.format("%d,", ips));
+                    }
+                    out.write(String.format("%d%n", ipstotal));
+                }
+            }
+            out.close();
+        }
+
+        System.out.println("Parsing finished after "
+            + ((System.currentTimeMillis() - started) / 1000)
+            + " seconds.");
+    }
+}
+
-- 
1.5.6.5




More information about the tor-commits mailing list