[tor-commits] [flashproxy/master] Extract proxy requests for graphing with R.

dcf at torproject.org dcf at torproject.org
Wed Feb 20 17:30:39 UTC 2013


commit 5713db780884c581f53a8045b370fa32b4a7b5ec
Author: David Fifield <david at bamsoftware.com>
Date:   Wed Feb 20 03:57:48 2013 -0800

    Extract proxy requests for graphing with R.
---
 experiments/proxy-extract.py |   82 ++++++++++++++++++++++++++++++++++++++++++
 experiments/proxy-graph.r    |    5 +++
 2 files changed, 87 insertions(+), 0 deletions(-)

diff --git a/experiments/proxy-extract.py b/experiments/proxy-extract.py
new file mode 100755
index 0000000..e8b02ce
--- /dev/null
+++ b/experiments/proxy-extract.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import datetime
+import getopt
+import re
+import sys
+
+def usage(f = sys.stdout):
+    print >> f, """\
+Usage: %s [INPUTFILE]
+Extract proxy connections from a facilitator log. Each output line is
+    date\tcount\n
+where count is the approximate poll interval in effect at date.
+
+  -h, --help           show this help.
+""" % sys.argv[0]
+
+opts, args = getopt.gnu_getopt(sys.argv[1:], "h", ["help"])
+for o, a in opts:
+    if o == "-h" or o == "--help":
+        usage()
+        sys.exit()
+
+if len(args) == 0:
+    input_file = sys.stdin
+elif len(args) == 1:
+    input_file = open(args[0])
+else:
+    usage()
+    sys.exit()
+
+def timedelta_to_seconds(delta):
+    return delta.days * (24 * 60 * 60) + delta.seconds + delta.microseconds / 1000000.0
+
+# commit 49de7bf689ee989997a1edbf2414a7bdbc2164f9
+# Author: David Fifield <david at bamsoftware.com>
+# Date:   Thu Jan 3 21:01:39 2013 -0800
+#
+#     Bump poll interval from 10 s to 60 s.
+#
+# commit 69d429db12cedc90dac9ccefcace80c86af7eb51
+# Author: David Fifield <david at bamsoftware.com>
+# Date:   Tue Jan 15 14:02:02 2013 -0800
+#
+#     Increase facilitator_poll_interval from 1 m to 10 m.
+
+BEGIN_60S = datetime.datetime(2013, 1, 3, 21, 0, 0)
+BEGIN_600S = datetime.datetime(2013, 1, 15, 14, 0, 0)
+
+# Proxies refresh themselves once a day, so interpolate across a day when the
+# polling interval historically changed.
+def get_poll_interval(date):
+    if date < BEGIN_60S:
+        return 10
+    elif BEGIN_60S <= date < BEGIN_60S + datetime.timedelta(1):
+        return timedelta_to_seconds(date-BEGIN_60S) / timedelta_to_seconds(datetime.timedelta(1)) * (60-10) + 10
+    elif date < BEGIN_600S:
+        return 60
+    elif BEGIN_600S <= date < BEGIN_600S + datetime.timedelta(1):
+        return timedelta_to_seconds(date-BEGIN_600S) / timedelta_to_seconds(datetime.timedelta(1)) * (600-60) + 60
+    else:
+        return 600
+
+prev_output = None
+count = 0.0
+
+for line in input_file:
+    m = re.match(r'^(\d+-\d+-\d+ \d+:\d+:\d+) proxy gets', line)
+    if not m:
+        continue
+    date_str, = m.groups()
+    date = datetime.datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
+
+    count += get_poll_interval(date)
+
+    rounded_date = date.replace(minute=0, second=0, microsecond=0)
+    prev_output = prev_output or rounded_date
+    if prev_output is None or rounded_date != prev_output:
+        avg = float(count) / 10.0
+        print date.strftime("%Y-%m-%d %H:%M:%S") + "\t" + "%.2f" % avg
+        prev_output = rounded_date
+        count = 0.0
diff --git a/experiments/proxy-graph.r b/experiments/proxy-graph.r
new file mode 100644
index 0000000..6b3e1cc
--- /dev/null
+++ b/experiments/proxy-graph.r
@@ -0,0 +1,5 @@
+library(ggplot2)
+x <- read.delim("proxy.dat", header=FALSE, col.names=c("date", "interval"), colClasses=c("POSIXct", "numeric"))
+
+png("proxy-count.png", width=720, height=480)
+qplot(date, data=x, geom="bar", weight=interval/10, binwidth=86400, ylab="proxy requests per day")





More information about the tor-commits mailing list