commit 178f753a451959cb78c6d0bf13a929ed1940a2e1 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Mar 5 16:33:41 2012 +0100
Truncate .data and .extradata files to contain only the last 4 days. --- extra_stats.py | 57 +++++++++++++++++++++++++++++++++++---- measurements-HOWTO | 18 ++++++++---- truncate-data.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 12 deletions(-)
diff --git a/extra_stats.py b/extra_stats.py index 69bb8a5..662cf43 100755 --- a/extra_stats.py +++ b/extra_stats.py @@ -1,6 +1,6 @@ #!/usr/bin/python
-import sys, time +import os, re, sys, time import TorCtl.TorUtil as TorUtil import TorCtl.TorCtl as TorCtl
@@ -22,15 +22,18 @@ class Circuit: self.stream_fail_reason = None
class WriteStats(TorCtl.PostEventListener): - def __init__(self, port, filename): + def __init__(self, port, filename, truncate): TorCtl.PostEventListener.__init__(self) self._port = int(port) self._filename = filename + self.truncate = truncate + self.first_launched = None self._conn = None self.all_circs = {} self.ignore_streams = {} self.current_timeout = None self.current_quantile = None + self.truncate_statsfile()
def connect(self): self._conn = TorCtl.connect(HOST, self._port) @@ -52,6 +55,7 @@ class WriteStats(TorCtl.PostEventListener): self.current_quantile = b.cutoff_quantile result = b.event_name + " " +b.body self.write_result(result) + self.truncate_statsfile()
def circ_status_event(self, c): if c.status == "LAUNCHED": @@ -94,6 +98,7 @@ class WriteStats(TorCtl.PostEventListener): (self.current_timeout, self.current_quantile)
self.write_result(result) + self.truncate_statsfile()
def stream_status_event(self, event): if event.status == "NEW": @@ -140,19 +145,59 @@ class WriteStats(TorCtl.PostEventListener):
def write_result(self, result): # XXX: hrmm. seems wasteful to keep opening+closing.. + # XXX: When changing this, also change truncated_statsfile(). statsfile = open(self._filename, 'a') statsfile.write(result+"\n") statsfile.close()
+ def truncate_statsfile(self): + if not self.truncate: + return + launched_str = "^.*LAUNCH=([\d]*).*$" + if not self.first_launched: + if os.path.isfile(self._filename): + launched_re = re.compile(launched_str) + with open(self._filename) as statsfile: + for line in statsfile: + m = launched_re.match(line) + if m: + self.first_launched = int(m.group(1)) + break + if not self.first_launched: + self.first_launched = time.time() + now = time.time() + if self.first_launched < now - 7 * 24 * 60 * 60: + copylines = False + statsfilebak_path = self._filename + ".bak" + statsfilebak_file = open(statsfilebak_path, "w") + launched_re = re.compile(launched_str) + with open(self._filename) as statsfile: + for line in statsfile: + if copylines: + statsfilebak_file.write(line) + else: + m = launched_re.match(line) + if m and int(m.group(1)) >= now - 4 * 24 * 60 * 60: + statsfilebak_file.write(line) + copylines = True + statsfilebak_file.close() + os.rename(statsfilebak_path, self._filename) + def main(): - if len(sys.argv) < 3: + if len(sys.argv) < 3 or len(sys.argv) > 4: print "Bad arguments" sys.exit(1)
- port = sys.argv[1] - filename = sys.argv[2] + truncate = False + if (sys.argv[1] == "--truncate"): + truncate = True + port = sys.argv[2] + filename = sys.argv[3] + else: + port = sys.argv[1] + filename = sys.argv[2]
- stats = WriteStats(port, filename) + stats = WriteStats(port, filename, truncate) stats.connect() stats.setup_listener() try: diff --git a/measurements-HOWTO b/measurements-HOWTO index 67de0c5..2cefb68 100644 --- a/measurements-HOWTO +++ b/measurements-HOWTO @@ -103,14 +103,17 @@ cd ~/torperf/torclient50kb && tor -f ~/torperf/torclient50kb/torrc cd ~/torperf/torclient1mb && tor -f ~/torperf/torclient1mb/torrc cd ~/torperf/torclient5mb && tor -f ~/torperf/torclient5mb/torrc sleep 5 -cd ~/torperf/torclient50kb && python ../extra_stats.py 10020 +cd ~/torperf/torclient50kb && python ../extra_stats.py --truncate 10020 ../50kb.extradata & -cd ~/torperf/torclient1mb && python ../extra_stats.py 10021 +cd ~/torperf/torclient1mb && python ../extra_stats.py --truncate 10021 ../1mb.extradata & -cd ~/torperf/torclient5mb && python ../extra_stats.py 10022 +cd ~/torperf/torclient5mb && python ../extra_stats.py --truncate 10022 ../5mb.extradata & EOF
+(Omit the --truncate switch if you don't want .extradata files to be +truncated once per week to contain only the last 4 days of data.) + $ chmod a+x start-tors $ ./start-tors
@@ -125,13 +128,16 @@ $ crontab -e
*/5 * * * * timeout -s2 295 ~/torperf/trivsocks-client torperf.torproject.org 127.0.0.1:9020 /.50kbfile >> ~/torperf/50kb.data - 2>/dev/null + 2>/dev/null; ~/torperf/truncate-data.py ~/torperf/50kb.data 2,32 * * * * timeout -s2 1795 ~/torperf/trivsocks-client torperf.torproject.org 127.0.0.1:9021 /.1mbfile >> ~/torperf/1mb.data - 2>/dev/null + 2>/dev/null; ~/torperf/truncate-data.py ~/torperf/1mb.data 8 * * * * timeout -s2 3595 ~/torperf/trivsocks-client torperf.torproject.org 127.0.0.1:9022 /.5mbfile >> ~/torperf/5mb.data - 2>/dev/null + 2>/dev/null; ~/torperf/truncate-data.py ~/torperf/5mb.data + +(Omit the truncate-data.py command if you don't want .data files to be +truncated once per week to contain only the last 4 days of data.)
From now on, the three files 50kb.data, 1mb.data, and 5mb.data should accumulate lines like this (50kb.data shown here; line breaks are only for diff --git a/truncate-data.py b/truncate-data.py new file mode 100755 index 0000000..a70e3fc --- /dev/null +++ b/truncate-data.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +import os +import re +import sys +import time + +# Truncate Torperf .data file by deleting lines older than 4 days, but +# only truncate once a week. +def main(): + + # Check usage. + if len(sys.argv) != 2: + print "Usage: ./truncate.py <.data file>" + return + data_path = sys.argv[1] + if not os.path.isfile(data_path): + print "%s is not a .data file." % data_path + return + + # Prepare for parsing. + parselines = False + copylines = False + databak_path = data_path + ".bak" + databak_file = None + started_re = re.compile('(^[\d]*) .*') + now = time.time() + + # Parse the .data file line by line, possibly stopping early if the + # first timestamp we find isn't older than a week. + with open(data_path) as data_file: + for line in data_file: + + # Copy lines written in the past 4 days. We have already decided to + # copy this part of the .data file before, so just copy the line and + # continue. + if copylines: + databak_file.write(line) + continue + + # Skip empty lines. + if line.strip() == "": + continue + + # Extract the first timestamp from the current line. + m = started_re.match(line) + if not m: + print "%s is not a valid .data file." % data_path + return + started_ts = int(m.group(1)) + + # Decide whether to start copying lines. We have already decided to + # truncate this file before. + if parselines: + if started_ts >= now - 4 * 24 * 60 * 60: + databak_file.write(line) + copylines = True + continue + + # Decide whether to truncate this file at all. + if started_ts >= now - 7 * 24 * 60 * 60: + return + + # Open a .bak file to write into and start parsing lines to copy in + # the next iteration. + databak_file = open(databak_path, "w") + parselines = True + + # Close the .bak file and replace the original .data file with it. + if databak_file: + databak_file.close() + os.rename(databak_path, data_path) + +if __name__ == "__main__": + main() +
tor-commits@lists.torproject.org