commit 17182144e3f597b74772b1ff9299385f4695fd0d Author: Mike Perry mikeperry-git@fscked.org Date: Fri Mar 4 18:57:24 2011 -0800
Update consolidate_stats to new .data and .extradata formats. --- consolidate_stats.py | 150 ++++++++++++++++++++++++++++++++++---------------- measurements-HOWTO | 10 +-- 2 files changed, 106 insertions(+), 54 deletions(-)
diff --git a/consolidate_stats.py b/consolidate_stats.py old mode 100644 new mode 100755 index 40e8d0a..4c8a990 --- a/consolidate_stats.py +++ b/consolidate_stats.py @@ -1,29 +1,49 @@ -### -# Call this with 4 parameters: the file to read data from, the file to read -# extradata from, the file to write the combined data to, the slack interval -# to match data and extradata timestamps. +#!/usr/bin/python +# +# This script consolidates a .data file and an .extradata file together, +# matching the lines based on the completion time. # -# IMPORTANT: You need to manually sort -g the data file, because torperf -# might screw up ordering and this script expects sorted lines! +# The resulting output will be the union of both files. It will match lines +# where possible, and include unmatched lines from both files as well. +# +# Usage: +# ./consolidate-stats.py <.data file> <.extradata file> <.mergedata file> ###
-import sys, time +import sys
class Data: def __init__(self, filename, mode="r"): self._filename = filename self._file = open(filename, mode) + self._curData = None + self._retCurrent = False
def prepline(self): + if self._retCurrent: + self._retCurrent = False + return self._curData line = self._file.readline() if line == "" or line == "\n": raise StopIteration - if line[-1] == "\n": - line = line[:-1] - return line.split(" ") + line = line.strip() + self._curData = line.split(" ") + return self._curData + + def keepCurrent(self): + self._retCurrent = True + +class TorperfData(Data): + def __init__(self, filename): + Data.__init__(self, filename) + self.fields = "STARTSEC STARTUSEC SOCKETSEC SOCKETUSEC CONNECTSEC CONNECTUSEC NEGOTIATESEC NEGOTIATEUSEC REQUESTSEC REQUESTUSEC RESPONSESEC RESPONSEUSEC DATAREQUESTSEC DATAREQUESTUSEC DATARESPONSESEC DATARESPONSEUSEC DATACOMPLETESEC DATACOMPLETEUSEC WRITEBYTES READBYTES DIDTIMEOUT".split(" ")
def next(self): - return self.prepline() + ret = {} + values = self.prepline() + for i in xrange(len(values)): + ret[self.fields[i]] = values[i] + return ret
def __iter__(self): return self @@ -31,58 +51,94 @@ class Data: class ExtraData(Data): def __init__(self, filename): Data.__init__(self, filename) - self._curData = None - self._retCurrent = False
def next(self): - if self._retCurrent == True: - self._retCurrent = False - return self._curData cont = self.prepline() - if cont[0] == "ok": - self._curData = cont[1:] - return self._curData - print('Ignoring line "' + " ".join(cont) + '"') - return self.next()
- def keepCurrent(self): - self._retCurrent = True + ret = {} + for i in cont: + if not "=" in i: + ret[i] = "" + continue + pair = i.split("=") + ret[pair[0]] = pair[1]
-class NormalData(Data): - def __init__(self, filename): - Data.__init__(self, filename) + if not "CIRC_ID" in ret: + #print('Ignoring line "' + " ".join(cont) + '"') + return self.next() + return ret
-class BetterData(Data): + def __iter__(self): + return self + +class MergeData(Data): def __init__(self, filename): Data.__init__(self, filename, "w")
- def writeLine(self, line): + def writeLine(self, data): + line = [] + for key in data.iterkeys(): + line.append(key+"="+data[key]) + line.sort() self._file.write(" ".join(line) + "\n")
def main(): - if len(sys.argv) < 5: - print("Bad arguments") + if len(sys.argv) != 4: + print("See script header for usage") sys.exit(1)
- normalData = NormalData(sys.argv[1]) - extraData = ExtraData(sys.argv[2]) - betterData = BetterData(sys.argv[3]) - slack = int(sys.argv[4]) - for normal in normalData: - normalTime = int(normal[0]) - for extra in extraData: - extraTime = int(extra[0]) - if normalTime > extraTime: - print("Got unexpected extradata entry" + " ".join(extra)) - continue - if normalTime + slack < extraTime: - print("Got a data entry without extradata " + " ".join(normal)) - extraData.keepCurrent() + torperfdata = TorperfData(sys.argv[1]) + extradata = ExtraData(sys.argv[2]) + mergedata = MergeData(sys.argv[3]) + slack = 1.0 # More than 1s means something is really, really wrong + lastDataTime = 0 + lastExtraTime = 0 + dataLine = 0 + extraLine = 0 + mergedYet = False + for data in torperfdata: + dataLine += 1 + dataEndTime = int(data["DATACOMPLETESEC"]) + dataEndTime += int(data["DATACOMPLETEUSEC"])/1000000.0 + if not dataEndTime: + # Skip failures + continue + + if lastDataTime > dataEndTime: + print "Torperf .data is not monotonic! Sort it by completion time!" + print "Line "+str(dataLine)+" "+str(lastDataTime)+" > "+str(dataEndTime) + sys.exit(0) + lastDataTime = dataEndTime + for extra in extradata: + extraLine += 1 + if not "USED_AT" in extra or not extra["USED_AT"]: + mergedata.writeLine(extra) + continue # Failed circ + + extraEndTime = float(extra["USED_AT"]) + if lastExtraTime > extraEndTime: + print "The .extradata is not monotonic! Sort it by USED_AT!" + print "Line "+str(extraLine)+" "+str(lastExtraTime)+" > "+str(extraEndTime) + sys.exit(0) + lastExtraTime = extraEndTime + if abs(dataEndTime - extraEndTime) > slack: + if dataEndTime < extraEndTime: + if mergedYet: + print("Got a data line at "+str(dataLine)+ " without extradata (line "+str(extraLine)+")") + extradata.keepCurrent() + extraLine -= 1 + mergedata.writeLine(data) + else: + torperfdata.keepCurrent() + dataLine -= 1 + mergedata.writeLine(extra) break - normal.extend(extra) - betterData.writeLine(normal) + + mergedYet = True + data.update(extra) + mergedata.writeLine(data) break - +
if __name__ == "__main__": main() diff --git a/measurements-HOWTO b/measurements-HOWTO index 8fc3a24..e8978e3 100644 --- a/measurements-HOWTO +++ b/measurements-HOWTO @@ -211,14 +211,10 @@ the .data files that is closest to USED_AT is "datacompletesec datacompleteusec". If the times are more than a second apart, something is really wrong.
-XXX: consolidate-stats needs update, and no longer works. +You may want to consolidate a .data file and an .extradata file together +by calling the consolidate-stats script like this:
-Call it like this: - -$ ~/torperf/consolidate-stats DATAFILE EXTRADATAFILE OUTFILE SLACK - -Relays are written as $fingerprint=nickname for Named relays and as -$fingerprint~nickname for relays without the Named flag. +$ ~/torperf/consolidate-stats DATAFILE EXTRADATAFILE OUTFILE
If everything works, you might want to let your system start these Tor clients on system startup. On Debian, this can be done using a crontab