[tor-commits] [torperf/master] Update consolidate_stats to new .data and .extradata formats.

karsten at torproject.org karsten at torproject.org
Wed Mar 16 12:47:18 UTC 2011


commit 17182144e3f597b74772b1ff9299385f4695fd0d
Author: Mike Perry <mikeperry-git at fscked.org>
Date:   Fri Mar 4 18:57:24 2011 -0800

    Update consolidate_stats to new .data and .extradata formats.
---
 consolidate_stats.py |  150 ++++++++++++++++++++++++++++++++++----------------
 measurements-HOWTO   |   10 +--
 2 files changed, 106 insertions(+), 54 deletions(-)

diff --git a/consolidate_stats.py b/consolidate_stats.py
old mode 100644
new mode 100755
index 40e8d0a..4c8a990
--- a/consolidate_stats.py
+++ b/consolidate_stats.py
@@ -1,29 +1,49 @@
-###
-#   Call this with 4 parameters: the file to read data from, the file to read
-#   extradata from, the file to write the combined data to, the slack interval
-#   to match data and extradata timestamps.
+#!/usr/bin/python
+#
+# This script consolidates a .data file and an .extradata file together,
+# matching the lines based on the completion time.
 #
-#   IMPORTANT: You need to manually sort -g the data file, because torperf
-#   might screw up ordering and this script expects sorted lines!
+# The resulting output will be the union of both files. It will match lines
+# where possible, and include unmatched lines from both files as well.
+#
+# Usage:
+#   ./consolidate-stats.py <.data file> <.extradata file> <.mergedata file>
 ###
 
-import sys, time
+import sys
 
 class Data:
   def __init__(self, filename, mode="r"):
     self._filename = filename
     self._file = open(filename, mode)
+    self._curData = None
+    self._retCurrent = False
 
   def prepline(self):
+    if self._retCurrent:
+      self._retCurrent = False
+      return self._curData
     line = self._file.readline()
     if line == "" or line == "\n":
       raise StopIteration
-    if line[-1] == "\n":
-      line = line[:-1]
-    return line.split(" ")
+    line = line.strip()
+    self._curData = line.split(" ")
+    return self._curData
+
+  def keepCurrent(self):
+    self._retCurrent = True
+
+class TorperfData(Data):
+  def __init__(self, filename):
+    Data.__init__(self, filename)
+    self.fields = "STARTSEC STARTUSEC SOCKETSEC SOCKETUSEC CONNECTSEC CONNECTUSEC NEGOTIATESEC NEGOTIATEUSEC REQUESTSEC REQUESTUSEC RESPONSESEC RESPONSEUSEC DATAREQUESTSEC DATAREQUESTUSEC DATARESPONSESEC DATARESPONSEUSEC DATACOMPLETESEC DATACOMPLETEUSEC WRITEBYTES READBYTES DIDTIMEOUT".split(" ")
 
   def next(self):
-    return self.prepline()
+    ret = {}
+    values = self.prepline()
+    for i in xrange(len(values)):
+      ret[self.fields[i]] = values[i]
+    return ret
 
   def __iter__(self):
     return self
@@ -31,58 +51,94 @@ class Data:
 class ExtraData(Data):
   def __init__(self, filename):
     Data.__init__(self, filename)
-    self._curData = None
-    self._retCurrent = False
 
   def next(self):
-    if self._retCurrent == True:
-      self._retCurrent = False
-      return self._curData
     cont = self.prepline()
-    if cont[0] == "ok":
-      self._curData = cont[1:]
-      return self._curData
-    print('Ignoring line "' + " ".join(cont) + '"')
-    return self.next()
 
-  def keepCurrent(self):
-    self._retCurrent = True
+    ret = {}
+    for i in cont:
+      if not "=" in i:
+        ret[i] = ""
+        continue
+      pair = i.split("=")
+      ret[pair[0]] = pair[1]
 
-class NormalData(Data):
-  def __init__(self, filename):
-    Data.__init__(self, filename)
+    if not "CIRC_ID" in ret:
+      #print('Ignoring line "' + " ".join(cont) + '"')
+      return self.next()
+    return ret
 
-class BetterData(Data):
+  def __iter__(self):
+    return self
+
+class MergeData(Data):
   def __init__(self, filename):
     Data.__init__(self, filename, "w")
 
-  def writeLine(self, line):
+  def writeLine(self, data):
+    line = []
+    for key in data.iterkeys():
+      line.append(key+"="+data[key])
+    line.sort()
     self._file.write(" ".join(line) + "\n")
 
 def main():
-  if len(sys.argv) < 5:
-    print("Bad arguments")
+  if len(sys.argv) != 4:
+    print("See script header for usage")
     sys.exit(1)
 
-  normalData = NormalData(sys.argv[1])
-  extraData = ExtraData(sys.argv[2])
-  betterData = BetterData(sys.argv[3])
-  slack = int(sys.argv[4])
-  for normal in normalData:
-    normalTime = int(normal[0])
-    for extra in extraData:
-      extraTime = int(extra[0])
-      if normalTime > extraTime:
-        print("Got unexpected extradata entry" + " ".join(extra))
-        continue
-      if normalTime + slack < extraTime:
-        print("Got a data entry without extradata " + " ".join(normal))
-        extraData.keepCurrent()
+  torperfdata = TorperfData(sys.argv[1])
+  extradata = ExtraData(sys.argv[2])
+  mergedata = MergeData(sys.argv[3])
+  slack = 1.0 # More than 1s means something is really, really wrong
+  lastDataTime = 0
+  lastExtraTime = 0
+  dataLine = 0
+  extraLine = 0
+  mergedYet = False
+  for data in torperfdata:
+    dataLine += 1
+    dataEndTime = int(data["DATACOMPLETESEC"])
+    dataEndTime += int(data["DATACOMPLETEUSEC"])/1000000.0
+    if not dataEndTime:
+      # Skip failures
+      continue
+
+    if lastDataTime > dataEndTime:
+      print "Torperf .data is not monotonic! Sort it by completion time!"
+      print "Line "+str(dataLine)+" "+str(lastDataTime)+" > "+str(dataEndTime)
+      sys.exit(0)
+    lastDataTime = dataEndTime
+    for extra in extradata:
+      extraLine += 1
+      if not "USED_AT" in extra or not extra["USED_AT"]:
+        mergedata.writeLine(extra)
+        continue # Failed circ
+
+      extraEndTime = float(extra["USED_AT"])
+      if lastExtraTime > extraEndTime:
+        print "The .extradata is not monotonic! Sort it by USED_AT!"
+        print "Line "+str(extraLine)+" "+str(lastExtraTime)+" > "+str(extraEndTime)
+        sys.exit(0)
+      lastExtraTime = extraEndTime
+      if abs(dataEndTime - extraEndTime) > slack:
+        if dataEndTime < extraEndTime:
+          if mergedYet:
+            print("Got a data line at "+str(dataLine)+ " without extradata (line "+str(extraLine)+")")
+          extradata.keepCurrent()
+          extraLine -= 1
+          mergedata.writeLine(data)
+        else:
+          torperfdata.keepCurrent()
+          dataLine -= 1
+          mergedata.writeLine(extra)
         break
-      normal.extend(extra)
-      betterData.writeLine(normal)
+
+      mergedYet = True
+      data.update(extra)
+      mergedata.writeLine(data)
       break
-  
+
 
 if __name__ == "__main__":
   main()
diff --git a/measurements-HOWTO b/measurements-HOWTO
index 8fc3a24..e8978e3 100644
--- a/measurements-HOWTO
+++ b/measurements-HOWTO
@@ -211,14 +211,10 @@ the .data files that is closest to USED_AT is "datacompletesec
 datacompleteusec". If the times are more than a second apart, something is
 really wrong.
 
-XXX: consolidate-stats needs update, and no longer works.
+You may want to consolidate a .data file and an .extradata file together
+by calling the consolidate-stats script like this:
 
-Call it like this:
-
-$ ~/torperf/consolidate-stats DATAFILE EXTRADATAFILE OUTFILE SLACK
-
-Relays are written as $fingerprint=nickname for Named relays and as
-$fingerprint~nickname for relays without the Named flag.
+$ ~/torperf/consolidate-stats DATAFILE EXTRADATAFILE OUTFILE
 
 If everything works, you might want to let your system start these Tor
 clients on system startup. On Debian, this can be done using a crontab



More information about the tor-commits mailing list