[or-cvs] r17929: {torflow} Improve the buildtimes gathering script to be more flexible (torflow/trunk/CircuitAnalysis/BuildTimes)

mikeperry at seul.org mikeperry at seul.org
Mon Jan 5 22:43:54 UTC 2009


Author: mikeperry
Date: 2009-01-05 17:43:54 -0500 (Mon, 05 Jan 2009)
New Revision: 17929

Modified:
   torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py
Log:

Improve the buildtimes gathering script to be more flexible and more accurate
with aggregate stats.



Modified: torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py
===================================================================
--- torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py	2009-01-05 22:41:12 UTC (rev 17928)
+++ torflow/trunk/CircuitAnalysis/BuildTimes/buildtimes.py	2009-01-05 22:43:54 UTC (rev 17929)
@@ -2,11 +2,12 @@
 # uses metatroller to collect circuit build times for 5% slices of guard nodes
 # [OUTPUT] one directory, with three files: StatsHandler aggregate stats file, file with all circuit events (for detailed reference), file with just buildtimes
 
-import socket,sys,time,getopt,os
+import socket,sys,time,getopt,os,threading
 sys.path.append("../../")
 from TorCtl.TorUtil import meta_port,meta_host,control_port,control_host
 from TorCtl.StatsSupport import StatsHandler
 from TorCtl import PathSupport, TorCtl
+
 __selmgr = PathSupport.SelectionManager(
       pathlen=3,
       order_exits=True,
@@ -39,9 +40,9 @@
         cb(reply)
 
       if self._handler is not None:
-        if self._handler.circ_failed + self._handler.circ_built >= self._handler.nstats:
-          print 'Finished gathering',self._handler.circ_failed + self._handler.circ_built,'circuits'
-          print self._handler.circ_failed,'failed',self._handler.circ_built,'built'
+        if self._handler.done:
+          print 'Finished gathering',self._handler.circ_failed + self._handler.circ_succeeded,'circuits'
+          print self._handler.circ_failed,'failed',self._handler.circ_succeeded,'built'
           return 
 
 class StatsGatherer(StatsHandler):
@@ -52,6 +53,7 @@
     self.buildtimesfile = open(basefile_name + '.buildtimes','w')
     self.circ_built = 0
     self.nstats = nstats
+    self.done = False
 
     # sometimes relevant CircEvents occur before the circ_id is 
     # added to self.circuits, which means they get discarded
@@ -87,7 +89,7 @@
     now = time.time()
     now = '%3.10f' % now
 
-    if circ_event.circ_id in self.circuits.keys():
+    if circ_event.circ_id in self.circuits:
       self.add_missed_events(circ_event.circ_id)
       if circ_event.status == 'EXTENDED':
         extend_time = circ_event.arrived_at-self.circuits[circ_event.circ_id].last_extended_at
@@ -109,7 +111,7 @@
     else:
       #eventstr = 
       #if circ_event.circ_id in self.othercircs.keys():
-      if circ_event.circ_id not in self.othercircs.keys():
+      if circ_event.circ_id not in self.othercircs:
         self.othercircs[circ_event.circ_id] = []
       self.othercircs[circ_event.circ_id] += [self.circ_event_str(now,circ_event)]
     StatsHandler.circ_status_event(self,circ_event)
@@ -132,7 +134,7 @@
                 TorCtl.EVENT_TYPE.NEWDESC], True)
   return c
 
-def setargs():
+def getargs():
   ncircuits = ""
   dirname = ""
   filename = ""
@@ -140,31 +142,39 @@
     usage()
     sys.exit(2)
   try:
-    opts,args = getopt.getopt(sys.argv[1:],"p:n:d:")
+    opts,args = getopt.getopt(sys.argv[1:],"b:e:s:n:d:")
   except getopt.GetoptError,err:
     print str(err)
     usage()
   ncircuits=None
-  percentile=None
+  begin=0
+  end=80
+  slice=5
   dirname=""
   for o,a in opts:
     if o == '-n': 
       if a.isdigit(): ncircuits = int(a)
       else: usage()
     elif o == '-d': dirname = a  #directory where output files go 
-    elif o == '-p':
-      if a.isdigit(): percentile = int(a)
+    elif o == '-b':
+      if a.isdigit(): begin = int(a)
       else: usage()
+    elif o == '-e':
+      if a.isdigit(): end = int(a)
+      else: usage()
+    elif o == '-s':
+      if a.isdigit(): slice = int(a)
+      else: usage()
     else:
       assert False, "Bad option"
-  return ncircuits,percentile,dirname
+  return ncircuits,begin,end,slice,dirname
 
 def usage():
-    print 'usage: statscontroller.py [-p <#percentile>]  -n <# circuits> -d <output dir name>'
+    print 'usage: statscontroller.py [-b <#begin percentile>] [-e <end percentile] [-s <percentile slice>] -n <# circuits> -d <output dir name>'
     sys.exit(1)
 
 
-def guardslice(p,ncircuits,dirname):
+def guardslice(p,s,end,ncircuits,dirname):
 
   print 'Making new directory:',dirname
   if not os.path.isdir(dirname):
@@ -172,15 +182,22 @@
   else:
     print 'Directory',dirname,'exists, not making a new one.'
 
-  print 'Guard percentiles:',p,'to',p+5
+  print 'Guard percentiles:',p,'to',s
   print '#Circuits',ncircuits
 
-  basefile_name = dirname + '/' + str(p) + '-' + str(p+5) + '.' + str(ncircuits)
+  basefile_name = dirname + '/' + str(p) + '-' + str(s) + '.' + str(ncircuits)
   aggfile_name =  basefile_name + '.agg'
 
-  __selmgr.percent_fast = p+5
+  # Ok, since we create a new StatsGatherer each segment..
+  __selmgr.percent_fast = s
   __selmgr.percent_skip = p
-  
+  if s-p >= end:
+    print 'Using bandwidth weighted selection'
+    __selmgr.uniform = False 
+  else: 
+    print 'Using uniform weighted selection'
+    __selmgr.uniform = True
+ 
   c = getdata(basefile_name,ncircuits)
 
   for i in xrange(0,ncircuits):
@@ -189,27 +206,39 @@
       # XXX: hrmm.. race conditions on the path_selectior members 
       # for the event handler thread?
       # Probably only if streams end up coming in during this test..
-      circ = c.build_circuit(__selmgr.pathlen,__selmgr.path_selector)
-      c._handler.circuits[circ.circ_id] = circ
+      def notlambda(h):
+        circ = h.c.build_circuit(h.selmgr.pathlen, h.selmgr.path_selector)   
+        h.circuits[circ.circ_id] = circ
+      c._handler.schedule_low_prio(notlambda)
     except TorCtl.ErrorReply,e:
       plog("NOTICE","Error building circuit: " + str(e.args))
 
   while True:
     time.sleep(1)
     if c._handler.circ_built + c._handler.circ_failed >= ncircuits:
-      print 'Done gathering stats for slice',p,'to',p+5,'on',ncircuits
+      print 'Done gathering stats for slice',p,'to',s,'on',ncircuits
       print c._handler.circ_built,'built',c._handler.circ_failed,'failed' 
       break
-  c._handler.write_stats(aggfile_name)
 
+  cond = threading.Condition() 
+  def notlambda(h):
+    cond.acquire()
+    h.close_all_circuits()
+    h.write_stats(aggfile_name)
+    cond.notify()
+    cond.release()
+    h.done = True
+    print "Wrote stats."
+  cond.acquire()
+  c._handler.schedule_low_prio(notlambda)
+  cond.wait()
+  cond.release()
+  print "Done in main."
+
 def main():
-  ncircuits,p,dirname = setargs()
+  ncircuits,begin,end,pct,dirname = getargs()
 
-  if p is None:
-    # do all
-    for p in xrange(0,100,5):
-      guardslice(p,ncircuits,dirname)
-  else:
-    guardslice(p,ncircuits,dirname)
+  for p in xrange(begin,end,pct):
+    guardslice(p,p+pct,end,ncircuits,dirname)
 if __name__ == '__main__':
   main()



More information about the tor-commits mailing list