[or-cvs] r15329: Modified to test out the gamma distribution; adjust k via th (torflow/branches/gsoc2008/tools/BTAnalysis)

fallon at seul.org fallon at seul.org
Tue Jun 17 11:32:15 UTC 2008


Author: fallon
Date: 2008-06-17 07:32:15 -0400 (Tue, 17 Jun 2008)
New Revision: 15329

Modified:
   torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
Log:
Modified to test out the gamma distribution; adjust k via the -k parameter.


Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
===================================================================
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py	2008-06-17 10:51:59 UTC (rev 15328)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py	2008-06-17 11:32:15 UTC (rev 15329)
@@ -17,6 +17,7 @@
     for line in self.f:
       self.values += [float(line[:-1])]
     self.f.close()
+    self.buckets = {}
   def mean(self):
     # Borrowed from TorUtil
     if len(self.values) > 0:
@@ -61,23 +62,49 @@
     values.sort()
     count = 0
     i = 1
-    buckets = {} 
+    self.buckets = {} 
     for v in values:
       if v < res * i: count += 1
       else:
         count += 1
-        buckets[res * i] = count
+        self.buckets[res * i] = count
         i += 1
         count = 0
     f = open(histname,'w')
     f.write('#build time <\t#circuits\n') 
-    sortedkeys = buckets.keys()
+    sortedkeys = self.buckets.keys()
     sortedkeys.sort()
     for b in sortedkeys:
-      towrite = str(b) + '\t' + str(buckets[b]) + '\n'
+      towrite = str(b) + '\t' + str(self.buckets[b]) + '\n'
       f.write(towrite)
     f.close()
 
+  def maxlikelihood(self,k):
+    # theta estimator for gamma PDF
+    # maxlikelihood estimator
+    # theta = sum(values) / N*k 
+     return sum(self.values)/(k * len(self.values)) 
+
+  def bayesian(self,k):
+    # bayesian estimator for gamma PDF
+    # y = sum(values)
+    # theta = y/(Nk - 1) +/- y^2/((Nk-1)^2(Nk -2))
+    y = sum(self.values)
+    N = len(self.values)
+    mean = y/(N*k - 1)
+    sdev = (y*y)/((N*k - 1)* (N*k - 1) * (N*k - 2))
+    plus = mean + sdev
+    minus = mean - sdev
+    return plus,minus
+
+## Functions that return a gnuplot function string for a given distribution
+def gamma(k,theta, N,fname):
+  # gnuplot string for gamma PDF
+  # g(x,k,B) = (x**(k - 1) * B**k * exp(-B*x))/gamma(k)
+  B = 1.0/theta
+   
+  ps = fname + '(x) = '+str(N)+'*((x**' + str(k-1) + ')*(' +str(B**k)+ ')*(exp(-' + str(B) +'*x)))' +'/gamma('+str(k)+')\n'
+  return ps
 def poisson(u,N,fname):
   ps = fname + "(x) = " + str(N) + "*(" + str(u) + "**x)*exp(-"+str(u)+")/gamma(x + 1)\n"
   return ps
@@ -86,8 +113,9 @@
   ps = fname + "(x)="+str(int(N)/d)+"*(exp(-((x-"+str(u)+ ")**2)/"+str(2*d*d)+"))/sqrt(2*pi)\n"
   return ps
 
+
 def usage():
-  print "shufflebt.py -n <# circuits> -f <timefile> -d <outdirname>"
+  print "shufflebt.py -n <# circuits> -f <timefile> -d <outdirname> -k <k val.>"
 
 
 def setargs():
@@ -98,7 +126,7 @@
     usage()
     sys.exit(2)
   try:
-    opts,args = getopt.getopt(sys.argv[1:],"n:f:d:")
+    opts,args = getopt.getopt(sys.argv[1:],"n:f:d:k:")
   except getopt.GetoptError,err:
     print str(err)
     usage()
@@ -111,11 +139,12 @@
         usage()
     elif o == '-f': filename = a
     elif o == '-d': dirname = a
+    elif o == '-k': k = int(a)
     else:
       assert False, "Bad option"
-  return ncircuits,filename,dirname
+  return ncircuits,filename,dirname,k
 if __name__ == "__main__":
-  ncircuits,filename,dirname = setargs()
+  ncircuits,filename,dirname,k = setargs()
   print 'Num. Circuits:[',ncircuits,'] Filename:[',filename,'] Dir. Name:[',dirname,']'
 
   # make new directory
@@ -130,6 +159,7 @@
 
   newfile = dirname + '/' + filename + '.' + ncircuits 
   cmd = 'sort -R ' + filename + ' | head -n ' + ncircuits  + ' > ' + newfile
+
   p = popen2.Popen4(cmd)
   p.wait()
   print 'Done'
@@ -149,31 +179,40 @@
   # create gnuplot file
   print 'Creating gnuplot plot file...',
   plotname =  newfile + '.plt'
-  plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill  solid 1.00 border -1\nset style histogram clustered gap 1 title  offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits'\nset ylabel '# Circuits'\nset xlabel 'time (in 100ms)'\n"
-  plotstr += "set label 'std dev=" + str(stddev) + "' at 450,1380\n"
+ 
+  plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill  solid 1.00 border -1\nset style histogram clustered gap 1 title  offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (in 100ms)'\n"
+  plotstr += "set label 'std dev=" + str(stddev) + "' at 170,15\n"
 
   # FIXME: Hrmm... http://en.wikipedia.org/wiki/Skewness? Seems like a hack
   # Or better: http://en.wikipedia.org/wiki/Gamma_distribution with k=3?
   # Would make sense if this is the sum of 3 paretos for the individual
   # hop distributions.
-  plotstr += poisson(mean*10,ncircuits,'pmean')
-  plotstr += poisson(median*10,ncircuits,'pmedian')
-  plotstr += poisson(mode*10,ncircuits,'pmode')
-  plotstr += normal(mean*10,stddev*10,ncircuits,'nmean')
-  plotstr += normal(median*10,stddev*10,ncircuits,'nmedian')
-  plotstr += normal(mode*10,stddev*10,ncircuits,'nmode')
 
+  # Theta estimations 
+  maxliketheta = s.maxlikelihood(k)
+  baytheta = s.bayesian(k)
+
+  # N is the value to multipy the probabilities by
+  N = len(s.values)
+
+  #FIX? Other potential values of N: #circuits that match mode? median? mean?
+  #print 'Mean:',mean,'Median:', median,'Mode:', mode
+  #i = float("%3.1f" % mean) # crappy way of rounding
+  #N = s.buckets[i]            # num. circuits that have buildtimes 
+                              #close to mean/median/mode from histogram 
+
+  plotstr += gamma(k,maxliketheta,N, 'maxl')
+  plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
+  plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
+
   plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
 
-  plotstr += "nmean(x) title '" + "mean=" + str(mean) + "' ,\\\n"
-  plotstr += "nmedian(x) title '" + "median=" + str(median) + "' ,\\\n"
-  plotstr += "nmode(x) title '" + "mode=" + str(mode) + "' ,\\\n"
+  plotstr += "maxl(x) title '" + "max. likelihood', \\\n"
+  plotstr += "bayplus(x) title '" + "bayesian  theta +', \\\n"
+  plotstr += "bayminus(x) title '" + "bayesian theta -' \n"
 
-  plotstr += "pmean(x) title '" + "mean=" + str(mean) + "' ,\\\n"
-  plotstr += "pmedian(x) title '" + "median=" + str(median) + "' ,\\\n"
-  plotstr += "pmode(x) title '" + "mode=" + str(mode) + "' \\\n"
+  
 
-
   f = open(plotname,'w')
   f.write(plotstr)
   f.close()



More information about the tor-commits mailing list