[or-cvs] r15347: Aha! It is paretto. Should have tried this first. Just didn' (torflow/branches/gsoc2008/tools/BTAnalysis)

mikeperry at seul.org mikeperry at seul.org
Wed Jun 18 07:29:15 UTC 2008


Author: mikeperry
Date: 2008-06-18 03:29:15 -0400 (Wed, 18 Jun 2008)
New Revision: 15347

Modified:
   torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
Log:

Aha! It is paretto. Should have tried this first. Just didn't
expect latency to match the bandwidth distribution.



Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
===================================================================
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py	2008-06-18 05:35:19 UTC (rev 15346)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py	2008-06-18 07:29:15 UTC (rev 15347)
@@ -41,21 +41,17 @@
       return values[(len(values) - 1)/2]
     else:
       return 0.0
-  def mode(self):
+
+  def mode(self): # Requires makehistogram runs first
     counts = {}
     greatest = 0
     greatest_val = 0
-    for v in self.values:
-      if v not in counts.keys():
-        counts[v] = 1
-      else:
-        counts[v] += 1
-      if counts[v] > greatest:
+    for v in self.buckets.keys():
+      if self.buckets[v] > greatest:
         greatest_val = v
-        greatest = counts[v]
+        greatest = self.buckets[v]
     return greatest_val
     
-
   def makehistogram(self,res,ncircuits,histname):
     res = res /1000.0 # convert ms to s
     values = copy.copy(self.values) 
@@ -71,19 +67,45 @@
         i += 1
         count = 0
     f = open(histname,'w')
-    f.write('#build time <\t#circuits\n') 
+    f.write('#build time <\t#circuits\n')
     sortedkeys = self.buckets.keys()
     sortedkeys.sort()
     for b in sortedkeys:
       towrite = str(b) + '\t' + str(self.buckets[b]) + '\n'
       f.write(towrite)
     f.close()
+  
+  def paretoK(self, Xm):
+    n = 0
+    log_sum = 0
+    for x in self.values:
+      if x < Xm: continue
+      n += 1
+      log_sum += math.log(x)
+    return n/(log_sum - n*math.log(Xm))
 
+  # Calculate the mean beyond a mode value
+  def modeMean(self, Xm):
+    n = 0
+    tot = 0
+    for x in self.values:
+      if x < Xm: continue
+      n += 1
+      tot += x
+    return tot/n
+
+  def modeN(self, Xm):
+    n = 0
+    for x in self.values:
+      if x < Xm: continue
+      n += 1
+    return n
+
   def maxlikelihood(self,k):
     # theta estimator for gamma PDF
     # maxlikelihood estimator
     # theta = sum(values) / N*k 
-     return sum(self.values * 10)/(k * len(self.values)) 
+    return 10*sum(self.values)/(k * len(self.values))
 
   def bayesian(self,k):
     # bayesian estimator for gamma PDF
@@ -105,6 +127,27 @@
    
   ps = fname + '(x) = '+str(N)+'*((x**' + str(k-1) + ')*(' +str(B**k)+ ')*(exp(-' + str(B) +'*x)))' +'/gamma('+str(k)+')\n'
   return ps
+
+def pareto(k,Xm,N,fname):
+  # gnuplot string for shifted, normalized exponential PDF
+  # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
+  ps = fname+'(x)=(x<'+str(Xm)+') ? 0 : ('+str(N*k*(Xm**k))+'/x**('+str(k+1)+'))\n'
+  return ps
+
+def exp(mean,shift,N,fname):
+  # gnuplot string for normalized exponential PDF
+  # g(x,k,B) = N * l*exp(-l*(x-shift))
+  l = 1.0/mean
+  ps = fname+'(x)=(x<'+str(shift)+')?0:('+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+'))))\n'
+  return ps
+
+def shiftedExp(mean,shift,N,fname):
+  # gnuplot string for shifted, normalized exponential PDF
+  # g(x,k,B) = N * l*exp(-l*(x-shift))/(1+(1-exp(-l*shift)))
+  l = 1.0/mean
+  ps = fname+'(x)='+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+')))/(1+(1-exp(-'+str(l*shift)+')))\n'
+  return ps
+
 def poisson(u,N,fname):
   ps = fname + "(x) = " + str(N) + "*(" + str(u) + "**x)*exp(-"+str(u)+")/gamma(x + 1)\n"
   return ps
@@ -139,7 +182,7 @@
         usage()
     elif o == '-f': filename = a
     elif o == '-d': dirname = a
-    elif o == '-k': k = int(a)
+    elif o == '-k': k = float(a)
     else:
       assert False, "Bad option"
   return ncircuits,filename,dirname,k
@@ -158,7 +201,7 @@
   print 'Shuffling...',
 
   newfile = dirname + '/' + filename + '.' + ncircuits 
-  cmd = '/usr/local/bin/sort -R ' + filename + ' | head -n ' + ncircuits  + ' > ' + newfile
+  cmd = 'sort -R ' + filename + ' | head -n ' + ncircuits  + ' > ' + newfile
 
   p = popen2.Popen4(cmd)
   p.wait()
@@ -167,12 +210,17 @@
   # create histogram from file
   print 'Calculating statistics and creating histogram...',
   s = Stats(newfile)
+  s.makehistogram(100,newfile,newfile + '.hist')
   mean = s.mean()
   stddev = s.stddev()
   median = s.median()
-  mode = s.mode()
-  s.makehistogram(100,newfile,newfile + '.hist')
-  print 'Done'
+  mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+  parK = s.paretoK(mode)
+  modeN = s.modeN(mode)
+  modeMean = s.modeMean(mode)
+  print 'Done.'
+  print 'Mean: '+str(mean)+', mode: '+str(mode)
+  print 'ParK: '+str(parK)
 
   # get stats
 
@@ -202,17 +250,18 @@
   #N = s.buckets[i]            # num. circuits that have buildtimes 
                               #close to mean/median/mode from histogram 
 
-  plotstr += gamma(k,maxliketheta,N, 'maxl')
-  plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
-  plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
+#  plotstr += gamma(k,maxliketheta,N, 'maxl')
+#  plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
+#  plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
 
+  plotstr += pareto(parK,mode*10,modeN,'pareto')
+  plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
+
   plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
 
-  plotstr += "maxl(x) title '" + "max. likelihood', \\\n"
-  plotstr += "bayplus(x) title '" + "bayesian  theta +', \\\n"
-  plotstr += "bayminus(x) title '" + "bayesian theta -' \n"
+  plotstr += "pareto(x) title '" + "Shifted Pareto', \\\n"
+  plotstr += "expShifted(x) title '" + "Shifted Exp' \n"
 
-  
 
   f = open(plotname,'w')
   f.write(plotstr)
@@ -221,8 +270,8 @@
 
   # plot the file
   print 'Plotting...',
-#  p = popen2.Popen4('gnuplot ' + plotname)
-  p = popen2.Popen4('gp4.2 ' + plotname)
+  p = popen2.Popen4('gnuplot ' + plotname)
+#  p = popen2.Popen4('gp4.2 ' + plotname)
 
   p.wait()
   for err in p.fromchild:



More information about the tor-commits mailing list