[tor-commits] [tor/master] Hack up the scripts/maint/*callgraph* scripts to do more, better

nickm at torproject.org nickm at torproject.org
Sun Aug 16 02:41:16 UTC 2015


commit bb46630513ddfebd6e6dc79491912412bb8985ff
Author: Nick Mathewson <nickm at torproject.org>
Date:   Tue Aug 11 09:07:54 2015 -0400

    Hack up the scripts/maint/*callgraph* scripts to do more, better
    
    These scripts are now a little more bulletproof, cache data a little
    better, and generate more information.  Notably, they search for the
    vectors or edges to cut that would lower the size of the largest
    SCC.
---
 scripts/maint/analyze_callgraph.py |  120 +++++++++++++++++++++++++++++++++---
 scripts/maint/display_callgraph.py |   25 ++++++--
 2 files changed, 132 insertions(+), 13 deletions(-)

diff --git a/scripts/maint/analyze_callgraph.py b/scripts/maint/analyze_callgraph.py
index 69ae128..b284604 100755
--- a/scripts/maint/analyze_callgraph.py
+++ b/scripts/maint/analyze_callgraph.py
@@ -13,7 +13,7 @@ class Parser:
   def enter_func(self, name):
     if self.infunc and not self.extern:
       self.calls.setdefault(self.infunc, set()).update( self.calledfns )
- 
+
     self.calledfns = set()
     self.infunc = name
     self.extern = False
@@ -23,7 +23,7 @@ class Parser:
     self.extern = False
     self.calledfns = set()
     for line in inp:
-       m = re.match(r"Call graph node for function: '([^']+)'", line) 
+       m = re.match(r"Call graph node for function: '([^']+)'", line)
        if m:
            self.enter_func(m.group(1))
            continue
@@ -42,18 +42,28 @@ class Parser:
 
 
 def transitive_closure(g):
+    passno = 0
     changed = True
     g = copy.deepcopy(g)
+    import random
     while changed:
+      passno += 1
       changed = False
-      print "X"
-      for k in g.keys():
+      keys = g.keys()
+      idx = 0
+      for k in keys:
+         idx += 1
+         print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)),
+         sys.stdout.flush()
          newset = g[k].copy()
          for fn in g[k]:
             newset.update(g.get(fn, set()))
          if len(newset) != len(g[k]):
             g[k].update( newset )
             changed = True
+
+      print
+
     return g
 
 def strongly_connected_components(g):
@@ -96,22 +106,114 @@ def strongly_connected_components(g):
 
   return all_sccs
 
+def biggest_component(sccs):
+  return max(len(c) for c in sccs)
+
+def connection_bottlenecks(callgraph):
+
+  callers = {}
+  for fn in callgraph:
+    for fn2 in callgraph[fn]:
+      callers.setdefault(fn2, set()).add(fn)
+
+  components = strongly_connected_components(callgraph)
+  components.sort(key=len)
+  big_component_fns = components[-1]
+  size = len(big_component_fns)
+
+  function_bottlenecks = fn_results = []
+
+  total = len(big_component_fns)
+  idx = 0
+  for fn in big_component_fns:
+    idx += 1
+    print "Pass 1/3: %d/%d\r"%(idx, total),
+    sys.stdout.flush()
+    cg2 = copy.deepcopy(callgraph)
+    del cg2[fn]
+
+    fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) )
+
+  print
+  bcf_set = set(big_component_fns)
+
+  call_bottlenecks = fn_results = []
+  result_set = set()
+  total = len(big_component_fns)
+  idx = 0
+  for fn in big_component_fns:
+    fn_callers = callers[fn].intersection(bcf_set)
+    idx += 1
+    if len(fn_callers) != 1:
+      continue
+
+    print "Pass 2/3: %d/%d\r"%(idx, total),
+    sys.stdout.flush()
+
+    caller = fn_callers.pop()
+    assert len(fn_callers) == 0
+    cg2 = copy.deepcopy(callgraph)
+    cg2[caller].remove(fn)
+
+    fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) )
+    result_set.add( (caller, fn) )
+
+  print
+
+  total = len(big_component_fns)
+  idx = 0
+  for fn in big_component_fns:
+    fn_calls = callgraph[fn].intersection(bcf_set)
+    idx += 1
+    if len(fn_calls) != 1:
+      continue
+
+    print "Pass 3/3: %d/%d\r"%(idx, total),
+    sys.stdout.flush()
+
+    callee = fn_calls.pop()
+    if (fn, callee) in result_set:
+      continue
+
+    assert len(fn_calls) == 0
+    cg2 = copy.deepcopy(callgraph)
+    cg2[fn].remove(callee)
+
+    fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) )
+
+  print
+
+
+  return (function_bottlenecks, call_bottlenecks)
+
 if __name__ == '__main__':
     p = Parser()
     for fname in sys.argv[1:]:
       with open(fname, 'r') as f:
         p.parse_callgraph_file(f)
+
+    sys.stdout.flush
+
+    print "Building callgraph"
     callgraph = p.extract_callgraph()
 
+    print "Finding strongly connected components"
     sccs = strongly_connected_components(callgraph)
 
+    print "Finding the transitive closure of the callgraph.."
     closure = transitive_closure(callgraph)
 
+    print "Finding bottlenecks..."
+    bottlenecks = connection_bottlenecks(callgraph)
+
+    data = {
+      'callgraph' : callgraph,
+      'sccs' : sccs,
+      'closure' : closure,
+      'bottlenecks' : bottlenecks }
+
     with open('callgraph.pkl', 'w') as f:
-      cPickle.dump(callgraph, f)
+      cPickle.dump(data, f)
+
 
-    with open('callgraph_closure.pkl', 'w') as f:
-      cPickle.dump(closure, f)
 
-    with open('callgraph_sccs.pkl', 'w') as f:
-      cPickle.dump(sccs, f)
diff --git a/scripts/maint/display_callgraph.py b/scripts/maint/display_callgraph.py
index 9ead56c..211bfda 100755
--- a/scripts/maint/display_callgraph.py
+++ b/scripts/maint/display_callgraph.py
@@ -2,9 +2,12 @@
 
 import cPickle
 
-callgraph = cPickle.load(open("callgraph.pkl"))
-closure = cPickle.load(open("callgraph_closure.pkl"))
-sccs = cPickle.load(open("callgraph_sccs.pkl"))
+data = cPickle.load(open("callgraph.pkl"))
+
+callgraph = data['callgraph']
+closure = data['closure']
+sccs = data['sccs']
+fn_bottle, call_bottle = data['bottlenecks']
 
 for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems())):
    print "%s can reach %s other functions." %(fn, n_reachable)
@@ -13,10 +16,24 @@ for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems()
 c = [ (len(component), component) for component in sccs ]
 c.sort()
 
+print "\n================================"
+
 for n, component in c:
    if n < 2:
       continue
-   print n, component
+   print "Strongly connected component of size %d:"%n
+   print component
+
+
+print "\n================================"
 
+print "====== Number of functions pulled into blob, by function in blob."
+fn_bottle.sort()
+for n, fn in fn_bottle[-30:]:
+   print "%3d: %s"%(n, fn)
 
+print "====== Number of functions pulled into blob, by call in blob."
+call_bottle.sort()
+for n, fn1, _, fn2 in call_bottle[-30:]:
+   print "%3d: %s -> %s "%(n, fn2, fn1)
 





More information about the tor-commits mailing list