[tor/master] Add more intelligent problem tracking.

13 Mar 2019

commit 26c4f6cfd07fea6d49109e60a8b591b92ec59c49
Author: George Kadianakis <desnacked@riseup.net>
Date:   Wed Feb 27 18:24:10 2019 +0200

    Add more intelligent problem tracking.
---
 scripts/maint/practracker/practracker.py | 55 +++++++++---------
 scripts/maint/practracker/problem.py     | 96 ++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 28 deletions(-)

diff --git a/scripts/maint/practracker/practracker.py b/scripts/maint/practracker/practracker.py
index c3b8a9783..4fbca615c 100755
--- a/scripts/maint/practracker/practracker.py
+++ b/scripts/maint/practracker/practracker.py
@@ -5,7 +5,7 @@ Tor code best-practices tracker
 
 Go through the various .c files and collect metrics about them. If the metrics
 violate some of our best practices and they are not found in the optional
-exceptions file ("./exceptions.txt"), then log a violation about them.
+exceptions file ("./exceptions.txt"), then log a problem about them.
 
 The exceptions file is meant to be initialized with the current state of the
 source code as follows: ./practracker.py > ./exceptions.txt
@@ -22,6 +22,7 @@ import os, sys
 
 import metrics
 import util
+import problem
 
 # We don't want to run metrics for unittests, automatically-generated C files,
 # external libraries or git leftovers.
@@ -41,76 +42,74 @@ MAX_INCLUDE_COUNT = 50
 
 #######################################################
 
-def print_violation_if_not_exception(violation_str, exceptions_str):
-    # Check if this violation is already in the optional exceptions file
-    if exceptions_str and violation_str in exceptions_str:
-        return
-
-    print violation_str
+ProblemVault = None
 
 #######################################################
 
-def consider_file_size(fname, f, exceptions_str):
+def consider_file_size(fname, f):
     file_size = metrics.file_len(f)
     if file_size > MAX_FILE_SIZE:
-        violation_str = "violation file-size %s %d" % (fname, file_size)
-        print_violation_if_not_exception(violation_str, exceptions_str)
+        v = problem.FileSizeProblem(fname, file_size)
+        ProblemVault.register_problem(v)
 
-def consider_includes(fname, f, exceptions_str):
+def consider_includes(fname, f):
     include_count = metrics.get_include_count(f)
 
     if include_count > MAX_INCLUDE_COUNT:
-        violation_str = "violation include-count %s %d" % (fname, include_count)
-        print_violation_if_not_exception(violation_str, exceptions_str)
+        v = problem.IncludeCountProblem(fname, include_count)
+        ProblemVault.register_problem(v)
 
-def consider_function_size(fname, f, exceptions_str):
+def consider_function_size(fname, f):
     for name, lines in metrics.function_lines(f):
         # Don't worry about functions within our limits
         if lines <= MAX_FUNCTION_SIZE:
             continue
 
-        # That's a big function! Issue a violation!
+        # That's a big function! Issue a problem!
         canonical_function_name = "%s:%s()" % (fname,name)
-        violation_str = "violation function-size %s %s" % (lines, canonical_function_name)
-        print_violation_if_not_exception(violation_str, exceptions_str)
+        v = problem.FunctionSizeProblem(canonical_function_name, lines)
+        ProblemVault.register_problem(v)
 
 #######################################################
 
-def consider_all_metrics(files_list, exceptions_str):
+def consider_all_metrics(files_list):
     """Consider metrics for all files"""
     for fname in files_list:
         with open(fname, 'r') as f:
-            consider_metrics_for_file(fname, f, exceptions_str)
+            consider_metrics_for_file(fname, f)
 
-def consider_metrics_for_file(fname, f, exceptions_str):
+def consider_metrics_for_file(fname, f):
     """
     Get metrics for file with filename 'fname' and file descriptor 'f'.
     """
     # Get file length
-    consider_file_size(fname, f, exceptions_str)
+    consider_file_size(fname, f)
 
     # Consider number of #includes
     f.seek(0)
-    consider_includes(fname, f, exceptions_str)
+    consider_includes(fname, f)
 
     # Get function length
     f.seek(0)
-    consider_function_size(fname, f, exceptions_str)
+    consider_function_size(fname, f)
 
 def main():
+    global ProblemVault
+
     # 1) Get all the .c files we care about
     files_list = util.get_tor_c_files(TOR_TOPDIR, EXCLUDE_SOURCE_DIRS)
 
-    # 2) Read an optional exceptions file so that we don't warn about the past
-    exceptions_str = None
+    # 2) Initialize problem vault and load an optional exceptions file so that
+    # we don't warn about the past
     try:
         with open(EXCEPTIONS_FILE, 'r') as exception_f:
-            exceptions_str = exception_f.read()
+            ProblemVault = problem.ProblemVault(exception_f)
     except IOError:
         print "No exception file provided"
+        ProblemVault = problem.ProblemVault(None)
 
-    # 3) Go through all the files and report violations if they are not exceptions
-    consider_all_metrics(files_list, exceptions_str)
+    # 3) Go through all the files and report problems if they are not exceptions
+    consider_all_metrics(files_list)
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/maint/practracker/problem.py b/scripts/maint/practracker/problem.py
new file mode 100644
index 000000000..fe6b30915
--- /dev/null
+++ b/scripts/maint/practracker/problem.py
@@ -0,0 +1,96 @@
+"""
+In this file we define a ProblemVault class where we store all the
+exceptions and all the problems we find with the code.
+
+The ProblemVault is capable of registering problems and also figuring out if a
+problem is worse than a registered exception so that it only warns when things
+get worse.
+"""
+
+class ProblemVault(object):
+    """
+    Singleton where we store the various new problems we
+    found in the code, and also the old problems we read from the exception
+    file.
+    """
+    def __init__(self, exception_file):
+        # Exception dictionary: { problem.key() : Problem object }
+        self.exceptions = {}
+
+        if exception_file:
+            self.register_exceptions(exception_file)
+
+    def register_exceptions(self, exception_file):
+        # Register exceptions
+        for line in exception_file:
+            problem = get_old_problem_from_exception_str(line)
+            if problem is None:
+                continue
+
+            # XXX this might overwrite problems with the same key (e.g. MOCK_IMPL)
+            self.exceptions[problem.key()] = problem
+            #print "Registering exception: %s" % problem
+
+    def register_problem(self, problem):
+        # This is a new problem, print it
+        if problem.key() not in self.exceptions:
+            print problem
+            return
+
+        # If it's an old problem, we don't warn if the situation got better
+        # (e.g. we went from 4k LoC to 3k LoC), but we do warn if the
+        # situation worsened (e.g. we went from 60 includes to 80).
+        if problem.is_worse_than(self.exceptions[problem.key()]):
+            print problem
+            return
+#        else:
+#            print "OK %s better than %s" % (problem, self.exceptions[problem.key()])
+
+
+class Problem(object):
+    def __init__(self, problem_type, problem_location, metric_value):
+        self.problem_location = problem_location
+        self.metric_value = int(metric_value)
+        self.problem_type = problem_type
+
+    def is_worse_than(self, other_problem):
+        """Return True if this is a worse problem than other_problem"""
+        if self.metric_value > other_problem.metric_value:
+            return True
+        return False
+
+    def key(self):
+        """Generate a unique key that describes this problem that can be used as a dictionary key"""
+        return "%s:%s" % (self.problem_location, self.problem_type)
+
+    def __str__(self):
+        return "problem %s %s %s" % (self.problem_type, self.problem_location, self.metric_value)
+
+class FileSizeProblem(Problem):
+    def __init__(self, problem_location, metric_value):
+        super(FileSizeProblem, self).__init__("file-size", problem_location, metric_value)
+
+class IncludeCountProblem(Problem):
+    def __init__(self, problem_location, metric_value):
+        super(IncludeCountProblem, self).__init__("include-count", problem_location, metric_value)
+
+class FunctionSizeProblem(Problem):
+    def __init__(self, problem_location, metric_value):
+        super(FunctionSizeProblem, self).__init__("function-size", problem_location, metric_value)
+
+def get_old_problem_from_exception_str(exception_str):
+    try:
+        _, problem_type, problem_location, metric_value = exception_str.split(" ")
+    except ValueError:
+        return None
+
+    if problem_type == "file-size":
+        return FileSizeProblem(problem_location, metric_value)
+    elif problem_type == "include-count":
+        return IncludeCountProblem(problem_location, metric_value)
+    elif problem_type == "function-size":
+        return FunctionSizeProblem(problem_location, metric_value)
+    else:
+        print "Unknown exception line %s" % exception_str
+        return None
+

    

nickm＠torproject.org

tags

participants (1)