[stem/master] Use pep8's api rather than shelling out

2 Jan 2014

commit 44ae69fbeedee60c86550b52804d85a24ca33637
Author: Damian Johnson <atagar@torproject.org>
Date:   Wed Jan 1 17:55:29 2014 -0800

    Use pep8's api rather than shelling out
    
    Like pyflakes, pep8 is a python library (... albeit a lot more confusing to
    use). This provides us a 63% drop in the runtime of our style checks (from 8 to
    3 seconds on my system). This is down to the realm that I'm a little tempted to
    always run them...
---
 run_tests.py |   15 ++++++++---
 test/util.py |   79 ++++++++++++++++++++++++++++++++++------------------------
 2 files changed, 59 insertions(+), 35 deletions(-)

diff --git a/run_tests.py b/run_tests.py
index 0711b98..5f65fd4 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -372,7 +372,7 @@ def _print_static_issues(args):
       println("Static error checking requires pyflakes. Please install it from ...\n  http://pypi.python.org/pypi/pyflakes\n", ERROR)
 
   if args.run_style:
-    if stem.util.system.is_available("pep8"):
+    if test.util.is_pep8_available():
       static_check_issues.update(test.util.get_stylistic_issues(SRC_PATHS))
     else:
       println("Style checks require pep8. Please install it from...\n  http://pypi.python.org/pypi/pep8\n", ERROR)
@@ -383,9 +383,18 @@ def _print_static_issues(args):
     for file_path in static_check_issues:
       println("* %s" % file_path, STATUS)
 
+      # Make a dict of line numbers to its issues. This is so we can both sort
+      # by the line number and clear any duplicate messages.
+
+      line_to_issues = {}
+
       for line_number, msg in static_check_issues[file_path]:
-        line_count = "%-4s" % line_number
-        println("  line %s - %s" % (line_count, msg))
+        line_to_issues.setdefault(line_number, set()).add(msg)
+
+      for line_number in sorted(line_to_issues.keys()):
+        for msg in line_to_issues[line_number]:
+          line_count = "%-4s" % line_number
+          println("  line %s - %s" % (line_count, msg))
 
       println()
 
diff --git a/test/util.py b/test/util.py
index 706a5b2..e2faaf5 100644
--- a/test/util.py
+++ b/test/util.py
@@ -10,6 +10,8 @@ Helper functions for our test framework.
   get_integ_tests - provides our integration tests
 
   is_pyflakes_available - checks if pyflakes is available
+  is_pep8_available - checks if pep8 is available
+
   get_prereq - provides the tor version required to run the given target
   get_torrc_entries - provides the torrc entries for a given target
   get_help_message - provides usage information for running our tests
@@ -170,6 +172,20 @@ def is_pyflakes_available():
     return False
 
 
+def is_pep8_available():
+  """
+  Checks if pep8 is availalbe.
+
+  :returns: **True** if we can use pep8 and **False** otherwise
+  """
+
+  try:
+    import pep8
+    return True
+  except ImportError:
+    return False
+
+
 def get_prereq(target):
   """
   Provides the tor version required to run the given target. If the target
@@ -244,32 +260,25 @@ def get_stylistic_issues(paths):
   :returns: **dict** of the form ``path => [(line_number, message)...]``
   """
 
-  # The pep8 command give output of the form...
-  #
-  #   FILE:LINE:CHARACTER ISSUE
-  #
-  # ... for instance...
-  #
-  #   ./test/mocking.py:868:31: E225 missing whitespace around operator
-
-  ignored_issues = ','.join(CONFIG["pep8.ignore"])
   issues = {}
 
-  for path in paths:
-    pep8_output = stem.util.system.call(
-      "pep8 --ignore %s %s" % (ignored_issues, path),
-      ignore_exit_status = True,
-    )
+  if is_pep8_available():
+    import pep8
+
+    class StyleReport(pep8.BaseReport):
+      def __init__(self, options):
+        super(StyleReport, self).__init__(options)
 
-    for line in pep8_output:
-      line_match = re.match("^(.*):(\d+):(\d+): (.*)$", line)
+      def error(self, line_number, offset, text, check):
+        code = super(StyleReport, self).error(line_number, offset, text, check)
 
-      if line_match:
-        file_path, line, _, issue = line_match.groups()
+        if code:
+          issues.setdefault(self.filename, []).append((offset + line_number, "%s %s" % (code, text)))
 
-        if not _is_test_data(file_path):
-          issues.setdefault(file_path, []).append((int(line), issue))
+    style_checker = pep8.StyleGuide(ignore = CONFIG["pep8.ignore"], reporter = StyleReport)
+    style_checker.check_files(_get_python_files(paths))
 
+  for path in paths:
     for file_path in _get_files_with_suffix(path):
       if _is_test_data(file_path):
         continue
@@ -277,7 +286,7 @@ def get_stylistic_issues(paths):
       with open(file_path) as f:
         file_contents = f.read()
 
-      lines, file_issues, prev_indent = file_contents.split("\n"), [], 0
+      lines, prev_indent = file_contents.split("\n"), 0
       is_block_comment = False
 
       for index, line in enumerate(lines):
@@ -291,11 +300,11 @@ def get_stylistic_issues(paths):
           is_block_comment = not is_block_comment
 
         if "\t" in whitespace:
-          file_issues.append((index + 1, "indentation has a tab"))
+          issues.setdefault(file_path, []).append((index + 1, "indentation has a tab"))
         elif "\r" in content:
-          file_issues.append((index + 1, "contains a windows newline"))
+          issues.setdefault(file_path, []).append((index + 1, "contains a windows newline"))
         elif content != content.rstrip():
-          file_issues.append((index + 1, "line has trailing whitespace"))
+          issues.setdefault(file_path, []).append((index + 1, "line has trailing whitespace"))
         elif content.lstrip().startswith("except") and content.endswith(", exc:"):
           # Python 2.6 - 2.7 supports two forms for exceptions...
           #
@@ -305,10 +314,7 @@ def get_stylistic_issues(paths):
           # The former is the old method and no longer supported in python 3
           # going forward.
 
-          file_issues.append((index + 1, "except clause should use 'as', not comma"))
-
-      if file_issues:
-        issues[file_path] = file_issues
+          issues.setdefault(file_path, []).append((index + 1, "except clause should use 'as', not comma"))
 
   return issues
 
@@ -329,10 +335,8 @@ def get_pyflakes_issues(paths):
 
   reporter = PyflakesReporter()
 
-  for path in paths:
-    for file_path in _get_files_with_suffix(path):
-      if not _is_test_data(file_path):
-        pyflakes.api.checkPath(file_path, reporter)
+  for path in _get_python_files(paths):
+    pyflakes.api.checkPath(path, reporter)
 
   return reporter.issues
 
@@ -551,6 +555,17 @@ def run_tasks(category, *tasks):
   println()
 
 
+def _get_python_files(paths):
+  results = []
+
+  for path in paths:
+    for file_path in _get_files_with_suffix(path):
+      if not _is_test_data(file_path):
+        results.append(file_path)
+
+  return results
+
+
 class PyflakesReporter(object):
   """
   Implementation of the pyflakes.reporter.Reporter interface. This populates

    

atagar＠torproject.org

tags

participants (1)