commit 5bd86b50b58ffcad48416df6fa6b411e92c4636e Author: Nick Mathewson nickm@torproject.org Date: Fri Jan 10 08:32:56 2020 -0500
codetool: post-processor for clang-format
This code transformer makes a couple of changes that we want for our source code, and can be expanded to handle more. --- scripts/maint/codetool.py | 174 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+)
diff --git a/scripts/maint/codetool.py b/scripts/maint/codetool.py new file mode 100755 index 000000000..6336e6843 --- /dev/null +++ b/scripts/maint/codetool.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020, The Tor Project, Inc. +# See LICENSE for licensing information. + +""" + This program uses a set of plugable filters to inspect and transform + our C code. +""" + +import os +import re +import sys + +class Filter: + """A Filter transforms a string containing a C program.""" + def __init__(self): + pass + + def transform(self, s): + return s + +class CompoundFilt(Filter): + """A CompoundFilt runs another set of filters, in sequence.""" + def __init__(self, items=()): + super().__init__() + self._filters = list(items) + + def add(self, filt): + self._filters.append(filt) + return self + + def transform(self, s): + for f in self._filters: + s = f.transform(s) + + return s + +class SplitError(Exception): + """Exception: raised if split_comments() can't understand a C file.""" + pass + +def split_comments(s): + r"""Iterate over the C code in 's', and yield a sequence of (code, + comment) pairs. Each pair will contain either a nonempty piece + of code, a nonempty comment, or both. + + >>> list(split_comments("hello // world\n")) + [('hello ', '// world'), ('\n', '')] + + >>> list(split_comments("a /* b cd */ efg // hi")) + [('a ', '/* b cd */'), (' efg ', '// hi')] + """ + + # Matches a block of code without any comments. + PAT_CODE = re.compile(r'''^(?: [^/"']+ | + "(?:[^\"]+|\.)*" | + '(?:[^\']+|\.)*' | + /[^/*] + )*''', re.VERBOSE|re.DOTALL) + + # Matches a C99 "//" comment. + PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE) + + # Matches a C "/* */" comment. + PAT_C_COMMENT = re.compile(r'^/*(?:[^*]|*+[^*/])**+/', re.DOTALL) + + while True: + # Find some non-comment code at the start of the string. + m = PAT_CODE.match(s) + + # If we found some code here, save it and advance the string. + # Otherwise set 'code' to "". + if m: + code = m.group(0) + s = s[m.end():] + else: + code = "" + + # Now we have a comment, or the end of the string. Find out which + # one, and how long it is. + if s.startswith("//"): + m = PAT_C99_COMMENT.match(s) + else: + m = PAT_C_COMMENT.match(s) + + # If we got a comment, save it and advance the string. Otherwise + # set 'comment' to "". + if m: + comment = m.group(0) + s = s[m.end():] + else: + comment = "" + + # If we found no code and no comment, we should be at the end of + # the string... + if code == "" and comment == "": + if s: + # But in case we *aren't* at the end of the string, raise + # an error. + raise SplitError() + # ... all is well, we're done scanning the code. + return + + yield (code, comment) + +class IgnoreCommentsFilt(Filter): + """Wrapper: applies another filter to C code only, excluding comments. + """ + def __init__(self, filt): + super().__init__() + self._filt = filt + + def transform(self, s): + result = [] + for code, comment in split_comments(s): + result.append(self._filt.transform(code)) + result.append(comment) + return "".join(result) + + +class RegexFilt(Filter): + """A regex filter applies a regular expression to some C code.""" + def __init__(self, pat, replacement, flags=0): + super().__init__() + self._pat = re.compile(pat, flags) + self._replacement = replacement + + def transform(self, s): + s, _ = self._pat.subn(self._replacement, s) + return s + +def revise(fname, filt): + """Run 'filt' on the contents of the file in 'fname'. If any + changes are made, then replace the file with its new contents. + Otherwise, leave the file alone. + """ + contents = open(fname, 'r').read() + result = filt.transform(contents) + if result == contents: + return + + tmpname = "{}_codetool_tmp".format(fname) + try: + with open(tmpname, 'w') as f: + f.write(result) + os.rename(tmpname, fname) + except: + os.unlink(tmpname) + raise + +############################## +# Filtering rules. +############################## + +# Make sure that there is a newline after the first comma in a MOCK_IMPL() +BREAK_MOCK_IMPL = RegexFilt( + r'^MOCK_IMPL(([^,]+),\s*(\S+)', + r'MOCK_IMPL(\1,\n\2', + re.MULTILINE) + +# Make sure there is no newline between } and a loop iteration terminator. +RESTORE_SMARTLIST_END = RegexFilt( + r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*(', + r'} \1_FOREACH_END (', + re.MULTILINE) + +F = CompoundFilt() +F.add(IgnoreCommentsFilt(CompoundFilt([ + RESTORE_SMARTLIST_END, + BREAK_MOCK_IMPL]))) + +if __name__ == '__main__': + for fname in sys.argv[1:]: + revise(fname, F)
tor-commits@lists.torproject.org