[tor-commits] [sbws/master] Add find-dead-funcs maint script

pastly at torproject.org pastly at torproject.org
Tue Jun 26 15:36:49 UTC 2018


commit 5a445c608ff4b00bb190799af2843e80ef2bbf90
Author: Matt Traudt <sirmatt at ksu.edu>
Date:   Tue Jun 19 12:01:03 2018 -0400

    Add find-dead-funcs maint script
    
    GH: ref #143
---
 CHANGELOG.md                  |  5 +++
 scripts/maint/find-dead-funcs | 81 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e40965..18708da 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+### Added
+
+- Maintenance script to help us find functions that are (probably) no longer
+  being called.
+
 ### Changed
 
 - Change the path where the Bandwidth List files are generated: now they are
diff --git a/scripts/maint/find-dead-funcs b/scripts/maint/find-dead-funcs
new file mode 100755
index 0000000..aa81d68
--- /dev/null
+++ b/scripts/maint/find-dead-funcs
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+
+SOURCE_DIR=sbws
+TESTS_DIR=tests
+
+UNUSED_LOG=unused-funcs.txt
+USED_LOG=used-funcs.txt
+
+cat > $USED_LOG <<EOF
+# This file was generated by a script on $(date).
+# It is not going to be perfectly accurate.
+# For example, it thinks that all @property are functions that are never called.
+EOF
+cp $USED_LOG $UNUSED_LOG
+
+function get_all_funcs {
+    # Finds all unique function names in $SOURCE_DIR.
+    # Both of the following functions 'foo' and 'bar' will be found and
+    # returned
+    #     def foo(a, b='c'):
+    #         pass
+    #     class Something:
+    #         def bar(self):
+    #             pass
+    find "$SOURCE_DIR" -type f -name '*.py' |
+        xargs grep --only-matching -E 'def [A-Za-z0-9_]+\(' |
+        cut -d ' ' -f 2 |
+        cut -d '(' -f 1 |
+        sort -u
+}
+
+function conditionally_print_usage_info {
+    # Expects a FUNC name as the only argument.
+    # Expects a stream of FILENAME:COUNT pairs on stdin.
+    #
+    # If a FILENAME has FUNC in it one time or less, it assumes it is unused
+    # and does nothing.
+    #
+    # If a FILENAME has FUNC in it more than once, then it prints FILENAME and
+    # goes on to print the exact lines that have the function on them.
+    FUNC="$1"
+    while read ITEM
+    do
+        FILENAME="$(echo "$ITEM" | cut -d ':' -f 1)"
+        COUNT="$(echo "$ITEM" | cut -d ':' -f 2)"
+        (( "$COUNT" <= 1 )) && continue
+        printf "    %s\n" $ITEM
+        grep --line-number -E "${FUNC}\(" "$FILENAME" |
+            while read USAGE
+            do
+                printf "        %s\n" "$USAGE"
+            done
+    done
+}
+
+function get_func_usage_count {
+    # Expects a stream of function names on stdin.
+    #
+    # Finds out whether or not a function is **probably** used or not by
+    # calling conditionally_print_usage_info and capturing its output. If there
+    # is no output, then the function is assumed to be unused and put into the
+    # UNUSED_LOG. Otherwise put its info in the USED_LOG.
+    PY_FILES=( $(find "$SOURCE_DIR" "$TESTS_DIR" -type f -name '*.py') )
+    while read FUNC
+    do
+        USAGE_INFO="$(grep --count -E "${FUNC}\(" "${PY_FILES[@]}" |
+            conditionally_print_usage_info ${FUNC})"
+        if [[ "$USAGE_INFO" == "" ]]
+        then
+            echo "$FUNC" >> $UNUSED_LOG
+        else
+            echo "$FUNC" >> $USED_LOG
+            echo "$USAGE_INFO" >> $USED_LOG
+        fi
+    done
+}
+
+echo "Writing unused function names to $UNUSED_LOG"
+echo "Writing used function names and information to $USED_LOG"
+
+get_all_funcs | get_func_usage_count





More information about the tor-commits mailing list