[tor-commits] [tor/master] Script to check sorting of manpage entries

dgoulet at torproject.org dgoulet at torproject.org
Tue Feb 18 13:47:31 UTC 2020


commit 55f088bb2998e70cb88e17ce21c3541966ae94ad
Author: Taylor Yu <catalyst at torproject.org>
Date:   Fri Feb 14 11:02:16 2020 -0600

    Script to check sorting of manpage entries
    
    Add a script to help check the alphabetical ordering of option names
    in a manpage.  Closes ticket 33339.
---
 changes/ticket33339                |  3 ++
 scripts/maint/checkManpageAlpha.py | 72 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/changes/ticket33339 b/changes/ticket33339
new file mode 100644
index 000000000..75ccb3546
--- /dev/null
+++ b/changes/ticket33339
@@ -0,0 +1,3 @@
+  o Minor feature (developer tools):
+    - Add a script to help check the alphabetical ordering of option
+      names in a manpage.  Closes ticket 33339.
diff --git a/scripts/maint/checkManpageAlpha.py b/scripts/maint/checkManpageAlpha.py
new file mode 100755
index 000000000..70421c2fd
--- /dev/null
+++ b/scripts/maint/checkManpageAlpha.py
@@ -0,0 +1,72 @@
+#!/usr/bin/python
+
+import difflib
+import re
+import sys
+
+# Assume we only use the "== Section Name" section title syntax
+sectionheader_re = re.compile(r'^==+\s(.*)\s*$')
+
+# Assume we only use the "[[ItemName]]" anchor syntax
+anchor_re = re.compile(r'^\[\[([^]]+)\]\]')
+
+class Reader(object):
+    def __init__(self):
+        self.d = {}
+        # Initial state is to gather section headers
+        self.getline = self._getsec
+        self.section = None
+
+    def _getsec(self, line):
+        """Read a section header
+
+        Prepare to gather anchors from subsequent lines.  Don't change
+        state if the line isn't a section header.
+        """
+        m = sectionheader_re.match(line)
+        if not m:
+            return
+        self.anchors = anchors = []
+        self.d[m.group(1)] = anchors
+        self.getline = self._getanchor
+
+    def _getanchor(self, line):
+        """Read an anchor for an item definition
+
+        Append the anchor names to the list of items in the current
+        section.
+        """
+        m = anchor_re.match(line)
+        if not m:
+            return self._getsec(line)
+        self.anchors.append(m.group(1))
+
+    def diffsort(self, key):
+        """Unified diff of unsorted and sorted item lists
+        """
+        # Append newlines because difflib works better with them
+        a = [s + '\n' for s in self.d[key]]
+        b = sorted(a, key=str.lower)
+        return difflib.unified_diff(a, b, fromfile=key+' unsorted',
+                                    tofile=key+' sorted')
+
+def main():
+    """Diff unsorted and sorted lists of option names in a manpage
+
+    Use the file named by the first argument, or standard input if
+    there is none.
+    """
+    try:
+        fname = sys.argv[1]
+        f = open(fname, 'r')
+    except IndexError:
+        f = sys.stdin
+
+    reader = Reader()
+    for line in f:
+        reader.getline(line)
+    for key in sorted(reader.d.keys(), key=str.lower):
+        sys.stdout.writelines(reader.diffsort(key))
+
+if __name__ == '__main__':
+    main()





More information about the tor-commits mailing list