commit 8a173635bcd85ad7375d996fb39efeaddd8c611d
Author: Nick Mathewson <nickm(a)torproject.org>
Date: Fri May 2 12:50:23 2014 -0400
Tweak the changelog formatter a little.
(I had a bad clone of Knuth's algorithm sitting around in an old code
repository of mine. I added orphan detection and smarter hyphenation;
it seems to give marginally better results than we had before.)
---
scripts/maint/format_changelog.py | 139 +++++++++++++++++++++++++++++++++++--
1 file changed, 134 insertions(+), 5 deletions(-)
diff --git a/scripts/maint/format_changelog.py b/scripts/maint/format_changelog.py
index 6997d95..35044b3 100755
--- a/scripts/maint/format_changelog.py
+++ b/scripts/maint/format_changelog.py
@@ -12,7 +12,135 @@
import os
import re
import sys
-import textwrap
+
+# ==============================
+# Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
+# algorithm, with totally ad hoc parameters!
+#
+# We're trying to minimize:
+# The total of the cubes of ragged space on underflowed intermediate lines,
+# PLUS
+# 100 * the fourth power of overflowed characters
+# PLUS
+# .1 * a bit more than the cube of ragged space on the last line.
+#
+# We use an obvious dynamic programming algorithm to sorta approximate this.
+# It's not coded right or optimally, but it's fast enough for changelogs
+#
+# (Code found in an old directory of mine, lightly cleaned. -NM)
+
+NO_HYPHENATE=set("""
+pf-divert
+""".split())
+
+LASTLINE_UNDERFLOW_EXPONENT = 1
+LASTLINE_UNDERFLOW_PENALTY = 1
+
+UNDERFLOW_EXPONENT = 3
+UNDERFLOW_PENALTY = 1
+
+OVERFLOW_EXPONENT = 4
+OVERFLOW_PENALTY = 2000
+
+ORPHAN_PENALTY = 10000
+
+def generate_wrapping(words, divisions):
+ lines = []
+ last = 0
+ for i in divisions:
+ w = words[last:i]
+ last = i
+ line = " ".join(w).replace("\xff ","-").replace("\xff","-")
+ lines.append(line)
+ return lines
+
+def wrapping_quality(words, divisions, width1, width2):
+ total = 0.0
+
+ lines = generate_wrapping(words, divisions)
+ for line in lines:
+ length = len(line)
+ if line is lines[0]:
+ width = width1
+ else:
+ width = width2
+
+ if length > width:
+ total += OVERFLOW_PENALTY * (
+ (length - width) ** OVERFLOW_EXPONENT )
+ else:
+ if line is lines[-1]:
+ e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
+ if " " not in line:
+ total += ORPHAN_PENALTY
+ else:
+ e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
+
+ total += p * ((width - length) ** e)
+
+ return total
+
+def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
+ wrapping_after = [ (0,), ]
+
+ w1 = width - prefix_len1
+ w2 = width - prefix_len2
+
+ for i in range(1, len(words)+1):
+ best_so_far = None
+ best_score = 1e300
+ for j in range(i):
+ t = wrapping_after[j]
+ t1 = t[:-1] + (i,)
+ t2 = t + (i,)
+ wq1 = wrapping_quality(words, t1, w1, w2)
+ wq2 = wrapping_quality(words, t2, w1, w2)
+
+ if wq1 < best_score:
+ best_so_far = t1
+ best_score = wq1
+ if wq2 < best_score:
+ best_so_far = t2
+ best_score = wq2
+ wrapping_after.append( best_so_far )
+
+ lines = generate_wrapping(words, wrapping_after[-1])
+
+ return lines
+
+def hyphenateable(word):
+ if re.match(r'^[^\d\-].*-', word):
+ stripped = re.sub(r'^\W+','',word)
+ stripped = re.sub(r'\W+$','',word)
+ return stripped not in NO_HYPHENATE
+ else:
+ return False
+
+def split_paragraph(s):
+ "Split paragraph into words; tuned for Tor."
+
+ r = []
+ for word in s.split():
+ if hyphenateable(word):
+ while "-" in word:
+ a,word = word.split("-",1)
+ r.append(a+"\xff")
+ r.append(word)
+ return r
+
+def fill(text, width, initial_indent, subsequent_indent):
+ words = split_paragraph(text)
+ lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
+ width)
+ res = [ initial_indent, lines[0], "\n" ]
+ for line in lines[1:]:
+ res.append(subsequent_indent)
+ res.append(line)
+ res.append("\n")
+ return "".join(res)
+
+# ==============================
+
TP_MAINHEAD = 0
TP_HEADTEXT = 1
@@ -108,10 +236,11 @@ class ChangeLog(object):
if indent2 == -1:
indent2 = indent1
text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
- print textwrap.fill(text, width=72,
- initial_indent=" "*indent1,
- subsequent_indent=" "*indent2,
- break_on_hyphens=False)
+
+ sys.stdout.write(fill(text,
+ width=72,
+ initial_indent=" "*indent1,
+ subsequent_indent=" "*indent2))
def dump(self):
print self.mainhead