tor/scripts/maint/format_changelog.py

561 lines
15 KiB
Python
Raw Normal View History

#!/usr/bin/python
# Copyright (c) 2014, The Tor Project, Inc.
# See LICENSE for licensing information
#
# This script reformats a section of the changelog to wrap everything to
# the right width and put blank lines in the right places. Eventually,
# it might include a linter.
#
# To run it, pipe a section of the changelog (starting with "Changes
# in Tor 0.x.y.z-alpha" through the script.)
import os
import re
import sys
import optparse
# ==============================
# Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
# algorithm, with totally ad hoc parameters!
#
# We're trying to minimize:
# The total of the cubes of ragged space on underflowed intermediate lines,
# PLUS
# 100 * the fourth power of overflowed characters
# PLUS
# .1 * a bit more than the cube of ragged space on the last line.
# PLUS
# OPENPAREN_PENALTY for each line that starts with (
#
# We use an obvious dynamic programming algorithm to sorta approximate this.
# It's not coded right or optimally, but it's fast enough for changelogs
#
# (Code found in an old directory of mine, lightly cleaned. -NM)
NO_HYPHENATE=set("""
pf-divert
tor-resolve
tor-gencert
tor-fw-helper
""".split())
LASTLINE_UNDERFLOW_EXPONENT = 1
LASTLINE_UNDERFLOW_PENALTY = 1
UNDERFLOW_EXPONENT = 3
UNDERFLOW_PENALTY = 1
OVERFLOW_EXPONENT = 4
OVERFLOW_PENALTY = 2000
ORPHAN_PENALTY = 10000
OPENPAREN_PENALTY = 200
def generate_wrapping(words, divisions):
lines = []
last = 0
for i in divisions:
w = words[last:i]
last = i
line = " ".join(w).replace("\xff ","-").replace("\xff","-")
lines.append(line)
return lines
def wrapping_quality(words, divisions, width1, width2):
total = 0.0
lines = generate_wrapping(words, divisions)
for line in lines:
length = len(line)
if line is lines[0]:
width = width1
else:
width = width2
if line[0:1] == '(':
total += OPENPAREN_PENALTY
if length > width:
total += OVERFLOW_PENALTY * (
(length - width) ** OVERFLOW_EXPONENT )
else:
if line is lines[-1]:
e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
if " " not in line:
total += ORPHAN_PENALTY
else:
e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
total += p * ((width - length) ** e)
return total
def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
wrapping_after = [ (0,), ]
w1 = width - prefix_len1
w2 = width - prefix_len2
for i in range(1, len(words)+1):
best_so_far = None
best_score = 1e300
for j in range(i):
t = wrapping_after[j]
t1 = t[:-1] + (i,)
t2 = t + (i,)
wq1 = wrapping_quality(words, t1, w1, w2)
wq2 = wrapping_quality(words, t2, w1, w2)
if wq1 < best_score:
best_so_far = t1
best_score = wq1
if wq2 < best_score:
best_so_far = t2
best_score = wq2
wrapping_after.append( best_so_far )
lines = generate_wrapping(words, wrapping_after[-1])
return lines
def hyphenatable(word):
if "--" in word:
return False
if re.match(r'^[^\d\-]\D*-', word):
stripped = re.sub(r'^\W+','',word)
stripped = re.sub(r'\W+$','',word)
return stripped not in NO_HYPHENATE
else:
return False
def split_paragraph(s):
"Split paragraph into words; tuned for Tor."
r = []
for word in s.split():
if hyphenatable(word):
while "-" in word:
a,word = word.split("-",1)
r.append(a+"\xff")
r.append(word)
return r
def fill(text, width, initial_indent, subsequent_indent):
words = split_paragraph(text)
lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
width)
res = [ initial_indent, lines[0], "\n" ]
for line in lines[1:]:
res.append(subsequent_indent)
res.append(line)
res.append("\n")
return "".join(res)
# ==============================
TP_MAINHEAD = 0
TP_HEADTEXT = 1
TP_BLANK = 2
TP_SECHEAD = 3
TP_ITEMFIRST = 4
TP_ITEMBODY = 5
TP_END = 6
TP_PREHEAD = 7
def head_parser(line):
if re.match(r'^Changes in', line):
return TP_MAINHEAD
elif re.match(r'^[A-Za-z]', line):
return TP_PREHEAD
elif re.match(r'^ o ', line):
return TP_SECHEAD
elif re.match(r'^\s*$', line):
return TP_BLANK
else:
return TP_HEADTEXT
def body_parser(line):
if re.match(r'^ o ', line):
return TP_SECHEAD
elif re.match(r'^ -',line):
return TP_ITEMFIRST
elif re.match(r'^ \S', line):
return TP_ITEMBODY
elif re.match(r'^\s*$', line):
return TP_BLANK
elif re.match(r'^Changes in', line):
return TP_END
elif re.match(r'^\s+\S', line):
return TP_HEADTEXT
else:
print "Weird line %r"%line
def clean_head(head):
return head
def head_score(s):
m = re.match(r'^ +o (.*)', s)
if not m:
print >>sys.stderr, "Can't score %r"%s
return 99999
lw = m.group(1).lower()
if lw.startswith("security") and "feature" not in lw:
score = -300
elif lw.startswith("deprecated version"):
score = -200
elif (('new' in lw and 'requirement' in lw) or
('new' in lw and 'dependenc' in lw) or
('build' in lw and 'requirement' in lw) or
('removed' in lw and 'platform' in lw)):
score = -100
elif lw.startswith("major feature"):
score = 00
elif lw.startswith("major bug"):
score = 50
elif lw.startswith("major"):
score = 70
elif lw.startswith("minor feature"):
score = 200
elif lw.startswith("minor bug"):
score = 250
elif lw.startswith("minor"):
score = 270
else:
score = 1000
if 'secur' in lw:
score -= 2
if "(other)" in lw:
score += 2
if '(' not in lw:
score -= 1
return score
class ChangeLog(object):
def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
self.prehead = []
self.mainhead = None
self.headtext = []
self.curgraf = None
self.sections = []
self.cursection = None
self.lineno = 0
self.wrapText = wrapText
self.blogOrder = blogOrder
self.drupalBreak = drupalBreak
def addLine(self, tp, line):
self.lineno += 1
if tp == TP_MAINHEAD:
assert not self.mainhead
self.mainhead = line
elif tp == TP_PREHEAD:
self.prehead.append(line)
elif tp == TP_HEADTEXT:
if self.curgraf is None:
self.curgraf = []
self.headtext.append(self.curgraf)
self.curgraf.append(line)
elif tp == TP_BLANK:
self.curgraf = None
elif tp == TP_SECHEAD:
self.cursection = [ self.lineno, line, [] ]
self.sections.append(self.cursection)
elif tp == TP_ITEMFIRST:
item = ( self.lineno, [ [line] ])
self.curgraf = item[1][0]
self.cursection[2].append(item)
elif tp == TP_ITEMBODY:
if self.curgraf is None:
self.curgraf = []
self.cursection[2][-1][1].append(self.curgraf)
self.curgraf.append(line)
else:
assert "This" is "unreachable"
def lint_head(self, line, head):
m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
if not m:
print >>sys.stderr, "Weird header format on line %s"%line
def lint_item(self, line, grafs, head_type):
pass
def lint(self):
self.head_lines = {}
for sec_line, sec_head, items in self.sections:
head_type = self.lint_head(sec_line, sec_head)
for item_line, grafs in items:
self.lint_item(item_line, grafs, head_type)
def dumpGraf(self,par,indent1,indent2=-1):
if not self.wrapText:
for line in par:
print line
return
if indent2 == -1:
indent2 = indent1
text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
sys.stdout.write(fill(text,
width=72,
initial_indent=" "*indent1,
subsequent_indent=" "*indent2))
def dumpPreheader(self, graf):
self.dumpGraf(graf, 0)
print
def dumpMainhead(self, head):
print head
def dumpHeadGraf(self, graf):
self.dumpGraf(graf, 2)
print
def dumpSectionHeader(self, header):
print header
def dumpStartOfSections(self):
pass
def dumpEndOfSections(self):
pass
def dumpEndOfSection(self):
print
def dumpEndOfChangelog(self):
print
def dumpDrupalBreak(self):
pass
def dumpItem(self, grafs):
self.dumpGraf(grafs[0],4,6)
for par in grafs[1:]:
print
self.dumpGraf(par,6,6)
def collateAndSortSections(self):
heads = []
sectionsByHead = { }
for _, head, items in self.sections:
head = clean_head(head)
try:
s = sectionsByHead[head]
except KeyError:
s = sectionsByHead[head] = []
heads.append( (head_score(head), head.lower(), head, s) )
s.extend(items)
heads.sort()
self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
def dump(self):
if self.prehead:
self.dumpPreheader(self.prehead)
if not self.blogOrder:
self.dumpMainhead(self.mainhead)
for par in self.headtext:
self.dumpHeadGraf(par)
if self.blogOrder:
self.dumpMainhead(self.mainhead)
drupalBreakAfter = None
if self.drupalBreak and len(self.sections) > 4:
drupalBreakAfter = self.sections[1][2]
self.dumpStartOfSections()
for _,head,items in self.sections:
if not head.endswith(':'):
print >>sys.stderr, "adding : to %r"%head
head = head + ":"
self.dumpSectionHeader(head)
for _,grafs in items:
self.dumpItem(grafs)
self.dumpEndOfSection()
if items is drupalBreakAfter:
self.dumpDrupalBreak()
self.dumpEndOfSections()
self.dumpEndOfChangelog()
class HTMLChangeLog(ChangeLog):
def __init__(self, *args, **kwargs):
ChangeLog.__init__(self, *args, **kwargs)
def htmlText(self, graf):
for line in graf:
line = line.rstrip().replace("&","&amp;")
line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
sys.stdout.write(line.strip())
sys.stdout.write(" ")
def htmlPar(self, graf):
sys.stdout.write("<p>")
self.htmlText(graf)
sys.stdout.write("</p>\n")
def dumpPreheader(self, graf):
self.htmlPar(graf)
def dumpMainhead(self, head):
sys.stdout.write("<h2>%s</h2>"%head)
def dumpHeadGraf(self, graf):
self.htmlPar(graf)
def dumpSectionHeader(self, header):
header = header.replace(" o ", "", 1).lstrip()
sys.stdout.write(" <li>%s\n"%header)
sys.stdout.write(" <ul>\n")
def dumpEndOfSection(self):
sys.stdout.write(" </ul>\n\n")
def dumpEndOfChangelog(self):
pass
def dumpStartOfSections(self):
print "<ul>\n"
def dumpEndOfSections(self):
print "</ul>\n"
def dumpDrupalBreak(self):
print "\n</ul>\n"
print "<p>&nbsp;</p>"
print "\n<!--break-->\n\n"
print "<ul>"
def dumpItem(self, grafs):
grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
sys.stdout.write(" <li>")
if len(grafs) > 1:
for par in grafs:
self.htmlPar(par)
else:
self.htmlText(grafs[0])
print
op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
op.add_option('-W', '--no-wrap', action='store_false',
dest='wrapText', default=True,
help='Do not re-wrap paragraphs')
op.add_option('-S', '--no-sort', action='store_false',
dest='sort', default=True,
help='Do not sort or collate sections')
op.add_option('-o', '--output', dest='output',
default='-', metavar='FILE', help="write output to FILE")
op.add_option('-H', '--html', action='store_true',
dest='html', default=False,
help="generate an HTML fragment")
op.add_option('-1', '--first', action='store_true',
dest='firstOnly', default=False,
help="write only the first section")
op.add_option('-b', '--blog-header', action='store_true',
dest='blogOrder', default=False,
help="Write the header in blog order")
op.add_option('-B', '--blog', action='store_true',
dest='blogFormat', default=False,
help="Set all other options as appropriate for a blog post")
op.add_option('--inplace', action='store_true',
dest='inplace', default=False,
help="Alter the ChangeLog in place")
op.add_option('--drupal-break', action='store_true',
dest='drupalBreak', default=False,
help='Insert a drupal-friendly <!--break--> as needed')
options,args = op.parse_args()
if options.blogFormat:
options.blogOrder = True
options.html = True
options.sort = False
options.wrapText = False
options.firstOnly = True
options.drupalBreak = True
if len(args) > 1:
op.error("Too many arguments")
elif len(args) == 0:
fname = 'ChangeLog'
else:
fname = args[0]
if options.inplace:
assert options.output == '-'
options.output = fname
if fname != '-':
sys.stdin = open(fname, 'r')
nextline = None
if options.html:
ChangeLogClass = HTMLChangeLog
else:
ChangeLogClass = ChangeLog
CL = ChangeLogClass(wrapText=options.wrapText,
blogOrder=options.blogOrder,
drupalBreak=options.drupalBreak)
parser = head_parser
for line in sys.stdin:
line = line.rstrip()
tp = parser(line)
if tp == TP_SECHEAD:
parser = body_parser
elif tp == TP_END:
nextline = line
break
CL.addLine(tp,line)
CL.lint()
if options.output != '-':
fname_new = options.output+".new"
fname_out = options.output
sys.stdout = open(fname_new, 'w')
else:
fname_new = fname_out = None
if options.sort:
CL.collateAndSortSections()
CL.dump()
if options.firstOnly:
sys.exit(0)
if nextline is not None:
print nextline
for line in sys.stdin:
sys.stdout.write(line)
if fname_new is not None:
os.rename(fname_new, fname_out)