mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-24 12:23:32 +01:00
560 lines
15 KiB
Python
Executable File
560 lines
15 KiB
Python
Executable File
#!/usr/bin/python
|
|
# Copyright (c) 2014-2015, The Tor Project, Inc.
|
|
# See LICENSE for licensing information
|
|
#
|
|
# This script reformats a section of the changelog to wrap everything to
|
|
# the right width and put blank lines in the right places. Eventually,
|
|
# it might include a linter.
|
|
#
|
|
# To run it, pipe a section of the changelog (starting with "Changes
|
|
# in Tor 0.x.y.z-alpha" through the script.)
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import optparse
|
|
|
|
# ==============================
|
|
# Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
|
|
# algorithm, with totally ad hoc parameters!
|
|
#
|
|
# We're trying to minimize:
|
|
# The total of the cubes of ragged space on underflowed intermediate lines,
|
|
# PLUS
|
|
# 100 * the fourth power of overflowed characters
|
|
# PLUS
|
|
# .1 * a bit more than the cube of ragged space on the last line.
|
|
# PLUS
|
|
# OPENPAREN_PENALTY for each line that starts with (
|
|
#
|
|
# We use an obvious dynamic programming algorithm to sorta approximate this.
|
|
# It's not coded right or optimally, but it's fast enough for changelogs
|
|
#
|
|
# (Code found in an old directory of mine, lightly cleaned. -NM)
|
|
|
|
NO_HYPHENATE=set("""
|
|
pf-divert
|
|
tor-resolve
|
|
tor-gencert
|
|
""".split())
|
|
|
|
LASTLINE_UNDERFLOW_EXPONENT = 1
|
|
LASTLINE_UNDERFLOW_PENALTY = 1
|
|
|
|
UNDERFLOW_EXPONENT = 3
|
|
UNDERFLOW_PENALTY = 1
|
|
|
|
OVERFLOW_EXPONENT = 4
|
|
OVERFLOW_PENALTY = 2000
|
|
|
|
ORPHAN_PENALTY = 10000
|
|
|
|
OPENPAREN_PENALTY = 200
|
|
|
|
def generate_wrapping(words, divisions):
|
|
lines = []
|
|
last = 0
|
|
for i in divisions:
|
|
w = words[last:i]
|
|
last = i
|
|
line = " ".join(w).replace("\xff ","-").replace("\xff","-")
|
|
lines.append(line.strip())
|
|
return lines
|
|
|
|
def wrapping_quality(words, divisions, width1, width2):
|
|
total = 0.0
|
|
|
|
lines = generate_wrapping(words, divisions)
|
|
for line in lines:
|
|
length = len(line)
|
|
if line is lines[0]:
|
|
width = width1
|
|
else:
|
|
width = width2
|
|
|
|
if line[0:1] == '(':
|
|
total += OPENPAREN_PENALTY
|
|
|
|
if length > width:
|
|
total += OVERFLOW_PENALTY * (
|
|
(length - width) ** OVERFLOW_EXPONENT )
|
|
else:
|
|
if line is lines[-1]:
|
|
e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
|
|
if " " not in line:
|
|
total += ORPHAN_PENALTY
|
|
else:
|
|
e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
|
|
|
|
total += p * ((width - length) ** e)
|
|
|
|
return total
|
|
|
|
def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
|
|
wrapping_after = [ (0,), ]
|
|
|
|
w1 = width - prefix_len1
|
|
w2 = width - prefix_len2
|
|
|
|
for i in range(1, len(words)+1):
|
|
best_so_far = None
|
|
best_score = 1e300
|
|
for j in range(i):
|
|
t = wrapping_after[j]
|
|
t1 = t[:-1] + (i,)
|
|
t2 = t + (i,)
|
|
wq1 = wrapping_quality(words, t1, w1, w2)
|
|
wq2 = wrapping_quality(words, t2, w1, w2)
|
|
|
|
if wq1 < best_score:
|
|
best_so_far = t1
|
|
best_score = wq1
|
|
if wq2 < best_score:
|
|
best_so_far = t2
|
|
best_score = wq2
|
|
wrapping_after.append( best_so_far )
|
|
|
|
lines = generate_wrapping(words, wrapping_after[-1])
|
|
|
|
return lines
|
|
|
|
def hyphenatable(word):
|
|
if "--" in word:
|
|
return False
|
|
|
|
if re.match(r'^[^\d\-]\D*-', word):
|
|
stripped = re.sub(r'^\W+','',word)
|
|
stripped = re.sub(r'\W+$','',word)
|
|
return stripped not in NO_HYPHENATE
|
|
else:
|
|
return False
|
|
|
|
def split_paragraph(s):
|
|
"Split paragraph into words; tuned for Tor."
|
|
|
|
r = []
|
|
for word in s.split():
|
|
if hyphenatable(word):
|
|
while "-" in word:
|
|
a,word = word.split("-",1)
|
|
r.append(a+"\xff")
|
|
r.append(word)
|
|
return r
|
|
|
|
def fill(text, width, initial_indent, subsequent_indent):
|
|
words = split_paragraph(text)
|
|
lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
|
|
width)
|
|
res = [ initial_indent, lines[0], "\n" ]
|
|
for line in lines[1:]:
|
|
res.append(subsequent_indent)
|
|
res.append(line)
|
|
res.append("\n")
|
|
return "".join(res)
|
|
|
|
# ==============================
|
|
|
|
|
|
TP_MAINHEAD = 0
|
|
TP_HEADTEXT = 1
|
|
TP_BLANK = 2
|
|
TP_SECHEAD = 3
|
|
TP_ITEMFIRST = 4
|
|
TP_ITEMBODY = 5
|
|
TP_END = 6
|
|
TP_PREHEAD = 7
|
|
|
|
def head_parser(line):
|
|
if re.match(r'^Changes in', line):
|
|
return TP_MAINHEAD
|
|
elif re.match(r'^[A-Za-z]', line):
|
|
return TP_PREHEAD
|
|
elif re.match(r'^ o ', line):
|
|
return TP_SECHEAD
|
|
elif re.match(r'^\s*$', line):
|
|
return TP_BLANK
|
|
else:
|
|
return TP_HEADTEXT
|
|
|
|
def body_parser(line):
|
|
if re.match(r'^ o ', line):
|
|
return TP_SECHEAD
|
|
elif re.match(r'^ -',line):
|
|
return TP_ITEMFIRST
|
|
elif re.match(r'^ \S', line):
|
|
return TP_ITEMBODY
|
|
elif re.match(r'^\s*$', line):
|
|
return TP_BLANK
|
|
elif re.match(r'^Changes in', line):
|
|
return TP_END
|
|
elif re.match(r'^\s+\S', line):
|
|
return TP_HEADTEXT
|
|
else:
|
|
print "Weird line %r"%line
|
|
|
|
def clean_head(head):
|
|
return head
|
|
|
|
def head_score(s):
|
|
m = re.match(r'^ +o (.*)', s)
|
|
if not m:
|
|
print >>sys.stderr, "Can't score %r"%s
|
|
return 99999
|
|
lw = m.group(1).lower()
|
|
if lw.startswith("security") and "feature" not in lw:
|
|
score = -300
|
|
elif lw.startswith("deprecated version"):
|
|
score = -200
|
|
elif (('new' in lw and 'requirement' in lw) or
|
|
('new' in lw and 'dependenc' in lw) or
|
|
('build' in lw and 'requirement' in lw) or
|
|
('removed' in lw and 'platform' in lw)):
|
|
score = -100
|
|
elif lw.startswith("major feature"):
|
|
score = 00
|
|
elif lw.startswith("major bug"):
|
|
score = 50
|
|
elif lw.startswith("major"):
|
|
score = 70
|
|
elif lw.startswith("minor feature"):
|
|
score = 200
|
|
elif lw.startswith("minor bug"):
|
|
score = 250
|
|
elif lw.startswith("minor"):
|
|
score = 270
|
|
else:
|
|
score = 1000
|
|
|
|
if 'secur' in lw:
|
|
score -= 2
|
|
|
|
if "(other)" in lw:
|
|
score += 2
|
|
|
|
if '(' not in lw:
|
|
score -= 1
|
|
|
|
return score
|
|
|
|
class ChangeLog(object):
|
|
def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
|
|
self.prehead = []
|
|
self.mainhead = None
|
|
self.headtext = []
|
|
self.curgraf = None
|
|
self.sections = []
|
|
self.cursection = None
|
|
self.lineno = 0
|
|
self.wrapText = wrapText
|
|
self.blogOrder = blogOrder
|
|
self.drupalBreak = drupalBreak
|
|
|
|
def addLine(self, tp, line):
|
|
self.lineno += 1
|
|
|
|
if tp == TP_MAINHEAD:
|
|
assert not self.mainhead
|
|
self.mainhead = line
|
|
|
|
elif tp == TP_PREHEAD:
|
|
self.prehead.append(line)
|
|
|
|
elif tp == TP_HEADTEXT:
|
|
if self.curgraf is None:
|
|
self.curgraf = []
|
|
self.headtext.append(self.curgraf)
|
|
self.curgraf.append(line)
|
|
|
|
elif tp == TP_BLANK:
|
|
self.curgraf = None
|
|
|
|
elif tp == TP_SECHEAD:
|
|
self.cursection = [ self.lineno, line, [] ]
|
|
self.sections.append(self.cursection)
|
|
|
|
elif tp == TP_ITEMFIRST:
|
|
item = ( self.lineno, [ [line] ])
|
|
self.curgraf = item[1][0]
|
|
self.cursection[2].append(item)
|
|
|
|
elif tp == TP_ITEMBODY:
|
|
if self.curgraf is None:
|
|
self.curgraf = []
|
|
self.cursection[2][-1][1].append(self.curgraf)
|
|
self.curgraf.append(line)
|
|
|
|
else:
|
|
assert "This" is "unreachable"
|
|
|
|
def lint_head(self, line, head):
|
|
m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
|
|
if not m:
|
|
print >>sys.stderr, "Weird header format on line %s"%line
|
|
|
|
def lint_item(self, line, grafs, head_type):
|
|
pass
|
|
|
|
def lint(self):
|
|
self.head_lines = {}
|
|
for sec_line, sec_head, items in self.sections:
|
|
head_type = self.lint_head(sec_line, sec_head)
|
|
for item_line, grafs in items:
|
|
self.lint_item(item_line, grafs, head_type)
|
|
|
|
def dumpGraf(self,par,indent1,indent2=-1):
|
|
if not self.wrapText:
|
|
for line in par:
|
|
print line
|
|
return
|
|
|
|
if indent2 == -1:
|
|
indent2 = indent1
|
|
text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
|
|
|
|
sys.stdout.write(fill(text,
|
|
width=72,
|
|
initial_indent=" "*indent1,
|
|
subsequent_indent=" "*indent2))
|
|
|
|
def dumpPreheader(self, graf):
|
|
self.dumpGraf(graf, 0)
|
|
print
|
|
|
|
def dumpMainhead(self, head):
|
|
print head
|
|
|
|
def dumpHeadGraf(self, graf):
|
|
self.dumpGraf(graf, 2)
|
|
print
|
|
|
|
def dumpSectionHeader(self, header):
|
|
print header
|
|
|
|
def dumpStartOfSections(self):
|
|
pass
|
|
|
|
def dumpEndOfSections(self):
|
|
pass
|
|
|
|
def dumpEndOfSection(self):
|
|
print
|
|
|
|
def dumpEndOfChangelog(self):
|
|
print
|
|
|
|
def dumpDrupalBreak(self):
|
|
pass
|
|
|
|
def dumpItem(self, grafs):
|
|
self.dumpGraf(grafs[0],4,6)
|
|
for par in grafs[1:]:
|
|
print
|
|
self.dumpGraf(par,6,6)
|
|
|
|
def collateAndSortSections(self):
|
|
heads = []
|
|
sectionsByHead = { }
|
|
for _, head, items in self.sections:
|
|
head = clean_head(head)
|
|
try:
|
|
s = sectionsByHead[head]
|
|
except KeyError:
|
|
s = sectionsByHead[head] = []
|
|
heads.append( (head_score(head), head.lower(), head, s) )
|
|
|
|
s.extend(items)
|
|
|
|
heads.sort()
|
|
self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
|
|
|
|
def dump(self):
|
|
if self.prehead:
|
|
self.dumpPreheader(self.prehead)
|
|
|
|
if not self.blogOrder:
|
|
self.dumpMainhead(self.mainhead)
|
|
|
|
for par in self.headtext:
|
|
self.dumpHeadGraf(par)
|
|
|
|
if self.blogOrder:
|
|
self.dumpMainhead(self.mainhead)
|
|
|
|
drupalBreakAfter = None
|
|
if self.drupalBreak and len(self.sections) > 4:
|
|
drupalBreakAfter = self.sections[1][2]
|
|
|
|
self.dumpStartOfSections()
|
|
for _,head,items in self.sections:
|
|
if not head.endswith(':'):
|
|
print >>sys.stderr, "adding : to %r"%head
|
|
head = head + ":"
|
|
self.dumpSectionHeader(head)
|
|
for _,grafs in items:
|
|
self.dumpItem(grafs)
|
|
self.dumpEndOfSection()
|
|
if items is drupalBreakAfter:
|
|
self.dumpDrupalBreak()
|
|
self.dumpEndOfSections()
|
|
self.dumpEndOfChangelog()
|
|
|
|
class HTMLChangeLog(ChangeLog):
|
|
def __init__(self, *args, **kwargs):
|
|
ChangeLog.__init__(self, *args, **kwargs)
|
|
|
|
def htmlText(self, graf):
|
|
for line in graf:
|
|
line = line.rstrip().replace("&","&")
|
|
line = line.rstrip().replace("<","<").replace(">",">")
|
|
sys.stdout.write(line.strip())
|
|
sys.stdout.write(" ")
|
|
|
|
def htmlPar(self, graf):
|
|
sys.stdout.write("<p>")
|
|
self.htmlText(graf)
|
|
sys.stdout.write("</p>\n")
|
|
|
|
def dumpPreheader(self, graf):
|
|
self.htmlPar(graf)
|
|
|
|
def dumpMainhead(self, head):
|
|
sys.stdout.write("<h2>%s</h2>"%head)
|
|
|
|
def dumpHeadGraf(self, graf):
|
|
self.htmlPar(graf)
|
|
|
|
def dumpSectionHeader(self, header):
|
|
header = header.replace(" o ", "", 1).lstrip()
|
|
sys.stdout.write(" <li>%s\n"%header)
|
|
sys.stdout.write(" <ul>\n")
|
|
|
|
def dumpEndOfSection(self):
|
|
sys.stdout.write(" </ul>\n\n")
|
|
|
|
def dumpEndOfChangelog(self):
|
|
pass
|
|
|
|
def dumpStartOfSections(self):
|
|
print "<ul>\n"
|
|
|
|
def dumpEndOfSections(self):
|
|
print "</ul>\n"
|
|
|
|
def dumpDrupalBreak(self):
|
|
print "\n</ul>\n"
|
|
print "<p> </p>"
|
|
print "\n<!--break-->\n\n"
|
|
print "<ul>"
|
|
|
|
def dumpItem(self, grafs):
|
|
grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
|
|
sys.stdout.write(" <li>")
|
|
if len(grafs) > 1:
|
|
for par in grafs:
|
|
self.htmlPar(par)
|
|
else:
|
|
self.htmlText(grafs[0])
|
|
print
|
|
|
|
op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
|
|
op.add_option('-W', '--no-wrap', action='store_false',
|
|
dest='wrapText', default=True,
|
|
help='Do not re-wrap paragraphs')
|
|
op.add_option('-S', '--no-sort', action='store_false',
|
|
dest='sort', default=True,
|
|
help='Do not sort or collate sections')
|
|
op.add_option('-o', '--output', dest='output',
|
|
default='-', metavar='FILE', help="write output to FILE")
|
|
op.add_option('-H', '--html', action='store_true',
|
|
dest='html', default=False,
|
|
help="generate an HTML fragment")
|
|
op.add_option('-1', '--first', action='store_true',
|
|
dest='firstOnly', default=False,
|
|
help="write only the first section")
|
|
op.add_option('-b', '--blog-header', action='store_true',
|
|
dest='blogOrder', default=False,
|
|
help="Write the header in blog order")
|
|
op.add_option('-B', '--blog', action='store_true',
|
|
dest='blogFormat', default=False,
|
|
help="Set all other options as appropriate for a blog post")
|
|
op.add_option('--inplace', action='store_true',
|
|
dest='inplace', default=False,
|
|
help="Alter the ChangeLog in place")
|
|
op.add_option('--drupal-break', action='store_true',
|
|
dest='drupalBreak', default=False,
|
|
help='Insert a drupal-friendly <!--break--> as needed')
|
|
|
|
options,args = op.parse_args()
|
|
|
|
if options.blogFormat:
|
|
options.blogOrder = True
|
|
options.html = True
|
|
options.sort = False
|
|
options.wrapText = False
|
|
options.firstOnly = True
|
|
options.drupalBreak = True
|
|
|
|
if len(args) > 1:
|
|
op.error("Too many arguments")
|
|
elif len(args) == 0:
|
|
fname = 'ChangeLog'
|
|
else:
|
|
fname = args[0]
|
|
|
|
if options.inplace:
|
|
assert options.output == '-'
|
|
options.output = fname
|
|
|
|
if fname != '-':
|
|
sys.stdin = open(fname, 'r')
|
|
|
|
nextline = None
|
|
|
|
if options.html:
|
|
ChangeLogClass = HTMLChangeLog
|
|
else:
|
|
ChangeLogClass = ChangeLog
|
|
|
|
CL = ChangeLogClass(wrapText=options.wrapText,
|
|
blogOrder=options.blogOrder,
|
|
drupalBreak=options.drupalBreak)
|
|
parser = head_parser
|
|
|
|
for line in sys.stdin:
|
|
line = line.rstrip()
|
|
tp = parser(line)
|
|
|
|
if tp == TP_SECHEAD:
|
|
parser = body_parser
|
|
elif tp == TP_END:
|
|
nextline = line
|
|
break
|
|
|
|
CL.addLine(tp,line)
|
|
|
|
CL.lint()
|
|
|
|
if options.output != '-':
|
|
fname_new = options.output+".new"
|
|
fname_out = options.output
|
|
sys.stdout = open(fname_new, 'w')
|
|
else:
|
|
fname_new = fname_out = None
|
|
|
|
if options.sort:
|
|
CL.collateAndSortSections()
|
|
|
|
CL.dump()
|
|
|
|
if options.firstOnly:
|
|
sys.exit(0)
|
|
|
|
if nextline is not None:
|
|
print nextline
|
|
|
|
for line in sys.stdin:
|
|
sys.stdout.write(line)
|
|
|
|
if fname_new is not None:
|
|
os.rename(fname_new, fname_out)
|