tor/scripts/maint/annotate_ifdef_directives.py
Nick Mathewson fc1134e3e5 annotate_ifdef_directives: test edge-case of 80-char line
An 80-character line (79 characters if you don't count the newline)
should not be truncated, and should not have a "..." insterted.
2019-09-27 21:04:22 -04:00

318 lines
9.5 KiB
Python
Executable File

#!/usr/bin/python
# Copyright (c) 2017-2019, The Tor Project, Inc.
# See LICENSE for licensing information
r"""
This script iterates over a list of C files. For each file, it looks at the
#if/#else C macros, and annotates them with comments explaining what they
match.
For example, it replaces this kind of input...
>>> INPUT = '''
... #ifdef HAVE_OCELOT
... C code here
... #if MIMSY == BOROGROVE
... block 1
... block 1
... block 1
... block 1
... #else
... block 2
... block 2
... block 2
... block 2
... #endif
... #endif
... '''
With this kind of output:
>>> EXPECTED_OUTPUT = '''
... #ifdef HAVE_OCELOT
... C code here
... #if MIMSY == BOROGROVE
... block 1
... block 1
... block 1
... block 1
... #else /* !(MIMSY == BOROGROVE) */
... block 2
... block 2
... block 2
... block 2
... #endif /* MIMSY == BOROGROVE */
... #endif /* defined(HAVE_OCELOT) */
... '''
Here's how to use it:
>>> import sys
>>> if sys.version_info.major < 3: from cStringIO import StringIO
>>> if sys.version_info.major >= 3: from io import StringIO
>>> OUTPUT = StringIO()
>>> translate(StringIO(INPUT), OUTPUT)
>>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT
Note that only #else and #endif lines are annotated. Existing comments
on those lines are removed.
"""
import re
# Any block with fewer than this many lines does not need annotations.
LINE_OBVIOUSNESS_LIMIT = 4
# Maximum line width. This includes a terminating newline character.
#
# (This is the maximum before encoding, so that if the the operating system
# uses multiple characers to encode newline, that's still okay.)
LINE_WIDTH=80
class Problem(Exception):
pass
def close_parens_needed(expr):
"""Return the number of left-parentheses needed to make 'expr'
balanced.
>>> close_parens_needed("1+2")
0
>>> close_parens_needed("(1 + 2)")
0
>>> close_parens_needed("(1 + 2")
1
>>> close_parens_needed("(1 + (2 *")
2
>>> close_parens_needed("(1 + (2 * 3) + (4")
2
"""
return expr.count("(") - expr.count(")")
def truncate_expression(expr, new_width):
"""Given a parenthesized C expression in 'expr', try to return a new
expression that is similar to 'expr', but no more than 'new_width'
characters long.
Try to return an expression with balanced parentheses.
>>> truncate_expression("1+2+3", 8)
'1+2+3'
>>> truncate_expression("1+2+3+4+5", 8)
'1+2+3...'
>>> truncate_expression("(1+2+3+4)", 8)
'(1+2...)'
>>> truncate_expression("(1+(2+3+4))", 8)
'(1+...)'
>>> truncate_expression("(((((((((", 8)
'((...))'
"""
if len(expr) <= new_width:
# The expression is already short enough.
return expr
ellipsis = "..."
# Start this at the minimum that we might truncate.
n_to_remove = len(expr) + len(ellipsis) - new_width
# Try removing characters, one by one, until we get something where
# re-balancing the parentheses still fits within the limit.
while n_to_remove < len(expr):
truncated = expr[:-n_to_remove] + ellipsis
truncated += ")" * close_parens_needed(truncated)
if len(truncated) <= new_width:
return truncated
n_to_remove += 1
return ellipsis
def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
# (This is a raw docstring so that our doctests can use \.)
r"""
Return fmt%argument, for use as a commented line. If the line would
be longer than maxwidth, truncate argument but try to keep its
parentheses balanced.
Requires that fmt%"..." will fit into maxwidth characters.
Requires that fmt ends with a newline.
>>> commented_line("/* %s */\n", "hello world", 32)
'/* hello world */\n'
>>> commented_line("/* %s */\n", "hello world", 15)
'/* hello... */\n'
>>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32)
'#endif /* ((1+2) && defi...) */\n'
The default line limit is 80 characters including the newline:
>>> long_argument = "long " * 100
>>> long_line = commented_line("#endif /* %s */\n", long_argument)
>>> len(long_line)
80
>>> long_line[:40]
'#endif /* long long long long long long '
>>> long_line[40:]
'long long long long long long lon... */\n'
If a line works out to being 80 characters naturally, it isn't truncated,
and no ellipsis is added.
>>> medium_argument = "a"*66
>>> medium_line = commented_line("#endif /* %s */\n", medium_argument)
>>> len(medium_line)
80
>>> "..." in medium_line
False
>>> medium_line[:40]
'#endif /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
>>> medium_line[40:]
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */\n'
"""
assert fmt.endswith("\n")
result = fmt % argument
if len(result) <= maxwidth:
return result
else:
# How long can we let the argument be? Try filling in the
# format with an empty argument to find out.
max_arg_width = maxwidth - len(fmt % "")
result = fmt % truncate_expression(argument, max_arg_width)
assert len(result) <= maxwidth
return result
def negate(expr):
"""Return a negated version of expr; try to avoid double-negation.
We usually wrap expressions in parentheses and add a "!".
>>> negate("A && B")
'!(A && B)'
But if we recognize the expression as negated, we can restore it.
>>> negate(negate("A && B"))
'A && B'
The same applies for defined(FOO).
>>> negate("defined(FOO)")
'!defined(FOO)'
>>> negate(negate("defined(FOO)"))
'defined(FOO)'
Internal parentheses don't confuse us:
>>> negate("!(FOO) && !(BAR)")
'!(!(FOO) && !(BAR))'
"""
expr = expr.strip()
# See whether we match !(...), with no intervening close-parens.
m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
if m:
return m.group(1)
# See whether we match !?defined(...), with no intervening close-parens.
m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
if m:
if m.group(1) == "!":
prefix = ""
else:
prefix = "!"
return prefix + m.group(2)
return "!(%s)" % expr
def uncomment(s):
"""
Remove existing trailing comments from an #else or #endif line.
"""
s = re.sub(r'//.*','',s)
s = re.sub(r'/\*.*','',s)
return s.strip()
def translate(f_in, f_out):
"""
Read a file from f_in, and write its annotated version to f_out.
"""
# A stack listing our current if/else state. Each member of the stack
# is a list of directives. Each directive is a 3-tuple of
# (command, rest, lineno)
# where "command" is one of if/ifdef/ifndef/else/elif, and where
# "rest" is an expression in a format suitable for use with #if, and where
# lineno is the line number where the directive occurred.
stack = []
# the stack element corresponding to the top level of the file.
whole_file = []
cur_level = whole_file
lineno = 0
for line in f_in:
lineno += 1
m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
line)
if not m:
# no directive, so we can just write it out.
f_out.write(line)
continue
command,rest = m.groups()
if command in ("if", "ifdef", "ifndef"):
# The #if directive pushes us one level lower on the stack.
if command == 'ifdef':
rest = "defined(%s)"%uncomment(rest)
elif command == 'ifndef':
rest = "!defined(%s)"%uncomment(rest)
elif rest.endswith("\\"):
rest = rest[:-1]+"..."
rest = uncomment(rest)
new_level = [ (command, rest, lineno) ]
stack.append(cur_level)
cur_level = new_level
f_out.write(line)
elif command in ("else", "elif"):
# We stay at the same level on the stack. If we have an #else,
# we comment it.
if len(cur_level) == 0 or cur_level[-1][0] == 'else':
raise Problem("Unexpected #%s on %d"% (command,lineno))
if (len(cur_level) == 1 and command == 'else' and
lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
f_out.write(commented_line("#else /* %s */\n",
negate(cur_level[0][1])))
else:
f_out.write(line)
cur_level.append((command, rest, lineno))
else:
# We pop one element on the stack, and comment an endif.
assert command == 'endif'
if len(stack) == 0:
raise Problem("Unmatched #%s on %s"% (command,lineno))
if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
f_out.write(line)
elif len(cur_level) == 1 or (
len(cur_level) == 2 and cur_level[1][0] == 'else'):
f_out.write(commented_line("#endif /* %s */\n",
cur_level[0][1]))
else:
f_out.write(commented_line("#endif /* %s || ... */\n",
cur_level[0][1]))
cur_level = stack.pop()
if len(stack) or cur_level != whole_file:
raise Problem("Missing #endif")
if __name__ == '__main__':
import sys,os
if sys.argv[1] == "--self-test":
import doctest
doctest.testmod()
sys.exit(0)
for fn in sys.argv[1:]:
with open(fn+"_OUT", 'w') as output_file:
translate(open(fn, 'r'), output_file)
os.rename(fn+"_OUT", fn)