tor/scripts/maint/annotate_ifdef_directives

#!/usr/bin/python
# Copyright (c) 2017-2019, The Tor Project, Inc.
# See LICENSE for licensing information

# This script iterates over a list of C files. For each file, it looks at the
# #if/#else C macros, and annotates them with comments explaining what they
# match.
#
# For example, it replaces this:
#
#  #ifdef HAVE_OCELOT
#   // 500 lines of ocelot code
#  #endif
#
# with this:
#
#  #ifdef HAVE_OCELOT
#   // 500 lines of ocelot code
#  #endif /* defined(HAVE_OCELOT) */
#
# Note that only #else and #endif lines are annotated.  Existing comments
# on those lines are removed.

import re

# Any block with fewer than this many lines does not need annotations.
LINE_OBVIOUSNESS_LIMIT = 4

# Maximum line width.  This includes a terminating newline character.
#
# (This is the maximum before encoding, so that if the the operating system
# uses multiple characers to encode newline, that's still okay.)
LINE_WIDTH=80

class Problem(Exception):
    pass

def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
    """
    Return fmt%argument, for use as a commented line.  If the line would
    be longer than maxwidth, truncate argument.

    Requires that fmt%"..." will fit into maxwidth characters.

    Requires that fmt ends with a newline.
    """
    assert fmt.endswith("\n")
    result = fmt % argument
    if len(result) <= maxwidth:
        return result
    else:
        # figure out how much we need to truncate by to fit the argument,
        # plus an ellipsis.
        ellipsis = "..."
        result = fmt % (argument + ellipsis)
        overrun = len(result) - maxwidth
        truncated_argument = argument[:-overrun] + ellipsis

        result = fmt % truncated_argument
        assert len(result) <= maxwidth
        return result

def negate(expr):
    """Return a negated version of expr; try to avoid double-negation.

    We usually wrap expressions in parentheses and add a "!".
    >>> negate("A && B")
    '!(A && B)'

    But if we recognize the expression as negated, we can restore it.
    >>> negate(negate("A && B"))
    'A && B'

    The same applies for defined(FOO).
    >>> negate("defined(FOO)")
    '!defined(FOO)'
    >>> negate(negate("defined(FOO)"))
    'defined(FOO)'

    Internal parentheses don't confuse us:
    >>> negate("!(FOO) && !(BAR)")
    '!(!(FOO) && !(BAR))'

    """
    expr = expr.strip()
    # See whether we match !(...), with no intervening close-parens.
    m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
    if m:
        return m.group(1)


    # See whether we match !?defined(...), with no intervening close-parens.
    m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
    if m:
        if m.group(1) == "!":
            prefix = ""
        else:
            prefix = "!"
        return prefix + m.group(2)

    return "!(%s)" % expr

def uncomment(s):
    """
    Remove existing trailing comments from an #else or #endif line.
    """
    s = re.sub(r'//.*','',s)
    s = re.sub(r'/\*.*','',s)
    return s.strip()

def translate(f_in, f_out):
    """
    Read a file from f_in, and write its annotated version to f_out.
    """
    # A stack listing our current if/else state.  Each member of the stack
    # is a list of directives.  Each directive is a 3-tuple of
    #    (command, rest, lineno)
    # where "command" is one of if/ifdef/ifndef/else/elif, and where
    # "rest" is an expression in a format suitable for use with #if, and where
    # lineno is the line number where the directive occurred.
    stack = []
    # the stack element corresponding to the top level of the file.
    whole_file = []
    cur_level = whole_file
    lineno = 0
    for line in f_in:
        lineno += 1
        m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
                     line)
        if not m:
            # no directive, so we can just write it out.
            f_out.write(line)
            continue
        command,rest = m.groups()
        if command in ("if", "ifdef", "ifndef"):
            # The #if directive pushes us one level lower on the stack.
            if command == 'ifdef':
                rest = "defined(%s)"%uncomment(rest)
            elif command == 'ifndef':
                rest = "!defined(%s)"%uncomment(rest)
            elif rest.endswith("\\"):
                rest = rest[:-1]+"..."

            rest = uncomment(rest)

            new_level = [ (command, rest, lineno) ]
            stack.append(cur_level)
            cur_level = new_level
            f_out.write(line)
        elif command in ("else", "elif"):
            # We stay at the same level on the stack.  If we have an #else,
            # we comment it.
            if len(cur_level) == 0 or cur_level[-1][0] == 'else':
                raise Problem("Unexpected #%s on %d"% (command,lineno))
            if (len(cur_level) == 1 and command == 'else' and
                lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
                f_out.write(commented_line("#else /* %s */\n",
                                           negate(cur_level[0][1])))
            else:
                f_out.write(line)
            cur_level.append((command, rest, lineno))
        else:
            # We pop one element on the stack, and comment an endif.
            assert command == 'endif'
            if len(stack) == 0:
                raise Problem("Unmatched #%s on %s"% (command,lineno))
            if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
                f_out.write(line)
            elif len(cur_level) == 1 or (
                    len(cur_level) == 2 and cur_level[1][0] == 'else'):
                f_out.write(commented_line("#endif /* %s */\n",
                                           cur_level[0][1]))
            else:
                f_out.write(commented_line("#endif /* %s || ... */\n",
                                           cur_level[0][1]))
            cur_level = stack.pop()
    if len(stack) or cur_level != whole_file:
        raise Problem("Missing #endif")

import sys,os
for fn in sys.argv[1:]:
    with open(fn+"_OUT", 'w') as output_file:
        translate(open(fn, 'r'), output_file)
    os.rename(fn+"_OUT", fn)
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`#!/usr/bin/python`
Bump copyright date to 2019 2019-01-16 18:33:22 +01:00			`# Copyright (c) 2017-2019, The Tor Project, Inc.`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`# See LICENSE for licensing information`

Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`# This script iterates over a list of C files. For each file, it looks at the`
			`# #if/#else C macros, and annotates them with comments explaining what they`
			`# match.`
			`#`
			`# For example, it replaces this:`
			`#`
			`# #ifdef HAVE_OCELOT`
			`# // 500 lines of ocelot code`
			`# #endif`
			`#`
			`# with this:`
			`#`
			`# #ifdef HAVE_OCELOT`
			`# // 500 lines of ocelot code`
			`# #endif /* defined(HAVE_OCELOT) */`
			`#`
			`# Note that only #else and #endif lines are annotated. Existing comments`
			`# on those lines are removed.`

Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`import re`

Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`# Any block with fewer than this many lines does not need annotations.`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`LINE_OBVIOUSNESS_LIMIT = 4`

annotate_ifdef_directives: clarify situation with newlines Our line limit is 80 characters, assuming that there is a single terminating newline character that counts towards the limit. On Windows, this might go as high as 81 characters, if we count CRLF as two characters. 2019-09-26 03:13:30 +02:00			`# Maximum line width. This includes a terminating newline character.`
			`#`
			`# (This is the maximum before encoding, so that if the the operating system`
			`# uses multiple characers to encode newline, that's still okay.)`
annotate_ifdef_directives: obey an 80-column line-limit If we would add a comment making a line longer than 80 columns, instead truncate the variable portion of the comment until it just fits into 80 columns, with an ellipsis. 2019-09-18 16:51:05 +02:00			`LINE_WIDTH=80`

Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`class Problem(Exception):`
			`pass`

annotate_ifdef_directives: obey an 80-column line-limit If we would add a comment making a line longer than 80 columns, instead truncate the variable portion of the comment until it just fits into 80 columns, with an ellipsis. 2019-09-18 16:51:05 +02:00			`def commented_line(fmt, argument, maxwidth=LINE_WIDTH):`
annotate_ifdef_directives: introduce a function to make commented lines No functional change in this commit. 2019-09-18 16:46:47 +02:00			`"""`
annotate_ifdef_directives: obey an 80-column line-limit If we would add a comment making a line longer than 80 columns, instead truncate the variable portion of the comment until it just fits into 80 columns, with an ellipsis. 2019-09-18 16:51:05 +02:00			`Return fmt%argument, for use as a commented line. If the line would`
			`be longer than maxwidth, truncate argument.`

			`Requires that fmt%"..." will fit into maxwidth characters.`
annotate_ifdef_directives: clarify situation with newlines Our line limit is 80 characters, assuming that there is a single terminating newline character that counts towards the limit. On Windows, this might go as high as 81 characters, if we count CRLF as two characters. 2019-09-26 03:13:30 +02:00
			`Requires that fmt ends with a newline.`
annotate_ifdef_directives: introduce a function to make commented lines No functional change in this commit. 2019-09-18 16:46:47 +02:00			`"""`
annotate_ifdef_directives: clarify situation with newlines Our line limit is 80 characters, assuming that there is a single terminating newline character that counts towards the limit. On Windows, this might go as high as 81 characters, if we count CRLF as two characters. 2019-09-26 03:13:30 +02:00			`assert fmt.endswith("\n")`
annotate_ifdef_directives: obey an 80-column line-limit If we would add a comment making a line longer than 80 columns, instead truncate the variable portion of the comment until it just fits into 80 columns, with an ellipsis. 2019-09-18 16:51:05 +02:00			`result = fmt % argument`
			`if len(result) <= maxwidth:`
			`return result`
			`else:`
			`# figure out how much we need to truncate by to fit the argument,`
			`# plus an ellipsis.`
			`ellipsis = "..."`
			`result = fmt % (argument + ellipsis)`
			`overrun = len(result) - maxwidth`
			`truncated_argument = argument[:-overrun] + ellipsis`

			`result = fmt % truncated_argument`
			`assert len(result) <= maxwidth`
			`return result`
annotate_ifdef_directives: introduce a function to make commented lines No functional change in this commit. 2019-09-18 16:46:47 +02:00
annotate_ifdef_directives: remove some cases of double negation This change should reduce the number of cases where we say "/* !(!defined(foo)) */" . This only does cases where we can use a regex to make sure that the simplification is guaranteed to be correct. Full boolean simplification would require this script to parse C, and nobody wants that. 2019-09-18 16:59:35 +02:00			`def negate(expr):`
			`"""Return a negated version of expr; try to avoid double-negation.`

			`We usually wrap expressions in parentheses and add a "!".`
			`>>> negate("A && B")`
			`'!(A && B)'`

			`But if we recognize the expression as negated, we can restore it.`
			`>>> negate(negate("A && B"))`
			`'A && B'`

			`The same applies for defined(FOO).`
			`>>> negate("defined(FOO)")`
			`'!defined(FOO)'`
			`>>> negate(negate("defined(FOO)"))`
			`'defined(FOO)'`

			`Internal parentheses don't confuse us:`
			`>>> negate("!(FOO) && !(BAR)")`
			`'!(!(FOO) && !(BAR))'`

			`"""`
			`expr = expr.strip()`
			`# See whether we match !(...), with no intervening close-parens.`
			`m = re.match(r'^!\s\(([^\)])\)$', expr)`
			`if m:`
			`return m.group(1)`


			`# See whether we match !?defined(...), with no intervening close-parens.`
			`m = re.match(r'^(!?)\s(defined\([^\)]\))$', expr)`
			`if m:`
			`if m.group(1) == "!":`
			`prefix = ""`
			`else:`
			`prefix = "!"`
			`return prefix + m.group(2)`

			`return "!(%s)" % expr`

Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`def uncomment(s):`
Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`"""`
			`Remove existing trailing comments from an #else or #endif line.`
			`"""`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`s = re.sub(r'//.*','',s)`
			`s = re.sub(r'/\.','',s)`
			`return s.strip()`

			`def translate(f_in, f_out):`
Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`"""`
			`Read a file from f_in, and write its annotated version to f_out.`
			`"""`
			`# A stack listing our current if/else state. Each member of the stack`
			`# is a list of directives. Each directive is a 3-tuple of`
			`# (command, rest, lineno)`
			`# where "command" is one of if/ifdef/ifndef/else/elif, and where`
			`# "rest" is an expression in a format suitable for use with #if, and where`
			`# lineno is the line number where the directive occurred.`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`stack = []`
Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`# the stack element corresponding to the top level of the file.`
			`whole_file = []`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`cur_level = whole_file`
			`lineno = 0`
			`for line in f_in:`
			`lineno += 1`
			`m = re.match(r'\s#\s(if\|ifdef\|ifndef\|else\|endif\|elif)\b\s(.)',`
			`line)`
			`if not m:`
Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`# no directive, so we can just write it out.`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`f_out.write(line)`
			`continue`
			`command,rest = m.groups()`
			`if command in ("if", "ifdef", "ifndef"):`
			`# The #if directive pushes us one level lower on the stack.`
			`if command == 'ifdef':`
			`rest = "defined(%s)"%uncomment(rest)`
			`elif command == 'ifndef':`
			`rest = "!defined(%s)"%uncomment(rest)`
			`elif rest.endswith("\\"):`
			`rest = rest[:-1]+"..."`

			`rest = uncomment(rest)`

			`new_level = [ (command, rest, lineno) ]`
			`stack.append(cur_level)`
			`cur_level = new_level`
			`f_out.write(line)`
			`elif command in ("else", "elif"):`
Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`# We stay at the same level on the stack. If we have an #else,`
			`# we comment it.`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`if len(cur_level) == 0 or cur_level[-1][0] == 'else':`
			`raise Problem("Unexpected #%s on %d"% (command,lineno))`
			`if (len(cur_level) == 1 and command == 'else' and`
			`lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):`
annotate_ifdef_directives: remove some cases of double negation This change should reduce the number of cases where we say "/* !(!defined(foo)) */" . This only does cases where we can use a regex to make sure that the simplification is guaranteed to be correct. Full boolean simplification would require this script to parse C, and nobody wants that. 2019-09-18 16:59:35 +02:00			`f_out.write(commented_line("#else /* %s */\n",`
			`negate(cur_level[0][1])))`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`else:`
			`f_out.write(line)`
			`cur_level.append((command, rest, lineno))`
			`else:`
Add comments to annotate_ifdef_directives 2019-09-18 16:41:05 +02:00			`# We pop one element on the stack, and comment an endif.`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`assert command == 'endif'`
			`if len(stack) == 0:`
			`raise Problem("Unmatched #%s on %s"% (command,lineno))`
			`if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:`
			`f_out.write(line)`
			`elif len(cur_level) == 1 or (`
			`len(cur_level) == 2 and cur_level[1][0] == 'else'):`
annotate_ifdef_directives: introduce a function to make commented lines No functional change in this commit. 2019-09-18 16:46:47 +02:00			`f_out.write(commented_line("#endif /* %s */\n",`
			`cur_level[0][1]))`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`else:`
annotate_ifdef_directives: introduce a function to make commented lines No functional change in this commit. 2019-09-18 16:46:47 +02:00			`f_out.write(commented_line("#endif /* %s \|\| ... */\n",`
			`cur_level[0][1]))`
Add a python script to annotate our #elses and #endifs 2017-09-06 17:27:31 +02:00			`cur_level = stack.pop()`
			`if len(stack) or cur_level != whole_file:`
			`raise Problem("Missing #endif")`

			`import sys,os`
			`for fn in sys.argv[1:]:`
			`with open(fn+"_OUT", 'w') as output_file:`
			`translate(open(fn, 'r'), output_file)`
			`os.rename(fn+"_OUT", fn)`