tor/scripts/maint/codetool.py

183 lines
5.1 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright (c) 2020, The Tor Project, Inc.
# See LICENSE for licensing information.
#
# DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
#
# WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
# ALONG WITH THE TOOLS THAT ACHIEVE IT.
# (12 Feb 2020)
#
"""
This program uses a set of plugable filters to inspect and transform
our C code.
"""
import os
import re
import sys
class Filter:
"""A Filter transforms a string containing a C program."""
def __init__(self):
pass
def transform(self, s):
return s
class CompoundFilt(Filter):
"""A CompoundFilt runs another set of filters, in sequence."""
def __init__(self, items=()):
super().__init__()
self._filters = list(items)
def add(self, filt):
self._filters.append(filt)
return self
def transform(self, s):
for f in self._filters:
s = f.transform(s)
return s
class SplitError(Exception):
"""Exception: raised if split_comments() can't understand a C file."""
pass
def split_comments(s):
r"""Iterate over the C code in 's', and yield a sequence of (code,
comment) pairs. Each pair will contain either a nonempty piece
of code, a nonempty comment, or both.
>>> list(split_comments("hello // world\n"))
[('hello ', '// world'), ('\n', '')]
>>> list(split_comments("a /* b cd */ efg // hi"))
[('a ', '/* b cd */'), (' efg ', '// hi')]
"""
# Matches a block of code without any comments.
PAT_CODE = re.compile(r'''^(?: [^/"']+ |
"(?:[^\\"]+|\\.)*" |
'(?:[^\\']+|\\.)*' |
/[^/*]
)*''', re.VERBOSE|re.DOTALL)
# Matches a C99 "//" comment.
PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
# Matches a C "/* */" comment.
PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
while True:
# Find some non-comment code at the start of the string.
m = PAT_CODE.match(s)
# If we found some code here, save it and advance the string.
# Otherwise set 'code' to "".
if m:
code = m.group(0)
s = s[m.end():]
else:
code = ""
# Now we have a comment, or the end of the string. Find out which
# one, and how long it is.
if s.startswith("//"):
m = PAT_C99_COMMENT.match(s)
else:
m = PAT_C_COMMENT.match(s)
# If we got a comment, save it and advance the string. Otherwise
# set 'comment' to "".
if m:
comment = m.group(0)
s = s[m.end():]
else:
comment = ""
# If we found no code and no comment, we should be at the end of
# the string...
if code == "" and comment == "":
if s:
# But in case we *aren't* at the end of the string, raise
# an error.
raise SplitError()
# ... all is well, we're done scanning the code.
return
yield (code, comment)
class IgnoreCommentsFilt(Filter):
"""Wrapper: applies another filter to C code only, excluding comments.
"""
def __init__(self, filt):
super().__init__()
self._filt = filt
def transform(self, s):
result = []
for code, comment in split_comments(s):
result.append(self._filt.transform(code))
result.append(comment)
return "".join(result)
class RegexFilt(Filter):
"""A regex filter applies a regular expression to some C code."""
def __init__(self, pat, replacement, flags=0):
super().__init__()
self._pat = re.compile(pat, flags)
self._replacement = replacement
def transform(self, s):
s, _ = self._pat.subn(self._replacement, s)
return s
def revise(fname, filt):
"""Run 'filt' on the contents of the file in 'fname'. If any
changes are made, then replace the file with its new contents.
Otherwise, leave the file alone.
"""
contents = open(fname, 'r').read()
result = filt.transform(contents)
if result == contents:
return
tmpname = "{}_codetool_tmp".format(fname)
try:
with open(tmpname, 'w') as f:
f.write(result)
os.rename(tmpname, fname)
except:
os.unlink(tmpname)
raise
##############################
# Filtering rules.
##############################
# Make sure that there is a newline after the first comma in a MOCK_IMPL()
BREAK_MOCK_IMPL = RegexFilt(
r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
r'MOCK_IMPL(\1,\n\2',
re.MULTILINE)
# Make sure there is no newline between } and a loop iteration terminator.
RESTORE_SMARTLIST_END = RegexFilt(
r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
r'} \1_FOREACH_END (',
re.MULTILINE)
F = CompoundFilt()
F.add(IgnoreCommentsFilt(CompoundFilt([
RESTORE_SMARTLIST_END,
BREAK_MOCK_IMPL])))
if __name__ == '__main__':
for fname in sys.argv[1:]:
revise(fname, F)