mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2024-11-24 20:33:31 +01:00
e5b234e358
Use tuples for bname.startswith and fname.endswith in "fname_is_c" function.
384 lines
12 KiB
Python
Executable File
384 lines
12 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info.
|
|
|
|
"""This script looks through all the directories for files matching *.c or
|
|
*.h, and checks their #include directives to make sure that only "permitted"
|
|
headers are included.
|
|
|
|
Any #include directives with angle brackets (like #include <stdio.h>) are
|
|
ignored -- only directives with quotes (like #include "foo.h") are
|
|
considered.
|
|
|
|
To decide what includes are permitted, this script looks at a .may_include
|
|
file in each directory. This file contains empty lines, #-prefixed
|
|
comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
|
|
for files that are permitted.
|
|
|
|
The script exits with an error if any non-permitted includes are found.
|
|
.may_include files that contain "!advisory" are considered advisory.
|
|
Advisory .may_include files only result in warnings, rather than errors.
|
|
"""
|
|
|
|
# Future imports for Python 2.7, mandatory in 3.0
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import fnmatch
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
if sys.version_info[0] <= 2:
|
|
def open_file(fname):
|
|
return open(fname, 'r')
|
|
else:
|
|
def open_file(fname):
|
|
return open(fname, 'r', encoding='utf-8')
|
|
|
|
def warn(msg):
|
|
print(msg, file=sys.stderr)
|
|
|
|
def fname_is_c(fname):
|
|
"""
|
|
Return true if 'fname' is the name of a file that we should
|
|
search for possibly disallowed #include directives.
|
|
"""
|
|
if fname.endswith((".c", ".h")):
|
|
bname = os.path.basename(fname)
|
|
return not bname.startswith((".", "#"))
|
|
else:
|
|
return False
|
|
|
|
INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
|
|
RULES_FNAME = ".may_include"
|
|
|
|
ALLOWED_PATTERNS = [
|
|
re.compile(r'^.*\*\.(h|inc)$'),
|
|
re.compile(r'^.*/.*\.h$'),
|
|
re.compile(r'^ext/.*\.c$'),
|
|
re.compile(r'^orconfig.h$'),
|
|
re.compile(r'^micro-revision.i$'),
|
|
]
|
|
|
|
TOPDIR = "src"
|
|
|
|
def pattern_is_normal(s):
|
|
for p in ALLOWED_PATTERNS:
|
|
if p.match(s):
|
|
return True
|
|
return False
|
|
|
|
class Error(object):
|
|
def __init__(self, location, msg, is_advisory=False):
|
|
self.location = location
|
|
self.msg = msg
|
|
self.is_advisory = is_advisory
|
|
|
|
def __str__(self):
|
|
return "{} at {}".format(self.msg, self.location)
|
|
|
|
class Rules(object):
|
|
""" A 'Rules' object is the parsed version of a .may_include file. """
|
|
def __init__(self, dirpath):
|
|
self.dirpath = dirpath
|
|
if dirpath.startswith("src/"):
|
|
self.incpath = dirpath[4:]
|
|
else:
|
|
self.incpath = dirpath
|
|
self.patterns = []
|
|
self.usedPatterns = set()
|
|
self.is_advisory = False
|
|
|
|
def addPattern(self, pattern):
|
|
if pattern == "!advisory":
|
|
self.is_advisory = True
|
|
return
|
|
if not pattern_is_normal(pattern):
|
|
warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
|
|
self.patterns.append(pattern)
|
|
|
|
def includeOk(self, path):
|
|
for pattern in self.patterns:
|
|
if fnmatch.fnmatchcase(path, pattern):
|
|
self.usedPatterns.add(pattern)
|
|
return True
|
|
return False
|
|
|
|
def applyToLines(self, lines, loc_prefix=""):
|
|
lineno = 0
|
|
for line in lines:
|
|
lineno += 1
|
|
m = INCLUDE_PATTERN.match(line)
|
|
if m:
|
|
include = m.group(1)
|
|
if not self.includeOk(include):
|
|
yield Error("{}{}".format(loc_prefix,str(lineno)),
|
|
"Forbidden include of {}".format(include),
|
|
is_advisory=self.is_advisory)
|
|
|
|
def applyToFile(self, fname, f):
|
|
for error in self.applyToLines(iter(f), "{}:".format(fname)):
|
|
yield error
|
|
|
|
def noteUnusedRules(self):
|
|
for p in self.patterns:
|
|
if p not in self.usedPatterns:
|
|
warn("Pattern {} in {} was never used.".format(p, self.dirpath))
|
|
|
|
def getAllowedDirectories(self):
|
|
allowed = []
|
|
for p in self.patterns:
|
|
m = re.match(r'^(.*)/\*\.(h|inc)$', p)
|
|
if m:
|
|
allowed.append(m.group(1))
|
|
continue
|
|
m = re.match(r'^(.*)/[^/]*$', p)
|
|
if m:
|
|
allowed.append(m.group(1))
|
|
continue
|
|
|
|
return allowed
|
|
|
|
|
|
def normalize_srcdir(fname):
|
|
"""given the name of a source directory or file, return its name
|
|
relative to `src` in a unix-like format.
|
|
"""
|
|
orig = fname
|
|
dirname, dirfile = os.path.split(fname)
|
|
if re.match(r'.*\.[ch]$', dirfile):
|
|
fname = dirname
|
|
|
|
# Now we have a directory.
|
|
dirname, result = os.path.split(fname)
|
|
for _ in range(100):
|
|
# prevent excess looping in case I missed a tricky case
|
|
dirname, dirpart = os.path.split(dirname)
|
|
if dirpart == 'src' or dirname == "":
|
|
#print(orig,"=>",result)
|
|
return result
|
|
result = "{}/{}".format(dirpart,result)
|
|
|
|
print("No progress!")
|
|
assert False
|
|
|
|
include_rules_cache = {}
|
|
|
|
def load_include_rules(fname):
|
|
""" Read a rules file from 'fname', and return it as a Rules object.
|
|
Return 'None' if fname does not exist.
|
|
"""
|
|
if fname in include_rules_cache:
|
|
return include_rules_cache[fname]
|
|
if not os.path.exists(fname):
|
|
include_rules_cache[fname] = None
|
|
return None
|
|
result = Rules(os.path.split(fname)[0])
|
|
with open_file(fname) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line.startswith("#") or not line:
|
|
continue
|
|
result.addPattern(line)
|
|
include_rules_cache[fname] = result
|
|
return result
|
|
|
|
def get_all_include_rules():
|
|
"""Return a list of all the Rules objects we have loaded so far,
|
|
sorted by their directory names."""
|
|
return [ rules for (fname,rules) in
|
|
sorted(include_rules_cache.items())
|
|
if rules is not None ]
|
|
|
|
def remove_self_edges(graph):
|
|
"""Takes a directed graph in as an adjacency mapping (a mapping from
|
|
node to a list of the nodes to which it connects).
|
|
|
|
Remove all edges from a node to itself."""
|
|
|
|
for k in list(graph):
|
|
graph[k] = [ d for d in graph[k] if d != k ]
|
|
|
|
def closure(graph):
|
|
"""Takes a directed graph in as an adjacency mapping (a mapping from
|
|
node to a list of the nodes to which it connects), and completes
|
|
its closure.
|
|
"""
|
|
graph = graph.copy()
|
|
changed = False
|
|
for k in graph.keys():
|
|
graph[k] = set(graph[k])
|
|
while True:
|
|
for k in graph.keys():
|
|
sz = len(graph[k])
|
|
for v in list(graph[k]):
|
|
graph[k].update(graph.get(v, []))
|
|
if sz != len(graph[k]):
|
|
changed = True
|
|
|
|
if not changed:
|
|
return graph
|
|
changed = False
|
|
|
|
def toposort(graph, limit=100):
|
|
"""Takes a directed graph in as an adjacency mapping (a mapping from
|
|
node to a list of the nodes to which it connects). Tries to
|
|
perform a topological sort on the graph, arranging the nodes into
|
|
"levels", such that every member of each level is only reachable
|
|
by members of later levels.
|
|
|
|
Returns a list of the members of each level.
|
|
|
|
Modifies the input graph, removing every member that could be
|
|
sorted. If the graph does not become empty, then it contains a
|
|
cycle.
|
|
|
|
"limit" is the max depth of the graph after which we give up trying
|
|
to sort it and conclude we have a cycle.
|
|
"""
|
|
all_levels = []
|
|
|
|
n = 0
|
|
while graph:
|
|
n += 0
|
|
cur_level = []
|
|
all_levels.append(cur_level)
|
|
for k in list(graph):
|
|
graph[k] = [ d for d in graph[k] if d in graph ]
|
|
if graph[k] == []:
|
|
cur_level.append(k)
|
|
for k in cur_level:
|
|
del graph[k]
|
|
n += 1
|
|
if n > limit:
|
|
break
|
|
|
|
return all_levels
|
|
|
|
def consider_include_rules(fname, f):
|
|
dirpath = os.path.split(fname)[0]
|
|
rules_fname = os.path.join(dirpath, RULES_FNAME)
|
|
rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
|
|
if rules is None:
|
|
return
|
|
|
|
for err in rules.applyToFile(fname, f):
|
|
yield err
|
|
|
|
list_unused = False
|
|
log_sorted_levels = False
|
|
|
|
def walk_c_files(topdir="src"):
|
|
"""Run through all .c and .h files under topdir, looking for
|
|
include-rule violations. Yield those violations."""
|
|
|
|
for dirpath, dirnames, fnames in os.walk(topdir):
|
|
for fname in fnames:
|
|
if fname_is_c(fname):
|
|
fullpath = os.path.join(dirpath,fname)
|
|
with open(fullpath) as f:
|
|
for err in consider_include_rules(fullpath, f):
|
|
yield err
|
|
|
|
def open_or_stdin(fname):
|
|
if fname == '-':
|
|
return sys.stdin
|
|
else:
|
|
return open(fname)
|
|
|
|
def check_subsys_file(fname, uses_dirs):
|
|
if not uses_dirs:
|
|
# We're doing a distcheck build, or for some other reason there are
|
|
# no .may_include files.
|
|
print("SKIPPING")
|
|
return False
|
|
|
|
uses_dirs = { normalize_srcdir(k) : { normalize_srcdir(d) for d in v }
|
|
for (k,v) in uses_dirs.items() }
|
|
uses_closure = closure(uses_dirs)
|
|
ok = True
|
|
previous_subsystems = []
|
|
|
|
with open_or_stdin(fname) as f:
|
|
for line in f:
|
|
_, name, fname = line.split()
|
|
fname = normalize_srcdir(fname)
|
|
for prev in previous_subsystems:
|
|
if fname in uses_closure[prev]:
|
|
print("INVERSION: {} uses {}".format(prev,fname))
|
|
ok = False
|
|
previous_subsystems.append(fname)
|
|
return not ok
|
|
|
|
def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
|
|
list_advisories=False, check_subsystem_order=None):
|
|
trouble = False
|
|
|
|
for err in walk_c_files(topdir):
|
|
if err.is_advisory and not list_advisories:
|
|
continue
|
|
print(err, file=sys.stderr)
|
|
if not err.is_advisory:
|
|
trouble = True
|
|
|
|
if trouble:
|
|
warn(
|
|
"""To change which includes are allowed in a C file, edit the {}
|
|
files in its enclosing directory.""".format(RULES_FNAME))
|
|
sys.exit(1)
|
|
|
|
if list_unused:
|
|
for rules in get_all_include_rules():
|
|
rules.noteUnusedRules()
|
|
|
|
uses_dirs = { }
|
|
for rules in get_all_include_rules():
|
|
uses_dirs[rules.incpath] = rules.getAllowedDirectories()
|
|
|
|
remove_self_edges(uses_dirs)
|
|
|
|
if check_subsystem_order:
|
|
if check_subsys_file(check_subsystem_order, uses_dirs):
|
|
sys.exit(1)
|
|
|
|
all_levels = toposort(uses_dirs)
|
|
|
|
if log_sorted_levels:
|
|
for (n, cur_level) in enumerate(all_levels):
|
|
if cur_level:
|
|
print(n, cur_level)
|
|
|
|
if uses_dirs:
|
|
print("There are circular .may_include dependencies in here somewhere:",
|
|
uses_dirs)
|
|
sys.exit(1)
|
|
|
|
def main(argv):
|
|
import argparse
|
|
|
|
progname = argv[0]
|
|
parser = argparse.ArgumentParser(prog=progname)
|
|
parser.add_argument("--toposort", action="store_true",
|
|
help="Print a topologically sorted list of modules")
|
|
parser.add_argument("--list-unused", action="store_true",
|
|
help="List unused lines in .may_include files.")
|
|
parser.add_argument("--list-advisories", action="store_true",
|
|
help="List advisories as well as forbidden includes")
|
|
parser.add_argument("--check-subsystem-order", action="store",
|
|
help="Check a list of subsystems for ordering")
|
|
parser.add_argument("topdir", default="src", nargs="?",
|
|
help="Top-level directory for the tor source")
|
|
args = parser.parse_args(argv[1:])
|
|
|
|
global TOPDIR
|
|
TOPDIR = args.topdir
|
|
run_check_includes(topdir=args.topdir,
|
|
log_sorted_levels=args.toposort,
|
|
list_unused=args.list_unused,
|
|
list_advisories=args.list_advisories,
|
|
check_subsystem_order=args.check_subsystem_order)
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv)
|