diff --git a/Makefile.am b/Makefile.am index 309696ede9..8eeba5edb7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -371,7 +371,7 @@ endif check-best-practices: if USEPYTHON - $(PYTHON) $(top_srcdir)/scripts/maint/practracker/practracker.py $(top_srcdir) + @$(PYTHON) $(top_srcdir)/scripts/maint/practracker/practracker.py $(top_srcdir) endif practracker-regen: diff --git a/changes/ticket29746 b/changes/ticket29746 new file mode 100644 index 0000000000..63b9edb391 --- /dev/null +++ b/changes/ticket29746 @@ -0,0 +1,4 @@ + o Minor bugfixes (best practices tracker): + - Fix a few issues in the best-practices script, including tests, tab + tolerance, error reporting, and directory-exclusion logic. Fixes bug + 29746; bugfix on 0.4.1.1-alpha. diff --git a/changes/ticket30752 b/changes/ticket30752 new file mode 100644 index 0000000000..044c7c7d93 --- /dev/null +++ b/changes/ticket30752 @@ -0,0 +1,6 @@ + o Minor features (best practices tracker): + - Give a warning rather than an error when a practracker exception is + violated by a small amount; add a --list-overbroad option to + practracker that lists exceptions that are stricter than they need to + be, and provide an environment variable for disabling + practracker. Closes ticekt 30752. diff --git a/scripts/maint/practracker/metrics.py b/scripts/maint/practracker/metrics.py index 5fa305a868..82f1cd64e9 100644 --- a/scripts/maint/practracker/metrics.py +++ b/scripts/maint/practracker/metrics.py @@ -16,7 +16,7 @@ def get_include_count(f): """Get number of #include statements in the file""" include_count = 0 for line in f: - if re.match(r' *# *include', line): + if re.match(r'\s*#\s*include', line): include_count += 1 return include_count @@ -31,6 +31,7 @@ def get_function_lines(f): "DISABLE_GCC_WARNING", "DISABLE_GCC_WARNINGS"} in_function = False + found_openbrace = False for lineno, line in enumerate(f): if not in_function: # find the start of a function @@ -41,10 +42,13 @@ def get_function_lines(f): continue func_start = lineno in_function = True - + elif not found_openbrace and line.startswith("{"): + found_openbrace = True + func_start = lineno else: # Find the end of a function if line.startswith("}"): - n_lines = lineno - func_start + n_lines = lineno - func_start + 1 in_function = False + found_openbrace = False yield (func_name, n_lines) diff --git a/scripts/maint/practracker/practracker.py b/scripts/maint/practracker/practracker.py index febb14639d..a4e951ac7c 100755 --- a/scripts/maint/practracker/practracker.py +++ b/scripts/maint/practracker/practracker.py @@ -36,10 +36,14 @@ MAX_FUNCTION_SIZE = 100 # lines # Recommended number of #includes MAX_INCLUDE_COUNT = 50 -####################################################### +# Map from problem type to functions that adjust for tolerance +TOLERANCE_FNS = { + 'include-count': lambda n: int(n*1.1), + 'function-size': lambda n: int(n*1.1), + 'file-size': lambda n: int(n*1.02) +} -# ProblemVault singleton -ProblemVault = None +####################################################### # The Tor source code topdir TOR_TOPDIR = None @@ -54,71 +58,59 @@ else: return open(fname, 'r', encoding='utf-8') def consider_file_size(fname, f): - """Consider file size issues for 'f' and return True if a new issue was found""" + """Consider the size of 'f' and yield an FileSizeItem for it. + """ file_size = metrics.get_file_len(f) - if file_size > MAX_FILE_SIZE: - p = problem.FileSizeProblem(fname, file_size) - return ProblemVault.register_problem(p) - return False + yield problem.FileSizeItem(fname, file_size) def consider_includes(fname, f): - """Consider #include issues for 'f' and return True if a new issue was found""" + """Consider the #include count in for 'f' and yield an IncludeCountItem + for it. + """ include_count = metrics.get_include_count(f) - if include_count > MAX_INCLUDE_COUNT: - p = problem.IncludeCountProblem(fname, include_count) - return ProblemVault.register_problem(p) - return False + yield problem.IncludeCountItem(fname, include_count) def consider_function_size(fname, f): - """Consider the function sizes for 'f' and return True if a new issue was found""" - found_new_issues = False + """yield a FunctionSizeItem for every function in f. + """ for name, lines in metrics.get_function_lines(f): - # Don't worry about functions within our limits - if lines <= MAX_FUNCTION_SIZE: - continue - - # That's a big function! Issue a problem! canonical_function_name = "%s:%s()" % (fname, name) - p = problem.FunctionSizeProblem(canonical_function_name, lines) - found_new_issues |= ProblemVault.register_problem(p) - - return found_new_issues + yield problem.FunctionSizeItem(canonical_function_name, lines) ####################################################### def consider_all_metrics(files_list): - """Consider metrics for all files, and return True if new issues were found""" - found_new_issues = False + """Consider metrics for all files, and yield a sequence of problem.Item + object for those issues.""" for fname in files_list: with open_file(fname) as f: - found_new_issues |= consider_metrics_for_file(fname, f) - return found_new_issues + for item in consider_metrics_for_file(fname, f): + yield item def consider_metrics_for_file(fname, f): """ - Consider the various metrics for file with filename 'fname' and file descriptor 'f'. - Return True if we found new issues. + Yield a sequence of problem.Item objects for all of the metrics in + 'f'. """ # Strip the useless part of the path if fname.startswith(TOR_TOPDIR): fname = fname[len(TOR_TOPDIR):] - found_new_issues = False - # Get file length - found_new_issues |= consider_file_size(fname, f) + for item in consider_file_size(fname, f): + yield item # Consider number of #includes f.seek(0) - found_new_issues |= consider_includes(fname, f) + for item in consider_includes(fname, f): + yield item # Get function length f.seek(0) - found_new_issues |= consider_function_size(fname, f) - - return found_new_issues + for item in consider_function_size(fname, f): + yield item HEADER="""\ # Welcome to the exceptions file for Tor's best-practices tracker! @@ -161,8 +153,20 @@ def main(argv): parser = argparse.ArgumentParser(prog=progname) parser.add_argument("--regen", action="store_true", help="Regenerate the exceptions file") + parser.add_argument("--list-overbroad", action="store_true", + help="List over-strict exceptions") parser.add_argument("--exceptions", help="Override the location for the exceptions file") + parser.add_argument("--strict", action="store_true", + help="Make all warnings into errors") + parser.add_argument("--terse", action="store_true", + help="Do not emit helpful instructions.") + parser.add_argument("--max-file-size", default=MAX_FILE_SIZE, + help="Maximum lines per C file size") + parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT, + help="Maximum includes per C file") + parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE, + help="Maximum lines per function") parser.add_argument("topdir", default=".", nargs="?", help="Top-level directory for the tor source") args = parser.parse_args(argv[1:]) @@ -174,24 +178,41 @@ def main(argv): else: exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME) + # 0) Configure our thresholds of "what is a problem actually" + filt = problem.ProblemFilter() + filt.addThreshold(problem.FileSizeItem("*", int(args.max_file_size))) + filt.addThreshold(problem.IncludeCountItem("*", int(args.max_include_count))) + filt.addThreshold(problem.FunctionSizeItem("*", int(args.max_function_size))) + # 1) Get all the .c files we care about files_list = util.get_tor_c_files(TOR_TOPDIR) # 2) Initialize problem vault and load an optional exceptions file so that # we don't warn about the past - global ProblemVault - if args.regen: tmpname = exceptions_file + ".tmp" tmpfile = open(tmpname, "w") - sys.stdout = tmpfile - sys.stdout.write(HEADER) + problem_file = tmpfile ProblemVault = problem.ProblemVault() else: ProblemVault = problem.ProblemVault(exceptions_file) + problem_file = sys.stdout + + # 2.1) Adjust the exceptions so that we warn only about small problems, + # and produce errors on big ones. + if not (args.regen or args.list_overbroad or args.strict): + ProblemVault.set_tolerances(TOLERANCE_FNS) # 3) Go through all the files and report problems if they are not exceptions - found_new_issues = consider_all_metrics(files_list) + found_new_issues = 0 + for item in filt.filter(consider_all_metrics(files_list)): + status = ProblemVault.register_problem(item) + if status == problem.STATUS_ERR: + print(item, file=problem_file) + found_new_issues += 1 + elif status == problem.STATUS_WARN: + # warnings always go to stdout. + print("(warning) {}".format(item)) if args.regen: tmpfile.close() @@ -199,18 +220,32 @@ def main(argv): sys.exit(0) # If new issues were found, try to give out some advice to the developer on how to resolve it. - if found_new_issues and not args.regen: + if found_new_issues and not args.regen and not args.terse: new_issues_str = """\ -FAILURE: practracker found new problems in the code: see warnings above. +FAILURE: practracker found {} new problem(s) in the code: see warnings above. Please fix the problems if you can, and update the exceptions file ({}) if you can't. See doc/HACKING/HelpfulTools.md for more information on using practracker.\ -""".format(exceptions_file) + +You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment +variable. +""".format(found_new_issues, exceptions_file) print(new_issues_str) + if args.list_overbroad: + def k_fn(tup): + return tup[0].key() + for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn): + if p is None: + print(ex, "->", 0) + else: + print(ex, "->", p.metric_value) + sys.exit(found_new_issues) if __name__ == '__main__': + if os.environ.get("TOR_DISABLE_PRACTRACKER"): + sys.exit(0) main(sys.argv) diff --git a/scripts/maint/practracker/practracker_tests.py b/scripts/maint/practracker/practracker_tests.py index cdbab2908e..865f68d186 100755 --- a/scripts/maint/practracker/practracker_tests.py +++ b/scripts/maint/practracker/practracker_tests.py @@ -1,3 +1,5 @@ +#!/usr/bin/python + """Some simple tests for practracker metrics""" import unittest @@ -38,13 +40,23 @@ class TestFunctionLength(unittest.TestCase): def test_function_length(self): funcs = StringIO.StringIO(function_file) # All functions should have length 2 - for name, lines in metrics.function_lines(funcs): + for name, lines in metrics.get_function_lines(funcs): self.assertEqual(name, "fun") funcs.seek(0) - for name, lines in metrics.function_lines(funcs): - self.assertEqual(lines, 2) + for name, lines in metrics.get_function_lines(funcs): + self.assertEqual(lines, 4) + +class TestIncludeCount(unittest.TestCase): + def test_include_count(self): + f = StringIO.StringIO(""" + # include + # include "def.h" +#include "ghi.h" +\t#\t include "jkl.h" +""") + self.assertEqual(metrics.get_include_count(f),4) if __name__ == '__main__': unittest.main() diff --git a/scripts/maint/practracker/problem.py b/scripts/maint/practracker/problem.py index c82c5db572..73519d446f 100644 --- a/scripts/maint/practracker/problem.py +++ b/scripts/maint/practracker/problem.py @@ -13,6 +13,10 @@ import os.path import re import sys +STATUS_ERR = 2 +STATUS_WARN = 1 +STATUS_OK = 0 + class ProblemVault(object): """ Singleton where we store the various new problems we @@ -22,6 +26,9 @@ class ProblemVault(object): def __init__(self, exception_fname=None): # Exception dictionary: { problem.key() : Problem object } self.exceptions = {} + # Exception dictionary: maps key to the problem it was used to + # suppress. + self.used_exception_for = {} if exception_fname == None: return @@ -57,42 +64,90 @@ class ProblemVault(object): def register_problem(self, problem): """ - Register this problem to the problem value. Return True if it was a new - problem or it worsens an already existing problem. + Register this problem to the problem value. Return true if it was a new + problem or it worsens an already existing problem. A true + value may be STATUS_ERR to indicate a hard violation, or STATUS_WARN + to indicate a warning. """ # This is a new problem, print it if problem.key() not in self.exceptions: - print(problem) - return True + return STATUS_ERR # If it's an old problem, we don't warn if the situation got better # (e.g. we went from 4k LoC to 3k LoC), but we do warn if the # situation worsened (e.g. we went from 60 includes to 80). - if problem.is_worse_than(self.exceptions[problem.key()]): - print(problem) - return True + status = problem.is_worse_than(self.exceptions[problem.key()]) + if status == STATUS_OK: + self.used_exception_for[problem.key()] = problem - return False + return status -class Problem(object): + def list_overbroad_exceptions(self): + """Return an iterator of tuples containing (ex,prob) where ex is an + exceptions in this vault that are stricter than it needs to be, and + prob is the worst problem (if any) that it covered. + """ + for k in self.exceptions: + e = self.exceptions[k] + p = self.used_exception_for.get(k) + if p is None or e.is_worse_than(p): + yield (e, p) + + def set_tolerances(self, fns): + """Adjust the tolerances for the exceptions in this vault. Takes + a map of problem type to a function that adjusts the permitted + function to its new maximum value.""" + for k in self.exceptions: + ex = self.exceptions[k] + fn = fns.get(ex.problem_type) + if fn is not None: + ex.metric_value = fn(ex.metric_value) + +class ProblemFilter(object): + def __init__(self): + self.thresholds = dict() + + def addThreshold(self, item): + self.thresholds[item.get_type()] = item + + def matches(self, item): + filt = self.thresholds.get(item.get_type(), None) + if filt is None: + return False + return item.is_worse_than(filt) + + def filter(self, sequence): + for item in iter(sequence): + if self.matches(item): + yield item + +class Item(object): """ - A generic problem in our source code. See the subclasses below for the - specific problems we are trying to tackle. + A generic measurement about some aspect of our source code. See + the subclasses below for the specific problems we are trying to tackle. """ def __init__(self, problem_type, problem_location, metric_value): self.problem_location = problem_location self.metric_value = int(metric_value) + self.warning_threshold = self.metric_value self.problem_type = problem_type def is_worse_than(self, other_problem): - """Return True if this is a worse problem than other_problem""" + """Return STATUS_ERR if this is a worse problem than other_problem. + Return STATUS_WARN if it is a little worse, but falls within the + warning threshold. Return STATUS_OK if this problem is not + at all worse than other_problem. + """ if self.metric_value > other_problem.metric_value: - return True - return False + return STATUS_ERR + elif self.metric_value > other_problem.warning_threshold: + return STATUS_WARN + else: + return STATUS_OK def key(self): """Generate a unique key that describes this problem that can be used as a dictionary key""" - # Problem location is a filesystem path, so we need to normalize this + # Item location is a filesystem path, so we need to normalize this # across platforms otherwise same paths are not gonna match. canonical_location = os.path.normcase(self.problem_location) return "%s:%s" % (canonical_location, self.problem_type) @@ -100,7 +155,10 @@ class Problem(object): def __str__(self): return "problem %s %s %s" % (self.problem_type, self.problem_location, self.metric_value) -class FileSizeProblem(Problem): + def get_type(self): + return self.problem_type + +class FileSizeItem(Item): """ Denotes a problem with the size of a .c file. @@ -108,9 +166,9 @@ class FileSizeProblem(Problem): 'metric_value' is the number of lines in the .c file. """ def __init__(self, problem_location, metric_value): - super(FileSizeProblem, self).__init__("file-size", problem_location, metric_value) + super(FileSizeItem, self).__init__("file-size", problem_location, metric_value) -class IncludeCountProblem(Problem): +class IncludeCountItem(Item): """ Denotes a problem with the number of #includes in a .c file. @@ -118,9 +176,9 @@ class IncludeCountProblem(Problem): 'metric_value' is the number of #includes in the .c file. """ def __init__(self, problem_location, metric_value): - super(IncludeCountProblem, self).__init__("include-count", problem_location, metric_value) + super(IncludeCountItem, self).__init__("include-count", problem_location, metric_value) -class FunctionSizeProblem(Problem): +class FunctionSizeItem(Item): """ Denotes a problem with a size of a function in a .c file. @@ -131,7 +189,7 @@ class FunctionSizeProblem(Problem): The 'metric_value' is the size of the offending function in lines. """ def __init__(self, problem_location, metric_value): - super(FunctionSizeProblem, self).__init__("function-size", problem_location, metric_value) + super(FunctionSizeItem, self).__init__("function-size", problem_location, metric_value) comment_re = re.compile(r'#.*$') @@ -149,10 +207,10 @@ def get_old_problem_from_exception_str(exception_str): raise ValueError("Misformatted line {!r}".format(orig_str)) if problem_type == "file-size": - return FileSizeProblem(problem_location, metric_value) + return FileSizeItem(problem_location, metric_value) elif problem_type == "include-count": - return IncludeCountProblem(problem_location, metric_value) + return IncludeCountItem(problem_location, metric_value) elif problem_type == "function-size": - return FunctionSizeProblem(problem_location, metric_value) + return FunctionSizeItem(problem_location, metric_value) else: raise ValueError("Unknown exception type {!r}".format(orig_str)) diff --git a/scripts/maint/practracker/test_practracker.sh b/scripts/maint/practracker/test_practracker.sh new file mode 100755 index 0000000000..590525660b --- /dev/null +++ b/scripts/maint/practracker/test_practracker.sh @@ -0,0 +1,50 @@ +#!/bin/sh + +umask 077 + +TMPDIR="" +clean () { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf "$TMPDIR" + fi +} +trap clean EXIT HUP INT TERM + +if test "${PRACTRACKER_DIR}" = "" || + test ! -e "${PRACTRACKER_DIR}/practracker.py" ; then + PRACTRACKER_DIR=$(dirname "$0") +fi + +TMPDIR="$(mktemp -d -t pracktracker.test.XXXXXX)" +if test -z "${TMPDIR}" || test ! -d "${TMPDIR}" ; then + echo >&2 "mktemp failed." + exit 1; +fi + +DATA="${PRACTRACKER_DIR}/testdata" + +run_practracker() { + "${PYTHON:-python}" "${PRACTRACKER_DIR}/practracker.py" \ + --max-include-count=0 --max-file-size=0 --max-function-size=0 --terse \ + "${DATA}/" "$@"; +} + +echo "ex0:" + +run_practracker --exceptions "${DATA}/ex0.txt" > "${TMPDIR}/ex0-received.txt" + +if cmp "${TMPDIR}/ex0-received.txt" "${DATA}/ex0-expected.txt" ; then + echo " OK" +else + exit 1 +fi + +echo "ex1:" + +run_practracker --exceptions "${DATA}/ex1.txt" > "${TMPDIR}/ex1-received.txt" + +if cmp "${TMPDIR}/ex1-received.txt" "${DATA}/ex1-expected.txt" ;then + echo " OK" +else + exit 1 +fi diff --git a/scripts/maint/practracker/testdata/a.c b/scripts/maint/practracker/testdata/a.c new file mode 100644 index 0000000000..b52a14f56a --- /dev/null +++ b/scripts/maint/practracker/testdata/a.c @@ -0,0 +1,38 @@ + +#include "one.h" +#include "two.h" +#incldue "three.h" + +# include "four.h" + +int +i_am_a_function(void) +{ + call(); + call(); + /* comment + + another */ + + return 3; +} + +# include "five.h" + +long +another_function(long x, + long y) +{ + int abcd; + + abcd = x+y; + abcd *= abcd; + + /* comment here */ + + return abcd + + abcd + + abcd; +} + +/* And a comment to grow! */ diff --git a/scripts/maint/practracker/testdata/b.c b/scripts/maint/practracker/testdata/b.c new file mode 100644 index 0000000000..bef277aaae --- /dev/null +++ b/scripts/maint/practracker/testdata/b.c @@ -0,0 +1,15 @@ + +MOCK_IMPL(int, +foo,(void)) +{ + // blah1 + return 0; +} + +MOCK_IMPL(int, +bar,( long z)) +{ + // blah2 + + return (int)(z+2); +} diff --git a/scripts/maint/practracker/testdata/ex.txt b/scripts/maint/practracker/testdata/ex.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/maint/practracker/testdata/ex0-expected.txt b/scripts/maint/practracker/testdata/ex0-expected.txt new file mode 100644 index 0000000000..c021e6f710 --- /dev/null +++ b/scripts/maint/practracker/testdata/ex0-expected.txt @@ -0,0 +1,7 @@ +problem file-size a.c 38 +problem include-count a.c 4 +problem function-size a.c:i_am_a_function() 9 +problem function-size a.c:another_function() 12 +problem file-size b.c 15 +problem function-size b.c:foo() 4 +problem function-size b.c:bar() 5 diff --git a/scripts/maint/practracker/testdata/ex0.txt b/scripts/maint/practracker/testdata/ex0.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/maint/practracker/testdata/ex1-expected.txt b/scripts/maint/practracker/testdata/ex1-expected.txt new file mode 100644 index 0000000000..58140a4d9a --- /dev/null +++ b/scripts/maint/practracker/testdata/ex1-expected.txt @@ -0,0 +1,3 @@ +problem function-size a.c:i_am_a_function() 9 +(warning) problem function-size a.c:another_function() 12 +problem function-size b.c:foo() 4 diff --git a/scripts/maint/practracker/testdata/ex1.txt b/scripts/maint/practracker/testdata/ex1.txt new file mode 100644 index 0000000000..db42ae8450 --- /dev/null +++ b/scripts/maint/practracker/testdata/ex1.txt @@ -0,0 +1,11 @@ + +problem file-size a.c 38 +problem include-count a.c 4 +# this problem will produce an error +problem function-size a.c:i_am_a_function() 8 +# this problem will produce a warning +problem function-size a.c:another_function() 11 +problem file-size b.c 15 +# This is removed, and so will produce an error. +# problem function-size b.c:foo() 4 +problem function-size b.c:bar() 5 diff --git a/scripts/maint/practracker/testdata/not_c_file b/scripts/maint/practracker/testdata/not_c_file new file mode 100644 index 0000000000..e150962c02 --- /dev/null +++ b/scripts/maint/practracker/testdata/not_c_file @@ -0,0 +1,2 @@ + +This isn't a C file, so practracker shouldn't care about it. diff --git a/scripts/maint/practracker/util.py b/scripts/maint/practracker/util.py index b0ca73b997..5a8876a0f6 100644 --- a/scripts/maint/practracker/util.py +++ b/scripts/maint/practracker/util.py @@ -2,15 +2,24 @@ import os # We don't want to run metrics for unittests, automatically-generated C files, # external libraries or git leftovers. -EXCLUDE_SOURCE_DIRS = {"/src/test/", "/src/trunnel/", "/src/ext/", "/.git/"} +EXCLUDE_SOURCE_DIRS = {"src/test/", "src/trunnel/", "src/rust/", + "src/ext/", ".git/"} + +def _norm(p): + return os.path.normcase(os.path.normpath(p)) def get_tor_c_files(tor_topdir): """ Return a list with the .c filenames we want to get metrics of. """ files_list = [] + exclude_dirs = { _norm(os.path.join(tor_topdir, p)) for p in EXCLUDE_SOURCE_DIRS } + for root, directories, filenames in os.walk(tor_topdir): + # Remove all the directories that are excluded. + directories[:] = [ d for d in directories + if _norm(os.path.join(root,d)) not in exclude_dirs ] directories.sort() filenames.sort() for filename in filenames: @@ -18,10 +27,7 @@ def get_tor_c_files(tor_topdir): if not filename.endswith(".c"): continue - # Exclude the excluded paths full_path = os.path.join(root,filename) - if any(os.path.normcase(exclude_dir) in full_path for exclude_dir in EXCLUDE_SOURCE_DIRS): - continue files_list.append(full_path)