Merge branch 'tor-github/pr/1177'

This commit is contained in:
David Goulet 2019-08-01 10:18:10 -04:00
commit c4864de573
17 changed files with 331 additions and 80 deletions

View File

@ -371,7 +371,7 @@ endif
check-best-practices:
if USEPYTHON
$(PYTHON) $(top_srcdir)/scripts/maint/practracker/practracker.py $(top_srcdir)
@$(PYTHON) $(top_srcdir)/scripts/maint/practracker/practracker.py $(top_srcdir)
endif
practracker-regen:

4
changes/ticket29746 Normal file
View File

@ -0,0 +1,4 @@
o Minor bugfixes (best practices tracker):
- Fix a few issues in the best-practices script, including tests, tab
tolerance, error reporting, and directory-exclusion logic. Fixes bug
29746; bugfix on 0.4.1.1-alpha.

6
changes/ticket30752 Normal file
View File

@ -0,0 +1,6 @@
o Minor features (best practices tracker):
- Give a warning rather than an error when a practracker exception is
violated by a small amount; add a --list-overbroad option to
practracker that lists exceptions that are stricter than they need to
be, and provide an environment variable for disabling
practracker. Closes ticekt 30752.

View File

@ -16,7 +16,7 @@ def get_include_count(f):
"""Get number of #include statements in the file"""
include_count = 0
for line in f:
if re.match(r' *# *include', line):
if re.match(r'\s*#\s*include', line):
include_count += 1
return include_count
@ -31,6 +31,7 @@ def get_function_lines(f):
"DISABLE_GCC_WARNING", "DISABLE_GCC_WARNINGS"}
in_function = False
found_openbrace = False
for lineno, line in enumerate(f):
if not in_function:
# find the start of a function
@ -41,10 +42,13 @@ def get_function_lines(f):
continue
func_start = lineno
in_function = True
elif not found_openbrace and line.startswith("{"):
found_openbrace = True
func_start = lineno
else:
# Find the end of a function
if line.startswith("}"):
n_lines = lineno - func_start
n_lines = lineno - func_start + 1
in_function = False
found_openbrace = False
yield (func_name, n_lines)

View File

@ -36,10 +36,14 @@ MAX_FUNCTION_SIZE = 100 # lines
# Recommended number of #includes
MAX_INCLUDE_COUNT = 50
#######################################################
# Map from problem type to functions that adjust for tolerance
TOLERANCE_FNS = {
'include-count': lambda n: int(n*1.1),
'function-size': lambda n: int(n*1.1),
'file-size': lambda n: int(n*1.02)
}
# ProblemVault singleton
ProblemVault = None
#######################################################
# The Tor source code topdir
TOR_TOPDIR = None
@ -54,71 +58,59 @@ else:
return open(fname, 'r', encoding='utf-8')
def consider_file_size(fname, f):
"""Consider file size issues for 'f' and return True if a new issue was found"""
"""Consider the size of 'f' and yield an FileSizeItem for it.
"""
file_size = metrics.get_file_len(f)
if file_size > MAX_FILE_SIZE:
p = problem.FileSizeProblem(fname, file_size)
return ProblemVault.register_problem(p)
return False
yield problem.FileSizeItem(fname, file_size)
def consider_includes(fname, f):
"""Consider #include issues for 'f' and return True if a new issue was found"""
"""Consider the #include count in for 'f' and yield an IncludeCountItem
for it.
"""
include_count = metrics.get_include_count(f)
if include_count > MAX_INCLUDE_COUNT:
p = problem.IncludeCountProblem(fname, include_count)
return ProblemVault.register_problem(p)
return False
yield problem.IncludeCountItem(fname, include_count)
def consider_function_size(fname, f):
"""Consider the function sizes for 'f' and return True if a new issue was found"""
found_new_issues = False
"""yield a FunctionSizeItem for every function in f.
"""
for name, lines in metrics.get_function_lines(f):
# Don't worry about functions within our limits
if lines <= MAX_FUNCTION_SIZE:
continue
# That's a big function! Issue a problem!
canonical_function_name = "%s:%s()" % (fname, name)
p = problem.FunctionSizeProblem(canonical_function_name, lines)
found_new_issues |= ProblemVault.register_problem(p)
return found_new_issues
yield problem.FunctionSizeItem(canonical_function_name, lines)
#######################################################
def consider_all_metrics(files_list):
"""Consider metrics for all files, and return True if new issues were found"""
found_new_issues = False
"""Consider metrics for all files, and yield a sequence of problem.Item
object for those issues."""
for fname in files_list:
with open_file(fname) as f:
found_new_issues |= consider_metrics_for_file(fname, f)
return found_new_issues
for item in consider_metrics_for_file(fname, f):
yield item
def consider_metrics_for_file(fname, f):
"""
Consider the various metrics for file with filename 'fname' and file descriptor 'f'.
Return True if we found new issues.
Yield a sequence of problem.Item objects for all of the metrics in
'f'.
"""
# Strip the useless part of the path
if fname.startswith(TOR_TOPDIR):
fname = fname[len(TOR_TOPDIR):]
found_new_issues = False
# Get file length
found_new_issues |= consider_file_size(fname, f)
for item in consider_file_size(fname, f):
yield item
# Consider number of #includes
f.seek(0)
found_new_issues |= consider_includes(fname, f)
for item in consider_includes(fname, f):
yield item
# Get function length
f.seek(0)
found_new_issues |= consider_function_size(fname, f)
return found_new_issues
for item in consider_function_size(fname, f):
yield item
HEADER="""\
# Welcome to the exceptions file for Tor's best-practices tracker!
@ -161,8 +153,20 @@ def main(argv):
parser = argparse.ArgumentParser(prog=progname)
parser.add_argument("--regen", action="store_true",
help="Regenerate the exceptions file")
parser.add_argument("--list-overbroad", action="store_true",
help="List over-strict exceptions")
parser.add_argument("--exceptions",
help="Override the location for the exceptions file")
parser.add_argument("--strict", action="store_true",
help="Make all warnings into errors")
parser.add_argument("--terse", action="store_true",
help="Do not emit helpful instructions.")
parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
help="Maximum lines per C file size")
parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
help="Maximum includes per C file")
parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
help="Maximum lines per function")
parser.add_argument("topdir", default=".", nargs="?",
help="Top-level directory for the tor source")
args = parser.parse_args(argv[1:])
@ -174,24 +178,41 @@ def main(argv):
else:
exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
# 0) Configure our thresholds of "what is a problem actually"
filt = problem.ProblemFilter()
filt.addThreshold(problem.FileSizeItem("*", int(args.max_file_size)))
filt.addThreshold(problem.IncludeCountItem("*", int(args.max_include_count)))
filt.addThreshold(problem.FunctionSizeItem("*", int(args.max_function_size)))
# 1) Get all the .c files we care about
files_list = util.get_tor_c_files(TOR_TOPDIR)
# 2) Initialize problem vault and load an optional exceptions file so that
# we don't warn about the past
global ProblemVault
if args.regen:
tmpname = exceptions_file + ".tmp"
tmpfile = open(tmpname, "w")
sys.stdout = tmpfile
sys.stdout.write(HEADER)
problem_file = tmpfile
ProblemVault = problem.ProblemVault()
else:
ProblemVault = problem.ProblemVault(exceptions_file)
problem_file = sys.stdout
# 2.1) Adjust the exceptions so that we warn only about small problems,
# and produce errors on big ones.
if not (args.regen or args.list_overbroad or args.strict):
ProblemVault.set_tolerances(TOLERANCE_FNS)
# 3) Go through all the files and report problems if they are not exceptions
found_new_issues = consider_all_metrics(files_list)
found_new_issues = 0
for item in filt.filter(consider_all_metrics(files_list)):
status = ProblemVault.register_problem(item)
if status == problem.STATUS_ERR:
print(item, file=problem_file)
found_new_issues += 1
elif status == problem.STATUS_WARN:
# warnings always go to stdout.
print("(warning) {}".format(item))
if args.regen:
tmpfile.close()
@ -199,18 +220,32 @@ def main(argv):
sys.exit(0)
# If new issues were found, try to give out some advice to the developer on how to resolve it.
if found_new_issues and not args.regen:
if found_new_issues and not args.regen and not args.terse:
new_issues_str = """\
FAILURE: practracker found new problems in the code: see warnings above.
FAILURE: practracker found {} new problem(s) in the code: see warnings above.
Please fix the problems if you can, and update the exceptions file
({}) if you can't.
See doc/HACKING/HelpfulTools.md for more information on using practracker.\
""".format(exceptions_file)
You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
variable.
""".format(found_new_issues, exceptions_file)
print(new_issues_str)
if args.list_overbroad:
def k_fn(tup):
return tup[0].key()
for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
if p is None:
print(ex, "->", 0)
else:
print(ex, "->", p.metric_value)
sys.exit(found_new_issues)
if __name__ == '__main__':
if os.environ.get("TOR_DISABLE_PRACTRACKER"):
sys.exit(0)
main(sys.argv)

View File

@ -1,3 +1,5 @@
#!/usr/bin/python
"""Some simple tests for practracker metrics"""
import unittest
@ -38,13 +40,23 @@ class TestFunctionLength(unittest.TestCase):
def test_function_length(self):
funcs = StringIO.StringIO(function_file)
# All functions should have length 2
for name, lines in metrics.function_lines(funcs):
for name, lines in metrics.get_function_lines(funcs):
self.assertEqual(name, "fun")
funcs.seek(0)
for name, lines in metrics.function_lines(funcs):
self.assertEqual(lines, 2)
for name, lines in metrics.get_function_lines(funcs):
self.assertEqual(lines, 4)
class TestIncludeCount(unittest.TestCase):
def test_include_count(self):
f = StringIO.StringIO("""
# include <abc.h>
# include "def.h"
#include "ghi.h"
\t#\t include "jkl.h"
""")
self.assertEqual(metrics.get_include_count(f),4)
if __name__ == '__main__':
unittest.main()

View File

@ -13,6 +13,10 @@ import os.path
import re
import sys
STATUS_ERR = 2
STATUS_WARN = 1
STATUS_OK = 0
class ProblemVault(object):
"""
Singleton where we store the various new problems we
@ -22,6 +26,9 @@ class ProblemVault(object):
def __init__(self, exception_fname=None):
# Exception dictionary: { problem.key() : Problem object }
self.exceptions = {}
# Exception dictionary: maps key to the problem it was used to
# suppress.
self.used_exception_for = {}
if exception_fname == None:
return
@ -57,42 +64,90 @@ class ProblemVault(object):
def register_problem(self, problem):
"""
Register this problem to the problem value. Return True if it was a new
problem or it worsens an already existing problem.
Register this problem to the problem value. Return true if it was a new
problem or it worsens an already existing problem. A true
value may be STATUS_ERR to indicate a hard violation, or STATUS_WARN
to indicate a warning.
"""
# This is a new problem, print it
if problem.key() not in self.exceptions:
print(problem)
return True
return STATUS_ERR
# If it's an old problem, we don't warn if the situation got better
# (e.g. we went from 4k LoC to 3k LoC), but we do warn if the
# situation worsened (e.g. we went from 60 includes to 80).
if problem.is_worse_than(self.exceptions[problem.key()]):
print(problem)
return True
status = problem.is_worse_than(self.exceptions[problem.key()])
if status == STATUS_OK:
self.used_exception_for[problem.key()] = problem
return False
return status
class Problem(object):
def list_overbroad_exceptions(self):
"""Return an iterator of tuples containing (ex,prob) where ex is an
exceptions in this vault that are stricter than it needs to be, and
prob is the worst problem (if any) that it covered.
"""
for k in self.exceptions:
e = self.exceptions[k]
p = self.used_exception_for.get(k)
if p is None or e.is_worse_than(p):
yield (e, p)
def set_tolerances(self, fns):
"""Adjust the tolerances for the exceptions in this vault. Takes
a map of problem type to a function that adjusts the permitted
function to its new maximum value."""
for k in self.exceptions:
ex = self.exceptions[k]
fn = fns.get(ex.problem_type)
if fn is not None:
ex.metric_value = fn(ex.metric_value)
class ProblemFilter(object):
def __init__(self):
self.thresholds = dict()
def addThreshold(self, item):
self.thresholds[item.get_type()] = item
def matches(self, item):
filt = self.thresholds.get(item.get_type(), None)
if filt is None:
return False
return item.is_worse_than(filt)
def filter(self, sequence):
for item in iter(sequence):
if self.matches(item):
yield item
class Item(object):
"""
A generic problem in our source code. See the subclasses below for the
specific problems we are trying to tackle.
A generic measurement about some aspect of our source code. See
the subclasses below for the specific problems we are trying to tackle.
"""
def __init__(self, problem_type, problem_location, metric_value):
self.problem_location = problem_location
self.metric_value = int(metric_value)
self.warning_threshold = self.metric_value
self.problem_type = problem_type
def is_worse_than(self, other_problem):
"""Return True if this is a worse problem than other_problem"""
"""Return STATUS_ERR if this is a worse problem than other_problem.
Return STATUS_WARN if it is a little worse, but falls within the
warning threshold. Return STATUS_OK if this problem is not
at all worse than other_problem.
"""
if self.metric_value > other_problem.metric_value:
return True
return False
return STATUS_ERR
elif self.metric_value > other_problem.warning_threshold:
return STATUS_WARN
else:
return STATUS_OK
def key(self):
"""Generate a unique key that describes this problem that can be used as a dictionary key"""
# Problem location is a filesystem path, so we need to normalize this
# Item location is a filesystem path, so we need to normalize this
# across platforms otherwise same paths are not gonna match.
canonical_location = os.path.normcase(self.problem_location)
return "%s:%s" % (canonical_location, self.problem_type)
@ -100,7 +155,10 @@ class Problem(object):
def __str__(self):
return "problem %s %s %s" % (self.problem_type, self.problem_location, self.metric_value)
class FileSizeProblem(Problem):
def get_type(self):
return self.problem_type
class FileSizeItem(Item):
"""
Denotes a problem with the size of a .c file.
@ -108,9 +166,9 @@ class FileSizeProblem(Problem):
'metric_value' is the number of lines in the .c file.
"""
def __init__(self, problem_location, metric_value):
super(FileSizeProblem, self).__init__("file-size", problem_location, metric_value)
super(FileSizeItem, self).__init__("file-size", problem_location, metric_value)
class IncludeCountProblem(Problem):
class IncludeCountItem(Item):
"""
Denotes a problem with the number of #includes in a .c file.
@ -118,9 +176,9 @@ class IncludeCountProblem(Problem):
'metric_value' is the number of #includes in the .c file.
"""
def __init__(self, problem_location, metric_value):
super(IncludeCountProblem, self).__init__("include-count", problem_location, metric_value)
super(IncludeCountItem, self).__init__("include-count", problem_location, metric_value)
class FunctionSizeProblem(Problem):
class FunctionSizeItem(Item):
"""
Denotes a problem with a size of a function in a .c file.
@ -131,7 +189,7 @@ class FunctionSizeProblem(Problem):
The 'metric_value' is the size of the offending function in lines.
"""
def __init__(self, problem_location, metric_value):
super(FunctionSizeProblem, self).__init__("function-size", problem_location, metric_value)
super(FunctionSizeItem, self).__init__("function-size", problem_location, metric_value)
comment_re = re.compile(r'#.*$')
@ -149,10 +207,10 @@ def get_old_problem_from_exception_str(exception_str):
raise ValueError("Misformatted line {!r}".format(orig_str))
if problem_type == "file-size":
return FileSizeProblem(problem_location, metric_value)
return FileSizeItem(problem_location, metric_value)
elif problem_type == "include-count":
return IncludeCountProblem(problem_location, metric_value)
return IncludeCountItem(problem_location, metric_value)
elif problem_type == "function-size":
return FunctionSizeProblem(problem_location, metric_value)
return FunctionSizeItem(problem_location, metric_value)
else:
raise ValueError("Unknown exception type {!r}".format(orig_str))

View File

@ -0,0 +1,50 @@
#!/bin/sh
umask 077
TMPDIR=""
clean () {
if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then
rm -rf "$TMPDIR"
fi
}
trap clean EXIT HUP INT TERM
if test "${PRACTRACKER_DIR}" = "" ||
test ! -e "${PRACTRACKER_DIR}/practracker.py" ; then
PRACTRACKER_DIR=$(dirname "$0")
fi
TMPDIR="$(mktemp -d -t pracktracker.test.XXXXXX)"
if test -z "${TMPDIR}" || test ! -d "${TMPDIR}" ; then
echo >&2 "mktemp failed."
exit 1;
fi
DATA="${PRACTRACKER_DIR}/testdata"
run_practracker() {
"${PYTHON:-python}" "${PRACTRACKER_DIR}/practracker.py" \
--max-include-count=0 --max-file-size=0 --max-function-size=0 --terse \
"${DATA}/" "$@";
}
echo "ex0:"
run_practracker --exceptions "${DATA}/ex0.txt" > "${TMPDIR}/ex0-received.txt"
if cmp "${TMPDIR}/ex0-received.txt" "${DATA}/ex0-expected.txt" ; then
echo " OK"
else
exit 1
fi
echo "ex1:"
run_practracker --exceptions "${DATA}/ex1.txt" > "${TMPDIR}/ex1-received.txt"
if cmp "${TMPDIR}/ex1-received.txt" "${DATA}/ex1-expected.txt" ;then
echo " OK"
else
exit 1
fi

38
scripts/maint/practracker/testdata/a.c vendored Normal file
View File

@ -0,0 +1,38 @@
#include "one.h"
#include "two.h"
#incldue "three.h"
# include "four.h"
int
i_am_a_function(void)
{
call();
call();
/* comment
another */
return 3;
}
# include "five.h"
long
another_function(long x,
long y)
{
int abcd;
abcd = x+y;
abcd *= abcd;
/* comment here */
return abcd +
abcd +
abcd;
}
/* And a comment to grow! */

15
scripts/maint/practracker/testdata/b.c vendored Normal file
View File

@ -0,0 +1,15 @@
MOCK_IMPL(int,
foo,(void))
{
// blah1
return 0;
}
MOCK_IMPL(int,
bar,( long z))
{
// blah2
return (int)(z+2);
}

View File

View File

@ -0,0 +1,7 @@
problem file-size a.c 38
problem include-count a.c 4
problem function-size a.c:i_am_a_function() 9
problem function-size a.c:another_function() 12
problem file-size b.c 15
problem function-size b.c:foo() 4
problem function-size b.c:bar() 5

View File

View File

@ -0,0 +1,3 @@
problem function-size a.c:i_am_a_function() 9
(warning) problem function-size a.c:another_function() 12
problem function-size b.c:foo() 4

View File

@ -0,0 +1,11 @@
problem file-size a.c 38
problem include-count a.c 4
# this problem will produce an error
problem function-size a.c:i_am_a_function() 8
# this problem will produce a warning
problem function-size a.c:another_function() 11
problem file-size b.c 15
# This is removed, and so will produce an error.
# problem function-size b.c:foo() 4
problem function-size b.c:bar() 5

View File

@ -0,0 +1,2 @@
This isn't a C file, so practracker shouldn't care about it.

View File

@ -2,15 +2,24 @@ import os
# We don't want to run metrics for unittests, automatically-generated C files,
# external libraries or git leftovers.
EXCLUDE_SOURCE_DIRS = {"/src/test/", "/src/trunnel/", "/src/ext/", "/.git/"}
EXCLUDE_SOURCE_DIRS = {"src/test/", "src/trunnel/", "src/rust/",
"src/ext/", ".git/"}
def _norm(p):
return os.path.normcase(os.path.normpath(p))
def get_tor_c_files(tor_topdir):
"""
Return a list with the .c filenames we want to get metrics of.
"""
files_list = []
exclude_dirs = { _norm(os.path.join(tor_topdir, p)) for p in EXCLUDE_SOURCE_DIRS }
for root, directories, filenames in os.walk(tor_topdir):
# Remove all the directories that are excluded.
directories[:] = [ d for d in directories
if _norm(os.path.join(root,d)) not in exclude_dirs ]
directories.sort()
filenames.sort()
for filename in filenames:
@ -18,10 +27,7 @@ def get_tor_c_files(tor_topdir):
if not filename.endswith(".c"):
continue
# Exclude the excluded paths
full_path = os.path.join(root,filename)
if any(os.path.normcase(exclude_dir) in full_path for exclude_dir in EXCLUDE_SOURCE_DIRS):
continue
files_list.append(full_path)