diff options
Diffstat (limited to 'scripts/checkkconfigsymbols.py')
-rwxr-xr-x | scripts/checkkconfigsymbols.py | 482 |
1 files changed, 482 insertions, 0 deletions
diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py new file mode 100755 index 0000000000..36c920e713 --- /dev/null +++ b/scripts/checkkconfigsymbols.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only + +"""Find Kconfig symbols that are referenced but not defined.""" + +# (c) 2014-2017 Valentin Rothberg <valentinrothberg@gmail.com> +# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> +# + + +import argparse +import difflib +import os +import re +import signal +import subprocess +import sys +from multiprocessing import Pool, cpu_count + + +# regex expressions +OPERATORS = r"&|\(|\)|\||\!" +SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}" +DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*" +EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+" +DEFAULT = r"default\s+.*?(?:if\s.+){,1}" +STMT = r"^\s*(?:if|select|imply|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR +SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")" + +# regex objects +REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") +REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)') +REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) +REGEX_KCONFIG_DEF = re.compile(DEF) +REGEX_KCONFIG_EXPR = re.compile(EXPR) +REGEX_KCONFIG_STMT = re.compile(STMT) +REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") +REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") +REGEX_QUOTES = re.compile("(\"(.*?)\")") + + +def parse_options(): + """The user interface of this module.""" + usage = "Run this tool to detect Kconfig symbols that are referenced but " \ + "not defined in Kconfig. If no option is specified, " \ + "checkkconfigsymbols defaults to check your current tree. " \ + "Please note that specifying commits will 'git reset --hard\' " \ + "your current tree! You may save uncommitted changes to avoid " \ + "losing data." + + parser = argparse.ArgumentParser(description=usage) + + parser.add_argument('-c', '--commit', dest='commit', action='store', + default="", + help="check if the specified commit (hash) introduces " + "undefined Kconfig symbols") + + parser.add_argument('-d', '--diff', dest='diff', action='store', + default="", + help="diff undefined symbols between two commits " + "(e.g., -d commmit1..commit2)") + + parser.add_argument('-f', '--find', dest='find', action='store_true', + default=False, + help="find and show commits that may cause symbols to be " + "missing (required to run with --diff)") + + parser.add_argument('-i', '--ignore', dest='ignore', action='store', + default="", + help="ignore files matching this Python regex " + "(e.g., -i '.*defconfig')") + + parser.add_argument('-s', '--sim', dest='sim', action='store', default="", + help="print a list of max. 10 string-similar symbols") + + parser.add_argument('--force', dest='force', action='store_true', + default=False, + help="reset current Git tree even when it's dirty") + + parser.add_argument('--no-color', dest='color', action='store_false', + default=True, + help="don't print colored output (default when not " + "outputting to a terminal)") + + args = parser.parse_args() + + if args.commit and args.diff: + sys.exit("Please specify only one option at once.") + + if args.diff and not re.match(r"^[\w\-\.\^]+\.\.[\w\-\.\^]+$", args.diff): + sys.exit("Please specify valid input in the following format: " + "\'commit1..commit2\'") + + if args.commit or args.diff: + if not args.force and tree_is_dirty(): + sys.exit("The current Git tree is dirty (see 'git status'). " + "Running this script may\ndelete important data since it " + "calls 'git reset --hard' for some performance\nreasons. " + " Please run this script in a clean Git tree or pass " + "'--force' if you\nwant to ignore this warning and " + "continue.") + + if args.commit: + if args.commit.startswith('HEAD'): + sys.exit("The --commit option can't use the HEAD ref") + + args.find = False + + if args.ignore: + try: + re.match(args.ignore, "this/is/just/a/test.c") + except: + sys.exit("Please specify a valid Python regex.") + + return args + + +def print_undefined_symbols(): + """Main function of this module.""" + args = parse_options() + + global COLOR + COLOR = args.color and sys.stdout.isatty() + + if args.sim and not args.commit and not args.diff: + sims = find_sims(args.sim, args.ignore) + if sims: + print("%s: %s" % (yel("Similar symbols"), ', '.join(sims))) + else: + print("%s: no similar symbols found" % yel("Similar symbols")) + sys.exit(0) + + # dictionary of (un)defined symbols + defined = {} + undefined = {} + + if args.commit or args.diff: + head = get_head() + + # get commit range + commit_a = None + commit_b = None + if args.commit: + commit_a = args.commit + "~" + commit_b = args.commit + elif args.diff: + split = args.diff.split("..") + commit_a = split[0] + commit_b = split[1] + undefined_a = {} + undefined_b = {} + + # get undefined items before the commit + reset(commit_a) + undefined_a, _ = check_symbols(args.ignore) + + # get undefined items for the commit + reset(commit_b) + undefined_b, defined = check_symbols(args.ignore) + + # report cases that are present for the commit but not before + for symbol in sorted(undefined_b): + # symbol has not been undefined before + if symbol not in undefined_a: + files = sorted(undefined_b.get(symbol)) + undefined[symbol] = files + # check if there are new files that reference the undefined symbol + else: + files = sorted(undefined_b.get(symbol) - + undefined_a.get(symbol)) + if files: + undefined[symbol] = files + + # reset to head + reset(head) + + # default to check the entire tree + else: + undefined, defined = check_symbols(args.ignore) + + # now print the output + for symbol in sorted(undefined): + print(red(symbol)) + + files = sorted(undefined.get(symbol)) + print("%s: %s" % (yel("Referencing files"), ", ".join(files))) + + sims = find_sims(symbol, args.ignore, defined) + sims_out = yel("Similar symbols") + if sims: + print("%s: %s" % (sims_out, ', '.join(sims))) + else: + print("%s: %s" % (sims_out, "no similar symbols found")) + + if args.find: + print("%s:" % yel("Commits changing symbol")) + commits = find_commits(symbol, args.diff) + if commits: + for commit in commits: + commit = commit.split(" ", 1) + print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1])) + else: + print("\t- no commit found") + print() # new line + + +def reset(commit): + """Reset current git tree to %commit.""" + execute(["git", "reset", "--hard", commit]) + + +def yel(string): + """ + Color %string yellow. + """ + return "\033[33m%s\033[0m" % string if COLOR else string + + +def red(string): + """ + Color %string red. + """ + return "\033[31m%s\033[0m" % string if COLOR else string + + +def execute(cmd): + """Execute %cmd and return stdout. Exit in case of error.""" + try: + stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) + stdout = stdout.decode(errors='replace') + except subprocess.CalledProcessError as fail: + exit(fail) + return stdout + + +def find_commits(symbol, diff): + """Find commits changing %symbol in the given range of %diff.""" + commits = execute(["git", "log", "--pretty=oneline", + "--abbrev-commit", "-G", + symbol, diff]) + return [x for x in commits.split("\n") if x] + + +def tree_is_dirty(): + """Return true if the current working tree is dirty (i.e., if any file has + been added, deleted, modified, renamed or copied but not committed).""" + stdout = execute(["git", "status", "--porcelain"]) + for line in stdout: + if re.findall(r"[URMADC]{1}", line[:2]): + return True + return False + + +def get_head(): + """Return commit hash of current HEAD.""" + stdout = execute(["git", "rev-parse", "HEAD"]) + return stdout.strip('\n') + + +def partition(lst, size): + """Partition list @lst into eveni-sized lists of size @size.""" + return [lst[i::size] for i in range(size)] + + +def init_worker(): + """Set signal handler to ignore SIGINT.""" + signal.signal(signal.SIGINT, signal.SIG_IGN) + + +def find_sims(symbol, ignore, defined=[]): + """Return a list of max. ten Kconfig symbols that are string-similar to + @symbol.""" + if defined: + return difflib.get_close_matches(symbol, set(defined), 10) + + pool = Pool(cpu_count(), init_worker) + kfiles = [] + for gitfile in get_files(): + if REGEX_FILE_KCONFIG.match(gitfile): + kfiles.append(gitfile) + + arglist = [] + for part in partition(kfiles, cpu_count()): + arglist.append((part, ignore)) + + for res in pool.map(parse_kconfig_files, arglist): + defined.extend(res[0]) + + return difflib.get_close_matches(symbol, set(defined), 10) + + +def get_files(): + """Return a list of all files in the current git directory.""" + # use 'git ls-files' to get the worklist + stdout = execute(["git", "ls-files"]) + if len(stdout) > 0 and stdout[-1] == "\n": + stdout = stdout[:-1] + + files = [] + for gitfile in stdout.rsplit("\n"): + if ".git" in gitfile or "ChangeLog" in gitfile or \ + ".log" in gitfile or os.path.isdir(gitfile) or \ + gitfile.startswith("tools/"): + continue + files.append(gitfile) + return files + + +def check_symbols(ignore): + """Find undefined Kconfig symbols and return a dict with the symbol as key + and a list of referencing files as value. Files matching %ignore are not + checked for undefined symbols.""" + pool = Pool(cpu_count(), init_worker) + try: + return check_symbols_helper(pool, ignore) + except KeyboardInterrupt: + pool.terminate() + pool.join() + sys.exit(1) + + +def check_symbols_helper(pool, ignore): + """Helper method for check_symbols(). Used to catch keyboard interrupts in + check_symbols() in order to properly terminate running worker processes.""" + source_files = [] + kconfig_files = [] + defined_symbols = [] + referenced_symbols = dict() # {file: [symbols]} + + for gitfile in get_files(): + if REGEX_FILE_KCONFIG.match(gitfile): + kconfig_files.append(gitfile) + else: + if ignore and re.match(ignore, gitfile): + continue + # add source files that do not match the ignore pattern + source_files.append(gitfile) + + # parse source files + arglist = partition(source_files, cpu_count()) + for res in pool.map(parse_source_files, arglist): + referenced_symbols.update(res) + + # parse kconfig files + arglist = [] + for part in partition(kconfig_files, cpu_count()): + arglist.append((part, ignore)) + for res in pool.map(parse_kconfig_files, arglist): + defined_symbols.extend(res[0]) + referenced_symbols.update(res[1]) + defined_symbols = set(defined_symbols) + + # inverse mapping of referenced_symbols to dict(symbol: [files]) + inv_map = dict() + for _file, symbols in referenced_symbols.items(): + for symbol in symbols: + inv_map[symbol] = inv_map.get(symbol, set()) + inv_map[symbol].add(_file) + referenced_symbols = inv_map + + undefined = {} # {symbol: [files]} + for symbol in sorted(referenced_symbols): + # filter some false positives + if symbol == "FOO" or symbol == "BAR" or \ + symbol == "FOO_BAR" or symbol == "XXX": + continue + if symbol not in defined_symbols: + if symbol.endswith("_MODULE"): + # avoid false positives for kernel modules + if symbol[:-len("_MODULE")] in defined_symbols: + continue + undefined[symbol] = referenced_symbols.get(symbol) + return undefined, defined_symbols + + +def parse_source_files(source_files): + """Parse each source file in @source_files and return dictionary with source + files as keys and lists of references Kconfig symbols as values.""" + referenced_symbols = dict() + for sfile in source_files: + referenced_symbols[sfile] = parse_source_file(sfile) + return referenced_symbols + + +def parse_source_file(sfile): + """Parse @sfile and return a list of referenced Kconfig symbols.""" + lines = [] + references = [] + + if not os.path.exists(sfile): + return references + + with open(sfile, "r", encoding='utf-8', errors='replace') as stream: + lines = stream.readlines() + + for line in lines: + if "CONFIG_" not in line: + continue + symbols = REGEX_SOURCE_SYMBOL.findall(line) + for symbol in symbols: + if not REGEX_FILTER_SYMBOLS.search(symbol): + continue + references.append(symbol) + + return references + + +def get_symbols_in_line(line): + """Return mentioned Kconfig symbols in @line.""" + return REGEX_SYMBOL.findall(line) + + +def parse_kconfig_files(args): + """Parse kconfig files and return tuple of defined and references Kconfig + symbols. Note, @args is a tuple of a list of files and the @ignore + pattern.""" + kconfig_files = args[0] + ignore = args[1] + defined_symbols = [] + referenced_symbols = dict() + + for kfile in kconfig_files: + defined, references = parse_kconfig_file(kfile) + defined_symbols.extend(defined) + if ignore and re.match(ignore, kfile): + # do not collect references for files that match the ignore pattern + continue + referenced_symbols[kfile] = references + return (defined_symbols, referenced_symbols) + + +def parse_kconfig_file(kfile): + """Parse @kfile and update symbol definitions and references.""" + lines = [] + defined = [] + references = [] + + if not os.path.exists(kfile): + return defined, references + + with open(kfile, "r", encoding='utf-8', errors='replace') as stream: + lines = stream.readlines() + + for i in range(len(lines)): + line = lines[i] + line = line.strip('\n') + line = line.split("#")[0] # ignore comments + + if REGEX_KCONFIG_DEF.match(line): + symbol_def = REGEX_KCONFIG_DEF.findall(line) + defined.append(symbol_def[0]) + elif REGEX_KCONFIG_STMT.match(line): + line = REGEX_QUOTES.sub("", line) + symbols = get_symbols_in_line(line) + # multi-line statements + while line.endswith("\\"): + i += 1 + line = lines[i] + line = line.strip('\n') + symbols.extend(get_symbols_in_line(line)) + for symbol in set(symbols): + if REGEX_NUMERIC.match(symbol): + # ignore numeric values + continue + references.append(symbol) + + return defined, references + + +def main(): + try: + print_undefined_symbols() + except BrokenPipeError: + # Python flushes standard streams on exit; redirect remaining output + # to devnull to avoid another BrokenPipeError at shutdown + devnull = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull, sys.stdout.fileno()) + sys.exit(1) # Python exits with error code 1 on EPIPE + + +if __name__ == "__main__": + main() |