diff options
Diffstat (limited to 'js/src/devtools/rootAnalysis/analyze.py')
-rwxr-xr-x | js/src/devtools/rootAnalysis/analyze.py | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/js/src/devtools/rootAnalysis/analyze.py b/js/src/devtools/rootAnalysis/analyze.py new file mode 100755 index 0000000000..dd37991d41 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyze.py @@ -0,0 +1,462 @@ +#!/usr/bin/env python3 + +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Runs the static rooting analysis +""" + +import argparse +import os +import subprocess +import sys +from subprocess import Popen + +try: + from shlex import quote +except ImportError: + from pipes import quote + + +def execfile(thefile, globals): + exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals) + + +# Label a string as an output. +class Output(str): + pass + + +# Label a string as a pattern for multiple inputs. +class MultiInput(str): + pass + + +# Construct a new environment by merging in some settings needed for running the individual scripts. +def env(config): + # Add config['sixgill_bin'] to $PATH if not already there. + path = os.environ["PATH"].split(":") + if dir := config.get("sixgill_bin"): + if dir not in path: + path.insert(0, dir) + + return dict( + os.environ, + PATH=":".join(path), + XDB=f"{config['sixgill_bin']}/xdb.so", + SOURCE=config["source"], + ) + + +def fill(command, config): + filled = [] + for s in command: + try: + rep = s.format(**config) + except KeyError: + print("Substitution failed: %s" % s) + filled = None + break + + if isinstance(s, Output): + filled.append(Output(rep)) + elif isinstance(s, MultiInput): + N = int(config["jobs"]) + for i in range(1, N + 1): + filled.append(rep.format(i=i, n=N)) + else: + filled.append(rep) + + if filled is None: + raise Exception("substitution failure") + + return tuple(filled) + + +def print_command(job, config, env=None): + # Display a command to run that has roughly the same effect as what was + # actually run. The actual command uses temporary files that get renamed at + # the end, and run some commands in parallel chunks. The printed command + # will substitute in the actual output and run in a single chunk, so that + # it is easier to cut & paste and add a --function flag for debugging. + cfg = dict(config, n=1, i=1, jobs=1) + cmd = job_command_with_final_output_names(job) + cmd = fill(cmd, cfg) + + cmd = [quote(s) for s in cmd] + if outfile := job.get("redirect-output"): + cmd.extend([">", quote(outfile.format(**cfg))]) + if HOME := os.environ.get("HOME"): + cmd = [s.replace(HOME, "~") for s in cmd] + + if env: + # Try to keep the command as short as possible by only displaying + # modified environment variable settings. + e = os.environ + changed = {key: value for key, value in env.items() if value != e.get(key)} + if changed: + settings = [] + for key, value in changed.items(): + if key in e and e[key] in value: + # Display modifications as V=prefix${V}suffix when + # possible. This can make a huge different for $PATH. + start = value.index(e[key]) + end = start + len(e[key]) + setting = '%s="%s${%s}%s"' % (key, value[:start], key, value[end:]) + else: + setting = '%s="%s"' % (key, value) + if HOME: + setting = setting.replace(HOME, "$HOME") + settings.append(setting) + + cmd = settings + cmd + + print(" " + " ".join(cmd)) + + +JOBS = { + "list-dbs": {"command": ["ls", "-l"]}, + "rawcalls": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeCallgraph.js", + "{typeInfo}", + Output("{rawcalls}"), + "{i}", + "{n}", + ], + "multi-output": True, + "outputs": ["rawcalls.{i}.of.{n}"], + }, + "gcFunctions": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeGCFunctions.js", + MultiInput("{rawcalls}"), + "--outputs", + Output("{callgraph}"), + Output("{gcFunctions}"), + Output("{gcFunctions_list}"), + Output("{limitedFunctions_list}"), + ], + "outputs": [ + "callgraph.txt", + "gcFunctions.txt", + "gcFunctions.lst", + "limitedFunctions.lst", + ], + }, + "gcTypes": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeGCTypes.js", + Output("{gcTypes}"), + Output("{typeInfo}"), + ], + "outputs": ["gcTypes.txt", "typeInfo.txt"], + }, + "allFunctions": { + "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"], + "redirect-output": "allFunctions.txt", + }, + "hazards": { + "command": [ + "{js}", + "{analysis_scriptdir}/analyzeRoots.js", + "{gcFunctions_list}", + "{limitedFunctions_list}", + "{gcTypes}", + "{typeInfo}", + "{i}", + "{n}", + "tmp.{i}.of.{n}", + ], + "multi-output": True, + "redirect-output": "rootingHazards.{i}.of.{n}", + }, + "gather-hazards": { + "command": [ + "{js}", + "{analysis_scriptdir}/mergeJSON.js", + MultiInput("{hazards}"), + Output("{all_hazards}"), + ], + "outputs": ["rootingHazards.json"], + }, + "explain": { + "command": [ + sys.executable, + "{analysis_scriptdir}/explain.py", + "{all_hazards}", + "{gcFunctions}", + Output("{explained_hazards}"), + Output("{unnecessary}"), + Output("{refs}"), + Output("{html}"), + ], + "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt", "hazards.html"], + }, + "heapwrites": { + "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"], + "redirect-output": "heapWriteHazards.txt", + }, +} + + +# Generator of (i, j, item) tuples corresponding to outputs: +# - i is just the index of the yielded tuple (a la enumerate()) +# - j is the index of the item in the command list +# - item is command[j] +def out_indexes(command): + i = 0 + for j, fragment in enumerate(command): + if isinstance(fragment, Output): + yield (i, j, fragment) + i += 1 + + +def job_command_with_final_output_names(job): + outfiles = job.get("outputs", []) + command = list(job["command"]) + for i, j, name in out_indexes(job["command"]): + command[j] = outfiles[i] + return command + + +def run_job(name, config): + job = JOBS[name] + outs = job.get("outputs") or job.get("redirect-output") + print("Running " + name + " to generate " + str(outs)) + if "function" in job: + job["function"](config, job["redirect-output"]) + return + + N = int(config["jobs"]) if job.get("multi-output") else 1 + config["n"] = N + jobs = {} + for i in range(1, N + 1): + config["i"] = i + cmd = fill(job["command"], config) + info = spawn_command(cmd, job, name, config) + jobs[info["proc"].pid] = info + + if config["verbose"] > 0: + print_command(job, config, env=env(config)) + + final_status = 0 + while jobs: + pid, status = os.wait() + final_status = final_status or status + info = jobs[pid] + del jobs[pid] + if "redirect" in info: + info["redirect"].close() + + # Rename the temporary files to their final names. + for temp, final in info["rename_map"].items(): + try: + if config["verbose"] > 1: + print("Renaming %s -> %s" % (temp, final)) + os.rename(temp, final) + except OSError: + print("Error renaming %s -> %s" % (temp, final)) + raise + + if final_status != 0: + raise Exception("job {} returned status {}".format(name, final_status)) + + +def spawn_command(cmdspec, job, name, config): + rename_map = {} + + if "redirect-output" in job: + stdout_filename = "{}.tmp{}".format(name, config.get("i", "")) + final_outfile = job["redirect-output"].format(**config) + rename_map[stdout_filename] = final_outfile + command = cmdspec + else: + outfiles = fill(job["outputs"], config) + stdout_filename = None + + # Replace the Outputs with temporary filenames, and record a mapping + # from those temp names to their actual final names that will be used + # if the command succeeds. + command = list(cmdspec) + for i, j, raw_name in out_indexes(cmdspec): + [name] = fill([raw_name], config) + command[j] = "{}.tmp{}".format(name, config.get("i", "")) + rename_map[command[j]] = outfiles[i] + + sys.stdout.flush() + info = {"rename_map": rename_map} + if stdout_filename: + info["redirect"] = open(stdout_filename, "w") + info["proc"] = Popen(command, stdout=info["redirect"], env=env(config)) + else: + info["proc"] = Popen(command, env=env(config)) + + if config["verbose"] > 1: + print("Spawned process {}".format(info["proc"].pid)) + + return info + + +# Default to conservatively assuming 4GB/job. +def max_parallel_jobs(job_size=4 * 2**30): + """Return the max number of parallel jobs we can run without overfilling + memory, assuming heavyweight jobs.""" + from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip()) + mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") + from_mem = round(mem_bytes / job_size) + return min(from_cores, from_mem) + + +config = {"analysis_scriptdir": os.path.dirname(__file__)} + +defaults = [ + "%s/defaults.py" % config["analysis_scriptdir"], + "%s/defaults.py" % os.getcwd(), +] + +parser = argparse.ArgumentParser( + description="Statically analyze build tree for rooting hazards." +) +parser.add_argument( + "step", metavar="STEP", type=str, nargs="?", help="run only step STEP" +) +parser.add_argument( + "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze" +) +parser.add_argument( + "--js", + metavar="JSSHELL", + type=str, + nargs="?", + help="full path to ctypes-capable JS shell", +) +parser.add_argument( + "--first", + metavar="STEP", + type=str, + nargs="?", + help="execute all jobs starting with STEP", +) +parser.add_argument( + "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP" +) +parser.add_argument( + "--jobs", + "-j", + default=None, + metavar="JOBS", + type=int, + help="number of simultaneous analyzeRoots.js jobs", +) +parser.add_argument( + "--list", const=True, nargs="?", type=bool, help="display available steps" +) +parser.add_argument( + "--expect-file", + type=str, + nargs="?", + help="deprecated option, temporarily still present for backwards " "compatibility", +) +parser.add_argument( + "--verbose", + "-v", + action="count", + default=1, + help="Display cut & paste commands to run individual steps (give twice for more output)", +) +parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output") + +args = parser.parse_args() +args.verbose = max(0, args.verbose - args.quiet) + +for default in defaults: + try: + execfile(default, config) + if args.verbose > 1: + print("Loaded %s" % default) + except Exception: + pass + +# execfile() used config as the globals for running the +# defaults.py script, and will have set a __builtins__ key as a side effect. +del config["__builtins__"] +data = config.copy() + +for k, v in vars(args).items(): + if v is not None: + data[k] = v + +if args.jobs is not None: + data["jobs"] = args.jobs +if not data.get("jobs"): + data["jobs"] = max_parallel_jobs() + +if "GECKO_PATH" in os.environ: + data["source"] = os.environ["GECKO_PATH"] +if "SOURCE" in os.environ: + data["source"] = os.environ["SOURCE"] + +steps = [ + "gcTypes", + "rawcalls", + "gcFunctions", + "allFunctions", + "hazards", + "gather-hazards", + "explain", + "heapwrites", +] + +if args.list: + for step in steps: + job = JOBS[step] + outfiles = job.get("outputs") or job.get("redirect-output") + if outfiles: + print( + "%s\n ->%s %s" + % (step, "*" if job.get("multi-output") else "", outfiles) + ) + else: + print(step) + sys.exit(0) + +for step in steps: + job = JOBS[step] + if "redirect-output" in job: + data[step] = job["redirect-output"] + elif "outputs" in job and "command" in job: + outfiles = job["outputs"] + num_outputs = 0 + for i, j, name in out_indexes(job["command"]): + # Trim the {curly brackets} off of the output keys. + data[name[1:-1]] = outfiles[i] + num_outputs += 1 + assert ( + len(outfiles) == num_outputs + ), 'step "%s": mismatched number of output files (%d) and params (%d)' % ( + step, + num_outputs, + len(outfiles), + ) # NOQA: E501 + +if args.step: + if args.first or args.last: + raise Exception( + "--first and --last cannot be used when a step argument is given" + ) + steps = [args.step] +else: + if args.first: + steps = steps[steps.index(args.first) :] + if args.last: + steps = steps[: steps.index(args.last) + 1] + +for step in steps: + run_job(step, data) |