From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- js/src/devtools/rootAnalysis/CFG.js | 1178 +++++++++++++++++ js/src/devtools/rootAnalysis/README.md | 3 + js/src/devtools/rootAnalysis/analyze.py | 462 +++++++ js/src/devtools/rootAnalysis/analyzeHeapWrites.js | 1396 ++++++++++++++++++++ js/src/devtools/rootAnalysis/analyzeRoots.js | 963 ++++++++++++++ js/src/devtools/rootAnalysis/annotations.js | 489 +++++++ js/src/devtools/rootAnalysis/build.js | 15 + .../rootAnalysis/build/sixgill-b2g.manifest | 10 + .../devtools/rootAnalysis/build/sixgill.manifest | 10 + js/src/devtools/rootAnalysis/callgraph.js | 233 ++++ js/src/devtools/rootAnalysis/computeCallgraph.js | 434 ++++++ js/src/devtools/rootAnalysis/computeGCFunctions.js | 113 ++ js/src/devtools/rootAnalysis/computeGCTypes.js | 550 ++++++++ js/src/devtools/rootAnalysis/dumpCFG.js | 273 ++++ js/src/devtools/rootAnalysis/expect.b2g.json | 3 + js/src/devtools/rootAnalysis/expect.browser.json | 3 + js/src/devtools/rootAnalysis/expect.shell.json | 3 + js/src/devtools/rootAnalysis/explain.py | 345 +++++ js/src/devtools/rootAnalysis/gen-hazards.sh | 15 + js/src/devtools/rootAnalysis/loadCallgraph.js | 590 +++++++++ js/src/devtools/rootAnalysis/mach_commands.py | 690 ++++++++++ js/src/devtools/rootAnalysis/mergeJSON.js | 26 + js/src/devtools/rootAnalysis/mozconfig.browser | 19 + js/src/devtools/rootAnalysis/mozconfig.common | 37 + js/src/devtools/rootAnalysis/mozconfig.haz_shell | 18 + js/src/devtools/rootAnalysis/mozconfig.js | 16 + js/src/devtools/rootAnalysis/run-analysis.sh | 4 + js/src/devtools/rootAnalysis/run-test.py | 154 +++ js/src/devtools/rootAnalysis/run_complete | 384 ++++++ .../devtools/rootAnalysis/t/exceptions/source.cpp | 57 + js/src/devtools/rootAnalysis/t/exceptions/test.py | 21 + js/src/devtools/rootAnalysis/t/graph/source.cpp | 90 ++ js/src/devtools/rootAnalysis/t/graph/test.py | 54 + js/src/devtools/rootAnalysis/t/hazards/source.cpp | 566 ++++++++ js/src/devtools/rootAnalysis/t/hazards/test.py | 121 ++ .../rootAnalysis/t/sixgill-tree/source.cpp | 76 ++ .../devtools/rootAnalysis/t/sixgill-tree/test.py | 63 + js/src/devtools/rootAnalysis/t/sixgill.py | 70 + .../devtools/rootAnalysis/t/suppression/source.cpp | 72 + js/src/devtools/rootAnalysis/t/suppression/test.py | 21 + js/src/devtools/rootAnalysis/t/testlib.py | 249 ++++ js/src/devtools/rootAnalysis/t/types/source.cpp | 167 +++ js/src/devtools/rootAnalysis/t/types/test.py | 16 + js/src/devtools/rootAnalysis/t/virtual/source.cpp | 366 +++++ js/src/devtools/rootAnalysis/t/virtual/test.py | 99 ++ js/src/devtools/rootAnalysis/utility.js | 422 ++++++ 46 files changed, 10966 insertions(+) create mode 100644 js/src/devtools/rootAnalysis/CFG.js create mode 100644 js/src/devtools/rootAnalysis/README.md create mode 100755 js/src/devtools/rootAnalysis/analyze.py create mode 100644 js/src/devtools/rootAnalysis/analyzeHeapWrites.js create mode 100644 js/src/devtools/rootAnalysis/analyzeRoots.js create mode 100644 js/src/devtools/rootAnalysis/annotations.js create mode 100644 js/src/devtools/rootAnalysis/build.js create mode 100644 js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest create mode 100644 js/src/devtools/rootAnalysis/build/sixgill.manifest create mode 100644 js/src/devtools/rootAnalysis/callgraph.js create mode 100644 js/src/devtools/rootAnalysis/computeCallgraph.js create mode 100644 js/src/devtools/rootAnalysis/computeGCFunctions.js create mode 100644 js/src/devtools/rootAnalysis/computeGCTypes.js create mode 100644 js/src/devtools/rootAnalysis/dumpCFG.js create mode 100644 js/src/devtools/rootAnalysis/expect.b2g.json create mode 100644 js/src/devtools/rootAnalysis/expect.browser.json create mode 100644 js/src/devtools/rootAnalysis/expect.shell.json create mode 100755 js/src/devtools/rootAnalysis/explain.py create mode 100755 js/src/devtools/rootAnalysis/gen-hazards.sh create mode 100644 js/src/devtools/rootAnalysis/loadCallgraph.js create mode 100644 js/src/devtools/rootAnalysis/mach_commands.py create mode 100644 js/src/devtools/rootAnalysis/mergeJSON.js create mode 100644 js/src/devtools/rootAnalysis/mozconfig.browser create mode 100644 js/src/devtools/rootAnalysis/mozconfig.common create mode 100644 js/src/devtools/rootAnalysis/mozconfig.haz_shell create mode 100644 js/src/devtools/rootAnalysis/mozconfig.js create mode 100755 js/src/devtools/rootAnalysis/run-analysis.sh create mode 100755 js/src/devtools/rootAnalysis/run-test.py create mode 100755 js/src/devtools/rootAnalysis/run_complete create mode 100644 js/src/devtools/rootAnalysis/t/exceptions/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/exceptions/test.py create mode 100644 js/src/devtools/rootAnalysis/t/graph/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/graph/test.py create mode 100644 js/src/devtools/rootAnalysis/t/hazards/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/hazards/test.py create mode 100644 js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/sixgill-tree/test.py create mode 100644 js/src/devtools/rootAnalysis/t/sixgill.py create mode 100644 js/src/devtools/rootAnalysis/t/suppression/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/suppression/test.py create mode 100644 js/src/devtools/rootAnalysis/t/testlib.py create mode 100644 js/src/devtools/rootAnalysis/t/types/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/types/test.py create mode 100644 js/src/devtools/rootAnalysis/t/virtual/source.cpp create mode 100644 js/src/devtools/rootAnalysis/t/virtual/test.py create mode 100644 js/src/devtools/rootAnalysis/utility.js (limited to 'js/src/devtools/rootAnalysis') diff --git a/js/src/devtools/rootAnalysis/CFG.js b/js/src/devtools/rootAnalysis/CFG.js new file mode 100644 index 0000000000..1b6f714279 --- /dev/null +++ b/js/src/devtools/rootAnalysis/CFG.js @@ -0,0 +1,1178 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +// Utility code for traversing the JSON data structures produced by sixgill. + +"use strict"; + +var TRACING = false; + +// When edge.Kind == "Pointer", these are the meanings of the edge.Reference field. +var PTR_POINTER = 0; +var PTR_REFERENCE = 1; +var PTR_RVALUE_REF = 2; + +// Find all points (positions within the code) of the body given by the list of +// bodies and the blockId to match (which will specify an outer function or a +// loop within it), recursing into loops if needed. +function findAllPoints(bodies, blockId, bits) +{ + var points = []; + var body; + + for (var xbody of bodies) { + if (sameBlockId(xbody.BlockId, blockId)) { + assert(!body); + body = xbody; + } + } + assert(body); + + if (!("PEdge" in body)) + return; + for (var edge of body.PEdge) { + points.push([body, edge.Index[0], bits]); + if (edge.Kind == "Loop") + points.push(...findAllPoints(bodies, edge.BlockId, bits)); + } + + return points; +} + +// Visitor of a graph of vertexes and sixgill-generated edges, +// where the edges represent the actual computation happening. +// +// Uses the syntax `var Visitor = class { ... }` rather than `class Visitor` +// to allow reloading this file with the JS debugger. +var Visitor = class { + constructor(bodies) { + this.visited_bodies = new Map(); + for (const body of bodies) { + this.visited_bodies.set(body, new Map()); + } + } + + // Prepend `edge` to the info stored at the successor node, returning + // the updated info value. This should be overridden by pretty much any + // subclass, as a traversal's semantics are largely determined by this method. + extend_path(edge, body, ppoint, successor_value) { return true; } + + // Default implementation does a basic "only visit nodes once" search. + // (Whether this is BFS/DFS/other is determined by the caller.) + + // Override if you need to revisit nodes. Valid actions are "continue", + // "prune", and "done". "continue" means continue with the search. "prune" + // means do not continue to predecessors of this node, only continue with + // the remaining entries in the work queue. "done" means the + // whole search is complete even if unvisited nodes remain. + next_action(prev, current) { return prev ? "prune" : "continue"; } + + // Update the info at a node. If this is the first time the node has been + // seen, `prev` will be undefined. `current` will be the info computed by + // `extend_path`. The node will be updated with the return value. + merge_info(prev, current) { return true; } + + // Default visit() implementation. Subclasses will usually leave this alone + // and use the other methods as extension points. + // + // Take a body, a point within that body, and the info computed by + // extend_path() for that point when traversing an edge. Return whether the + // search should continue ("continue"), the search should be pruned and + // other paths followed ("prune"), or that the whole search is complete and + // it is time to return a value ("done", and the value returned by + // merge_info() will be returned by the overall search). + // + // Persistently record the value computed so far at each point, and call + // (overridable) next_action() and merge_info() methods with the previous + // and freshly-computed value for each point. + // + // Often, extend_path() will decide how/whether to continue the search and + // will return the search action to take, and next_action() will blindly + // return it if the point has not yet been visited. (And if it has, it will + // prune this branch of the search so that no point is visited multiple + // times.) + visit(body, ppoint, info) { + const visited_value_table = this.visited_bodies.get(body); + const existing_value_if_visited = visited_value_table.get(ppoint); + const action = this.next_action(existing_value_if_visited, info); + const merged = this.merge_info(existing_value_if_visited, info); + visited_value_table.set(ppoint, merged); + return [action, merged]; + } +}; + +function findMatchingBlock(bodies, blockId) { + for (const body of bodies) { + if (sameBlockId(body.BlockId, blockId)) { + return body; + } + } + assert(false); +} + +// For a given function containing a set of bodies, each containing a set of +// ppoints, perform a mostly breadth-first traversal through the complete graph +// of all nodes throughout all the bodies of the function. +// +// When traversing, every node is associated with a value that +// is assigned or updated whenever it is visited. The overall traversal +// terminates when a given condition is reached, and an arbitrary custom value +// is returned. If the search completes without the termination condition +// being reached, it will return the value associated with the entrypoint +// node, which is initialized to `entrypoint_fallback_value` (and thus serves as +// the fallback return value if all search paths are pruned before reaching +// the entrypoint.) +// +// The traversal is only *mostly* breadth-first because the visitor decides +// whether to stop searching when it sees a node. If a node is visited for a +// second time, the visitor can choose to continue (and thus revisit the node) +// in order to find "better" paths that may include a node more than once. +// The search is done in the "upwards" direction -- as in, it starts at the +// exit point and searches through predecessors. +// +// Override visitor.visit() to return an action and a value. The action +// determines whether the overall search should terminate ('done'), or +// continues looking through the predecessors of the current node ('continue'), +// or whether it should just continue processing the work queue without +// looking at predecessors ('prune'). +// +// This allows this function to be used in different ways. If the visitor +// associates a value with each node that chains onto its forward-flow successors +// (predecessors in the "upwards" search order), then a complete path through +// the graph will be returned. +// +// Alternatively, BFS_upwards() can be used to test whether a condition holds +// (eg "the exit point is reachable only after calling SomethingImportant()"), +// in which case no path is needed and the visitor can compute a simple boolean +// every time it encounters a point. Note that `entrypoint_fallback_value` will +// still be returned if the search terminates without ever reaching the +// entrypoint, which is useful for dominator analyses. +// +// See the Visitor base class's implementation of visit(), above, for the +// most commonly used visit logic. +function BFS_upwards(start_body, start_ppoint, bodies, visitor, + initial_successor_value = {}, + entrypoint_fallback_value=null) +{ + let entrypoint_value = entrypoint_fallback_value; + + const work = [[start_body, start_ppoint, null, initial_successor_value]]; + if (TRACING) { + printErr(`BFS start at ${blockIdentifier(start_body)}:${start_ppoint}`); + } + + while (work.length > 0) { + const [body, ppoint, edgeToAdd, successor_value] = work.shift(); + if (TRACING) { + const s = edgeToAdd ? " : " + str(edgeToAdd) : ""; + printErr(`prepending edge from ${ppoint} to state '${successor_value}'${s}`); + } + let value = visitor.extend_path(edgeToAdd, body, ppoint, successor_value); + + const [action, merged_value] = visitor.visit(body, ppoint, value); + if (action === "done") { + return merged_value; + } + if (action === "prune") { + // Do not push anything else to the work queue, but continue processing + // other branches. + continue; + } + assert(action == "continue"); + value = merged_value; + + const predecessors = getPredecessors(body); + for (const edge of (predecessors[ppoint] || [])) { + if (edge.Kind == "Loop") { + // Propagate the search into the exit point of the loop body. + const loopBody = findMatchingBlock(bodies, edge.BlockId); + const loopEnd = loopBody.Index[1]; + work.push([loopBody, loopEnd, null, value]); + // Don't continue to predecessors here without going through + // the loop. (The points in this body that enter the loop will + // be traversed when we reach the entry point of the loop.) + } + work.push([body, edge.Index[0], edge, value]); + } + + // Check for hitting the entry point of a loop body. + if (ppoint == body.Index[0] && body.BlockId.Kind == "Loop") { + // Propagate to outer body parents that enter the loop body. + for (const parent of (body.BlockPPoint || [])) { + const parentBody = findMatchingBlock(bodies, parent.BlockId); + work.push([parentBody, parent.Index, null, value]); + } + + // This point is also preceded by the *end* of this loop, for the + // previous iteration. + work.push([body, body.Index[1], null, value]); + } + + // Check for reaching the entrypoint of the function. + if (body === start_body && ppoint == body.Index[0]) { + entrypoint_value = value; + } + } + + // The search space was exhausted without finding a 'done' state. That + // might be because all search paths were pruned before reaching the entry + // point of the function, in which case entrypoint_value will still be its initial + // value. (If entrypoint_value has been set, then we may still not have visited the + // entire graph, if some paths were pruned but at least one made it to the entrypoint.) + return entrypoint_value; +} + +// Given the CFG for the constructor call of some RAII, return whether the +// given edge is the matching destructor call. +function isMatchingDestructor(constructor, edge) +{ + if (edge.Kind != "Call") + return false; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + var variable = callee.Variable; + assert(variable.Kind == "Func"); + if (variable.Name[1].charAt(0) != '~') + return false; + + // Note that in some situations, a regular function can begin with '~', so + // we don't necessarily have a destructor in hand. This is probably a + // sixgill artifact, but in js::wasm::ModuleGenerator::~ModuleGenerator, a + // templatized static inline EraseIf is invoked, and it gets named ~EraseIf + // for some reason. + if (!("PEdgeCallInstance" in edge)) + return false; + + var constructExp = constructor.PEdgeCallInstance.Exp; + assert(constructExp.Kind == "Var"); + + var destructExp = edge.PEdgeCallInstance.Exp; + if (destructExp.Kind != "Var") + return false; + + return sameVariable(constructExp.Variable, destructExp.Variable); +} + +// Return all calls within the RAII scope of any constructor matched by +// isConstructor(). (Note that this would be insufficient if you needed to +// treat each instance separately, such as when different regions of a function +// body were guarded by these constructors and you needed to do something +// different with each.) +function allRAIIGuardedCallPoints(typeInfo, bodies, body, isConstructor) +{ + if (!("PEdge" in body)) + return []; + + var points = []; + + for (var edge of body.PEdge) { + if (edge.Kind != "Call") + continue; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + continue; + var variable = callee.Variable; + assert(variable.Kind == "Func"); + const bits = isConstructor(typeInfo, edge.Type, variable.Name); + if (!bits) + continue; + if (!("PEdgeCallInstance" in edge)) + continue; + if (edge.PEdgeCallInstance.Exp.Kind != "Var") + continue; + + points.push(...pointsInRAIIScope(bodies, body, edge, bits)); + } + + return points; +} + +// Test whether the given edge is the constructor corresponding to the given +// destructor edge. +function isMatchingConstructor(destructor, edge) +{ + if (edge.Kind != "Call") + return false; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + var variable = callee.Variable; + if (variable.Kind != "Func") + return false; + var name = readable(variable.Name[0]); + var destructorName = readable(destructor.Exp[0].Variable.Name[0]); + var match = destructorName.match(/^(.*?::)~(\w+)\(/); + if (!match) { + printErr("Unhandled destructor syntax: " + destructorName); + return false; + } + var constructorSubstring = match[1] + match[2]; + if (name.indexOf(constructorSubstring) == -1) + return false; + + var destructExp = destructor.PEdgeCallInstance.Exp; + if (destructExp.Kind != "Var") + return false; + + var constructExp = edge.PEdgeCallInstance.Exp; + if (constructExp.Kind != "Var") + return false; + + return sameVariable(constructExp.Variable, destructExp.Variable); +} + +function findMatchingConstructor(destructorEdge, body, warnIfNotFound=true) +{ + var worklist = [destructorEdge]; + var predecessors = getPredecessors(body); + while(worklist.length > 0) { + var edge = worklist.pop(); + if (isMatchingConstructor(destructorEdge, edge)) + return edge; + if (edge.Index[0] in predecessors) { + for (var e of predecessors[edge.Index[0]]) + worklist.push(e); + } + } + if (warnIfNotFound) + printErr("Could not find matching constructor!"); + return undefined; +} + +function pointsInRAIIScope(bodies, body, constructorEdge, bits) { + var seen = {}; + var worklist = [constructorEdge.Index[1]]; + var points = []; + while (worklist.length) { + var point = worklist.pop(); + if (point in seen) + continue; + seen[point] = true; + points.push([body, point, bits]); + var successors = getSuccessors(body); + if (!(point in successors)) + continue; + for (var nedge of successors[point]) { + if (isMatchingDestructor(constructorEdge, nedge)) + continue; + if (nedge.Kind == "Loop") + points.push(...findAllPoints(bodies, nedge.BlockId, bits)); + worklist.push(nedge.Index[1]); + } + } + + return points; +} + +function isImmobileValue(exp) { + if (exp.Kind == "Int" && exp.String == "0") { + return true; + } + return false; +} + +// Returns whether decl is a body.DefineVariable[] entry for a non-temporary reference. +function isReferenceDecl(decl) { + return decl.Type.Kind == "Pointer" && decl.Type.Reference != PTR_POINTER && decl.Variable.Kind != "Temp"; +} + +function expressionIsVariableAddress(exp, variable) +{ + while (exp.Kind == "Fld") + exp = exp.Exp[0]; + return exp.Kind == "Var" && sameVariable(exp.Variable, variable); +} + +function edgeTakesVariableAddress(edge, variable, body) +{ + if (ignoreEdgeUse(edge, variable, body)) + return false; + if (ignoreEdgeAddressTaken(edge)) + return false; + switch (edge.Kind) { + case "Assign": + return expressionIsVariableAddress(edge.Exp[1], variable); + case "Call": + if ("PEdgeCallArguments" in edge) { + for (var exp of edge.PEdgeCallArguments.Exp) { + if (expressionIsVariableAddress(exp, variable)) + return true; + } + } + return false; + default: + return false; + } +} + +// Look at an invocation of a virtual method or function pointer contained in a +// field, and return the static type of the invocant (or the containing struct, +// for a function pointer field.) +function getFieldCallInstanceCSU(edge, field) +{ + if ("FieldInstanceFunction" in field) { + // We have a 'this'. + const instanceExp = edge.PEdgeCallInstance.Exp; + if (instanceExp.Kind == 'Drf') { + // somevar->foo() + return edge.Type.TypeFunctionCSU.Type.Name; + } else if (instanceExp.Kind == 'Fld') { + // somevar.foo() + return instanceExp.Field.FieldCSU.Type.Name; + } else if (instanceExp.Kind == 'Index') { + // A strange construct. + // C++ code: static_cast(this)->trace(trc); + // CFG: Call(21,30, this*[-1]{JS::CustomAutoRooter}.trace*(trc*)) + return instanceExp.Type.Name; + } else if (instanceExp.Kind == 'Var') { + // C++: reinterpret_cast(gRawGMT)->~SimpleTimeZone(); + // CFG: + // # icu_64::SimpleTimeZone::icu_64::SimpleTimeZone.__comp_dtor + // [6,7] Call gRawGMT.icu_64::SimpleTimeZone.__comp_dtor () + return field.FieldCSU.Type.Name; + } else { + printErr("------------------ edge -------------------"); + printErr(JSON.stringify(edge, null, 4)); + printErr("------------------ field -------------------"); + printErr(JSON.stringify(field, null, 4)); + assert(false, `unrecognized FieldInstanceFunction Kind ${instanceExp.Kind}`); + } + } else { + // somefar.foo() where somevar is a field of some CSU. + return field.FieldCSU.Type.Name; + } +} + +function expressionUsesVariable(exp, variable) +{ + if (exp.Kind == "Var" && sameVariable(exp.Variable, variable)) + return true; + if (!("Exp" in exp)) + return false; + for (var childExp of exp.Exp) { + if (expressionUsesVariable(childExp, variable)) + return true; + } + return false; +} + +function expressionUsesVariableContents(exp, variable) +{ + if (!("Exp" in exp)) + return false; + for (var childExp of exp.Exp) { + if (childExp.Kind == 'Drf') { + if (expressionUsesVariable(childExp, variable)) + return true; + } else if (expressionUsesVariableContents(childExp, variable)) { + return true; + } + } + return false; +} + +// Detect simple |return nullptr;| statements. +function isReturningImmobileValue(edge, variable) +{ + if (variable.Kind == "Return") { + if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) { + if (isImmobileValue(edge.Exp[1])) + return true; + } + } + return false; +} + +// If the edge uses the given variable's value, return the earliest point at +// which the use is definite. Usually, that means the source of the edge +// (anything that reaches that source point will end up using the variable, but +// there may be other ways to reach the destination of the edge.) +// +// Return values are implicitly used at the very last point in the function. +// This makes a difference: if an RAII class GCs in its destructor, we need to +// start looking at the final point in the function, not one point back from +// that, since that would skip over the GCing call. +// +// Certain references may be annotated to be live to the end of the function +// as well (eg AutoCheckCannotGC&& parameters). +// +// Note that this returns a nonzero value only if the variable's incoming value is used. +// So this would return 0 for 'obj': +// +// obj = someFunction(); +// +// but these would return a positive value: +// +// obj = someFunction(obj); +// obj->foo = someFunction(); +// +function edgeUsesVariable(edge, variable, body, liveToEnd=false) +{ + if (ignoreEdgeUse(edge, variable, body)) + return 0; + + if (variable.Kind == "Return") { + liveToEnd = true; + } + + if (liveToEnd && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") { + // The last point in the function body is treated as using the return + // value. This is the only time the destination point is returned + // rather than the source point. + return edge.Index[1]; + } + + var src = edge.Index[0]; + + switch (edge.Kind) { + + case "Assign": { + // Detect `Return := nullptr`. + if (isReturningImmobileValue(edge, variable)) + return 0; + const [lhs, rhs] = edge.Exp; + // Detect `lhs := ...variable...` + if (expressionUsesVariable(rhs, variable)) + return src; + // Detect `...variable... := rhs` but not `variable := rhs`. The latter + // overwrites the previous value of `variable` without using it. + if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) + return src; + return 0; + } + + case "Assume": + return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0; + + case "Call": { + const callee = edge.Exp[0]; + if (expressionUsesVariable(callee, variable)) + return src; + if ("PEdgeCallInstance" in edge) { + if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) { + if (edgeStartsValueLiveRange(edge, variable)) { + // If the variable is being constructed, then the incoming + // value is not used here; it didn't exist before + // construction. (The analysis doesn't get told where + // variables are defined, so must infer it from + // construction. If the variable does not have a + // constructor, its live range may be larger than it really + // ought to be if it is defined within a loop body, but + // that is conservative.) + } else { + return src; + } + } + } + if ("PEdgeCallArguments" in edge) { + for (var exp of edge.PEdgeCallArguments.Exp) { + if (expressionUsesVariable(exp, variable)) + return src; + } + } + if (edge.Exp.length == 1) + return 0; + + // Assigning call result to a variable. + const lhs = edge.Exp[1]; + if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) + return src; + return 0; + } + + case "Loop": + return 0; + + case "Assembly": + return 0; + + default: + assert(false); + } +} + +// If `decl` is the body.DefineVariable[] declaration of a reference type, then +// return the expression without the outer dereference. Otherwise, return the +// original expression. +function maybeDereference(exp, decl) { + if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") { + if (isReferenceDecl(decl)) { + return exp.Exp[0]; + } + } + return exp; +} + +function expressionIsVariable(exp, variable) +{ + return exp.Kind == "Var" && sameVariable(exp.Variable, variable); +} + +// Similar to the above, except treat uses of a reference as if they were uses +// of the dereferenced contents. This requires knowing the type of the +// variable, and so takes its declaration rather than the variable itself. +function expressionIsDeclaredVariable(exp, decl) +{ + exp = maybeDereference(exp, decl); + return expressionIsVariable(exp, decl.Variable); +} + +function expressionIsMethodOnVariableDecl(exp, decl) +{ + // This might be calling a method on a base class, in which case exp will + // be an unnamed field of the variable instead of the variable itself. + while (exp.Kind == "Fld" && exp.Field.Name[0].startsWith("field:")) + exp = exp.Exp[0]; + return expressionIsDeclaredVariable(exp, decl); +} + +// Return whether the edge starts the live range of a variable's value, by setting +// it to some new value. Examples of starting obj's live range: +// +// obj = foo; +// obj = foo(); +// obj = foo(obj); // uses previous value but then sets to new value +// SomeClass obj(true, 1); // constructor +// +function edgeStartsValueLiveRange(edge, variable) +{ + // Direct assignments start live range of lhs: var = value + if (edge.Kind == "Assign") { + const [lhs, rhs] = edge.Exp; + return (expressionIsVariable(lhs, variable) && + !isReturningImmobileValue(edge, variable)); + } + + if (edge.Kind != "Call") + return false; + + // Assignments of call results start live range: var = foo() + if (1 in edge.Exp) { + var lhs = edge.Exp[1]; + if (expressionIsVariable(lhs, variable)) + return true; + } + + // Constructor calls start live range of instance: SomeClass var(...) + if ("PEdgeCallInstance" in edge) { + var instance = edge.PEdgeCallInstance.Exp; + + // Kludge around incorrect dereference on some constructor calls. + if (instance.Kind == "Drf") + instance = instance.Exp[0]; + + if (!expressionIsVariable(instance, variable)) + return false; + + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + + assert(callee.Variable.Kind == "Func"); + var calleeName = readable(callee.Variable.Name[0]); + + // Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('. + var openParen = calleeName.indexOf('('); + if (openParen < 0) + return false; + calleeName = calleeName.substring(0, openParen); + + var lastColon = calleeName.lastIndexOf('::'); + if (lastColon < 0) + return false; + var constructorName = calleeName.substr(lastColon + 2); + calleeName = calleeName.substr(0, lastColon); + + var lastTemplateOpen = calleeName.lastIndexOf('<'); + if (lastTemplateOpen >= 0) + calleeName = calleeName.substr(0, lastTemplateOpen); + + if (calleeName.endsWith(constructorName)) + return true; + } + + return false; +} + +// Return the result of a `matcher` callback on the call found in the given +// `edge`, if the edge is a direct call to a named function (if not, return false). +// `matcher` is given the name of the callee (actually, a tuple +// [fully qualified name, base name]), an array of expressions containing the +// arguments, and if the result of the call is assigned to a variable, +// the expression representing that variable(the lhs). +// +// https://firefox-source-docs.mozilla.org/js/HazardAnalysis/CFG.html for +// documentation of the data structure used here. +function matchEdgeCall(edge, matcher) { + if (edge.Kind != "Call") { + return false; + } + + const callee = edge.Exp[0]; + + if (edge.Type.Kind == 'Function' && + edge.Exp[0].Kind == 'Var' && + edge.Exp[0].Variable.Kind == 'Func') { + const calleeName = edge.Exp[0].Variable.Name; + const args = edge.PEdgeCallArguments; + const argExprs = args ? args.Exp : []; + const lhs = edge.Exp[1]; // May be undefined + return matcher(calleeName, argExprs, lhs); + } + + return false; +} + +function edgeMarksVariableGCSafe(edge, variable) { + return matchEdgeCall(edge, (calleeName, argExprs, _lhs) => { + // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation + return (calleeName[1] == 'MarkVariableAsGCSafe' && + calleeName[0].includes("JS::detail::MarkVariableAsGCSafe") && + argExprs.length == 1 && + expressionIsVariable(argExprs[0], variable)); + }); +} + +// Match an optional :: followed by the class name, +// and then an optional template parameter marker. +// +// Example: mozilla::dom::UniquePtr<... +// +function parseTypeName(typeName) { + const m = typeName.match(/^(((?:\w|::)+::)?(\w+))\b(\<)?/); + if (!m) { + return undefined; + } + const [, type, raw_namespace, classname, is_specialized] = m; + const namespace = raw_namespace === null ? "" : raw_namespace; + return { type, namespace, classname, is_specialized } +} + +// Return whether an edge "clears out" a variable's value. A simple example +// would be +// +// var = nullptr; +// +// for analyses for which nullptr is a "safe" value (eg GC rooting hazards; you +// can't get in trouble by holding a nullptr live across a GC.) A more complex +// example is a Maybe that gets reset: +// +// Maybe nogc; +// nogc.emplace(cx); +// nogc.reset(); +// gc(); // <-- not a problem; nogc is invalidated by prev line +// nogc.emplace(cx); +// foo(nogc); +// +// Yet another example is a UniquePtr being passed by value, which means the +// receiver takes ownership: +// +// UniquePtr uobj(obj); +// foo(uobj); +// gc(); +// +function edgeEndsValueLiveRange(edge, variable, body) +{ + // var = nullptr; + if (edge.Kind == "Assign") { + const [lhs, rhs] = edge.Exp; + return expressionIsVariable(lhs, variable) && isImmobileValue(rhs); + } + + if (edge.Kind != "Call") + return false; + + if (edgeMarksVariableGCSafe(edge, variable)) { + // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation + return true; + } + + const decl = lookupVariable(body, variable); + + if (matchEdgeCall(edge, (calleeName, argExprs, lhs) => { + return calleeName[1] == 'move' && calleeName[0].includes('std::move(') && + expressionIsDeclaredVariable(argExprs[0], decl) && + lhs && + lhs.Kind == 'Var' && + lhs.Variable.Kind == 'Temp'; + })) { + // temp = std::move(var) + // + // If var is a UniquePtr, and we pass it into something that takes + // ownership, then it should be considered to be invalid. Example: + // + // consume(std::move(var)); + // + // where consume takes a UniquePtr. This will compile to something like + // + // UniquePtr* __temp_1 = &std::move(var); + // UniquePtr&& __temp_2(*temp_1); // move constructor + // consume(__temp_2); + // ~UniquePtr(__temp_2); + // + // The line commented with "// move constructor" is a result of passing + // a UniquePtr as a parameter. If consume() took a UniquePtr&& + // directly, this would just be: + // + // UniquePtr* __temp_1 = &std::move(var); + // consume(__temp_1); + // + // which is not guaranteed to move from the reference. It might just + // ignore the parameter. We can't predict what consume(UniquePtr&&) + // will do. We do know that UniquePtr(UniquePtr&& other) moves out of + // `other`. + // + // The std::move() technically is irrelevant, but because we only care + // about bare variables, it has to be used, which is fortunate because + // the UniquePtr&& constructor operates on a temporary, not the + // variable we care about. + + const lhs = edge.Exp[1].Variable; + if (basicBlockEatsVariable(lhs, body, edge.Index[1])) + return true; + } + + const callee = edge.Exp[0]; + + if (edge.Type.Kind == 'Function' && + edge.Type.TypeFunctionCSU && + edge.PEdgeCallInstance && + expressionIsMethodOnVariableDecl(edge.PEdgeCallInstance.Exp, decl)) + { + const typeName = edge.Type.TypeFunctionCSU.Type.Name; + + // Synthesize a zero-arg constructor name like + // mozilla::dom::UniquePtr::UniquePtr(). Note that the `` is + // literal -- the pretty name from sixgill will render the actual + // constructor name as something like + // + // UniquePtr::UniquePtr() [where T = int] + // + const parsed = parseTypeName(typeName); + if (parsed) { + const { type, namespace, classname, is_specialized } = parsed; + + // special-case: the initial constructor that doesn't provide a value. + // Useful for things like Maybe. + const template = is_specialized ? '' : ''; + const ctorName = `${namespace}${classname}${template}::${classname}()`; + if (callee.Kind == 'Var' && + typesWithSafeConstructors.has(type) && + callee.Variable.Name[0].includes(ctorName)) + { + return true; + } + + // special-case: UniquePtr::reset() and similar. + if (callee.Kind == 'Var' && + type in resetterMethods && + resetterMethods[type].has(callee.Variable.Name[1])) + { + return true; + } + } + } + + // special-case: passing UniquePtr by value. + if (edge.Type.Kind == 'Function' && + edge.Type.TypeFunctionArgument && + edge.PEdgeCallArguments) + { + for (const i in edge.Type.TypeFunctionArgument) { + const param = edge.Type.TypeFunctionArgument[i]; + if (param.Type.Kind != 'CSU') + continue; + if (!param.Type.Name.startsWith("mozilla::UniquePtr<")) + continue; + const arg = edge.PEdgeCallArguments.Exp[i]; + if (expressionIsVariable(arg, variable)) { + return true; + } + } + } + + return false; +} + +// Look up a variable in the list of declarations for this body. +function lookupVariable(body, variable) { + for (const decl of (body.DefineVariable || [])) { + if (sameVariable(decl.Variable, variable)) { + return decl; + } + } + return undefined; +} + +function edgeMovesVariable(edge, variable, body) +{ + if (edge.Kind != 'Call') + return false; + const callee = edge.Exp[0]; + if (callee.Kind == 'Var' && + callee.Variable.Kind == 'Func') + { + const { Variable: { Name: [ fullname, shortname ] } } = callee; + + // Match an rvalue parameter. + + if (!edge || !edge.PEdgeCallArguments || !edge.PEdgeCallArguments.Exp) { + return false; + } + + for (const arg of edge.PEdgeCallArguments.Exp) { + if (arg.Kind != 'Drf') continue; + const val = arg.Exp[0]; + if (val.Kind == 'Var' && sameVariable(val.Variable, variable)) { + // This argument is the variable we're looking for. Return true + // if it is passed as an rvalue reference. + const type = lookupVariable(body, variable).Type; + if (type.Kind == "Pointer" && type.Reference == PTR_RVALUE_REF) { + return true; + } + } + } + } + + return false; +} + +// Scan forward through the basic block in 'body' starting at 'startpoint', +// looking for a call that passes 'variable' to a move constructor that +// "consumes" it (eg UniquePtr::UniquePtr(UniquePtr&&)). +function basicBlockEatsVariable(variable, body, startpoint) +{ + const successors = getSuccessors(body); + let point = startpoint; + while (point in successors) { + // Only handle a single basic block. If it forks, stop looking. + const edges = successors[point]; + if (edges.length != 1) { + return false; + } + const edge = edges[0]; + + if (edgeMovesVariable(edge, variable, body)) { + return true; + } + + // edgeStartsValueLiveRange will find places where 'variable' is given + // a new value. Never observed in practice, since this function is only + // called with a temporary resulting from std::move(), which is used + // immediately for a call. But just to be robust to future uses: + if (edgeStartsValueLiveRange(edge, variable)) { + return false; + } + + point = edge.Index[1]; + } + + return false; +} + +var PROP_REFCNT = 1 << 0; +var PROP_SHARED_PTR_DTOR = 1 << 1; + +function getCalleeProperties(calleeName) { + let props = 0; + + if (isRefcountedDtor(calleeName)) { + props |= PROP_REFCNT; + } + if (calleeName.includes("~shared_ptr()")) { + props |= PROP_SHARED_PTR_DTOR; + } + return props; +} + +// Basic C++ ABI mangling: prefix an identifier with its length, in decimal. +function mangle(name) { + return name.length + name; +} + +var TriviallyDestructibleTypes = new Set([ + // Single-token types from + // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin + "void", "wchar_t", "bool", "char", "short", "int", "long", "float", "double", + "__int64", "__int128", "__float128", "char32_t", "char16_t", "char8_t", + // Remaining observed cases. These are types T in shared_ptr that have + // been observed, where the types themselves have trivial destructors, and + // the custom deleter doesn't do anything nontrivial that we might care about. + "_IO_FILE" +]); +function synthesizeDestructorName(className) { + if (className.includes("<") || className.includes(" ") || className.includes("{")) { + return; + } + if (TriviallyDestructibleTypes.has(className)) { + return; + } + const parts = className.split("::"); + const mangled_dtor = "_ZN" + parts.map(p => mangle(p)).join("") + "D2Ev"; + const pretty_dtor = `void ${className}::~${parts.at(-1)}()`; + // Note that there will be a later check to verify that the function name + // synthesized here is an actual function, and assert if not (see + // assertFunctionExists() in computeCallgraph.js.) + return mangled_dtor + "$" + pretty_dtor; +} + +function getCallEdgeProperties(body, edge, calleeName, functionBodies) { + let attrs = 0; + let extraCalls = []; + + if (edge.Kind !== "Call") { + return { attrs, extraCalls }; + } + + const props = getCalleeProperties(calleeName); + if (props & PROP_REFCNT) { + // std::swap of two refcounted values thinks it can drop the + // ref count to zero. Or rather, it just calls operator=() in a context + // where the refcount will never drop to zero. + const blockId = blockIdentifier(body); + if (blockId.includes("std::swap") || blockId.includes("mozilla::Swap")) { + // Replace the refcnt release call with nothing. It's not going to happen. + attrs |= ATTR_REPLACED; + } + } + + if (props & PROP_SHARED_PTR_DTOR) { + // Replace shared_ptr::~shared_ptr() calls to T::~T() calls. + // Note that this will only apply to simple cases. + // Any templatized type, in particular, will be ignored and the original + // call tree will be left alone. If this triggers a hazard, then we can + // consider extending the mangling support. + // + // If the call to ~shared_ptr is not replaced, then it might end up calling + // an unknown function pointer. This does not always happen-- in some cases, + // the call tree below ~shared_ptr will invoke the correct destructor without + // going through function pointers. + const m = calleeName.match(/shared_ptr<(.*?)>::~shared_ptr\(\)(?: \[with T = ([\w:]+))?/); + assert(m); + let className = m[1] == "T" ? m[2] : m[1]; + assert(className != ""); + // cv qualification does not apply to destructors. + className = className.replace("const ", ""); + className = className.replace("volatile ", ""); + const dtor = synthesizeDestructorName(className); + if (dtor) { + attrs |= ATTR_REPLACED; + extraCalls.push({ + attrs: ATTR_SYNTHETIC, + name: dtor, + }); + } + } + + if ((props & PROP_REFCNT) == 0) { + return { attrs, extraCalls }; + } + + let callee = edge.Exp[0]; + while (callee.Kind === "Drf") { + callee = callee.Exp[0]; + } + + const instance = edge.PEdgeCallInstance.Exp; + if (instance.Kind !== "Var") { + // TODO: handle field destructors + return { attrs, extraCalls }; + } + + // Test whether the dtor call is dominated by operations on the variable + // that mean it will not go to a zero refcount in the dtor: either because + // it's already dead (eg r.forget() was called) or because it can be proven + // to have a ref count of greater than 1. This is implemented by looking + // for the reverse: find a path scanning backwards from the dtor call where + // the variable is used in any way that does *not* ensure that it is + // trivially destructible. + + const variable = instance.Variable; + + const visitor = new class DominatorVisitor extends Visitor { + // Do not revisit nodes. For new nodes, relay the decision made by + // extend_path. + next_action(seen, current) { return seen ? "prune" : current; } + + // We don't revisit, so always use the new. + merge_info(seen, current) { return current; } + + // Return the action to take from this node. + extend_path(edge, body, ppoint, successor_value) { + if (!edge) { + // Dummy edge to join two points. + return "continue"; + } + + if (!edgeUsesVariable(edge, variable, body)) { + // Nothing of interest on this edge, keep searching. + return "continue"; + } + + if (edgeEndsValueLiveRange(edge, variable, body)) { + // This path is safe! + return "prune"; + } + + // Unsafe. Found a use that might set the variable to a + // nonzero refcount. + return "done"; + } + }(functionBodies); + + // Searching upwards from a destructor call, return the opposite of: is + // there a path to a use or the start of the function that does NOT hit a + // safe assignment like refptr.forget() first? + // + // In graph terms: return whether the destructor call is dominated by forget() calls (or similar). + const edgeIsNonReleasingDtor = !BFS_upwards( + body, edge.Index[0], functionBodies, visitor, "start", + false // Return value if we do not reach the root without finding a non-forget() use. + ); + if (edgeIsNonReleasingDtor) { + attrs |= ATTR_GC_SUPPRESSED | ATTR_NONRELEASING; + } + return { attrs, extraCalls }; +} + +// gcc uses something like "__dt_del " for virtual destructors that it +// generates. +function isSyntheticVirtualDestructor(funcName) { + return funcName.endsWith(" "); +} + +function typedField(field) +{ + if ("FieldInstanceFunction" in field) { + // Virtual call + // + // This makes a minimal attempt at dealing with overloading, by + // incorporating the number of parameters. So far, that is all that has + // been needed. If more is needed, sixgill will need to produce a full + // mangled type. + const {Type, Name: [name]} = field; + + // Virtual destructors don't need a type or argument count, + // and synthetic ones don't have them filled in. + if (isSyntheticVirtualDestructor(name)) { + return name; + } + + var nargs = 0; + if (Type.Kind == "Function" && "TypeFunctionArguments" in Type) + nargs = Type.TypeFunctionArguments.Type.length; + return name + ":" + nargs; + } else { + // Function pointer field + return field.Name[0]; + } +} + +function fieldKey(csuName, field) +{ + return csuName + "." + typedField(field); +} diff --git a/js/src/devtools/rootAnalysis/README.md b/js/src/devtools/rootAnalysis/README.md new file mode 100644 index 0000000000..08a4fcde29 --- /dev/null +++ b/js/src/devtools/rootAnalysis/README.md @@ -0,0 +1,3 @@ +# Spidermonkey JSAPI rooting analysis + +See js/src/docs/HazardAnalysis/index.md diff --git a/js/src/devtools/rootAnalysis/analyze.py b/js/src/devtools/rootAnalysis/analyze.py new file mode 100755 index 0000000000..dd37991d41 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyze.py @@ -0,0 +1,462 @@ +#!/usr/bin/env python3 + +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Runs the static rooting analysis +""" + +import argparse +import os +import subprocess +import sys +from subprocess import Popen + +try: + from shlex import quote +except ImportError: + from pipes import quote + + +def execfile(thefile, globals): + exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals) + + +# Label a string as an output. +class Output(str): + pass + + +# Label a string as a pattern for multiple inputs. +class MultiInput(str): + pass + + +# Construct a new environment by merging in some settings needed for running the individual scripts. +def env(config): + # Add config['sixgill_bin'] to $PATH if not already there. + path = os.environ["PATH"].split(":") + if dir := config.get("sixgill_bin"): + if dir not in path: + path.insert(0, dir) + + return dict( + os.environ, + PATH=":".join(path), + XDB=f"{config['sixgill_bin']}/xdb.so", + SOURCE=config["source"], + ) + + +def fill(command, config): + filled = [] + for s in command: + try: + rep = s.format(**config) + except KeyError: + print("Substitution failed: %s" % s) + filled = None + break + + if isinstance(s, Output): + filled.append(Output(rep)) + elif isinstance(s, MultiInput): + N = int(config["jobs"]) + for i in range(1, N + 1): + filled.append(rep.format(i=i, n=N)) + else: + filled.append(rep) + + if filled is None: + raise Exception("substitution failure") + + return tuple(filled) + + +def print_command(job, config, env=None): + # Display a command to run that has roughly the same effect as what was + # actually run. The actual command uses temporary files that get renamed at + # the end, and run some commands in parallel chunks. The printed command + # will substitute in the actual output and run in a single chunk, so that + # it is easier to cut & paste and add a --function flag for debugging. + cfg = dict(config, n=1, i=1, jobs=1) + cmd = job_command_with_final_output_names(job) + cmd = fill(cmd, cfg) + + cmd = [quote(s) for s in cmd] + if outfile := job.get("redirect-output"): + cmd.extend([">", quote(outfile.format(**cfg))]) + if HOME := os.environ.get("HOME"): + cmd = [s.replace(HOME, "~") for s in cmd] + + if env: + # Try to keep the command as short as possible by only displaying + # modified environment variable settings. + e = os.environ + changed = {key: value for key, value in env.items() if value != e.get(key)} + if changed: + settings = [] + for key, value in changed.items(): + if key in e and e[key] in value: + # Display modifications as V=prefix${V}suffix when + # possible. This can make a huge different for $PATH. + start = value.index(e[key]) + end = start + len(e[key]) + setting = '%s="%s${%s}%s"' % (key, value[:start], key, value[end:]) + else: + setting = '%s="%s"' % (key, value) + if HOME: + setting = setting.replace(HOME, "$HOME") + settings.append(setting) + + cmd = settings + cmd + + print(" " + " ".join(cmd)) + + +JOBS = { + "list-dbs": {"command": ["ls", "-l"]}, + "rawcalls": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeCallgraph.js", + "{typeInfo}", + Output("{rawcalls}"), + "{i}", + "{n}", + ], + "multi-output": True, + "outputs": ["rawcalls.{i}.of.{n}"], + }, + "gcFunctions": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeGCFunctions.js", + MultiInput("{rawcalls}"), + "--outputs", + Output("{callgraph}"), + Output("{gcFunctions}"), + Output("{gcFunctions_list}"), + Output("{limitedFunctions_list}"), + ], + "outputs": [ + "callgraph.txt", + "gcFunctions.txt", + "gcFunctions.lst", + "limitedFunctions.lst", + ], + }, + "gcTypes": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeGCTypes.js", + Output("{gcTypes}"), + Output("{typeInfo}"), + ], + "outputs": ["gcTypes.txt", "typeInfo.txt"], + }, + "allFunctions": { + "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"], + "redirect-output": "allFunctions.txt", + }, + "hazards": { + "command": [ + "{js}", + "{analysis_scriptdir}/analyzeRoots.js", + "{gcFunctions_list}", + "{limitedFunctions_list}", + "{gcTypes}", + "{typeInfo}", + "{i}", + "{n}", + "tmp.{i}.of.{n}", + ], + "multi-output": True, + "redirect-output": "rootingHazards.{i}.of.{n}", + }, + "gather-hazards": { + "command": [ + "{js}", + "{analysis_scriptdir}/mergeJSON.js", + MultiInput("{hazards}"), + Output("{all_hazards}"), + ], + "outputs": ["rootingHazards.json"], + }, + "explain": { + "command": [ + sys.executable, + "{analysis_scriptdir}/explain.py", + "{all_hazards}", + "{gcFunctions}", + Output("{explained_hazards}"), + Output("{unnecessary}"), + Output("{refs}"), + Output("{html}"), + ], + "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt", "hazards.html"], + }, + "heapwrites": { + "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"], + "redirect-output": "heapWriteHazards.txt", + }, +} + + +# Generator of (i, j, item) tuples corresponding to outputs: +# - i is just the index of the yielded tuple (a la enumerate()) +# - j is the index of the item in the command list +# - item is command[j] +def out_indexes(command): + i = 0 + for j, fragment in enumerate(command): + if isinstance(fragment, Output): + yield (i, j, fragment) + i += 1 + + +def job_command_with_final_output_names(job): + outfiles = job.get("outputs", []) + command = list(job["command"]) + for i, j, name in out_indexes(job["command"]): + command[j] = outfiles[i] + return command + + +def run_job(name, config): + job = JOBS[name] + outs = job.get("outputs") or job.get("redirect-output") + print("Running " + name + " to generate " + str(outs)) + if "function" in job: + job["function"](config, job["redirect-output"]) + return + + N = int(config["jobs"]) if job.get("multi-output") else 1 + config["n"] = N + jobs = {} + for i in range(1, N + 1): + config["i"] = i + cmd = fill(job["command"], config) + info = spawn_command(cmd, job, name, config) + jobs[info["proc"].pid] = info + + if config["verbose"] > 0: + print_command(job, config, env=env(config)) + + final_status = 0 + while jobs: + pid, status = os.wait() + final_status = final_status or status + info = jobs[pid] + del jobs[pid] + if "redirect" in info: + info["redirect"].close() + + # Rename the temporary files to their final names. + for temp, final in info["rename_map"].items(): + try: + if config["verbose"] > 1: + print("Renaming %s -> %s" % (temp, final)) + os.rename(temp, final) + except OSError: + print("Error renaming %s -> %s" % (temp, final)) + raise + + if final_status != 0: + raise Exception("job {} returned status {}".format(name, final_status)) + + +def spawn_command(cmdspec, job, name, config): + rename_map = {} + + if "redirect-output" in job: + stdout_filename = "{}.tmp{}".format(name, config.get("i", "")) + final_outfile = job["redirect-output"].format(**config) + rename_map[stdout_filename] = final_outfile + command = cmdspec + else: + outfiles = fill(job["outputs"], config) + stdout_filename = None + + # Replace the Outputs with temporary filenames, and record a mapping + # from those temp names to their actual final names that will be used + # if the command succeeds. + command = list(cmdspec) + for i, j, raw_name in out_indexes(cmdspec): + [name] = fill([raw_name], config) + command[j] = "{}.tmp{}".format(name, config.get("i", "")) + rename_map[command[j]] = outfiles[i] + + sys.stdout.flush() + info = {"rename_map": rename_map} + if stdout_filename: + info["redirect"] = open(stdout_filename, "w") + info["proc"] = Popen(command, stdout=info["redirect"], env=env(config)) + else: + info["proc"] = Popen(command, env=env(config)) + + if config["verbose"] > 1: + print("Spawned process {}".format(info["proc"].pid)) + + return info + + +# Default to conservatively assuming 4GB/job. +def max_parallel_jobs(job_size=4 * 2**30): + """Return the max number of parallel jobs we can run without overfilling + memory, assuming heavyweight jobs.""" + from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip()) + mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") + from_mem = round(mem_bytes / job_size) + return min(from_cores, from_mem) + + +config = {"analysis_scriptdir": os.path.dirname(__file__)} + +defaults = [ + "%s/defaults.py" % config["analysis_scriptdir"], + "%s/defaults.py" % os.getcwd(), +] + +parser = argparse.ArgumentParser( + description="Statically analyze build tree for rooting hazards." +) +parser.add_argument( + "step", metavar="STEP", type=str, nargs="?", help="run only step STEP" +) +parser.add_argument( + "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze" +) +parser.add_argument( + "--js", + metavar="JSSHELL", + type=str, + nargs="?", + help="full path to ctypes-capable JS shell", +) +parser.add_argument( + "--first", + metavar="STEP", + type=str, + nargs="?", + help="execute all jobs starting with STEP", +) +parser.add_argument( + "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP" +) +parser.add_argument( + "--jobs", + "-j", + default=None, + metavar="JOBS", + type=int, + help="number of simultaneous analyzeRoots.js jobs", +) +parser.add_argument( + "--list", const=True, nargs="?", type=bool, help="display available steps" +) +parser.add_argument( + "--expect-file", + type=str, + nargs="?", + help="deprecated option, temporarily still present for backwards " "compatibility", +) +parser.add_argument( + "--verbose", + "-v", + action="count", + default=1, + help="Display cut & paste commands to run individual steps (give twice for more output)", +) +parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output") + +args = parser.parse_args() +args.verbose = max(0, args.verbose - args.quiet) + +for default in defaults: + try: + execfile(default, config) + if args.verbose > 1: + print("Loaded %s" % default) + except Exception: + pass + +# execfile() used config as the globals for running the +# defaults.py script, and will have set a __builtins__ key as a side effect. +del config["__builtins__"] +data = config.copy() + +for k, v in vars(args).items(): + if v is not None: + data[k] = v + +if args.jobs is not None: + data["jobs"] = args.jobs +if not data.get("jobs"): + data["jobs"] = max_parallel_jobs() + +if "GECKO_PATH" in os.environ: + data["source"] = os.environ["GECKO_PATH"] +if "SOURCE" in os.environ: + data["source"] = os.environ["SOURCE"] + +steps = [ + "gcTypes", + "rawcalls", + "gcFunctions", + "allFunctions", + "hazards", + "gather-hazards", + "explain", + "heapwrites", +] + +if args.list: + for step in steps: + job = JOBS[step] + outfiles = job.get("outputs") or job.get("redirect-output") + if outfiles: + print( + "%s\n ->%s %s" + % (step, "*" if job.get("multi-output") else "", outfiles) + ) + else: + print(step) + sys.exit(0) + +for step in steps: + job = JOBS[step] + if "redirect-output" in job: + data[step] = job["redirect-output"] + elif "outputs" in job and "command" in job: + outfiles = job["outputs"] + num_outputs = 0 + for i, j, name in out_indexes(job["command"]): + # Trim the {curly brackets} off of the output keys. + data[name[1:-1]] = outfiles[i] + num_outputs += 1 + assert ( + len(outfiles) == num_outputs + ), 'step "%s": mismatched number of output files (%d) and params (%d)' % ( + step, + num_outputs, + len(outfiles), + ) # NOQA: E501 + +if args.step: + if args.first or args.last: + raise Exception( + "--first and --last cannot be used when a step argument is given" + ) + steps = [args.step] +else: + if args.first: + steps = steps[steps.index(args.first) :] + if args.last: + steps = steps[: steps.index(args.last) + 1] + +for step in steps: + run_job(step, data) diff --git a/js/src/devtools/rootAnalysis/analyzeHeapWrites.js b/js/src/devtools/rootAnalysis/analyzeHeapWrites.js new file mode 100644 index 0000000000..28679676a5 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyzeHeapWrites.js @@ -0,0 +1,1396 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('callgraph.js'); +loadRelativeToScript('dumpCFG.js'); + +/////////////////////////////////////////////////////////////////////////////// +// Annotations +/////////////////////////////////////////////////////////////////////////////// + +function checkExternalFunction(entry) +{ + var whitelist = [ + "__builtin_clz", + "__builtin_expect", + "isprint", + "ceilf", + "floorf", + /^rusturl/, + "memcmp", + "strcmp", + "fmod", + "floor", + "ceil", + "atof", + /memchr/, + "strlen", + /Servo_DeclarationBlock_GetCssText/, + "Servo_GetArcStringData", + "Servo_IsWorkerThread", + /nsIFrame::AppendOwnedAnonBoxes/, + // Assume that atomic accesses are threadsafe. + /^__atomic_/, + ]; + if (entry.matches(whitelist)) + return; + + // memcpy and memset are safe if the target pointer is threadsafe. + const simpleWrites = [ + "memcpy", + "memset", + "memmove", + ]; + + if (entry.isSafeArgument(1) && simpleWrites.includes(entry.name)) + return; + + dumpError(entry, null, "External function"); +} + +function hasThreadsafeReferenceCounts(entry, regexp) +{ + // regexp should match some nsISupports-operating function and produce the + // name of the nsISupports class via exec(). + + // nsISupports classes which have threadsafe reference counting. + var whitelist = [ + "nsIRunnable", + + // I don't know if these always have threadsafe refcounts. + "nsAtom", + "nsIPermissionManager", + "nsIURI", + ]; + + var match = regexp.exec(entry.name); + return match && nameMatchesArray(match[1], whitelist); +} + +function checkOverridableVirtualCall(entry, location, callee) +{ + // We get here when a virtual call is made on a structure which might be + // overridden by script or by a binary extension. This includes almost + // everything under nsISupports, however, so for the most part we ignore + // this issue. The exception is for nsISupports AddRef/Release, which are + // not in general threadsafe and whose overrides will not be generated by + // the callgraph analysis. + if (callee != "nsISupports.AddRef" && callee != "nsISupports.Release") + return; + + if (hasThreadsafeReferenceCounts(entry, /::~?nsCOMPtr\(.*?\[with T = (.*?)\]$/)) + return; + if (hasThreadsafeReferenceCounts(entry, /RefPtrTraits.*?::Release.*?\[with U = (.*?)\]/)) + return; + if (hasThreadsafeReferenceCounts(entry, /nsCOMPtr::assign_assuming_AddRef.*?\[with T = (.*?)\]/)) + return; + if (hasThreadsafeReferenceCounts(entry, /nsCOMPtr::assign_with_AddRef.*?\[with T = (.*?)\]/)) + return; + + // Watch for raw addref/release. + var whitelist = [ + "Gecko_AddRefAtom", + "Gecko_ReleaseAtom", + /nsPrincipal::Get/, + /CounterStylePtr::Reset/, + ]; + if (entry.matches(whitelist)) + return; + + dumpError(entry, location, "AddRef/Release on nsISupports"); +} + +function checkIndirectCall(entry, location, callee) +{ + var name = entry.name; + + // These hash table callbacks should be threadsafe. + if (/PLDHashTable/.test(name) && (/matchEntry/.test(callee) || /hashKey/.test(callee))) + return; + if (/PL_HashTable/.test(name) && /keyCompare/.test(callee)) + return; + + dumpError(entry, location, "Indirect call " + callee); +} + +function checkVariableAssignment(entry, location, variable) +{ + var name = entry.name; + + dumpError(entry, location, "Variable assignment " + variable); +} + +// Annotations for function parameters, based on function name and parameter +// name + type. +function treatAsSafeArgument(entry, varName, csuName) +{ + var whitelist = [ + // These iterator classes should all be thread local. They are passed + // in to some Servo bindings and are created on the heap by others, so + // just ignore writes to them. + [null, null, /StyleChildrenIterator/], + [null, null, /ExplicitChildIterator/], + + // The use of BeginReading() to instantiate this class confuses the + // analysis. + [null, null, /nsReadingIterator/], + + // These classes are passed to some Servo bindings to fill in. + [/^Gecko_/, null, "nsStyleImageLayers"], + [/^Gecko_/, null, /FontFamilyList/], + + // Various Servo binding out parameters. This is a mess and there needs + // to be a way to indicate which params are out parameters, either using + // an attribute or a naming convention. + ["Gecko_SetCounterStyleToName", "aPtr", null], + ["Gecko_SetCounterStyleToSymbols", "aPtr", null], + ["Gecko_SetCounterStyleToString", "aPtr", null], + ["Gecko_CopyCounterStyle", "aDst", null], + ["Gecko_SetMozBinding", "aDisplay", null], + [/ClassOrClassList/, /aClass/, null], + ["Gecko_GetAtomAsUTF16", "aLength", null], + ["Gecko_CopyMozBindingFrom", "aDest", null], + ["Gecko_SetNullImageValue", "aImage", null], + ["Gecko_SetGradientImageValue", "aImage", null], + ["Gecko_SetImageElement", "aImage", null], + ["Gecko_SetLayerImageImageValue", "aImage", null], + ["Gecko_CopyImageValueFrom", "aImage", null], + ["Gecko_SetCursorArrayLength", "aStyleUI", null], + ["Gecko_CopyCursorArrayFrom", "aDest", null], + ["Gecko_SetCursorImageValue", "aCursor", null], + ["Gecko_SetListStyleImageImageValue", "aList", null], + ["Gecko_SetListStyleImageNone", "aList", null], + ["Gecko_CopyListStyleImageFrom", "aList", null], + ["Gecko_ClearStyleContents", "aContent", null], + ["Gecko_CopyStyleContentsFrom", "aContent", null], + ["Gecko_CopyStyleGridTemplateValues", "aGridTemplate", null], + ["Gecko_ResetStyleCoord", null, null], + ["Gecko_CopyClipPathValueFrom", "aDst", null], + ["Gecko_DestroyClipPath", "aClip", null], + ["Gecko_ResetFilters", "effects", null], + [/Gecko_CSSValue_Set/, "aCSSValue", null], + ["Gecko_CSSValue_Drop", "aCSSValue", null], + ["Gecko_CSSFontFaceRule_GetCssText", "aResult", null], + ["Gecko_EnsureTArrayCapacity", "aArray", null], + ["Gecko_ClearPODTArray", "aArray", null], + ["Gecko_SetStyleGridTemplate", "aGridTemplate", null], + ["Gecko_ResizeTArrayForStrings", "aArray", null], + ["Gecko_ClearAndResizeStyleContents", "aContent", null], + [/Gecko_ClearAndResizeCounter/, "aContent", null], + [/Gecko_CopyCounter.*?From/, "aContent", null], + [/Gecko_SetContentDataImageValue/, "aList", null], + [/Gecko_SetContentData/, "aContent", null], + ["Gecko_SetCounterFunction", "aContent", null], + [/Gecko_EnsureStyle.*?ArrayLength/, "aArray", null], + ["Gecko_GetOrCreateKeyframeAtStart", "aKeyframes", null], + ["Gecko_GetOrCreateInitialKeyframe", "aKeyframes", null], + ["Gecko_GetOrCreateFinalKeyframe", "aKeyframes", null], + ["Gecko_AppendPropertyValuePair", "aProperties", null], + ["Gecko_SetStyleCoordCalcValue", null, null], + ["Gecko_StyleClipPath_SetURLValue", "aClip", null], + ["Gecko_nsStyleFilter_SetURLValue", "aEffects", null], + ["Gecko_nsStyleSVG_SetDashArrayLength", "aSvg", null], + ["Gecko_nsStyleSVG_CopyDashArray", "aDst", null], + ["Gecko_nsStyleFont_SetLang", "aFont", null], + ["Gecko_nsStyleFont_CopyLangFrom", "aFont", null], + ["Gecko_ClearWillChange", "aDisplay", null], + ["Gecko_AppendWillChange", "aDisplay", null], + ["Gecko_CopyWillChangeFrom", "aDest", null], + ["Gecko_InitializeImageCropRect", "aImage", null], + ["Gecko_CopyShapeSourceFrom", "aDst", null], + ["Gecko_DestroyShapeSource", "aShape", null], + ["Gecko_StyleShapeSource_SetURLValue", "aShape", null], + ["Gecko_NewBasicShape", "aShape", null], + ["Gecko_NewShapeImage", "aShape", null], + ["Gecko_nsFont_InitSystem", "aDest", null], + ["Gecko_nsFont_SetFontFeatureValuesLookup", "aFont", null], + ["Gecko_nsFont_ResetFontFeatureValuesLookup", "aFont", null], + ["Gecko_nsStyleFont_FixupNoneGeneric", "aFont", null], + ["Gecko_StyleTransition_SetUnsupportedProperty", "aTransition", null], + ["Gecko_AddPropertyToSet", "aPropertySet", null], + ["Gecko_CalcStyleDifference", "aAnyStyleChanged", null], + ["Gecko_CalcStyleDifference", "aOnlyResetStructsChanged", null], + ["Gecko_nsStyleSVG_CopyContextProperties", "aDst", null], + ["Gecko_nsStyleFont_PrefillDefaultForGeneric", "aFont", null], + ["Gecko_nsStyleSVG_SetContextPropertiesLength", "aSvg", null], + ["Gecko_ClearAlternateValues", "aFont", null], + ["Gecko_AppendAlternateValues", "aFont", null], + ["Gecko_CopyAlternateValuesFrom", "aDest", null], + ["Gecko_CounterStyle_GetName", "aResult", null], + ["Gecko_CounterStyle_GetSingleString", "aResult", null], + ["Gecko_nsTArray_FontFamilyName_AppendNamed", "aNames", null], + ["Gecko_nsTArray_FontFamilyName_AppendGeneric", "aNames", null], + ]; + for (var [entryMatch, varMatch, csuMatch] of whitelist) { + assert(entryMatch || varMatch || csuMatch); + if (entryMatch && !nameMatches(entry.name, entryMatch)) + continue; + if (varMatch && !nameMatches(varName, varMatch)) + continue; + if (csuMatch && (!csuName || !nameMatches(csuName, csuMatch))) + continue; + return true; + } + return false; +} + +function isSafeAssignment(entry, edge, variable) +{ + if (edge.Kind != 'Assign') + return false; + + var [mangled, unmangled] = splitFunction(entry.name); + + // The assignment + // + // nsFont* font = fontTypes[eType]; + // + // ends up with 'font' pointing to a member of 'this', so it should inherit + // the safety of 'this'. + if (unmangled.includes("mozilla::LangGroupFontPrefs::Initialize") && + variable == 'font') + { + const [lhs, rhs] = edge.Exp; + const {Kind, Exp: [{Kind: indexKind, Exp: [collection, index]}]} = rhs; + if (Kind == 'Drf' && + indexKind == 'Index' && + collection.Kind == 'Var' && + collection.Variable.Name[0] == 'fontTypes') + { + return entry.isSafeArgument(0); // 'this' + } + } + + return false; +} + +function checkFieldWrite(entry, location, fields) +{ + var name = entry.name; + for (var field of fields) { + // The analysis is having some trouble keeping track of whether + // already_AddRefed and nsCOMPtr structures are safe to access. + // Hopefully these will be thread local, but it would be better to + // improve the analysis to handle these. + if (/already_AddRefed.*?.mRawPtr/.test(field)) + return; + if (/nsCOMPtr<.*?>.mRawPtr/.test(field)) + return; + + if (/\bThreadLocal<\b/.test(field)) + return; + + // Debugging check for string corruption. + if (field == "nsStringBuffer.mCanary") + return; + } + + var str = ""; + for (var field of fields) + str += " " + field; + + dumpError(entry, location, "Field write" + str); +} + +function checkDereferenceWrite(entry, location, variable) +{ + var name = entry.name; + + // Maybe uses placement new on local storage in a way we don't understand. + // Allow this if the Maybe<> value itself is threadsafe. + if (/Maybe.*?::emplace/.test(name) && entry.isSafeArgument(0)) + return; + + // UniquePtr writes through temporaries referring to its internal storage. + // Allow this if the UniquePtr<> is threadsafe. + if (/UniquePtr.*?::reset/.test(name) && entry.isSafeArgument(0)) + return; + + // Operations on nsISupports reference counts. + if (hasThreadsafeReferenceCounts(entry, /nsCOMPtr::swap\(.*?\[with T = (.*?)\]/)) + return; + + // ConvertToLowerCase::write writes through a local pointer into the first + // argument. + if (/ConvertToLowerCase::write/.test(name) && entry.isSafeArgument(0)) + return; + + dumpError(entry, location, "Dereference write " + (variable ? variable : "")); +} + +function ignoreCallEdge(entry, callee) +{ + var name = entry.name; + + // nsPropertyTable::GetPropertyInternal has the option of removing data + // from the table, but when it is called by nsPropertyTable::GetProperty + // this will not occur. + if (/nsPropertyTable::GetPropertyInternal/.test(callee) && + /nsPropertyTable::GetProperty/.test(name)) + { + return true; + } + + // Document::PropertyTable calls GetExtraPropertyTable (which has side + // effects) if the input category is non-zero. If a literal zero was passed + // in for the category then we treat it as a safe argument, per + // isEdgeSafeArgument, so just watch for that. + if (/Document::GetExtraPropertyTable/.test(callee) && + /Document::PropertyTable/.test(name) && + entry.isSafeArgument(1)) + { + return true; + } + + // This function has an explicit test for being on the main thread if the + // style has non-threadsafe refcounts, but the analysis isn't smart enough + // to understand what the actual styles that can be involved are. + if (/nsStyleList::SetCounterStyle/.test(callee)) + return true; + + // CachedBorderImageData is exclusively owned by nsStyleImage, but the + // analysis is not smart enough to know this. + if (/CachedBorderImageData::PurgeCachedImages/.test(callee) && + /nsStyleImage::/.test(name) && + entry.isSafeArgument(0)) + { + return true; + } + + // StyleShapeSource exclusively owns its UniquePtr. + if (/nsStyleImage::SetURLValue/.test(callee) && + /StyleShapeSource::SetURL/.test(name) && + entry.isSafeArgument(0)) + { + return true; + } + + // The AddRef through a just-assigned heap pointer here is not handled by + // the analysis. + if (/nsCSSValue::Array::AddRef/.test(callee) && + /nsStyleContentData::SetCounters/.test(name) && + entry.isSafeArgument(2)) + { + return true; + } + + // AllChildrenIterator asks AppendOwnedAnonBoxes to append into an nsTArray + // local variable. + if (/nsIFrame::AppendOwnedAnonBoxes/.test(callee) && + /AllChildrenIterator::AppendNativeAnonymousChildren/.test(name)) + { + return true; + } + + // Runnables are created and named on one thread, then dispatched + // (possibly to another). Writes on the origin thread are ok. + if (/::SetName/.test(callee) && + /::UnlabeledDispatch/.test(name)) + { + return true; + } + + // We manually lock here + if (name == "Gecko_nsFont_InitSystem" || + name == "Gecko_GetFontMetrics" || + name == "Gecko_nsStyleFont_FixupMinFontSize" || + /ThreadSafeGetDefaultFontHelper/.test(name)) + { + return true; + } + + return false; +} + +function ignoreContents(entry) +{ + var whitelist = [ + // We don't care what happens when we're about to crash. + "abort", + /MOZ_ReportAssertionFailure/, + /MOZ_ReportCrash/, + /MOZ_Crash/, + /MOZ_CrashPrintf/, + /AnnotateMozCrashReason/, + /InvalidArrayIndex_CRASH/, + /NS_ABORT_OOM/, + + // These ought to be threadsafe. + "NS_DebugBreak", + /mozalloc_handle_oom/, + /^NS_Log/, /log_print/, /LazyLogModule::operator/, + /SprintfLiteral/, "PR_smprintf", "PR_smprintf_free", + /NS_DispatchToMainThread/, /NS_ReleaseOnMainThread/, + /NS_NewRunnableFunction/, /NS_Atomize/, + /nsCSSValue::BufferFromString/, + /NS_xstrdup/, + /Assert_NoQueryNeeded/, + /AssertCurrentThreadOwnsMe/, + /PlatformThread::CurrentId/, + /imgRequestProxy::GetProgressTracker/, // Uses an AutoLock + /Smprintf/, + "malloc", + "calloc", + "free", + "realloc", + "memalign", + "strdup", + "strndup", + "moz_xmalloc", + "moz_xcalloc", + "moz_xrealloc", + "moz_xmemalign", + "moz_xstrdup", + "moz_xstrndup", + "jemalloc_thread_local_arena", + + // These all create static strings in local storage, which is threadsafe + // to do but not understood by the analysis yet. + / EmptyString\(\)/, + + // These could probably be handled by treating the scope of PSAutoLock + // aka BaseAutoLock as threadsafe. + /profiler_register_thread/, + /profiler_unregister_thread/, + + // The analysis thinks we'll write to mBits in the DoGetStyleFoo + // call. Maybe the template parameter confuses it? + /ComputedStyle::PeekStyle/, + + // The analysis can't cope with the indirection used for the objects + // being initialized here, from nsCSSValue::Array::Create to the return + // value of the Item(i) getter. + /nsCSSValue::SetCalcValue/, + + // Unable to analyze safety of linked list initialization. + "Gecko_NewCSSValueSharedList", + "Gecko_CSSValue_InitSharedList", + + // Unable to trace through dataflow, but straightforward if inspected. + "Gecko_NewNoneTransform", + + // Need main thread assertions or other fixes. + /EffectCompositor::GetServoAnimationRule/, + ]; + if (entry.matches(whitelist)) + return true; + + if (entry.isSafeArgument(0)) { + var heapWhitelist = [ + // Operations on heap structures pointed to by arrays and strings are + // threadsafe as long as the array/string itself is threadsafe. + /nsTArray_Impl.*?::AppendElement/, + /nsTArray_Impl.*?::RemoveElementsAt/, + /nsTArray_Impl.*?::ReplaceElementsAt/, + /nsTArray_Impl.*?::InsertElementAt/, + /nsTArray_Impl.*?::SetCapacity/, + /nsTArray_Impl.*?::SetLength/, + /nsTArray_base.*?::EnsureCapacity/, + /nsTArray_base.*?::ShiftData/, + /AutoTArray.*?::Init/, + /(nsTSubstring|nsAC?String)::SetCapacity/, + /(nsTSubstring|nsAC?String)::SetLength/, + /(nsTSubstring|nsAC?String)::Assign/, + /(nsTSubstring|nsAC?String)::Append/, + /(nsTSubstring|nsAC?String)::Replace/, + /(nsTSubstring|nsAC?String)::Trim/, + /(nsTSubstring|nsAC?String)::Truncate/, + /(nsTSubstring|nsAC?String)::StripTaggedASCII/, + /(nsTSubstring|nsAC?String)::operator=/, + /nsTAutoStringN::nsTAutoStringN/, + + // Similar for some other data structures + /nsCOMArray_base::SetCapacity/, + /nsCOMArray_base::Clear/, + /nsCOMArray_base::AppendElement/, + + // UniquePtr is similar. + /mozilla::UniquePtr/, + + // The use of unique pointers when copying mCropRect here confuses + // the analysis. + /nsStyleImage::DoCopy/, + ]; + if (entry.matches(heapWhitelist)) + return true; + } + + if (entry.isSafeArgument(1)) { + var firstArgWhitelist = [ + /nsTextFormatter::snprintf/, + /nsTextFormatter::ssprintf/, + /_ASCIIToUpperInSitu/, + + // Handle some writes into an array whose safety we don't have a good way + // of tracking currently. + /FillImageLayerList/, + /FillImageLayerPositionCoordList/, + ]; + if (entry.matches(firstArgWhitelist)) + return true; + } + + if (entry.isSafeArgument(2)) { + var secondArgWhitelist = [ + /nsStringBuffer::ToString/, + /AppendUTF\d+toUTF\d+/, + /AppendASCIItoUTF\d+/, + ]; + if (entry.matches(secondArgWhitelist)) + return true; + } + + return false; +} + +/////////////////////////////////////////////////////////////////////////////// +// Sixgill Utilities +/////////////////////////////////////////////////////////////////////////////// + +function variableName(variable) +{ + return (variable && variable.Name) ? variable.Name[0] : null; +} + +function stripFields(exp) +{ + // Fields and index operations do not involve any dereferences. Remove them + // from the expression but remember any encountered fields for use by + // annotations later on. + var fields = []; + while (true) { + if (exp.Kind == "Index") { + exp = exp.Exp[0]; + continue; + } + if (exp.Kind == "Fld") { + var csuName = exp.Field.FieldCSU.Type.Name; + var fieldName = exp.Field.Name[0]; + assert(csuName && fieldName); + fields.push(csuName + "." + fieldName); + exp = exp.Exp[0]; + continue; + } + break; + } + return [exp, fields]; +} + +function isLocalVariable(variable) +{ + switch (variable.Kind) { + case "Return": + case "Temp": + case "Local": + case "Arg": + return true; + } + return false; +} + +function isDirectCall(edge, regexp) +{ + return edge.Kind == "Call" + && edge.Exp[0].Kind == "Var" + && regexp.test(variableName(edge.Exp[0].Variable)); +} + +function isZero(exp) +{ + return exp.Kind == "Int" && exp.String == "0"; +} + +/////////////////////////////////////////////////////////////////////////////// +// Analysis Structures +/////////////////////////////////////////////////////////////////////////////// + +// Safe arguments are those which may be written through (directly, not through +// pointer fields etc.) without concerns about thread safety. This includes +// pointers to stack data, null pointers, and other data we know is thread +// local, such as certain arguments to the root functions. +// +// Entries in the worklist keep track of the pointer arguments to the function +// which are safe using a sorted array, so that this can be propagated down the +// stack. Zero is |this|, and arguments are indexed starting at one. + +function WorklistEntry(name, safeArguments, stack, parameterNames) +{ + this.name = name; + this.safeArguments = safeArguments; + this.stack = stack; + this.parameterNames = parameterNames; +} + +WorklistEntry.prototype.readable = function() +{ + const [ mangled, readable ] = splitFunction(this.name); + return readable; +} + +WorklistEntry.prototype.mangledName = function() +{ + var str = this.name; + for (var safe of this.safeArguments) + str += " SAFE " + safe; + return str; +} + +WorklistEntry.prototype.isSafeArgument = function(index) +{ + for (var safe of this.safeArguments) { + if (index == safe) + return true; + } + return false; +} + +WorklistEntry.prototype.setParameterName = function(index, name) +{ + this.parameterNames[index] = name; +} + +WorklistEntry.prototype.addSafeArgument = function(index) +{ + if (this.isSafeArgument(index)) + return; + this.safeArguments.push(index); + + // Sorting isn't necessary for correctness but makes printed stack info tidier. + this.safeArguments.sort(); +} + +function safeArgumentIndex(variable) +{ + if (variable.Kind == "This") + return 0; + if (variable.Kind == "Arg") + return variable.Index + 1; + return -1; +} + +function nameMatches(name, match) +{ + if (typeof match == "string") { + if (name == match) + return true; + } else { + assert(match instanceof RegExp); + if (match.test(name)) + return true; + } + return false; +} + +function nameMatchesArray(name, matchArray) +{ + for (var match of matchArray) { + if (nameMatches(name, match)) + return true; + } + return false; +} + +WorklistEntry.prototype.matches = function(matchArray) +{ + return nameMatchesArray(this.name, matchArray); +} + +function CallSite(callee, safeArguments, location, parameterNames) +{ + this.callee = callee; + this.safeArguments = safeArguments; + this.location = location; + this.parameterNames = parameterNames; +} + +CallSite.prototype.safeString = function() +{ + if (this.safeArguments.length) { + var str = ""; + for (var i = 0; i < this.safeArguments.length; i++) { + var arg = this.safeArguments[i]; + if (arg in this.parameterNames) + str += " " + this.parameterNames[arg]; + else + str += " <" + ((arg == 0) ? "this" : "arg" + (arg - 1)) + ">"; + } + return " ### SafeArguments:" + str; + } + return ""; +} + +/////////////////////////////////////////////////////////////////////////////// +// Analysis Core +/////////////////////////////////////////////////////////////////////////////// + +var errorCount = 0; +var errorLimit = 100; + +// We want to suppress output for functions that ended up not having any +// hazards, for brevity of the final output. So each new toplevel function will +// initialize this to a string, which should be printed only if an error is +// seen. +var errorHeader; + +var startTime = new Date; +function elapsedTime() +{ + var seconds = (new Date - startTime) / 1000; + return "[" + seconds.toFixed(2) + "s] "; +} + +var options = parse_options([ + { + name: '--strip-prefix', + default: os.getenv('SOURCE') || '', + type: 'string' + }, + { + name: '--add-prefix', + default: os.getenv('URLPREFIX') || '', + type: 'string' + }, + { + name: '--verbose', + type: 'bool' + }, +]); + +function add_trailing_slash(str) { + if (str == '') + return str; + return str.endsWith("/") ? str : str + "/"; +} + +var removePrefix = add_trailing_slash(options.strip_prefix); +var addPrefix = add_trailing_slash(options.add_prefix); + +if (options.verbose) { + printErr(`Removing prefix ${removePrefix} from paths`); + printErr(`Prepending ${addPrefix} to paths`); +} + +print(elapsedTime() + "Loading types..."); +if (os.getenv("TYPECACHE")) + loadTypesWithCache('src_comp.xdb', os.getenv("TYPECACHE")); +else + loadTypes('src_comp.xdb'); +print(elapsedTime() + "Starting analysis..."); + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); +var roots = []; + +var [flag, arg] = scriptArgs; +if (flag && (flag == '-f' || flag == '--function')) { + roots = [arg]; +} else { + for (var bodyIndex = minStream; bodyIndex <= maxStream; bodyIndex++) { + var key = xdb.read_key(bodyIndex); + var name = key.readString(); + if (/^Gecko_/.test(name)) { + var data = xdb.read_entry(key); + if (/ServoBindings.cpp/.test(data.readString())) + roots.push(name); + xdb.free_string(data); + } + xdb.free_string(key); + } +} + +print(elapsedTime() + "Found " + roots.length + " roots."); +for (var i = 0; i < roots.length; i++) { + var root = roots[i]; + errorHeader = elapsedTime() + "#" + (i + 1) + " Analyzing " + root + " ..."; + try { + processRoot(root); + } catch (e) { + if (e != "Error!") + throw e; + } +} + +print(`${elapsedTime()}Completed analysis, found ${errorCount}/${errorLimit} allowed errors`); + +var currentBody; + +// All local variable assignments we have seen in either the outer or inner +// function. This crosses loop boundaries, and currently has an unsoundness +// where later assignments in a loop are not taken into account. +var assignments; + +// All loops in the current function which are reachable off main thread. +var reachableLoops; + +// Functions that are reachable from the current root. +var reachable = {}; + +function dumpError(entry, location, text) +{ + if (errorHeader) { + print(errorHeader); + errorHeader = undefined; + } + + var stack = entry.stack; + print("Error: " + text); + print("Location: " + entry.name + (location ? " @ " + location : "") + stack[0].safeString()); + print("Stack Trace:"); + // Include the callers in the stack trace instead of the callees. Make sure + // the dummy stack entry we added for the original roots is in place. + assert(stack[stack.length - 1].location == null); + for (var i = 0; i < stack.length - 1; i++) + print(stack[i + 1].callee + " @ " + stack[i].location + stack[i + 1].safeString()); + print("\n"); + + if (++errorCount == errorLimit) { + print("Maximum number of errors encountered, exiting..."); + quit(); + } + + throw "Error!"; +} + +// If edge is an assignment from a local variable, return the rhs variable. +function variableAssignRhs(edge) +{ + if (edge.Kind == "Assign" && edge.Exp[1].Kind == "Drf" && edge.Exp[1].Exp[0].Kind == "Var") { + var variable = edge.Exp[1].Exp[0].Variable; + if (isLocalVariable(variable)) + return variable; + } + return null; +} + +function processAssign(body, entry, location, lhs, edge) +{ + var fields; + [lhs, fields] = stripFields(lhs); + + switch (lhs.Kind) { + case "Var": + var name = variableName(lhs.Variable); + if (isLocalVariable(lhs.Variable)) { + // Remember any assignments to local variables in this function. + // Note that we ignore any points where the variable's address is + // taken and indirect assignments might occur. This is an + // unsoundness in the analysis. + + let assign = [body, edge]; + + // Chain assignments if the RHS has only been assigned once. + var rhsVariable = variableAssignRhs(edge); + if (rhsVariable) { + var rhsAssign = singleAssignment(variableName(rhsVariable)); + if (rhsAssign) + assign = rhsAssign; + } + + if (!(name in assignments)) + assignments[name] = []; + assignments[name].push(assign); + } else { + checkVariableAssignment(entry, location, name); + } + return; + case "Drf": + var variable = null; + if (lhs.Exp[0].Kind == "Var") { + variable = lhs.Exp[0].Variable; + if (isSafeVariable(entry, variable)) + return; + } else if (lhs.Exp[0].Kind == "Fld") { + const { + Name: [ fieldName ], + Type: {Kind, Type: fieldType}, + FieldCSU: {Type: {Kind: containerTypeKind, + Name: containerTypeName}} + } = lhs.Exp[0].Field; + const [containerExpr] = lhs.Exp[0].Exp; + + if (containerTypeKind == 'CSU' && + Kind == 'Pointer' && + isEdgeSafeArgument(entry, containerExpr) && + isSafeMemberPointer(containerTypeName, fieldName, fieldType)) + { + return; + } + } + if (fields.length) + checkFieldWrite(entry, location, fields); + else + checkDereferenceWrite(entry, location, variableName(variable)); + return; + case "Int": + if (isZero(lhs)) { + // This shows up under MOZ_ASSERT, to crash the process. + return; + } + } + dumpError(entry, location, "Unknown assignment " + JSON.stringify(lhs)); +} + +function get_location(rawLocation) { + const filename = rawLocation.CacheString.replace(removePrefix, ''); + return addPrefix + filename + "#" + rawLocation.Line; +} + +function process(entry, body, addCallee) +{ + if (!("PEdge" in body)) + return; + + // Add any arguments which are safe due to annotations. + if ("DefineVariable" in body) { + for (var defvar of body.DefineVariable) { + var index = safeArgumentIndex(defvar.Variable); + if (index >= 0) { + var varName = index ? variableName(defvar.Variable) : "this"; + assert(varName); + entry.setParameterName(index, varName); + var csuName = null; + var type = defvar.Type; + if (type.Kind == "Pointer" && type.Type.Kind == "CSU") + csuName = type.Type.Name; + if (treatAsSafeArgument(entry, varName, csuName)) + entry.addSafeArgument(index); + } + } + } + + // Points in the body which are reachable if we are not on the main thread. + var nonMainThreadPoints = []; + nonMainThreadPoints[body.Index[0]] = true; + + for (var edge of body.PEdge) { + // Ignore code that only executes on the main thread. + if (!(edge.Index[0] in nonMainThreadPoints)) + continue; + + var location = get_location(body.PPoint[edge.Index[0] - 1].Location); + + var callees = getCallees(edge); + for (var callee of callees) { + switch (callee.kind) { + case "direct": + var safeArguments = getEdgeSafeArguments(entry, edge, callee.name); + addCallee(new CallSite(callee.name, safeArguments, location, {})); + break; + case "resolved-field": + break; + case "field": + var field = callee.csu + "." + callee.field; + if (callee.isVirtual) + checkOverridableVirtualCall(entry, location, field); + else + checkIndirectCall(entry, location, field); + break; + case "indirect": + checkIndirectCall(entry, location, callee.variable); + break; + default: + dumpError(entry, location, "Unknown call " + callee.kind); + break; + } + } + + var fallthrough = true; + + if (edge.Kind == "Assign") { + assert(edge.Exp.length == 2); + processAssign(body, entry, location, edge.Exp[0], edge); + } else if (edge.Kind == "Call") { + assert(edge.Exp.length <= 2); + if (edge.Exp.length == 2) + processAssign(body, entry, location, edge.Exp[1], edge); + + // Treat assertion failures as if they don't return, so that + // asserting NS_IsMainThread() is sufficient to prevent the + // analysis from considering a block of code. + if (isDirectCall(edge, /MOZ_ReportAssertionFailure/)) + fallthrough = false; + } else if (edge.Kind == "Loop") { + reachableLoops[edge.BlockId.Loop] = true; + } else if (edge.Kind == "Assume") { + if (testFailsOffMainThread(edge.Exp[0], edge.PEdgeAssumeNonZero)) + fallthrough = false; + } + + if (fallthrough) + nonMainThreadPoints[edge.Index[1]] = true; + } +} + +function maybeProcessMissingFunction(entry, addCallee) +{ + // If a function is missing it might be because a destructor Foo::~Foo() is + // being called but GCC only gave us an implementation for + // Foo::~Foo(int32). See computeCallgraph.js for a little more info. + var name = entry.name; + if (name.indexOf("::~") > 0 && name.indexOf("()") > 0) { + var callee = name.replace("()", "(int32)"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + // Similarly, a call to a C1 constructor might invoke the C4 constructor. A + // mangled constructor will be something like _ZNC1E... or in + // the case of a templatized constructor, _ZNC1I...EE... so + // we hack it and look for "C1E" or "C1I" and replace them with their C4 + // variants. This will have rare false matches, but so far we haven't hit + // any external function calls of that sort. + if (entry.mangledName().includes("C1E") || entry.mangledName().includes("C1I")) { + var callee = name.replace("C1E", "C4E").replace("C1I", "C4I"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + // Hack to manually follow some typedefs that show up on some functions. + // This is a bug in the sixgill GCC plugin I think, since sixgill is + // supposed to follow any typedefs itself. + if (/mozilla::dom::Element/.test(name)) { + var callee = name.replace("mozilla::dom::Element", "Document::Element"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + // Hack for contravariant return types. When overriding a virtual method + // with a method that returns a different return type (a subtype of the + // original return type), we are getting the right mangled name but the + // wrong return type in the unmangled name. + if (/\$nsTextFrame*/.test(name)) { + var callee = name.replace("nsTextFrame", "nsIFrame"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + return false; +} + +function processRoot(name) +{ + var safeArguments = []; + var parameterNames = {}; + var worklist = [new WorklistEntry(name, safeArguments, [new CallSite(name, safeArguments, null, parameterNames)], parameterNames)]; + + reachable = {}; + + while (worklist.length > 0) { + var entry = worklist.pop(); + + // In principle we would be better off doing a meet-over-paths here to get + // the common subset of arguments which are safe to write through. However, + // analyzing functions separately for each subset if simpler, ensures that + // the stack traces we produce accurately characterize the stack arguments, + // and should be fast enough for now. + + if (entry.mangledName() in reachable) + continue; + reachable[entry.mangledName()] = true; + + if (ignoreContents(entry)) + continue; + + var data = xdb.read_entry(entry.name); + var dataString = data.readString(); + var callees = []; + if (dataString.length) { + // Reverse the order of the bodies we process so that we visit the + // outer function and see its assignments before the inner loops. + assignments = {}; + reachableLoops = {}; + var bodies = JSON.parse(dataString).reverse(); + for (var body of bodies) { + if (!body.BlockId.Loop || body.BlockId.Loop in reachableLoops) { + currentBody = body; + process(entry, body, Array.prototype.push.bind(callees)); + } + } + } else { + if (!maybeProcessMissingFunction(entry, Array.prototype.push.bind(callees))) + checkExternalFunction(entry); + } + xdb.free_string(data); + + for (var callee of callees) { + if (!ignoreCallEdge(entry, callee.callee)) { + var nstack = [callee, ...entry.stack]; + worklist.push(new WorklistEntry(callee.callee, callee.safeArguments, nstack, callee.parameterNames)); + } + } + } +} + +function isEdgeSafeArgument(entry, exp) +{ + var fields; + [exp, fields] = stripFields(exp); + + if (exp.Kind == "Var" && isLocalVariable(exp.Variable)) + return true; + if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") { + var variable = exp.Exp[0].Variable; + return isSafeVariable(entry, variable); + } + if (isZero(exp)) + return true; + return false; +} + +function getEdgeSafeArguments(entry, edge, callee) +{ + assert(edge.Kind == "Call"); + var res = []; + if ("PEdgeCallInstance" in edge) { + if (isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + res.push(0); + } + if ("PEdgeCallArguments" in edge) { + var args = edge.PEdgeCallArguments.Exp; + for (var i = 0; i < args.length; i++) { + if (isEdgeSafeArgument(entry, args[i])) + res.push(i + 1); + } + } + return res; +} + +function singleAssignment(name) +{ + if (name in assignments) { + var edges = assignments[name]; + if (edges.length == 1) + return edges[0]; + } + return null; +} + +function expressionValueEdge(exp) { + if (!(exp.Kind == "Var" && exp.Variable.Kind == "Temp")) + return null; + const assign = singleAssignment(variableName(exp.Variable)); + if (!assign) + return null; + const [body, edge] = assign; + return edge; +} + +// Examples: +// +// void foo(type* aSafe) { +// type* safeBecauseNew = new type(...); +// type* unsafeBecauseMultipleAssignments = new type(...); +// if (rand()) +// unsafeBecauseMultipleAssignments = bar(); +// type* safeBecauseSingleAssignmentOfSafe = aSafe; +// } +// +function isSafeVariable(entry, variable) +{ + var index = safeArgumentIndex(variable); + if (index >= 0) + return entry.isSafeArgument(index); + + if (variable.Kind != "Temp" && variable.Kind != "Local") + return false; + var name = variableName(variable); + + if (!entry.safeLocals) + entry.safeLocals = new Map; + if (entry.safeLocals.has(name)) + return entry.safeLocals.get(name); + + const safe = isSafeLocalVariable(entry, name); + entry.safeLocals.set(name, safe); + return safe; +} + +function isSafeLocalVariable(entry, name) +{ + // If there is a single place where this variable has been assigned on + // edges we are considering, look at that edge. + var assign = singleAssignment(name); + if (assign) { + const [body, edge] = assign; + + // Treat temporary pointers to DebugOnly contents as thread local. + if (isDirectCall(edge, /DebugOnly.*?::operator/)) + return true; + + // Treat heap allocated pointers as thread local during construction. + // Hopefully the construction code doesn't leak pointers to the object + // to places where other threads might access it. + if (isDirectCall(edge, /operator new/) || + isDirectCall(edge, /nsCSSValue::Array::Create/)) + { + return true; + } + + if ("PEdgeCallInstance" in edge) { + // References to the contents of an array are threadsafe if the array + // itself is threadsafe. + if ((isDirectCall(edge, /operator\[\]/) || + isDirectCall(edge, /nsTArray.*?::InsertElementAt\b/) || + isDirectCall(edge, /nsStyleContent::ContentAt/) || + isDirectCall(edge, /nsTArray_base.*?::GetAutoArrayBuffer\b/)) && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + { + return true; + } + + // Watch for the coerced result of a getter_AddRefs or getter_Copies call. + if (isDirectCall(edge, /operator /)) { + var otherEdge = expressionValueEdge(edge.PEdgeCallInstance.Exp); + if (otherEdge && + isDirectCall(otherEdge, /getter_(?:AddRefs|Copies)/) && + isEdgeSafeArgument(entry, otherEdge.PEdgeCallArguments.Exp[0])) + { + return true; + } + } + + // RefPtr::operator->() and operator* transmit the safety of the + // RefPtr to the return value. + if (isDirectCall(edge, /RefPtr<.*?>::operator(->|\*)\(\)/) && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + { + return true; + } + + // Placement-new returns a pointer that is as safe as the pointer + // passed to it. Exp[0] is the size, Exp[1] is the pointer/address. + // Note that the invocation of the constructor is a separate call, + // and so need not be considered here. + if (isDirectCall(edge, /operator new/) && + edge.PEdgeCallInstance.Exp.length == 2 && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp[1])) + { + return true; + } + + // Coercion via AsAString preserves safety. + if (isDirectCall(edge, /AsAString/) && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + { + return true; + } + + // Special case: + // + // keyframe->mTimingFunction.emplace() + // keyframe->mTimingFunction->Init() + // + // The object calling Init should be considered safe here because + // we just emplaced it, though in general keyframe::operator-> + // could do something crazy. + if (isDirectCall(edge, /operator->/)) do { + const predges = getPredecessors(body)[edge.Index[0]]; + if (!predges || predges.length != 1) + break; + const predge = predges[0]; + if (!isDirectCall(predge, /\bemplace\b/)) + break; + const instance = predge.PEdgeCallInstance; + if (JSON.stringify(instance) == JSON.stringify(edge.PEdgeCallInstance)) + return true; + } while (false); + } + + if (isSafeAssignment(entry, edge, name)) + return true; + + // Watch out for variables which were assigned arguments. + var rhsVariable = variableAssignRhs(edge); + if (rhsVariable) + return isSafeVariable(entry, rhsVariable); + } + + // When temporary stack structures are created (either to return or to call + // methods on without assigning them a name), the generated sixgill JSON is + // rather strange. The temporary has structure type and is never assigned + // to, but is dereferenced. GCC is probably not showing us everything it is + // doing to compile this code. Pattern match for this case here. + + // The variable should have structure type. + var type = null; + for (var defvar of currentBody.DefineVariable) { + if (variableName(defvar.Variable) == name) { + type = defvar.Type; + break; + } + } + if (!type || type.Kind != "CSU") + return false; + + // The variable should not have been written to anywhere up to this point. + // If it is initialized at this point we should have seen *some* write + // already, since the CFG edges are visited in reverse post order. + if (name in assignments) + return false; + + return true; +} + +function isSafeMemberPointer(containerType, memberName, memberType) +{ + // nsTArray owns its header. + if (containerType.includes("nsTArray_base") && memberName == "mHdr") + return true; + + if (memberType.Kind != 'Pointer') + return false; + + // Special-cases go here :) + return false; +} + +// Return whether 'exp == value' holds only when execution is on the main thread. +function testFailsOffMainThread(exp, value) { + switch (exp.Kind) { + case "Drf": + var edge = expressionValueEdge(exp.Exp[0]); + if (edge) { + if (isDirectCall(edge, /NS_IsMainThread/) && value) + return true; + if (isDirectCall(edge, /IsInServoTraversal/) && !value) + return true; + if (isDirectCall(edge, /IsCurrentThreadInServoTraversal/) && !value) + return true; + if (isDirectCall(edge, /__builtin_expect/)) + return testFailsOffMainThread(edge.PEdgeCallArguments.Exp[0], value); + if (edge.Kind == "Assign") + return testFailsOffMainThread(edge.Exp[1], value); + } + break; + case "Unop": + if (exp.OpCode == "LogicalNot") + return testFailsOffMainThread(exp.Exp[0], !value); + break; + case "Binop": + if (exp.OpCode == "NotEqual" || exp.OpCode == "Equal") { + var cmpExp = isZero(exp.Exp[0]) + ? exp.Exp[1] + : (isZero(exp.Exp[1]) ? exp.Exp[0] : null); + if (cmpExp) + return testFailsOffMainThread(cmpExp, exp.OpCode == "NotEqual" ? value : !value); + } + break; + case "Int": + if (exp.String == "0" && value) + return true; + if (exp.String == "1" && !value) + return true; + break; + } + return false; +} diff --git a/js/src/devtools/rootAnalysis/analyzeRoots.js b/js/src/devtools/rootAnalysis/analyzeRoots.js new file mode 100644 index 0000000000..46bc7ea1fb --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyzeRoots.js @@ -0,0 +1,963 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('callgraph.js'); +loadRelativeToScript('CFG.js'); +loadRelativeToScript('dumpCFG.js'); + +var sourceRoot = (os.getenv('SOURCE') || '') + '/'; + +var functionName; +var functionBodies; + +try { + var options = parse_options([ + { + name: "--function", + type: 'string', + }, + { + name: "-f", + type: "string", + dest: "function", + }, + { + name: "gcFunctions", + default: "gcFunctions.lst" + }, + { + name: "limitedFunctions", + default: "limitedFunctions.lst" + }, + { + name: "gcTypes", + default: "gcTypes.txt" + }, + { + name: "typeInfo", + default: "typeInfo.txt" + }, + { + name: "batch", + type: "number", + default: 1 + }, + { + name: "numBatches", + type: "number", + default: 1 + }, + { + name: "tmpfile", + default: "tmp.txt" + }, + ]); +} catch (e) { + printErr(e); + printErr("Usage: analyzeRoots.js [-f function_name] [start end [tmpfile]]"); + quit(1); +} +var gcFunctions = {}; +var text = snarf(options.gcFunctions).split("\n"); +assert(text.pop().length == 0); +for (const line of text) + gcFunctions[mangled(line)] = readable(line); + +var limitedFunctions = JSON.parse(snarf(options.limitedFunctions)); +text = null; + +var typeInfo = loadTypeInfo(options.typeInfo); + +var match; +var gcThings = new Set(); +var gcPointers = new Set(); +var gcRefs = new Set(typeInfo.GCRefs); + +text = snarf(options.gcTypes).split("\n"); +for (var line of text) { + if (match = /^GCThing: (.*)/.exec(line)) + gcThings.add(match[1]); + if (match = /^GCPointer: (.*)/.exec(line)) + gcPointers.add(match[1]); +} +text = null; + +function isGCRef(type) +{ + if (type.Kind == "CSU") + return gcRefs.has(type.Name); + return false; +} + +function isGCType(type) +{ + if (type.Kind == "CSU") + return gcThings.has(type.Name); + else if (type.Kind == "Array") + return isGCType(type.Type); + return false; +} + +function isUnrootedPointerDeclType(decl) +{ + // Treat non-temporary T& references as if they were the underlying type T. + // For now, restrict this to only the types specifically annotated with JS_HAZ_GC_REF + // to avoid lots of false positives with other types. + let type = isReferenceDecl(decl) && isGCRef(decl.Type.Type) ? decl.Type.Type : decl.Type; + + while (type.Kind == "Array") { + type = type.Type; + } + + if (type.Kind == "Pointer") { + return isGCType(type.Type); + } else if (type.Kind == "CSU") { + return gcPointers.has(type.Name); + } else { + return false; + } +} + +function edgeCanGC(functionName, body, edge, scopeAttrs, functionBodies) +{ + if (edge.Kind != "Call") { + return false; + } + + for (const { callee, attrs } of getCallees(body, edge, scopeAttrs, functionBodies)) { + if (attrs & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) { + continue; + } + + if (callee.kind == "direct") { + const func = mangled(callee.name); + if ((func in gcFunctions) || ((func + internalMarker) in gcFunctions)) + return `'${func}$${gcFunctions[func]}'`; + return false; + } else if (callee.kind == "indirect") { + if (!indirectCallCannotGC(functionName, callee.variable)) { + return "'*" + callee.variable + "'"; + } + } else if (callee.kind == "field") { + if (fieldCallCannotGC(callee.staticCSU, callee.field)) { + continue; + } + const fieldkey = callee.fieldKey; + if (fieldkey in gcFunctions) { + return `'${fieldkey}'`; + } + } else { + return ""; + } + } + + return false; +} + +// Search upwards through a function's control flow graph (CFG) to find a path containing: +// +// - a use of a variable, preceded by +// +// - a function call that can GC, preceded by +// +// - a use of the variable that shows that the live range starts at least that +// far back, preceded by +// +// - an informative use of the variable (which might be the same use), one that +// assigns to it a value that might contain a GC pointer (or is the start of +// the function for parameters or 'this'.) This is not necessary for +// correctness, it just makes it easier to understand why something might be +// a hazard. The output of the analysis will include the whole path from the +// informative use to the post-GC use, to make the problem as understandable +// as possible. +// +// A canonical example might be: +// +// void foo() { +// JS::Value* val = lookupValue(); <-- informative use +// if (!val.isUndefined()) { <-- any use +// GC(); <-- GC call +// } +// putValue(val); <-- a use after a GC +// } +// +// The search is performed on an underlying CFG that we traverse in +// breadth-first order (to find the shortest path). We build a path starting +// from an empty path and conditionally lengthening and improving it according +// to the computation occurring on each incoming edge. (If that path so far +// does not have a GC call and we traverse an edge with a GC call, then we +// lengthen the path by that edge and record it as including a GC call.) The +// resulting path may include a point or edge more than once! For example, in: +// +// void foo(JS::Value val) { +// for (int i = 0; i < N; i++) { +// GC(); +// val = processValue(val); +// } +// } +// +// the path would start at the point after processValue(), go through the GC(), +// then back to the processValue() (for the call in the previous loop +// iteration). +// +// While searching, each point is annotated with a path node corresponding to +// the best path found to that node so far. When a later search ends up at the +// same point, the best path node is kept. (But the path that it heads may +// include an earlier path node for the same point, as in the case above.) +// +// What info we want depends on whether the variable turns out to be live +// across a GC call. We are looking for both hazards (unrooted variables live +// across GC calls) and unnecessary roots (rooted variables that have no GC +// calls in their live ranges.) +// +// If not: +// +// - 'minimumUse': the earliest point in each body that uses the variable, for +// reporting on unnecessary roots. +// +// If so: +// +// - 'successor': a path from the GC call to a use of the variable after the GC +// call, chained through 'successor' field in the returned edge descriptor +// +// - 'gcInfo': a direct pointer to the GC call edge +// +function findGCBeforeValueUse(start_body, start_point, funcAttrs, variable) +{ + // Scan through all edges preceding an unrooted variable use, using an + // explicit worklist, looking for a GC call and a preceding point where the + // variable is known to be live. A worklist contains an incoming edge + // together with a description of where it or one of its successors GC'd + // (if any). + + class Path { + get ProgressProperties() { return ["informativeUse", "anyUse", "gcInfo"]; } + + constructor(successor_path, body, ppoint) { + Object.assign(this, {body, ppoint}); + if (successor_path !== undefined) { + this.successor = successor_path; + for (const prop of this.ProgressProperties) { + if (prop in successor_path) { + this[prop] = successor_path[prop]; + } + } + } + } + + toString() { + const trail = []; + for (let path = this; path.ppoint; path = path.successor) { + trail.push(path.ppoint); + } + return trail.join(); + } + + // Return -1, 0, or 1 to indicate how complete this Path is compared + // to another one. + compare(other) { + for (const prop of this.ProgressProperties) { + const a = this.hasOwnProperty(prop); + const b = other.hasOwnProperty(prop); + if (a != b) { + return a - b; + } + } + return 0; + } + }; + + // In case we never find an informative use, keep track of the best path + // found with any use. + let bestPathWithAnyUse = null; + + const visitor = new class extends Visitor { + constructor() { + super(functionBodies); + } + + // Do a BFS upwards through the CFG, starting from a use of the + // variable and searching for a path containing a GC followed by an + // initializing use of the variable (or, in forward direction, a start + // of the variable's live range, a GC within that live range, and then + // a use showing that the live range extends past the GC call.) + // Actually, possibly two uses: any use at all, and then if available + // an "informative" use that is more convincing (they may be the same). + // + // The CFG is a graph (a 'body' here is acyclic, but they can contain + // loop nodes that bridge to additional bodies for the loop, so the + // overall graph can by cyclic.) That means there may be multiple paths + // from point A to point B, and we want paths with a GC on them. This + // can be thought of as searching for a "maximal GCing" path from a use + // A to an initialization B. + // + // This is implemented as a BFS search that when it reaches a point + // that has been visited before, stops if and only if the current path + // being advanced is a less GC-ful path. The traversal pushes a + // `gcInfo` token, initially empty, up through the graph and stores the + // maximal one visited so far at every point. + // + // Note that this means we may traverse through the same point more + // than once, and so in theory this scan is superlinear -- if you visit + // every point twice, once for a non GC path and once for a GC path, it + // would be 2^n. But that is unlikely to matter, since you'd need lots + // of split/join pairs that GC on one side and not the other, and you'd + // have to visit them in an unlucky order. This could be fixed by + // updating the gcInfo for past points in a path when a GC is found, + // but it hasn't been found to matter in practice yet. + + next_action(prev, current) { + // Continue if first visit, or the new path is more complete than the old path. This + // could be enhanced at some point to choose paths with 'better' + // examples of GC (eg a call that invokes GC through concrete functions rather than going through a function pointer that is conservatively assumed to GC.) + + if (!current) { + // This search path has been terminated. + return "prune"; + } + + if (current.informativeUse) { + // We have a path with an informative use leading to a GC + // leading to the starting point. + assert(current.gcInfo); + return "done"; + } + + if (prev === undefined) { + // first visit + return "continue"; + } + + if (!prev.gcInfo && current.gcInfo) { + // More GC. + return "continue"; + } else { + return "prune"; + } + } + + merge_info(prev, current) { + // Keep the most complete path. + + if (!prev || !current) { + return prev || current; + } + + // Tie goes to the first found, since it will be shorter when doing a BFS-like search. + return prev.compare(current) >= 0 ? prev : current; + } + + extend_path(edge, body, ppoint, successor_path) { + // Clone the successor path node and then tack on the new point. Other values + // will be updated during the rest of this function, according to what is + // happening on the edge. + const path = new Path(successor_path, body, ppoint); + if (edge === null) { + // Artificial edge to connect loops to their surrounding nodes in the outer body. + // Does not influence "completeness" of path. + return path; + } + + assert(ppoint == edge.Index[0]); + + if (edgeEndsValueLiveRange(edge, variable, body)) { + // Terminate the search through this point. + return null; + } + + const edge_starts = edgeStartsValueLiveRange(edge, variable); + const edge_uses = edgeUsesVariable(edge, variable, body); + + if (edge_starts || edge_uses) { + if (!body.minimumUse || ppoint < body.minimumUse) + body.minimumUse = ppoint; + } + + if (edge_starts) { + // This is a beginning of the variable's live range. If we can + // reach a GC call from here, then we're done -- we have a path + // from the beginning of the live range, through the GC call, to a + // use after the GC call that proves its live range extends at + // least that far. + if (path.gcInfo) { + path.anyUse = path.anyUse || edge; + path.informativeUse = path.informativeUse || edge; + return path; + } + + // Otherwise, truncate this particular branch of the search at this + // edge -- there is no GC after this use, and traversing the edge + // would lead to a different live range. + return null; + } + + // The value is live across this edge. Check whether this edge can + // GC (if we don't have a GC yet on this path.) + const had_gcInfo = Boolean(path.gcInfo); + const edgeAttrs = body.attrs[ppoint] | funcAttrs; + if (!path.gcInfo && !(edgeAttrs & (ATTR_GC_SUPPRESSED | ATTR_REPLACED))) { + var gcName = edgeCanGC(functionName, body, edge, edgeAttrs, functionBodies); + if (gcName) { + path.gcInfo = {name:gcName, body, ppoint, edge: edge.Index}; + } + } + + // Beginning of function? + if (ppoint == body.Index[0] && body.BlockId.Kind != "Loop") { + if (path.gcInfo && (variable.Kind == "Arg" || variable.Kind == "This")) { + // The scope of arguments starts at the beginning of the + // function. + path.anyUse = path.informativeUse = true; + } + + if (path.anyUse) { + // We know the variable was live across the GC. We may or + // may not have found an "informative" explanation + // beginning of the live range. (This can happen if the + // live range started when a variable is used as a + // retparam.) + return path; + } + } + + if (!path.gcInfo) { + // We haven't reached a GC yet, so don't start looking for uses. + return path; + } + + if (!edge_uses) { + // We have a GC. If this edge doesn't use the value, then there + // is no change to the completeness of the path. + return path; + } + + // The live range starts at least this far back, so we're done for + // the same reason as with edge_starts. The only difference is that + // a GC on this edge indicates a hazard, whereas if we're killing a + // live range in the GC call then it's not live *across* the call. + // + // However, we may want to generate a longer usage chain for the + // variable than is minimally necessary. For example, consider: + // + // Value v = f(); + // if (v.isUndefined()) + // return false; + // gc(); + // return v; + // + // The call to .isUndefined() is considered to be a use and + // therefore indicates that v must be live at that point. But it's + // more helpful to the user to continue the 'successor' path to + // include the ancestor where the value was generated. So we will + // only stop here if edge.Kind is Assign; otherwise, we'll pass a + // "preGCLive" value up through the worklist to remember that the + // variable *is* alive before the GC and so this function should be + // returning a true value even if we don't find an assignment. + + // One special case: if the use of the variable is on the + // destination part of the edge (which currently only happens for + // the return value and a terminal edge in the body), and this edge + // is also GCing, then that usage happens *after* the GC and so + // should not be used for anyUse or informativeUse. This matters + // for a hazard involving a destructor GC'ing after an immobile + // return value has been assigned: + // + // GCInDestructor guard(cx); + // if (cond()) { + // return nullptr; + // } + // + // which boils down to + // + // p1 --(construct guard)--> + // p2 --(call cond)--> + // p3 --(returnval := nullptr) --> + // p4 --(destruct guard, possibly GCing)--> + // p5 + // + // The return value is considered to be live at p5. The live range + // of the return value would ordinarily be from p3->p4->p5, except + // that the nullptr assignment means it needn't be considered live + // back that far, and so the live range is *just* p5. The GC on the + // 4->5 edge happens just before that range, so the value was not + // live across the GC. + // + if (!had_gcInfo && edge_uses == edge.Index[1]) { + return path; // New GC does not cross this variable use. + } + + path.anyUse = path.anyUse || edge; + bestPathWithAnyUse = bestPathWithAnyUse || path; + if (edge.Kind == 'Assign') { + path.informativeUse = edge; // Done! Setting this terminates the search. + } + + return path; + }; + }; + + const result = BFS_upwards(start_body, start_point, functionBodies, visitor, new Path()); + if (result && result.gcInfo && result.anyUse) { + return result; + } else { + return bestPathWithAnyUse; + } +} + +function variableLiveAcrossGC(funcAttrs, variable, liveToEnd=false) +{ + // A variable is live across a GC if (1) it is used by an edge (as in, it + // was at least initialized), and (2) it is used after a GC in a successor + // edge. + + for (var body of functionBodies) + body.minimumUse = 0; + + for (var body of functionBodies) { + if (!("PEdge" in body)) + continue; + for (var edge of body.PEdge) { + // Examples: + // + // JSObject* obj = NewObject(); + // cangc(); + // obj = NewObject(); <-- mentions 'obj' but kills previous value + // + // This is not a hazard. Contrast this with: + // + // JSObject* obj = NewObject(); + // cangc(); + // obj = LookAt(obj); <-- uses 'obj' and kills previous value + // + // This is a hazard; the initial value of obj is live across + // cangc(). And a third possibility: + // + // JSObject* obj = NewObject(); + // obj = CopyObject(obj); + // + // This is not a hazard, because even though CopyObject can GC, obj + // is not live across it. (obj is live before CopyObject, and + // probably after, but not across.) There may be a hazard within + // CopyObject, of course. + // + + // Ignore uses that are just invalidating the previous value. + if (edgeEndsValueLiveRange(edge, variable, body)) + continue; + + var usePoint = edgeUsesVariable(edge, variable, body, liveToEnd); + if (usePoint) { + var call = findGCBeforeValueUse(body, usePoint, funcAttrs, variable); + if (!call) + continue; + + call.afterGCUse = usePoint; + return call; + } + } + } + return null; +} + +// An unrooted variable has its address stored in another variable via +// assignment, or passed into a function that can GC. If the address is +// assigned into some other variable, we can't track it to see if it is held +// live across a GC. If it is passed into a function that can GC, then it's +// sort of like a Handle to an unrooted location, and the callee could GC +// before overwriting it or rooting it. +function unsafeVariableAddressTaken(funcAttrs, variable) +{ + for (var body of functionBodies) { + if (!("PEdge" in body)) + continue; + for (var edge of body.PEdge) { + if (edgeTakesVariableAddress(edge, variable, body)) { + if (funcAttrs & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) { + continue; + } + if (edge.Kind == "Assign" || edgeCanGC(functionName, body, edge, funcAttrs, functionBodies)) { + return {body:body, ppoint:edge.Index[0]}; + } + } + } + } + return null; +} + +// Read out the brief (non-JSON, semi-human-readable) CFG description for the +// given function and store it. +function loadPrintedLines(functionName) +{ + assert(!os.system("xdbfind src_body.xdb '" + functionName + "' > " + options.tmpfile)); + var lines = snarf(options.tmpfile).split('\n'); + + for (var body of functionBodies) + body.lines = []; + + // Distribute lines of output to the block they originate from. + var currentBody = null; + for (var line of lines) { + if (/^block:/.test(line)) { + if (match = /:(loop#[\d#]+)/.exec(line)) { + var loop = match[1]; + var found = false; + for (var body of functionBodies) { + if (body.BlockId.Kind == "Loop" && body.BlockId.Loop == loop) { + assert(!found); + found = true; + currentBody = body; + } + } + assert(found); + } else { + for (var body of functionBodies) { + if (body.BlockId.Kind == "Function") + currentBody = body; + } + } + } + if (currentBody) + currentBody.lines.push(line); + } +} + +function findLocation(body, ppoint, opts={brief: false}) +{ + var location = body.PPoint[ppoint ? ppoint - 1 : 0].Location; + var file = location.CacheString; + + if (file.indexOf(sourceRoot) == 0) + file = file.substring(sourceRoot.length); + + if (opts.brief) { + var m = /.*\/(.*)/.exec(file); + if (m) + file = m[1]; + } + + return file + ":" + location.Line; +} + +function locationLine(text) +{ + if (match = /:(\d+)$/.exec(text)) + return match[1]; + return 0; +} + +function getEntryTrace(functionName, entry) +{ + const trace = []; + + var gcPoint = entry.gcInfo ? entry.gcInfo.ppoint : 0; + + if (!functionBodies[0].lines) + loadPrintedLines(functionName); + + while (entry.successor) { + var ppoint = entry.ppoint; + var lineText = findLocation(entry.body, ppoint, {"brief": true}); + + var edgeText = ""; + if (entry.successor && entry.successor.body == entry.body) { + // If the next point in the trace is in the same block, look for an + // edge between them. + var next = entry.successor.ppoint; + + if (!entry.body.edgeTable) { + var table = {}; + entry.body.edgeTable = table; + for (var line of entry.body.lines) { + if (match = /^\w+\((\d+,\d+),/.exec(line)) + table[match[1]] = line; // May be multiple? + } + if (entry.body.BlockId.Kind == 'Loop') { + const [startPoint, endPoint] = entry.body.Index; + table[`${endPoint},${startPoint}`] = '(loop to next iteration)'; + } + } + + edgeText = entry.body.edgeTable[ppoint + "," + next]; + assert(edgeText); + if (ppoint == gcPoint) + edgeText += " [[GC call]]"; + } else { + // Look for any outgoing edge from the chosen point. + for (var line of entry.body.lines) { + if (match = /\((\d+),/.exec(line)) { + if (match[1] == ppoint) { + edgeText = line; + break; + } + } + } + if (ppoint == entry.body.Index[1] && entry.body.BlockId.Kind == "Function") + edgeText += " [[end of function]]"; + } + + // TODO: Store this in a more structured form for better markup, and perhaps + // linking to line numbers. + trace.push({lineText, edgeText}); + entry = entry.successor; + } + + return trace; +} + +function isRootedDeclType(decl) +{ + // Treat non-temporary T& references as if they were the underlying type T. + const type = isReferenceDecl(decl) ? decl.Type.Type : decl.Type; + return type.Kind == "CSU" && ((type.Name in typeInfo.RootedPointers) || + (type.Name in typeInfo.RootedGCThings)); +} + +function printRecord(record) { + print(JSON.stringify(record)); +} + +function processBodies(functionName, wholeBodyAttrs) +{ + if (!("DefineVariable" in functionBodies[0])) + return; + const funcInfo = limitedFunctions[mangled(functionName)] || { attributes: 0 }; + const funcAttrs = funcInfo.attributes | wholeBodyAttrs; + + // Look for the JS_EXPECT_HAZARDS annotation, so as to output a different + // message in that case that won't be counted as a hazard. + var annotations = new Set(); + for (const variable of functionBodies[0].DefineVariable) { + if (variable.Variable.Kind == "Func" && variable.Variable.Name[0] == functionName) { + for (const { Name: [tag, value] } of (variable.Type.Annotation || [])) { + if (tag == 'annotate') + annotations.add(value); + } + } + } + + let missingExpectedHazard = annotations.has("Expect Hazards"); + + // Awful special case, hopefully temporary: + // + // The DOM bindings code generator uses "holders" to externally root + // variables. So for example: + // + // StringObjectRecordOrLong arg0; + // StringObjectRecordOrLongArgument arg0_holder(arg0); + // arg0_holder.TrySetToStringObjectRecord(cx, args[0]); + // GC(); + // self->PassUnion22(cx, arg0); + // + // This appears to be a rooting hazard on arg0, but it is rooted by + // arg0_holder if you set it to any of its union types that requires + // rooting. + // + // Additionally, the holder may be reported as a hazard because it's not + // itself a Rooted or a subclass of AutoRooter; it contains a + // Maybe> that will get emplaced if rooting is required. + // + // Hopefully these will be simplified at some point (see bug 1517829), but + // for now we special-case functions in the mozilla::dom namespace that + // contain locals with types ending in "Argument". Or + // Maybe. Or Maybe>. It's + // a harsh world. + const ignoreVars = new Set(); + if (functionName.match(/mozilla::dom::/)) { + const vars = functionBodies[0].DefineVariable.filter( + v => v.Type.Kind == 'CSU' && v.Variable.Kind == 'Local' + ).map( + v => [ v.Variable.Name[0], v.Type.Name ] + ); + + const holders = vars.filter( + ([n, t]) => n.match(/^arg\d+_holder$/) && + (t.includes("Argument") || t.includes("Rooter"))); + for (const [holder,] of holders) { + ignoreVars.add(holder); // Ignore the holder. + ignoreVars.add(holder.replace("_holder", "")); // Ignore the "managed" arg. + } + } + + const [mangledSymbol, readable] = splitFunction(functionName); + + for (let decl of functionBodies[0].DefineVariable) { + var name; + if (decl.Variable.Kind == "This") + name = "this"; + else if (decl.Variable.Kind == "Return") + name = ""; + else + name = decl.Variable.Name[0]; + + if (ignoreVars.has(name)) + continue; + + let liveToEnd = false; + if (decl.Variable.Kind == "Arg" && isReferenceDecl(decl) && decl.Type.Reference == 2) { + // References won't run destructors, so they would normally not be + // considered live at the end of the function. In order to handle + // the pattern of moving a GC-unsafe value into a function (eg an + // AutoCheckCannotGC&&), assume all argument rvalue references live to the + // end of the function unless their liveness is terminated by + // calling reset() or moving them into another function call. + liveToEnd = true; + } + + if (isRootedDeclType(decl)) { + if (!variableLiveAcrossGC(funcAttrs, decl.Variable)) { + // The earliest use of the variable should be its constructor. + var lineText; + for (var body of functionBodies) { + if (body.minimumUse) { + var text = findLocation(body, body.minimumUse); + if (!lineText || locationLine(lineText) > locationLine(text)) + lineText = text; + } + } + const record = { + record: "unnecessary", + functionName, + mangled: mangledSymbol, + readable, + variable: name, + type: str_Type(decl.Type), + loc: lineText || "???", + } + print(","); + printRecord(record); + } + } else if (isUnrootedPointerDeclType(decl)) { + var result = variableLiveAcrossGC(funcAttrs, decl.Variable, liveToEnd); + if (result) { + assert(result.gcInfo); + const edge = result.gcInfo.edge; + const body = result.gcInfo.body; + const lineText = findLocation(body, result.gcInfo.ppoint); + const makeLoc = l => [l.Location.CacheString, l.Location.Line]; + const range = [makeLoc(body.PPoint[edge[0] - 1]), makeLoc(body.PPoint[edge[1] - 1])]; + const record = { + record: "unrooted", + expected: annotations.has("Expect Hazards"), + functionName, + mangled: mangledSymbol, + readable, + variable: name, + type: str_Type(decl.Type), + gccall: result.gcInfo.name.replaceAll("'", ""), + gcrange: range, + loc: lineText, + trace: getEntryTrace(functionName, result), + }; + missingExpectedHazard = false; + print(","); + printRecord(record); + } + result = unsafeVariableAddressTaken(funcAttrs, decl.Variable); + if (result) { + var lineText = findLocation(result.body, result.ppoint); + const record = { + record: "address", + functionName, + mangled: mangledSymbol, + readable, + variable: name, + loc: lineText, + trace: getEntryTrace(functionName, {body:result.body, ppoint:result.ppoint}), + }; + print(","); + printRecord(record); + } + } + } + + if (missingExpectedHazard) { + const { + Location: [ + { CacheString: startfile, Line: startline }, + { CacheString: endfile, Line: endline } + ] + } = functionBodies[0]; + + const loc = (startfile == endfile) ? `${startfile}:${startline}-${endline}` + : `${startfile}:${startline}`; + + const record = { + record: "missing", + functionName, + mangled: mangledSymbol, + readable, + loc, + } + print(","); + printRecord(record); + } +} + +print("[\n"); +var now = new Date(); +printRecord({record: "time", iso: "" + now, t: now.getTime()}); + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +var minStream = xdb.min_data_stream()|0; +var maxStream = xdb.max_data_stream()|0; + +var start = batchStart(options.batch, options.numBatches, minStream, maxStream); +var end = batchLast(options.batch, options.numBatches, minStream, maxStream); + +function process(name, json) { + functionName = name; + functionBodies = JSON.parse(json); + + // Annotate body with a table of all points within the body that may be in + // a limited scope (eg within the scope of a GC suppression RAII class.) + // body.attrs is a plain object indexed by point, with the value being a + // bit set stored in an integer. + for (var body of functionBodies) + body.attrs = []; + + for (var body of functionBodies) { + for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) + { + if (attrs) + pbody.attrs[id] = attrs; + } + } + + processBodies(functionName); +} + +if (options.function) { + var data = xdb.read_entry(options.function); + var json = data.readString(); + debugger; + process(options.function, json); + xdb.free_string(data); + print("\n]\n"); + quit(0); +} + +for (var nameIndex = start; nameIndex <= end; nameIndex++) { + var name = xdb.read_key(nameIndex); + var functionName = name.readString(); + var data = xdb.read_entry(name); + xdb.free_string(name); + var json = data.readString(); + try { + process(functionName, json); + } catch (e) { + printErr("Exception caught while handling " + functionName); + throw(e); + } + xdb.free_string(data); +} + +print("\n]\n"); diff --git a/js/src/devtools/rootAnalysis/annotations.js b/js/src/devtools/rootAnalysis/annotations.js new file mode 100644 index 0000000000..7aedc7edac --- /dev/null +++ b/js/src/devtools/rootAnalysis/annotations.js @@ -0,0 +1,489 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +// Ignore calls made through these function pointers +var ignoreIndirectCalls = { + "mallocSizeOf" : true, + "aMallocSizeOf" : true, + "__conv" : true, + "__convf" : true, + "callback_newtable" : true, +}; + +// Types that when constructed with no arguments, are "safe" values (they do +// not contain GC pointers, or values with nontrivial destructors.) +var typesWithSafeConstructors = new Set([ + "mozilla::Maybe", + "mozilla::dom::Nullable", + "mozilla::dom::Optional", + "mozilla::UniquePtr", + "js::UniquePtr" +]); + +var resetterMethods = { + 'mozilla::Maybe': new Set(["reset"]), + 'mozilla::UniquePtr': new Set(["reset"]), + 'js::UniquePtr': new Set(["reset"]), + 'mozilla::dom::Nullable': new Set(["SetNull"]), + 'mozilla::dom::TypedArray_base': new Set(["Reset"]), + 'RefPtr': new Set(["forget"]), + 'nsCOMPtr': new Set(["forget"]), + 'JS::AutoAssertNoGC': new Set(["reset"]), +}; + +function isRefcountedDtor(name) { + return name.includes("::~RefPtr(") || name.includes("::~nsCOMPtr("); +} + +function indirectCallCannotGC(fullCaller, fullVariable) +{ + var caller = readable(fullCaller); + + // This is usually a simple variable name, but sometimes a full name gets + // passed through. And sometimes that name is truncated. Examples: + // _ZL13gAbortHandler$mozalloc_oom.cpp:void (* gAbortHandler)(size_t) + // _ZL14pMutexUnlockFn$umutex.cpp:void (* pMutexUnlockFn)(const void* + var name = readable(fullVariable); + + if (name in ignoreIndirectCalls) + return true; + + if (name == "mapper" && caller == "ptio.c:pt_MapError") + return true; + + if (name == "params" && caller == "PR_ExplodeTime") + return true; + + // hook called during script finalization which cannot GC. + if (/CallDestroyScriptHook/.test(caller)) + return true; + + // Call through a 'callback' function pointer, in a place where we're going + // to be throwing a JS exception. + if (name == "callback" && caller.includes("js::ErrorToException")) + return true; + + // The math cache only gets called with non-GC math functions. + if (name == "f" && caller.includes("js::MathCache::lookup")) + return true; + + // It would probably be better to somehow rewrite PR_CallOnce(foo) into a + // call of foo, but for now just assume that nobody is crazy enough to use + // PR_CallOnce with a function that can GC. + if (name == "func" && caller == "PR_CallOnce") + return true; + + return false; +} + +// Ignore calls through functions pointers with these types +var ignoreClasses = { + "JSStringFinalizer" : true, + "SprintfState" : true, + "SprintfStateStr" : true, + "JSLocaleCallbacks" : true, + "JSC::ExecutableAllocator" : true, + "PRIOMethods": true, + "_MD_IOVector" : true, + "malloc_table_t": true, // replace_malloc + "malloc_hook_table_t": true, // replace_malloc + "mozilla::MallocSizeOf": true, + "MozMallocSizeOf": true, +}; + +// Ignore calls through TYPE.FIELD, where TYPE is the class or struct name containing +// a function pointer field named FIELD. +var ignoreCallees = { + "js::Class.trace" : true, + "js::Class.finalize" : true, + "JSClassOps.trace" : true, + "JSClassOps.finalize" : true, + "JSRuntime.destroyPrincipals" : true, + "icu_50::UObject.__deleting_dtor" : true, // destructors in ICU code can't cause GC + "mozilla::CycleCollectedJSRuntime.DescribeCustomObjects" : true, // During tracing, cannot GC. + "mozilla::CycleCollectedJSRuntime.NoteCustomGCThingXPCOMChildren" : true, // During tracing, cannot GC. + "PLDHashTableOps.hashKey" : true, + "PLDHashTableOps.clearEntry" : true, + "z_stream_s.zfree" : true, + "z_stream_s.zalloc" : true, + "GrGLInterface.fCallback" : true, + "std::strstreambuf._M_alloc_fun" : true, + "std::strstreambuf._M_free_fun" : true, + "struct js::gc::Callback.op" : true, + "mozilla::ThreadSharedFloatArrayBufferList::Storage.mFree" : true, + "mozilla::SizeOfState.mMallocSizeOf": true, + "mozilla::gfx::SourceSurfaceRawData.mDeallocator": true, +}; + +function fieldCallCannotGC(csu, fullfield) +{ + if (csu in ignoreClasses) + return true; + if (fullfield in ignoreCallees) + return true; + return false; +} + +function ignoreEdgeUse(edge, variable, body) +{ + // Horrible special case for ignoring a false positive in xptcstubs: there + // is a local variable 'paramBuffer' holding an array of nsXPTCMiniVariant + // on the stack, which appears to be live across a GC call because its + // constructor is called when the array is initialized, even though the + // constructor is a no-op. So we'll do a very narrow exclusion for the use + // that incorrectly started the live range, which was basically "__temp_1 = + // paramBuffer". + // + // By scoping it so narrowly, we can detect most hazards that would be + // caused by modifications in the PrepareAndDispatch code. It just barely + // avoids having a hazard already. + if (('Name' in variable) && (variable.Name[0] == 'paramBuffer')) { + if (body.BlockId.Kind == 'Function' && body.BlockId.Variable.Name[0] == 'PrepareAndDispatch') + if (edge.Kind == 'Assign' && edge.Type.Kind == 'Pointer') + if (edge.Exp[0].Kind == 'Var' && edge.Exp[1].Kind == 'Var') + if (edge.Exp[1].Variable.Kind == 'Local' && edge.Exp[1].Variable.Name[0] == 'paramBuffer') + return true; + } + + // Functions which should not be treated as using variable. + if (edge.Kind == "Call") { + var callee = edge.Exp[0]; + if (callee.Kind == "Var") { + var name = callee.Variable.Name[0]; + if (/~DebugOnly/.test(name)) + return true; + if (/~ScopedThreadSafeStringInspector/.test(name)) + return true; + } + } + + return false; +} + +function ignoreEdgeAddressTaken(edge) +{ + // Functions which may take indirect pointers to unrooted GC things, + // but will copy them into rooted locations before calling anything + // that can GC. These parameters should usually be replaced with + // handles or mutable handles. + if (edge.Kind == "Call") { + var callee = edge.Exp[0]; + if (callee.Kind == "Var") { + var name = callee.Variable.Name[0]; + if (/js::Invoke\(/.test(name)) + return true; + } + } + + return false; +} + +// Ignore calls of these functions (so ignore any stack containing these) +var ignoreFunctions = { + "ptio.c:pt_MapError" : true, + "je_malloc_printf" : true, + "malloc_usable_size" : true, + "vprintf_stderr" : true, + "PR_ExplodeTime" : true, + "PR_ErrorInstallTable" : true, + "PR_SetThreadPrivate" : true, + "uint8 NS_IsMainThread()" : true, + + // Has an indirect call under it by the name "__f", which seemed too + // generic to ignore by itself. + "void* std::_Locale_impl::~_Locale_impl(int32)" : true, + + // Bug 1056410 - devirtualization prevents the standard nsISupports::Release heuristic from working + "uint32 nsXPConnect::Release()" : true, + "uint32 nsAtom::Release()" : true, + + // Allocation API + "malloc": true, + "calloc": true, + "realloc": true, + "free": true, + + // FIXME! + "NS_LogInit": true, + "NS_LogTerm": true, + "NS_LogAddRef": true, + "NS_LogRelease": true, + "NS_LogCtor": true, + "NS_LogDtor": true, + "NS_LogCOMPtrAddRef": true, + "NS_LogCOMPtrRelease": true, + + // FIXME! + "NS_DebugBreak": true, + + // Similar to heap snapshot mock classes, and GTests below. This posts a + // synchronous runnable when a GTest fails, and we are pretty sure that the + // particular runnable it posts can't even GC, but the analysis isn't + // currently smart enough to determine that. In either case, this is (a) + // only in GTests, and (b) only when the Gtest has already failed. We have + // static and dynamic checks for no GC in the non-test code, and in the test + // code we fall back to only the dynamic checks. + "void test::RingbufferDumper::OnTestPartResult(testing::TestPartResult*)" : true, + + "float64 JS_GetCurrentEmbedderTime()" : true, + + // This calls any JSObjectMovedOp for the tenured object via an indirect call. + "JSObject* js::TenuringTracer::moveToTenuredSlow(JSObject*)" : true, + + "void js::Nursery::freeMallocedBuffers()" : true, + + "void js::AutoEnterOOMUnsafeRegion::crash(uint64, int8*)" : true, + "void js::AutoEnterOOMUnsafeRegion::crash_impl(uint64, int8*)" : true, + + "void mozilla::dom::WorkerPrivate::AssertIsOnWorkerThread() const" : true, + + // It would be cool to somehow annotate that nsTHashtable will use + // nsTHashtable::s_MatchEntry for its matchEntry function pointer, but + // there is no mechanism for that. So we will just annotate a particularly + // troublesome logging-related usage. + "EntryType* nsTHashtable::PutEntry(nsTHashtable::KeyType, const fallible_t&) [with EntryType = nsBaseHashtableET >; nsTHashtable::KeyType = const char*; nsTHashtable::fallible_t = mozilla::fallible_t]" : true, + "EntryType* nsTHashtable::GetEntry(nsTHashtable::KeyType) const [with EntryType = nsBaseHashtableET >; nsTHashtable::KeyType = const char*]" : true, + "EntryType* nsTHashtable::PutEntry(nsTHashtable::KeyType) [with EntryType = nsBaseHashtableET, nsAutoPtr::OrderingEntry> >; nsTHashtable::KeyType = const mozilla::BlockingResourceBase*]" : true, + "EntryType* nsTHashtable::GetEntry(nsTHashtable::KeyType) const [with EntryType = nsBaseHashtableET, nsAutoPtr::OrderingEntry> >; nsTHashtable::KeyType = const mozilla::BlockingResourceBase*]" : true, + + // VTune internals that lazy-load a shared library and make IndirectCalls. + "iJIT_IsProfilingActive" : true, + "iJIT_NotifyEvent": true, + + // The big hammers. + "PR_GetCurrentThread" : true, + "calloc" : true, + + // This will happen early enough in initialization to not matter. + "_PR_UnixInit" : true, + + "uint8 nsContentUtils::IsExpandedPrincipal(nsIPrincipal*)" : true, + + "void mozilla::AutoProfilerLabel::~AutoProfilerLabel(int32)" : true, + + // Stores a function pointer in an AutoProfilerLabelData struct and calls it. + // And it's in mozglue, which doesn't have access to the attributes yet. + "void mozilla::ProfilerLabelEnd(std::tuple*)" : true, + + // This gets into PLDHashTable function pointer territory, and should get + // set up early enough to not do anything when it matters anyway. + "mozilla::LogModule* mozilla::LogModule::Get(int8*)": true, + + // This annotation is correct, but the reasoning is still being hashed out + // in bug 1582326 comment 8 and on. + "nsCycleCollector.cpp:nsISupports* CanonicalizeXPCOMParticipant(nsISupports*)": true, + + // PLDHashTable again + "void mozilla::DeadlockDetector::Add(const T*) [with T = mozilla::BlockingResourceBase]": true, + + // OOM handling during logging + "void mozilla::detail::log_print(mozilla::LogModule*, int32, int8*)": true, + + // This would need to know that the nsCOMPtr refcount will not go to zero. + "uint8 XPCJSRuntime::DescribeCustomObjects(JSObject*, JSClass*, int8[72]*)[72]) const": true, + + // As the comment says "Refcount isn't zero, so Suspect won't delete anything." + "uint64 nsCycleCollectingAutoRefCnt::incr(void*, nsCycleCollectionParticipant*) [with void (* suspect)(void*, nsCycleCollectionParticipant*, nsCycleCollectingAutoRefCnt*, bool*) = NS_CycleCollectorSuspect3; uintptr_t = long unsigned int]": true, + + // Calls MergeSort + "uint8 v8::internal::RegExpDisjunction::SortConsecutiveAtoms(v8::internal::RegExpCompiler*)": true, + + // nsIEventTarget.IsOnCurrentThreadInfallible does not get resolved, and + // this is called on non-JS threads so cannot use AutoSuppressGCAnalysis. + "uint8 nsAutoOwningEventTarget::IsCurrentThread() const": true, + + // ~JSStreamConsumer calls 2 ~RefCnt/~nsCOMPtr destructors for its fields, + // but the body of the destructor is written so that all Releases + // are proxied, and the members will all be empty at destruction time. + "void mozilla::dom::JSStreamConsumer::~JSStreamConsumer() [[base_dtor]]": true, +}; + +function extraGCFunctions(readableNames) { + return ["ffi_call"].filter(f => f in readableNames); +} + +function isProtobuf(name) +{ + return name.match(/\bgoogle::protobuf\b/) || + name.match(/\bmozilla::devtools::protobuf\b/); +} + +function isHeapSnapshotMockClass(name) +{ + return name.match(/\bMockWriter\b/) || + name.match(/\bMockDeserializedNode\b/); +} + +function isGTest(name) +{ + return name.match(/\btesting::/); +} + +function isICU(name) +{ + return name.match(/\bicu_\d+::/) || + name.match(/u(prv_malloc|prv_realloc|prv_free|case_toFullLower)_\d+/) +} + +function ignoreGCFunction(mangled, readableNames) +{ + // Field calls will not be in readableNames + if (!(mangled in readableNames)) + return false; + + const fun = readableNames[mangled][0]; + + if (fun in ignoreFunctions) + return true; + + // The protobuf library, and [de]serialization code generated by the + // protobuf compiler, uses a _ton_ of function pointers but they are all + // internal. The same is true for ICU. Easiest to just ignore that mess + // here. + if (isProtobuf(fun) || isICU(fun)) + return true; + + // Ignore anything that goes through heap snapshot GTests or mocked classes + // used in heap snapshot GTests. GTest and GMock expose a lot of virtual + // methods and function pointers that could potentially GC after an + // assertion has already failed (depending on user-provided code), but don't + // exhibit that behavior currently. For non-test code, we have dynamic and + // static checks that ensure we don't GC. However, for test code we opt out + // of static checks here, because of the above stated GMock/GTest issues, + // and rely on only the dynamic checks provided by AutoAssertCannotGC. + if (isHeapSnapshotMockClass(fun) || isGTest(fun)) + return true; + + // Templatized function + if (fun.includes("void nsCOMPtr::Assert_NoQueryNeeded()")) + return true; + + // Bug 1577915 - Sixgill is ignoring a template param that makes its CFG + // impossible. + if (fun.includes("UnwrapObjectInternal") && fun.includes("mayBeWrapper = false")) + return true; + + // These call through an 'op' function pointer. + if (fun.includes("js::WeakMap::getDelegate(")) + return true; + + // TODO: modify refillFreeList to not need data flow analysis to + // understand it cannot GC. As of gcc 6, the same problem occurs with + // tryNewTenuredThing, tryNewNurseryObject, and others. + if (/refillFreeList|tryNew/.test(fun) && /= js::NoGC/.test(fun)) + return true; + + return false; +} + +function stripUCSAndNamespace(name) +{ + name = name.replace(/(struct|class|union|const) /g, ""); + name = name.replace(/(js::ctypes::|js::|JS::|mozilla::dom::|mozilla::)/g, ""); + return name; +} + +function extraRootedGCThings() +{ + return [ 'JSAddonId' ]; +} + +function extraRootedPointers() +{ + return [ + ]; +} + +function isRootedGCPointerTypeName(name) +{ + name = stripUCSAndNamespace(name); + + if (name.startsWith('MaybeRooted<')) + return /\(js::AllowGC\)1u>::RootType/.test(name); + + return false; +} + +function isUnsafeStorage(typeName) +{ + typeName = stripUCSAndNamespace(typeName); + return typeName.startsWith('UniquePtr<'); +} + +// If edgeType is a constructor type, return whatever bits it implies for its +// scope (or zero if not matching). +function isLimitConstructor(typeInfo, edgeType, varName) +{ + // Check whether this could be a constructor + if (edgeType.Kind != 'Function') + return 0; + if (!('TypeFunctionCSU' in edgeType)) + return 0; + if (edgeType.Type.Kind != 'Void') + return 0; + + // Check whether the type is a known suppression type. + var type = edgeType.TypeFunctionCSU.Type.Name; + let attrs = 0; + if (type in typeInfo.GCSuppressors) + attrs = attrs | ATTR_GC_SUPPRESSED; + + // And now make sure this is the constructor, not some other method on a + // suppression type. varName[0] contains the qualified name. + var [ mangled, unmangled ] = splitFunction(varName[0]); + if (mangled.search(/C\d[EI]/) == -1) + return 0; // Mangled names of constructors have CE or CI + var m = unmangled.match(/([~\w]+)(?:<.*>)?\(/); + if (!m) + return 0; + var type_stem = type.replace(/\w+::/g, '').replace(/\<.*\>/g, ''); + if (m[1] != type_stem) + return 0; + + return attrs; +} + +// XPIDL-generated methods may invoke JS code, depending on the IDL +// attributes. This is not visible in the static callgraph since it +// goes through generated asm code. We can use the JS_HAZ_CAN_RUN_SCRIPT +// annotation to tell whether this is possible, which is set programmatically +// by the code generator when needed (bug 1347999): +// https://searchfox.org/mozilla-central/rev/81c52abeec336685330af5956c37b4bcf8926476/xpcom/idl-parser/xpidl/header.py#213-219 +// +// Note that WebIDL callbacks can also invoke JS code, but our code generator +// produces regular C++ code and so does not need any annotations. (There will +// be a call to JS::Call() or similar.) +function virtualCanRunJS(csu, field) +{ + const tags = typeInfo.OtherFieldTags; + const iface = tags[csu] + if (!iface) { + return false; + } + const virtual_method_tags = iface[field]; + return virtual_method_tags && virtual_method_tags.includes("Can run script"); +} + +function listNonGCPointers() { + return [ + // Safe only because jsids are currently only made from pinned strings. + 'NPIdentifier', + ]; +} + +function isJSNative(mangled) +{ + // _Z...E = function + // 9JSContext = JSContext* + // j = uint32 + // PN2JS5Value = JS::Value* + // P = pointer + // N2JS = JS:: + // 5Value = Value + return mangled.endsWith("P9JSContextjPN2JS5ValueE") && mangled.startsWith("_Z"); +} diff --git a/js/src/devtools/rootAnalysis/build.js b/js/src/devtools/rootAnalysis/build.js new file mode 100644 index 0000000000..78ef04fea1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build.js @@ -0,0 +1,15 @@ +#!/bin/sh +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +set -e + +cd $SOURCE +./mach configure +./mach build export +./mach build -X nsprpub mfbt memory memory/mozalloc modules/zlib mozglue js/src xpcom/glue js/xpconnect/loader js/xpconnect/wrappers js/xpconnect/src +status=$? +echo "[[[[ build.js complete, exit code $status ]]]]" +exit $status diff --git a/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest b/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest new file mode 100644 index 0000000000..1ecb5d0665 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest @@ -0,0 +1,10 @@ +[ +{ +"hg_id" : "ec7b7d2442e8", +"algorithm" : "sha512", +"digest" : "49627d734df52cb9e7319733da5a6be1812b9373355dc300ee5600b431122570e00d380d50c7c5b5003c462c2c2cb022494b42c4ad00f8eba01c2259cbe6e502", +"filename" : "sixgill.tar.xz", +"size" : 2628868, +"unpack" : true +} +] diff --git a/js/src/devtools/rootAnalysis/build/sixgill.manifest b/js/src/devtools/rootAnalysis/build/sixgill.manifest new file mode 100644 index 0000000000..49ccdcbd3f --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/sixgill.manifest @@ -0,0 +1,10 @@ +[ +{ +"digest" : "2e56a3cf84764b8e63720e5f961cff7ba8ba5cf2f353dac55c69486489bcd89f53a757e09469a07700b80cd09f09666c2db4ce375b67060ac3be967714597231", +"size" : 2629600, +"hg_id" : "221d0d2eead9", +"unpack" : true, +"filename" : "sixgill.tar.xz", +"algorithm" : "sha512" +} +] diff --git a/js/src/devtools/rootAnalysis/callgraph.js b/js/src/devtools/rootAnalysis/callgraph.js new file mode 100644 index 0000000000..750324f0ed --- /dev/null +++ b/js/src/devtools/rootAnalysis/callgraph.js @@ -0,0 +1,233 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('CFG.js'); + +// Map from csu => set of immediate subclasses +var subclasses = new Map(); + +// Map from csu => set of immediate superclasses +var superclasses = new Map(); + +// Map from "csu.name:nargs" => set of full method name +var virtualDefinitions = new Map(); + +// Every virtual method declaration, anywhere. +// +// Map from csu => Set of function-info. +// function-info: { +// name : simple string +// typedfield : "name:nargs" ("mangled" field name) +// field: full Field datastructure +// annotations : Set of [annotation-name, annotation-value] 2-element arrays +// inherited : whether the method is inherited from a base class +// pureVirtual : whether the method is pure virtual on this CSU +// dtor : if this is a virtual destructor with a definition in this class or +// a superclass, then the full name of the definition as if it were defined +// in this class. This is weird, but it's how gcc emits it. We will add a +// synthetic call from this function to its immediate base classes' dtors, +// so even if the function does not actually exist and is inherited from a +// base class, we will get a path to the inherited function. (Regular +// virtual methods are *not* claimed to exist when they don't.) +// } +var virtualDeclarations = new Map(); + +var virtualResolutionsSeen = new Set(); + +var ID = { + jscode: 1, + anyfunc: 2, + nogcfunc: 3, + gc: 4, +}; + +// map is a map from names to sets of entries. +function addToNamedSet(map, name, entry) +{ + if (!map.has(name)) + map.set(name, new Set()); + const s = map.get(name); + s.add(entry); + return s; +} + +// CSU is "Class/Struct/Union" +function processCSU(csuName, csu) +{ + if (!("FunctionField" in csu)) + return; + + for (const {Base} of (csu.CSUBaseClass || [])) { + addToNamedSet(subclasses, Base, csuName); + addToNamedSet(superclasses, csuName, Base); + } + + for (const {Field, Variable} of csu.FunctionField) { + // Virtual method + const info = Field[0]; + const name = info.Name[0]; + const annotations = new Set(); + const funcInfo = { + name, + typedfield: typedField(info), + field: info, + annotations, + inherited: (info.FieldCSU.Type.Name != csuName), // Always false for virtual dtors + pureVirtual: Boolean(Variable), + dtor: false, + }; + + if (Variable && isSyntheticVirtualDestructor(name)) { + // This is one of gcc's artificial dtors. + funcInfo.dtor = Variable.Name[0]; + funcInfo.pureVirtual = false; + } + + addToNamedSet(virtualDeclarations, csuName, funcInfo); + if ('Annotation' in info) { + for (const {Name: [annType, annValue]} of info.Annotation) { + annotations.add([annType, annValue]); + } + } + + if (Variable) { + // Note: not dealing with overloading correctly. + const name = Variable.Name[0]; + addToNamedSet(virtualDefinitions, fieldKey(csuName, Field[0]), name); + } + } +} + +// Return a list of all callees that the given edge might be a call to. Each +// one is represented by an object with a 'kind' field that is one of +// ('direct', 'field', 'resolved-field', 'indirect', 'unknown'), though note +// that 'resolved-field' is really a global record of virtual method +// resolutions, indepedent of this particular edge. +function translateCallees(edge) +{ + if (edge.Kind != "Call") + return []; + + const callee = edge.Exp[0]; + if (callee.Kind == "Var") { + assert(callee.Variable.Kind == "Func"); + return [{'kind': 'direct', 'name': callee.Variable.Name[0]}]; + } + + // At some point, we were intentionally invoking invalid function pointers + // (as in, a small integer cast to a function pointer type) to convey a + // small amount of information in the crash address. + if (callee.Kind == "Int") + return []; // Intentional crash + + assert(callee.Kind == "Drf"); + let called = callee.Exp[0]; + let indirection = 1; + if (called.Kind == "Drf") { + // This is probably a reference to a function pointer (`func*&`). It + // would be possible to determine that for certain by looking up the + // variable's type, which is doable but unnecessary. Indirect calls + // are assumed to call anything (any function in the codebase) unless they + // are annotated otherwise, and the `funkyName` annotation applies to + // `(**funkyName)(args)` as well as `(*funkyName)(args)`, it's ok. + called = called.Exp[0]; + indirection += 1; + } + + if (called.Kind == "Var") { + // indirect call through a variable. Note that the `indirection` field is + // currently unused by the later analysis. It is the number of dereferences + // applied to the variable before invoking the resulting function. + // + // The variable name passed through is the simplified one, since that is + // what annotations.js uses and we don't want the annotation to be missed + // if eg there is another variable of the same name in a sibling scope such + // that the fully decorated name no longer matches. + const [decorated, bare] = called.Variable.Name; + return [{'kind': "indirect", 'variable': bare, indirection}]; + } + + if (called.Kind != "Fld") { + // unknown call target. + return [{'kind': "unknown"}]; + } + + // Return one 'field' callee record giving the full description of what's + // happening here (which is either a virtual method call, or a call through + // a function pointer stored in a field), and then boil the call down to a + // synthetic function that incorporates both the name of the field and the + // static type of whatever you're calling the method on. Both refer to the + // same call; they're just different ways of describing it. + const callees = []; + const field = called.Field; + const staticCSU = getFieldCallInstanceCSU(edge, field); + callees.push({'kind': "field", 'csu': field.FieldCSU.Type.Name, staticCSU, + 'field': field.Name[0], 'fieldKey': fieldKey(staticCSU, field), + 'isVirtual': ("FieldInstanceFunction" in field)}); + callees.push({'kind': "direct", 'name': fieldKey(staticCSU, field)}); + + return callees; +} + +function getCallees(body, edge, scopeAttrs, functionBodies) { + const calls = []; + + // getCallEdgeProperties can set the ATTR_REPLACED attribute, which + // means that the call in the edge has been replaced by zero or + // more edges to other functions. This is used when the original + // edge will end up calling through a function pointer or something + // (eg ~shared_ptr calls a function pointer that can only be + // T::~T()). The original call edges are left in the graph in case + // they are useful for other purposes. + for (const callee of translateCallees(edge)) { + if (callee.kind != "direct") { + calls.push({ callee, attrs: scopeAttrs }); + } else { + const edgeInfo = getCallEdgeProperties(body, edge, callee.name, functionBodies); + for (const extra of (edgeInfo.extraCalls || [])) { + calls.push({ attrs: scopeAttrs | extra.attrs, callee: { name: extra.name, 'kind': "direct", } }); + } + calls.push({ callee, attrs: scopeAttrs | edgeInfo.attrs}); + } + } + + return calls; +} + +function loadTypes(type_xdb_filename) { + const xdb = xdbLibrary(); + xdb.open(type_xdb_filename); + + const minStream = xdb.min_data_stream(); + const maxStream = xdb.max_data_stream(); + + for (var csuIndex = minStream; csuIndex <= maxStream; csuIndex++) { + const csu = xdb.read_key(csuIndex); + const data = xdb.read_entry(csu); + const json = JSON.parse(data.readString()); + processCSU(csu.readString(), json[0]); + + xdb.free_string(csu); + xdb.free_string(data); + } +} + +function loadTypesWithCache(type_xdb_filename, cache_filename) { + try { + const cacheAB = os.file.readFile(cache_filename, "binary"); + const cb = serialize(); + cb.clonebuffer = cacheAB.buffer; + const cacheData = deserialize(cb); + subclasses = cacheData.subclasses; + superclasses = cacheData.superclasses; + virtualDefinitions = cacheData.virtualDefinitions; + } catch (e) { + loadTypes(type_xdb_filename); + const cb = serialize({subclasses, superclasses, virtualDefinitions}); + os.file.writeTypedArrayToFile(cache_filename, + new Uint8Array(cb.arraybuffer)); + } +} diff --git a/js/src/devtools/rootAnalysis/computeCallgraph.js b/js/src/devtools/rootAnalysis/computeCallgraph.js new file mode 100644 index 0000000000..d847465678 --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeCallgraph.js @@ -0,0 +1,434 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('callgraph.js'); + +var options = parse_options([ + { + name: '--verbose', + type: 'bool' + }, + { + name: '--function', + type: 'string' + }, + { + name: 'typeInfo_filename', + type: 'string', + default: "typeInfo.txt" + }, + { + name: 'callgraphOut_filename', + type: 'string', + default: "rawcalls.txt" + }, + { + name: 'batch', + default: 1, + type: 'number' + }, + { + name: 'numBatches', + default: 1, + type: 'number' + }, +]); + +var origOut = os.file.redirect(options.callgraphOut_filename); + +var memoized = new Map(); + +var unmangled2id = new Set(); + +// Insert a string into the name table and return the ID. Do not use for +// functions, which must be handled specially. +function getId(name) +{ + let id = memoized.get(name); + if (id !== undefined) + return id; + + id = memoized.size + 1; + memoized.set(name, id); + print(`#${id} ${name}`); + + return id; +} + +// Split a function into mangled and unmangled parts and return the ID for the +// function. +function functionId(name) +{ + const [mangled, unmangled] = splitFunction(name); + const id = getId(mangled); + + // Only produce a mangled -> unmangled mapping once, unless there are + // multiple unmangled names for the same mangled name. + if (unmangled2id.has(unmangled)) + return id; + + print(`= ${id} ${unmangled}`); + unmangled2id.add(unmangled); + return id; +} + +var lastline; +function printOnce(line) +{ + if (line != lastline) { + print(line); + lastline = line; + } +} + +// Returns a table mapping function name to lists of +// [annotation-name, annotation-value] pairs: +// { function-name => [ [annotation-name, annotation-value] ] } +// +// Note that sixgill will only store certain attributes (annotation-names), so +// this won't be *all* the attributes in the source, just the ones that sixgill +// watches for. +function getAllAttributes(body) +{ + var all_annotations = {}; + for (var v of (body.DefineVariable || [])) { + if (v.Variable.Kind != 'Func') + continue; + var name = v.Variable.Name[0]; + var annotations = all_annotations[name] = []; + + for (var ann of (v.Type.Annotation || [])) { + annotations.push(ann.Name); + } + } + + return all_annotations; +} + +// Get just the annotations understood by the hazard analysis. +function getAnnotations(functionName, body) { + var tags = new Set(); + var attributes = getAllAttributes(body); + if (functionName in attributes) { + for (var [ annName, annValue ] of attributes[functionName]) { + if (annName == 'annotate') + tags.add(annValue); + } + } + return tags; +} + +// Scan through a function body, pulling out all annotations and calls and +// recording them in callgraph.txt. +function processBody(functionName, body, functionBodies) +{ + if (!('PEdge' in body)) + return; + + for (var tag of getAnnotations(functionName, body).values()) { + const id = functionId(functionName); + print(`T ${id} ${tag}`); + if (tag == "Calls JSNatives") + printOnce(`D ${id} ${functionId("(js-code)")}`); + } + + // Set of all callees that have been output so far, in order to suppress + // repeated callgraph edges from being recorded. This uses a Map from + // callees to limit sets, because we don't want a limited edge to prevent + // an unlimited edge from being recorded later. (So an edge will be skipped + // if it exists and is at least as limited as the previously seen edge.) + // + // Limit sets are implemented as integers interpreted as bitfields. + // + var seen = new Map(); + + lastline = null; + for (var edge of body.PEdge) { + if (edge.Kind != "Call") + continue; + + // The attrs (eg ATTR_GC_SUPPRESSED) are determined by whatever RAII + // scopes might be active, which have been computed previously for all + // points in the body. + const scopeAttrs = body.attrs[edge.Index[0]] | 0; + + for (const { callee, attrs } of getCallees(body, edge, scopeAttrs, functionBodies)) { + // Some function names will be synthesized by manually constructing + // their names. Verify that we managed to synthesize an existing function. + // This cannot be done later with either the callees or callers tables, + // because the function may be an otherwise uncalled leaf. + if (attrs & ATTR_SYNTHETIC) { + assertFunctionExists(callee.name); + } + + // Individual callees may have additional attrs. The only such + // bit currently is that nsISupports.{AddRef,Release} are assumed + // to never GC. + let prologue = attrs ? `/${attrs} ` : ""; + prologue += functionId(functionName) + " "; + if (callee.kind == 'direct') { + const prev_attrs = seen.has(callee.name) ? seen.get(callee.name) : ATTRS_UNVISITED; + if (prev_attrs & ~attrs) { + // Only output an edge if it loosens a limit. + seen.set(callee.name, prev_attrs & attrs); + printOnce("D " + prologue + functionId(callee.name)); + } + } else if (callee.kind == 'field') { + var { csu, field, isVirtual } = callee; + const tag = isVirtual ? 'V' : 'F'; + const fullfield = `${csu}.${field}`; + printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`); + } else if (callee.kind == 'resolved-field') { + // Fully-resolved field (virtual method) call. Record the + // callgraph edges. Do not consider attrs, since they are local + // to this callsite and we are writing out a global record + // here. + // + // Any field call that does *not* have an R entry must be + // assumed to call anything. + var { csu, field, callees } = callee; + var fullFieldName = csu + "." + field; + if (!virtualResolutionsSeen.has(fullFieldName)) { + virtualResolutionsSeen.add(fullFieldName); + for (var target of callees) + printOnce("R " + getId(fullFieldName) + " " + functionId(target.name)); + } + } else if (callee.kind == 'indirect') { + printOnce("I " + prologue + "VARIABLE " + callee.variable); + } else if (callee.kind == 'unknown') { + printOnce("I " + prologue + "VARIABLE UNKNOWN"); + } else { + printErr("invalid " + callee.kind + " callee"); + debugger; + } + } + } +} + +// Reserve IDs for special function names. + +// represents anything that can run JS +assert(ID.jscode == functionId("(js-code)")); + +// function pointers will get an edge to this in loadCallgraph.js; only the ID +// reservation is present in callgraph.txt +assert(ID.anyfunc == functionId("(any-function)")); + +// same as above, but for fields annotated to never GC +assert(ID.nogcfunc == functionId("(nogc-function)")); + +// garbage collection +assert(ID.gc == functionId("(GC)")); + +var typeInfo = loadTypeInfo(options.typeInfo_filename); + +loadTypes("src_comp.xdb"); + +// Arbitrary JS code must always be assumed to GC. In real code, there would +// always be a path anyway through some arbitrary JSNative, but this route will be shorter. +print(`D ${ID.jscode} ${ID.gc}`); + +// An unknown function is assumed to GC. +print(`D ${ID.anyfunc} ${ID.gc}`); + +// Output call edges for all virtual methods defined anywhere, from +// Class.methodname to what a (dynamic) instance of Class would run when +// methodname was called (either Class::methodname() if defined, or some +// Base::methodname() for inherited method definitions). +for (const [fieldkey, methods] of virtualDefinitions) { + const caller = getId(fieldkey); + for (const name of methods) { + const callee = functionId(name); + printOnce(`D ${caller} ${callee}`); + } +} + +// Output call edges from C.methodname -> S.methodname for all subclasses S of +// class C. This is for when you are calling methodname on a pointer/ref of +// dynamic type C, so that the callgraph contains calls to all descendant +// subclasses' implementations. +for (const [csu, methods] of virtualDeclarations) { + for (const {field, dtor} of methods) { + const caller = getId(fieldKey(csu, field)); + if (virtualCanRunJS(csu, field.Name[0])) + printOnce(`D ${caller} ${functionId("(js-code)")}`); + if (dtor) + printOnce(`D ${caller} ${functionId(dtor)}`); + if (!subclasses.has(csu)) + continue; + for (const sub of subclasses.get(csu)) { + printOnce(`D ${caller} ${getId(fieldKey(sub, field))}`); + } + } +} + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +if (options.verbose) { + printErr("Finished loading data structures"); +} + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +if (options.function) { + var index = xdb.lookup_key(options.function); + if (!index) { + printErr("Function not found"); + quit(1); + } + minStream = maxStream = index; +} + +function assertFunctionExists(name) { + var data = xdb.read_entry(name); + assert(data.contents != 0, `synthetic function '${name}' not found!`); +} + +function process(functionName, functionBodies) +{ + for (var body of functionBodies) + body.attrs = []; + + for (var body of functionBodies) { + for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) { + pbody.attrs[id] = attrs; + } + } + + if (options.function) { + debugger; + } + for (var body of functionBodies) { + processBody(functionName, body, functionBodies); + } + + // Not strictly necessary, but add an edge from the synthetic "(js-code)" + // to RunScript to allow better stacks than just randomly selecting a + // JSNative to blame things on. + if (functionName.includes("js::RunScript")) + print(`D ${functionId("(js-code)")} ${functionId(functionName)}`); + + // GCC generates multiple constructors and destructors ("in-charge" and + // "not-in-charge") to handle virtual base classes. They are normally + // identical, and it appears that GCC does some magic to alias them to the + // same thing. But this aliasing is not visible to the analysis. So we'll + // add a dummy call edge from "foo" -> "foo *INTERNAL* ", since only "foo" + // will show up as called but only "foo *INTERNAL* " will be emitted in the + // case where the constructors are identical. + // + // This is slightly conservative in the case where they are *not* + // identical, but that should be rare enough that we don't care. + var markerPos = functionName.indexOf(internalMarker); + if (markerPos > 0) { + var inChargeXTor = functionName.replace(internalMarker, ""); + printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName)); + } + + const [ mangled, unmangled ] = splitFunction(functionName); + + // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the + // different kinds of constructors/destructors are: + // C1 # complete object constructor + // C2 # base object constructor + // C3 # complete object allocating constructor + // D0 # deleting destructor + // D1 # complete object destructor + // D2 # base object destructor + // + // In actual practice, I have observed C4 and D4 xtors generated by gcc + // 4.9.3 (but not 4.7.3). The gcc source code says: + // + // /* This is the old-style "[unified]" constructor. + // In some cases, we may emit this function and call + // it from the clones in order to share code and save space. */ + // + // Unfortunately, that "call... from the clones" does not seem to appear in + // the CFG we get from GCC. So if we see a C4 constructor or D4 destructor, + // inject an edge to it from C1, C2, and C3 (or D1, D2, and D3). (Note that + // C3 isn't even used in current GCC, but add the edge anyway just in + // case.) + // + // from gcc/cp/mangle.c: + // + // ::= D0 # deleting (in-charge) destructor + // ::= D1 # complete object (in-charge) destructor + // ::= D2 # base object (not-in-charge) destructor + // ::= C1 # complete object constructor + // ::= C2 # base object constructor + // ::= C3 # complete object allocating constructor + // + // Currently, allocating constructors are never used. + // + if (functionName.indexOf("C4") != -1) { + // E terminates the method name (and precedes the method parameters). + // If eg "C4E" shows up in the mangled name for another reason, this + // will create bogus edges in the callgraph. But it will affect little + // and is somewhat difficult to avoid, so we will live with it. + // + // Another possibility! A templatized constructor will contain C4I...E + // for template arguments. + // + for (let [synthetic, variant, desc] of [ + ['C4E', 'C1E', 'complete_ctor'], + ['C4E', 'C2E', 'base_ctor'], + ['C4E', 'C3E', 'complete_alloc_ctor'], + ['C4I', 'C1I', 'complete_ctor'], + ['C4I', 'C2I', 'base_ctor'], + ['C4I', 'C3I', 'complete_alloc_ctor']]) + { + if (mangled.indexOf(synthetic) == -1) + continue; + + let variant_mangled = mangled.replace(synthetic, variant); + let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`; + printOnce("D " + functionId(variant_full) + " " + functionId(functionName)); + } + } + + // For destructors: + // + // I've never seen D4Ev() + D4Ev(int32), only one or the other. So + // for a D4Ev of any sort, create: + // + // D0() -> D1() # deleting destructor calls complete destructor, then deletes + // D1() -> D2() # complete destructor calls base destructor, then destroys virtual bases + // D2() -> D4(?) # base destructor might be aliased to unified destructor + // # use whichever one is defined, in-charge or not. + // # ('?') means either () or (int32). + // + // Note that this doesn't actually make sense -- D0 and D1 should be + // in-charge, but gcc doesn't seem to give them the in-charge parameter?! + // + if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) { + const not_in_charge_dtor = functionName.replace("(int32)", "()"); + const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]"; + const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]"; + const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]"; + printOnce("D " + functionId(D0) + " " + functionId(D1)); + printOnce("D " + functionId(D1) + " " + functionId(D2)); + printOnce("D " + functionId(D2) + " " + functionId(functionName)); + } + + if (isJSNative(mangled)) + printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`); +} + +var start = batchStart(options.batch, options.numBatches, minStream, maxStream); +var end = batchLast(options.batch, options.numBatches, minStream, maxStream); + +for (var nameIndex = start; nameIndex <= end; nameIndex++) { + var name = xdb.read_key(nameIndex); + var data = xdb.read_entry(name); + process(name.readString(), JSON.parse(data.readString())); + xdb.free_string(name); + xdb.free_string(data); +} + +os.file.close(os.file.redirect(origOut)); diff --git a/js/src/devtools/rootAnalysis/computeGCFunctions.js b/js/src/devtools/rootAnalysis/computeGCFunctions.js new file mode 100644 index 0000000000..99410efdf8 --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeGCFunctions.js @@ -0,0 +1,113 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('loadCallgraph.js'); + +function usage() { + throw "Usage: computeGCFunctions.js ... --outputs "; +} + +if (typeof scriptArgs[0] != 'string') + usage(); + +var start = "Time: " + new Date; + +try { + var options = parse_options([ + { + name: '--verbose', + type: 'bool' + }, + { + name: 'inputs', + dest: 'rawcalls_filenames', + nargs: '+' + }, + { + name: '--outputs', + type: 'bool' + }, + { + name: 'callgraph', + type: 'string', + default: 'callgraph.txt' + }, + { + name: 'gcFunctions', + type: 'string', + default: 'gcFunctions.txt' + }, + { + name: 'gcFunctionsList', + type: 'string', + default: 'gcFunctions.lst' + }, + { + name: 'limitedFunctions', + type: 'string', + default: 'limitedFunctions.lst' + }, + ]); +} catch { + printErr("Usage: computeGCFunctions.js [--verbose] ... --outputs "); + quit(1); +}; + +function info(message) { + if (options.verbose) { + printErr(message); + } +} + +var { + gcFunctions, + functions, + calleesOf, + limitedFunctions +} = loadCallgraph(options.rawcalls_filenames, options.verbose); + +info("Writing " + options.gcFunctions); +redirect(options.gcFunctions); + +for (var name in gcFunctions) { + for (let readable of (functions.readableName[name] || [name])) { + print(""); + const fullname = (name == readable) ? name : name + "$" + readable; + print("GC Function: " + fullname); + let current = name; + do { + current = gcFunctions[current]; + if (current === 'internal') + ; // Hit the end + else if (current in functions.readableName) + print(" " + functions.readableName[current][0]); + else + print(" " + current); + } while (current in gcFunctions); + } +} + +info("Writing " + options.gcFunctionsList); +redirect(options.gcFunctionsList); +for (var name in gcFunctions) { + if (name in functions.readableName) { + for (var readable of functions.readableName[name]) + print(name + "$" + readable); + } else { + print(name); + } +} + +info("Writing " + options.limitedFunctions); +redirect(options.limitedFunctions); +print(JSON.stringify(limitedFunctions, null, 4)); + +info("Writing " + options.callgraph); +redirect(options.callgraph); +saveCallgraph(functions, calleesOf); diff --git a/js/src/devtools/rootAnalysis/computeGCTypes.js b/js/src/devtools/rootAnalysis/computeGCTypes.js new file mode 100644 index 0000000000..c38a13dabb --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeGCTypes.js @@ -0,0 +1,550 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); + +var options = parse_options([ + { name: '--verbose', type: 'bool' }, + { name: "gcTypes", default: "gcTypes.txt" }, + { name: "typeInfo", default: "typeInfo.txt" } +]); + +var typeInfo = { + 'GCPointers': [], + 'GCThings': [], + 'GCInvalidated': [], + 'GCRefs': [], + 'NonGCTypes': {}, // unused + 'NonGCPointers': {}, + 'RootedGCThings': {}, + 'RootedPointers': {}, + 'RootedBases': {'JS::AutoGCRooter': true}, + 'InheritFromTemplateArgs': {}, + 'OtherCSUTags': {}, + 'OtherFieldTags': {}, + + // RAII types within which we should assume GC is suppressed, eg + // AutoSuppressGC. + 'GCSuppressors': {}, +}; + +var gDescriptors = new Map; // Map from descriptor string => Set of typeName + +var structureParents = {}; // Map from field => list of +var pointerParents = {}; // Map from field => list of +var baseClasses = {}; // Map from struct name => list of base class name strings +var subClasses = {}; // Map from struct name => list of subclass name strings + +var gcTypes = {}; // map from parent struct => Set of GC typed children +var gcPointers = {}; // map from parent struct => Set of GC typed children +var gcFields = new Map; + +var rootedPointers = {}; + +// Accumulate the base GC types before propagating info through the type graph, +// so that we can include edges from types processed later +// (eg MOZ_INHERIT_TYPE_ANNOTATIONS_FROM_TEMPLATE_ARGS). +var pendingGCTypes = []; // array of [name, reason, ptrdness] + +function processCSU(csu, body) +{ + for (let { 'Name': [ annType, tag ] } of (body.Annotation || [])) { + if (annType != 'annotate') + continue; + + if (tag == 'GC Pointer') + typeInfo.GCPointers.push(csu); + else if (tag == 'Invalidated by GC') + typeInfo.GCInvalidated.push(csu); + else if (tag == 'GC Pointer or Reference') + typeInfo.GCRefs.push(csu); + else if (tag == 'GC Thing') + typeInfo.GCThings.push(csu); + else if (tag == 'Suppressed GC Pointer') + typeInfo.NonGCPointers[csu] = true; + else if (tag == 'Rooted Pointer') + typeInfo.RootedPointers[csu] = true; + else if (tag == 'Rooted Base') + typeInfo.RootedBases[csu] = true; + else if (tag == 'Suppress GC') + typeInfo.GCSuppressors[csu] = true; + else if (tag == 'moz_inherit_type_annotations_from_template_args') + typeInfo.InheritFromTemplateArgs[csu] = true; + else + addToKeyedList(typeInfo.OtherCSUTags, csu, tag); + } + + for (let { 'Base': base } of (body.CSUBaseClass || [])) + addBaseClass(csu, base); + + for (const field of (body.DataField || [])) { + var type = field.Field.Type; + var fieldName = field.Field.Name[0]; + if (type.Kind == "Pointer") { + var target = type.Type; + if (target.Kind == "CSU") + addNestedPointer(csu, target.Name, fieldName); + } + if (type.Kind == "Array") { + var target = type.Type; + if (target.Kind == "CSU") + addNestedStructure(csu, target.Name, fieldName); + } + if (type.Kind == "CSU") + addNestedStructure(csu, type.Name, fieldName); + + for (const { 'Name': [ annType, tag ] } of (field.Annotation || [])) { + if (!(csu in typeInfo.OtherFieldTags)) + typeInfo.OtherFieldTags[csu] = []; + addToKeyedList(typeInfo.OtherFieldTags[csu], fieldName, tag); + } + } + + for (const funcfield of (body.FunctionField || [])) { + const fields = funcfield.Field; + // Pure virtual functions will not have field.Variable; others will. + for (const field of funcfield.Field) { + for (const {'Name': [annType, tag]} of (field.Annotation || [])) { + if (!(csu in typeInfo.OtherFieldTags)) + typeInfo.OtherFieldTags[csu] = {}; + addToKeyedList(typeInfo.OtherFieldTags[csu], field.Name[0], tag); + } + } + } +} + +// csu.field is of type inner +function addNestedStructure(csu, inner, field) +{ + if (!(inner in structureParents)) + structureParents[inner] = []; + + // Skip fields that are really base classes, to avoid duplicating the base + // fields; addBaseClass already added a "base-N" name. + if (field.match(/^field:\d+$/) && (csu in baseClasses) && (baseClasses[csu].indexOf(inner) != -1)) + return; + + structureParents[inner].push([ csu, field ]); +} + +function addBaseClass(csu, base) { + if (!(csu in baseClasses)) + baseClasses[csu] = []; + baseClasses[csu].push(base); + if (!(base in subClasses)) + subClasses[base] = []; + subClasses[base].push(csu); + var k = baseClasses[csu].length; + addNestedStructure(csu, base, ``); +} + +function addNestedPointer(csu, inner, field) +{ + if (!(inner in pointerParents)) + pointerParents[inner] = []; + pointerParents[inner].push([ csu, field ]); +} + +var xdb = xdbLibrary(); +xdb.open("src_comp.xdb"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +for (var csuIndex = minStream; csuIndex <= maxStream; csuIndex++) { + var csu = xdb.read_key(csuIndex); + var data = xdb.read_entry(csu); + var json = JSON.parse(data.readString()); + assert(json.length == 1); + processCSU(csu.readString(), json[0]); + + xdb.free_string(csu); + xdb.free_string(data); +} + +for (const typename of extraRootedGCThings()) + typeInfo.RootedGCThings[typename] = true; + +for (const typename of extraRootedPointers()) + typeInfo.RootedPointers[typename] = true; + +// Everything that inherits from a "Rooted Base" is considered to be rooted. +// This is for things like CustomAutoRooter and its subclasses. +var basework = Object.keys(typeInfo.RootedBases); +while (basework.length) { + const base = basework.pop(); + typeInfo.RootedPointers[base] = true; + if (base in subClasses) + basework.push(...subClasses[base]); +} + +// Now that we have the whole hierarchy set up, add all the types and propagate +// info. +for (const csu of typeInfo.GCThings) + addGCType(csu); +for (const csu of typeInfo.GCPointers) + addGCPointer(csu); +for (const csu of typeInfo.GCInvalidated) + addGCPointer(csu); + +function parseTemplateType(typeName, validate=false) { + // We only want templatized types. `Foo::Member` doesn't count. + // Foo::Bar does count. Which turns out to be a simple rule: + // check whether the type ends in '>'. + if (!typeName.endsWith(">")) { + return [typeName, undefined]; + } + + // "Tokenize" into angle brackets, commas, and everything else. We store + // match objects as tokens because we'll need the string offset after we + // finish grabbing the template parameters. + const tokens = []; + const tokenizer = /[<>,]|[^<>,]+/g; + let match; + while ((match = tokenizer.exec(typeName)) !== null) { + tokens.push(match); + } + + // Walk backwards through the tokens, stopping when we find the matching + // open bracket. + const args = []; + let depth = 0; + let arg; + let first_result; + for (const match of tokens.reverse()) { + const token = match[0]; + if (depth == 1 && (token == ',' || token == '<')) { + // We've walked back to the beginning of a template parameter, + // where we will see either a comma or open bracket. + args.unshift(arg); + arg = ''; + } else if (depth == 0 && token == '>') { + arg = ''; // We just started. + } else { + arg = token + arg; + } + + // Maintain the depth. + if (token == '<') { + // This could be bug 1728151. + assert(depth > 0, `Invalid type: too many '<' signs in '${typeName}'`); + depth--; + } else if (token == '>') { + depth++; + } + + if (depth == 0) { + // We've walked out of the template parameter list. + // Record the results. + assert(args.length > 0); + const templateName = typeName.substr(0, match.index); + const result = [templateName, args.map(arg => arg.trim())]; + if (!validate) { + // Normal processing is to return the result the first time we + // get to the '<' that matches the terminal '>', without validating + // that the rest of the type name is balanced. + return result; + } else if (!first_result) { + // If we are validating, remember the result when we hit the + // first matching '<', but then keep processing the rest of + // the input string to count brackets. + first_result = result; + } + } + } + + // This could be bug 1728151. + assert(depth == 0, `Invalid type: too many '>' signs in '${typeName}'`); + return first_result; +} + +if (os.getenv("HAZARD_RUN_INTERNAL_TESTS")) { + function check_parse(typeName, result) { + assertEq(JSON.stringify(parseTemplateType(typeName)), JSON.stringify(result)); + } + + check_parse("int", ["int", undefined]); + check_parse("Type", ["Type", ["int"]]); + check_parse("Container", ["Container", ["int", "double"]]); + check_parse("Container, double>", ["Container", ["Container", "double"]]); + check_parse("Foo,Bar>::Container, double>", ["Foo,Bar>::Container", ["Container", "double"]]); + check_parse("AlignedStorage2>", ["AlignedStorage2", ["TypedArray"]]); + check_parse("mozilla::AlignedStorage2 >", + [ + "mozilla::AlignedStorage2", + [ + "mozilla::dom::TypedArray" + ] + ] + ); + check_parse( + "mozilla::ArrayIterator, mozilla::dom::Nullable > >&, nsTArray_Impl, mozilla::dom::Nullable > >, nsTArrayInfallibleAllocator> >", + [ + "mozilla::ArrayIterator", + [ + "const mozilla::dom::binding_detail::RecordEntry, mozilla::dom::Nullable > >&", + "nsTArray_Impl, mozilla::dom::Nullable > >, nsTArrayInfallibleAllocator>" + ] + ] + ); + + function check_throws(f, exc) { + try { + f(); + } catch (e) { + assertEq(e.message.includes(exc), true, "incorrect exception: " + e.message); + return; + } + assertEq(undefined, exc); + } + // Note that these need to end in '>' or the whole thing will be ignored. + check_throws(() => parseTemplateType("foo>", true), "too many '>' signs"); + check_throws(() => parseTemplateType("foo<<>", true), "too many '<' signs"); + check_throws(() => parseTemplateType("foo", true), "too many '<' signs"); + check_throws(() => parseTemplateType("foo*>::bar", true), "too many '>' signs"); +} + +// GC Thing and GC Pointer annotations can be inherited from template args if +// this annotation is used. Think of Maybe for example: Maybe has +// the same GC rules as JSObject*. + +var inheritors = Object.keys(typeInfo.InheritFromTemplateArgs).sort((a, b) => a.length - b.length); +for (const csu of inheritors) { + const [templateName, templateArgs] = parseTemplateType(csu); + for (const param of (templateArgs || [])) { + const pos = param.search(/\**$/); + const ptrdness = param.length - pos; + const core_type = param.substr(0, pos); + if (ptrdness == 0) { + addToKeyedList(structureParents, core_type, [csu, "template-param-" + param]); + } else if (ptrdness == 1) { + addToKeyedList(pointerParents, core_type, [csu, "template-param-" + param]); + } + } +} + +function Ptr(level) { + if (level < 0) + return Array(-level).fill("&").join(""); + else + return Array(level).fill("*").join(""); +} + +// "typeName is a (pointer to a)^'typePtrLevel' GC type because it contains a field +// named 'child' of type 'childType' (or pointer to 'childType' if fieldPtrLevel == 1), +// which is itself a GCThing or GCPointer." +function markGCType(typeName, child, childType, typePtrLevel, fieldPtrLevel, indent = "") { + // Some types, like UniquePtr, do not mark/trace/relocate their contained + // pointers and so should not hold them live across a GC. UniquePtr in + // particular should be the only thing pointing to a structure containing a + // GCPointer, so nothing else can possibly trace it and it'll die when the + // UniquePtr goes out of scope. So we say that memory pointed to by a + // UniquePtr is just as unsafe as the stack for storing GC pointers. + if (isUnsafeStorage(typeName)) { + // If a UniquePtr itself is on the stack, then there's a problem if + // T contains a Cell*. But the UniquePtr itself stores a T*, not a T, + // so set fieldPtrLevel=-1 to "undo" the pointer. When the type T is + // scanned for pointers and a Cell* is found, then when unwrapping the + // types, UniquePtr will be seen as a T*=Cell** that should be + // treated as a Cell*. + // + // However, that creates the possibility of an infinite loop, if you + // have a type T that contains a UniquePtr (which is allowed, because + // it's storing a T* not a T.) + const ptrLevel = typePtrLevel + fieldPtrLevel - 1; + if (options.verbose) { + printErr(`.${child} : (${childType} : "Cell${Ptr(typePtrLevel)}")${Ptr(fieldPtrLevel)} is-field-of ${typeName} : "Cell${Ptr(ptrLevel)}" [unsafe]`); + } + markGCTypeImpl(typeName, child, childType, ptrLevel, indent); + + // Also treat UniquePtr as if it were any other struct. + } + + // Example: with: + // struct Pair { JSObject* foo; int bar; }; + // struct { Pair** info }*** + // make a call to: + // child='info' typePtrLevel=3 fieldPtrLevel=2 + // for a final ptrLevel of 5, used to later call: + // child='foo' typePtrLevel=5 fieldPtrLevel=1 + // + const ptrLevel = typePtrLevel + fieldPtrLevel; + if (options.verbose) { + printErr(`.${child} : (${childType} : "Cell${Ptr(typePtrLevel)}")${Ptr(fieldPtrLevel)} is-field-of ${typeName} : "Cell${Ptr(ptrLevel)}"`); + } + markGCTypeImpl(typeName, child, childType, ptrLevel, indent); +} + +function markGCTypeImpl(typeName, child, childType, ptrLevel, indent) { + // ...except when > 2 levels of pointers away from an actual GC thing, stop + // searching the graph. (This would just be > 1, except that a UniquePtr + // field might still have a GC pointer.) + if (ptrLevel > 2) + return; + + if (isRootedGCPointerTypeName(typeName) && !(typeName in typeInfo.RootedPointers)) + printErr("FIXME: use in-source annotation for " + typeName); + + if (ptrLevel == 0 && (typeName in typeInfo.RootedGCThings)) + return; + if (ptrLevel == 1 && (isRootedGCPointerTypeName(typeName) || (typeName in typeInfo.RootedPointers))) + return; + + if (ptrLevel == 0) { + if (typeName in typeInfo.NonGCTypes) + return; + if (!(typeName in gcTypes)) + gcTypes[typeName] = new Set(); + gcTypes[typeName].add(childType); + } else if (ptrLevel == 1) { + if (typeName in typeInfo.NonGCPointers) + return; + if (!(typeName in gcPointers)) + gcPointers[typeName] = new Set(); + gcPointers[typeName].add(childType); + } + + if (ptrLevel < 2) { + if (!gcFields.has(typeName)) + gcFields.set(typeName, new Map()); + const fields = gcFields.get(typeName); + if (fields.has(child)) { + const [orig_childType, orig_ptrLevel] = fields.get(child); + if (ptrLevel >= orig_ptrLevel) { + // Do not recurse for things more levels of pointers away from Cell. + // This will prevent infinite loops when types are defined recursively + // (eg a struct containing a UniquePtr of itself). + return; + } + } + fields.set(child, [childType, ptrLevel]); + } + + if (typeName in structureParents) { + for (var field of structureParents[typeName]) { + var [ holderType, fieldName ] = field; + markGCType(holderType, fieldName, typeName, ptrLevel, 0, indent + " "); + } + } + if (typeName in pointerParents) { + for (var field of pointerParents[typeName]) { + var [ holderType, fieldName ] = field; + markGCType(holderType, fieldName, typeName, ptrLevel, 1, indent + " "); + } + } +} + +function addGCType(typeName) +{ + pendingGCTypes.push([typeName, '', '(annotation)', 0, 0]); +} + +function addGCPointer(typeName) +{ + pendingGCTypes.push([typeName, '', '(annotation)', 1, 0]); +} + +for (const pending of pendingGCTypes) { + markGCType(...pending); +} + +// Call a function for a type and every type that contains the type in a field +// or as a base class (which internally is pretty much the same thing -- +// subclasses are structs beginning with the base class and adding on their +// local fields.) +function foreachContainingStruct(typeName, func, seen = new Set()) +{ + function recurse(container, typeName) { + if (seen.has(typeName)) + return; + seen.add(typeName); + + func(container, typeName); + + if (typeName in subClasses) { + for (const sub of subClasses[typeName]) + recurse("subclass of " + typeName, sub); + } + if (typeName in structureParents) { + for (const [holder, field] of structureParents[typeName]) + recurse(field + " : " + typeName, holder); + } + } + + recurse('', typeName); +} + +for (var type of listNonGCPointers()) + typeInfo.NonGCPointers[type] = true; + +function explain(csu, indent, seen) { + if (!seen) + seen = new Set(); + seen.add(csu); + if (!gcFields.has(csu)) + return; + var fields = gcFields.get(csu); + + if (fields.has('')) { + print(indent + "which is annotated as a GCThing"); + return; + } + if (fields.has('')) { + print(indent + "which is annotated as a GCPointer"); + return; + } + for (var [ field, [ child, ptrdness ] ] of fields) { + var msg = indent; + if (field[0] == '<') + msg += "inherits from "; + else { + if (field.startsWith("template-param-")) { + msg += "inherits annotations from template parameter '" + field.substr(15) + "' "; + } else { + msg += "contains field '" + field + "' "; + } + if (ptrdness == -1) + msg += "(with a pointer to unsafe storage) holding a "; + else if (ptrdness == 0) + msg += "of type "; + else + msg += "pointing to type "; + } + msg += child; + print(msg); + if (!seen.has(child)) + explain(child, indent + " ", seen); + } +} + +var origOut = os.file.redirect(options.gcTypes); + +for (var csu in gcTypes) { + print("GCThing: " + csu); + explain(csu, " "); +} +for (var csu in gcPointers) { + print("GCPointer: " + csu); + explain(csu, " "); +} + +// Redirect output to the typeInfo file and close the gcTypes file. +os.file.close(os.file.redirect(options.typeInfo)); + +// Compute the set of types that suppress GC within their RAII scopes (eg +// AutoSuppressGC, AutoSuppressGCForAnalysis). +var seen = new Set(); +for (let csu in typeInfo.GCSuppressors) + foreachContainingStruct(csu, + (holder, typeName) => { typeInfo.GCSuppressors[typeName] = holder }, + seen); + +print(JSON.stringify(typeInfo, null, 4)); + +os.file.close(os.file.redirect(origOut)); diff --git a/js/src/devtools/rootAnalysis/dumpCFG.js b/js/src/devtools/rootAnalysis/dumpCFG.js new file mode 100644 index 0000000000..0ac220840c --- /dev/null +++ b/js/src/devtools/rootAnalysis/dumpCFG.js @@ -0,0 +1,273 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// const cfg = loadCFG(scriptArgs[0]); +// dump_CFG(cfg); + +function loadCFG(filename) { + const data = os.file.readFile(filename); + return JSON.parse(data); +} + +function dump_CFG(cfg) { + for (const body of cfg) + dump_body(body); +} + +function dump_body(body, src, dst) { + const {BlockId,Command,DefineVariable,Index,Location,PEdge,PPoint,Version} = body; + + const [mangled, unmangled] = splitFunction(BlockId.Variable.Name[0]); + print(`${unmangled} at ${Location[0].CacheString}:${Location[0].Line}`); + + if (src === undefined) { + for (const def of DefineVariable) + print(str_definition(def)); + print(""); + } + + for (const edge of PEdge) { + if (src === undefined || edge.Index[0] == src) { + if (dst == undefined || edge.Index[1] == dst) + print(str_edge(edge, body)); + } + } +} + +function str_definition(def) { + const {Type, Variable} = def; + return `define ${str_Variable(Variable)} : ${str_Type(Type)}`; +} + +function badFormat(what, val) { + printErr("Bad format of " + what + ": " + JSON.stringify(val, null, 4)); + printErr((new Error).stack); +} + +function str_Variable(variable) { + if (variable.Kind == 'Return') + return ''; + else if (variable.Kind == 'This') + return 'this'; + + try { + return variable.Name[1]; + } catch(e) { + badFormat("variable", variable); + } +} + +function str_Type(type) { + try { + const {Kind, Type, Name, TypeFunctionArguments} = type; + if (Kind == 'Pointer') + return str_Type(Type) + ["*", "&", "&&"][type.Reference]; + else if (Kind == 'CSU') + return Name; + else if (Kind == 'Array') + return str_Type(Type) + "[]"; + else if (Kind == 'Function') + return str_Type(Type) + "()"; + + return Kind; + } catch(e) { + badFormat("type", type); + } +} + +var OpCodeNames = { + 'LessEqual': ['<=', '>'], + 'LessThan': ['<', '>='], + 'GreaterEqual': ['>=', '<'], + 'Greater': ['>', '<='], + 'Plus': '+', + 'Minus': '-', +}; + +function opcode_name(opcode, invert) { + if (opcode in OpCodeNames) { + const name = OpCodeNames[opcode]; + if (invert === undefined) + return name; + return name[invert ? 1 : 0]; + } else { + if (invert === undefined) + return opcode; + return (invert ? '!' : '') + opcode; + } +} + +function str_value(val, env, options) { + const {Kind, Variable, String, Exp} = val; + if (Kind == 'Var') + return str_Variable(Variable); + else if (Kind == 'Drf') { + // Suppress the vtable lookup dereference + if (Exp[0].Kind == 'Fld' && "FieldInstanceFunction" in Exp[0].Field) + return str_value(Exp[0], env); + const exp = str_value(Exp[0], env); + if (options && options.noderef) + return exp; + return "*" + exp; + } else if (Kind == 'Fld') { + const {Exp, Field} = val; + const name = Field.Name[0]; + if ("FieldInstanceFunction" in Field) { + return Field.FieldCSU.Type.Name + "." + name; + } + const container = str_value(Exp[0]); + if (container.startsWith("*")) + return container.substring(1) + "->" + name; + return container + "." + name; + } else if (Kind == 'Empty') { + return ''; + } else if (Kind == 'Binop') { + const {OpCode} = val; + const op = opcode_name(OpCode); + return `${str_value(Exp[0], env)} ${op} ${str_value(Exp[1], env)}`; + } else if (Kind == 'Unop') { + const exp = str_value(Exp[0], env); + const {OpCode} = val; + if (OpCode == 'LogicalNot') + return `not ${exp}`; + return `${OpCode}(${exp})`; + } else if (Kind == 'Index') { + const index = str_value(Exp[1], env); + if (Exp[0].Kind == 'Drf') + return `${str_value(Exp[0], env, {noderef:true})}[${index}]`; + else + return `&${str_value(Exp[0], env)}[${index}]`; + } else if (Kind == 'NullTest') { + return `nullptr == ${str_value(Exp[0], env)}`; + } else if (Kind == "String") { + return '"' + String + '"'; + } else if (String !== undefined) { + return String; + } + badFormat("value", val); +} + +function str_thiscall_Exp(exp) { + return exp.Kind == 'Drf' ? str_value(exp.Exp[0]) + "->" : str_value(exp) + "."; +} + +function stripcsu(s) { + return s.replace("class ", "").replace("struct ", "").replace("union "); +} + +function str_call(prefix, edge, env) { + const {Exp, Type, PEdgeCallArguments, PEdgeCallInstance} = edge; + const {Kind, Type:cType, TypeFunctionArguments, TypeFunctionCSU} = Type; + + if (Kind == 'Function') { + const params = PEdgeCallArguments ? PEdgeCallArguments.Exp : []; + const strParams = params.map(str_value); + + let func; + let comment = ""; + let assign_exp; + if (PEdgeCallInstance) { + const csu = TypeFunctionCSU.Type.Name; + const method = str_value(Exp[0], env); + + // Heuristic to only display the csu for constructors + if (csu.includes(method)) { + func = stripcsu(csu) + "::" + method; + } else { + func = method; + comment = "# " + csu + "::" + method + "\n"; + } + + const {Exp: thisExp} = PEdgeCallInstance; + func = str_thiscall_Exp(thisExp) + func; + } else { + func = str_value(Exp[0]); + } + assign_exp = Exp[1]; + + let assign = ""; + if (assign_exp) { + assign = str_value(assign_exp) + " := "; + } + return `${comment}${prefix} Call ${assign}${func}(${strParams.join(", ")})`; + } + + print(JSON.stringify(edge, null, 4)); + throw new Error("unhandled format error"); +} + +function str_assign(prefix, edge) { + const {Exp} = edge; + const [lhs, rhs] = Exp; + return `${prefix} Assign ${str_value(lhs)} := ${str_value(rhs)}`; +} + +function str_loop(prefix, edge) { + const {BlockId: {Loop}} = edge; + return `${prefix} Loop ${Loop}`; +} + +function str_assume(prefix, edge) { + const {Exp, PEdgeAssumeNonZero} = edge; + const cmp = PEdgeAssumeNonZero ? "" : "!"; + + const {Exp: aExp, Kind, OpCode} = Exp[0]; + if (Kind == 'Binop') { + const [lhs, rhs] = aExp; + const op = opcode_name(OpCode, !PEdgeAssumeNonZero); + return `${prefix} Assume ${str_value(lhs)} ${op} ${str_value(rhs)}`; + } else if (Kind == 'Unop') { + return `${prefix} Assume ${cmp}${OpCode} ${str_value(aExp[0])}`; + } else if (Kind == 'NullTest') { + return `${prefix} Assume nullptr ${cmp}== ${str_value(aExp[0])}`; + } else if (Kind == 'Drf') { + return `${prefix} Assume ${cmp}${str_value(Exp[0])}`; + } + + print(JSON.stringify(edge, null, 4)); + throw new Error("unhandled format error"); +} + +function str_edge(edge, env) { + const {Index, Kind} = edge; + const [src, dst] = Index; + const prefix = `[${src},${dst}]`; + + if (Kind == "Call") + return str_call(prefix, edge, env); + if (Kind == 'Assign') + return str_assign(prefix, edge); + if (Kind == 'Assume') + return str_assume(prefix, edge); + if (Kind == 'Loop') + return str_loop(prefix, edge); + + print(JSON.stringify(edge, null, 4)); + throw "unhandled edge type"; +} + +function str(unknown) { + if ("Name" in unknown) { + return str_Variable(unknown); + } else if ("Index" in unknown) { + // Note: Variable also has .Index, with a different meaning. + return str_edge(unknown); + } else if ("Type" in unknown) { + if ("Variable" in unknown) { + return str_definition(unknown); + } else { + return str_Type(unknown); + } + } else if ("Kind" in unknown) { + if ("BlockId" in unknown) + return str_Variable(unknown); + return str_value(unknown); + } + return "unknown"; +} + +function jdump(x) { + print(JSON.stringify(x, null, 4)); + quit(0); +} diff --git a/js/src/devtools/rootAnalysis/expect.b2g.json b/js/src/devtools/rootAnalysis/expect.b2g.json new file mode 100644 index 0000000000..06f2beb36f --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.b2g.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/expect.browser.json b/js/src/devtools/rootAnalysis/expect.browser.json new file mode 100644 index 0000000000..06f2beb36f --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.browser.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/expect.shell.json b/js/src/devtools/rootAnalysis/expect.shell.json new file mode 100644 index 0000000000..06f2beb36f --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.shell.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/explain.py b/js/src/devtools/rootAnalysis/explain.py new file mode 100755 index 0000000000..2fb45e07f9 --- /dev/null +++ b/js/src/devtools/rootAnalysis/explain.py @@ -0,0 +1,345 @@ +#!/usr/bin/python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + + +import argparse +import json +import pathlib +import re +from html import escape + +SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute() + +parser = argparse.ArgumentParser( + description="Convert the JSON output of the hazard analysis into various text files describing the results.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) +parser.add_argument("--verbose", type=bool, default=False, help="verbose output") + +inputs = parser.add_argument_group("Input") +inputs.add_argument( + "rootingHazards", + nargs="?", + default="rootingHazards.json", + help="JSON input file describing the output of the hazard analysis", +) + +outputs = parser.add_argument_group("Output") +outputs.add_argument( + "gcFunctions", + nargs="?", + default="gcFunctions.txt", + help="file containing a list of functions that can GC", +) +outputs.add_argument( + "hazards", + nargs="?", + default="hazards.txt", + help="file containing the rooting hazards found", +) +outputs.add_argument( + "extra", + nargs="?", + default="unnecessary.txt", + help="file containing unnecessary roots", +) +outputs.add_argument( + "refs", + nargs="?", + default="refs.txt", + help="file containing a list of unsafe references to unrooted values", +) +outputs.add_argument( + "html", + nargs="?", + default="hazards.html", + help="HTML-formatted file with the hazards found", +) + +args = parser.parse_args() + + +# Imitate splitFunction from utility.js. +def splitfunc(full): + idx = full.find("$") + if idx == -1: + return (full, full) + return (full[0:idx], full[idx + 1 :]) + + +def print_header(outfh): + print( + """\ + + + + + +""", + file=outfh, + ) + + +def print_footer(outfh): + print("", file=outfh) + + +def sourcelink(symbol=None, loc=None, range=None): + if symbol: + return f"https://searchfox.org/mozilla-central/search?q=symbol:{symbol}" + elif range: + filename, lineno = loc.split(":") + [f0, l0] = range[0] + [f1, l1] = range[1] + if f0 == f1 and l1 > l0: + return f"../{filename}?L={l0}-{l1 - 1}#{l0}" + else: + return f"../{filename}?L={l0}#{l0}" + elif loc: + filename, lineno = loc.split(":") + return f"../{filename}?L={lineno}#{lineno}" + else: + raise Exception("missing argument to sourcelink()") + + +def quoted_dict(d): + return {k: escape(v) for k, v in d.items() if type(v) == str} + + +num_hazards = 0 +num_refs = 0 +num_missing = 0 + +try: + with open(args.rootingHazards) as rootingHazards, open( + args.hazards, "w" + ) as hazards, open(args.extra, "w") as extra, open(args.refs, "w") as refs, open( + args.html, "w" + ) as html: + current_gcFunction = None + + hazardousGCFunctions = set() + + results = json.load(rootingHazards) + print_header(html) + + when = min((r for r in results if r["record"] == "time"), key=lambda r: r["t"])[ + "iso" + ] + line = f"Time: {when}" + print(line, file=hazards) + print(line, file=extra) + print(line, file=refs) + + checkboxCounter = 0 + hazard_results = [] + seen_time = False + for result in results: + if result["record"] == "unrooted": + hazard_results.append(result) + gccall_mangled, _ = splitfunc(result["gccall"]) + hazardousGCFunctions.add(gccall_mangled) + if not result.get("expected"): + num_hazards += 1 + + elif result["record"] == "unnecessary": + print( + "\nFunction '{mangled}' has unnecessary root '{variable}' of type {type} at {loc}".format( + **result + ), + file=extra, + ) + + elif result["record"] == "address": + print( + ( + "\nFunction '{functionName}'" + " takes unsafe address of unrooted '{variable}'" + " at {loc}" + ).format(**result), + file=refs, + ) + num_refs += 1 + + elif result["record"] == "missing": + print( + "\nFunction '{functionName}' expected hazard(s) but none were found at {loc}".format( + **result + ), + file=hazards, + ) + num_missing += 1 + + readable2mangled = {} + with open(args.gcFunctions) as gcFunctions: + gcExplanations = {} # gcFunction => stack showing why it can GC + + current_func = None + explanation = [] + for line in gcFunctions: + if m := re.match(r"^GC Function: (.*)", line): + if current_func: + gcExplanations[splitfunc(current_func)[0]] = explanation + functionName = m.group(1) + mangled, readable = splitfunc(functionName) + if mangled not in hazardousGCFunctions: + current_func = None + continue + current_func = functionName + if readable != mangled: + readable2mangled[readable] = mangled + # TODO: store the mangled name here, and change + # gcFunctions.txt -> gcFunctions.json and key off of the mangled name. + explanation = [readable] + elif current_func: + explanation.append(line.strip()) + if current_func: + gcExplanations[splitfunc(current_func)[0]] = explanation + + print( + "Found %d hazards, %d unsafe references, %d missing." + % (num_hazards, num_refs, num_missing), + file=html, + ) + print("
    ", file=html) + + for result in hazard_results: + (result["gccall_mangled"], result["gccall_readable"]) = splitfunc( + result["gccall"] + ) + # Attempt to extract out the function name. Won't handle `Foo>::Foo()`. + if m := re.search(r"((?:\w|:|<[^>]*?>)+)\(", result["gccall_readable"]): + result["gccall_short"] = m.group(1) + "()" + else: + result["gccall_short"] = result["gccall_readable"] + if result.get("expected"): + print("\nThis is expected, but ", end="", file=hazards) + else: + print("\nFunction ", end="", file=hazards) + print( + "'{readable}' has unrooted '{variable}'" + " of type '{type}' live across GC call '{gccall_readable}' at {loc}".format( + **result + ), + file=hazards, + ) + for edge in result["trace"]: + print(" {lineText}: {edgeText}".format(**edge), file=hazards) + explanation = gcExplanations.get(result["gccall_mangled"]) + explanation = explanation or gcExplanations.get( + readable2mangled.get( + result["gccall_readable"], result["gccall_readable"] + ), + [], + ) + if explanation: + print("GC Function: " + explanation[0], file=hazards) + for func in explanation[1:]: + print(" " + func, file=hazards) + print(file=hazards) + + if result.get("expected"): + continue + + cfgid = f"CFG_{checkboxCounter}" + gcid = f"GC_{checkboxCounter}" + checkboxCounter += 1 + print( + ( + "
    • \n" + "
    • Function {readable}\n" + "
    • has unrooted {variable} of type '{type}'\n" + "
    • \n" + "
      \n" + ).format( + **quoted_dict(result), + symbol_url=sourcelink(symbol=result["mangled"]), + cfgid=cfgid, + ), + file=html, + ) + for edge in result["trace"]: + print( + "
          {lineText}: {edgeText}
      ".format(**quoted_dict(edge)), + file=html, + ) + print("
      ", file=html) + print( + "
    • \n" + "
      ".format( + **quoted_dict(result), + loc_url=sourcelink(range=result["gcrange"], loc=result["loc"]), + gcid=gcid, + ), + file=html, + ) + for func in explanation: + print(f"
      {escape(func)}
      ", file=html) + print("

    ", file=html) + + print_footer(html) + +except IOError as e: + print("Failed: %s" % str(e)) + +if args.verbose: + print("Wrote %s" % args.hazards) + print("Wrote %s" % args.extra) + print("Wrote %s" % args.refs) + print("Wrote %s" % args.html) + +print( + "Found %d hazards %d unsafe references %d missing" + % (num_hazards, num_refs, num_missing) +) diff --git a/js/src/devtools/rootAnalysis/gen-hazards.sh b/js/src/devtools/rootAnalysis/gen-hazards.sh new file mode 100755 index 0000000000..7007969a14 --- /dev/null +++ b/js/src/devtools/rootAnalysis/gen-hazards.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +JOBS="$1" + +for j in $(seq $JOBS); do + env PATH=$PATH:$SIXGILL/bin XDB=$SIXGILL/bin/xdb.so $JS $ANALYZE gcFunctions.lst suppressedFunctions.lst gcTypes.txt $j $JOBS tmp.$j > rootingHazards.$j & +done + +wait + +for j in $(seq $JOBS); do + cat rootingHazards.$j +done diff --git a/js/src/devtools/rootAnalysis/loadCallgraph.js b/js/src/devtools/rootAnalysis/loadCallgraph.js new file mode 100644 index 0000000000..0a388f4de1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/loadCallgraph.js @@ -0,0 +1,590 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('callgraph.js'); + +// Functions come out of sixgill in the form "mangled$readable". The mangled +// name is Truth. One mangled name might correspond to multiple readable names, +// for multiple reasons, including (1) sixgill/gcc doesn't always qualify types +// the same way or de-typedef the same amount; (2) sixgill's output treats +// references and pointers the same, and so doesn't distinguish them, but C++ +// treats them as separate for overloading and linking; (3) (identical) +// destructors sometimes have an int32 parameter, sometimes not. +// +// The readable names are useful because they're far more meaningful to the +// user, and are what should show up in reports and questions to mrgiggles. At +// least in most cases, it's fine to have the extra mangled name tacked onto +// the beginning for these. +// +// The strategy used is to separate out the pieces whenever they are read in, +// create a table mapping mangled names to all readable names, and use the +// mangled names in all computation -- except for limited circumstances when +// the readable name is used in annotations. +// +// Note that callgraph.txt uses a compressed representation -- each name is +// mapped to an integer, and those integers are what is recorded in the edges. +// But the integers depend on the full name, whereas the true edge should only +// consider the mangled name. And some of the names encoded in callgraph.txt +// are FieldCalls, not just function names. + +var gcEdges = {}; + +// Returns whether the function was added. (It will be refused if it was +// already there, or if attrs or annotations say it shouldn't be added.) +function addGCFunction(caller, reason, gcFunctions, functionAttrs, functions) +{ + if (functionAttrs[caller] && functionAttrs[caller][1] & ATTR_GC_SUPPRESSED) + return false; + + if (ignoreGCFunction(functions.name[caller], functions.readableName)) + return false; + + if (!(caller in gcFunctions)) { + gcFunctions[caller] = reason; + return true; + } + + return false; +} + +// Every caller->callee callsite is associated with attrs saying what is +// allowed at that callsite (eg if it's in a GC suppression zone, it would have +// ATTR_GC_SUPPRESSED set.) A given caller might call the same callee multiple +// times, with different attributes. Associate the edge with +// the intersection (AND) and disjunction (OR) of all of the callsites' attrs. +// The AND ('all') says what attributes are present for all callers; the OR +// ('any') says what attributes are present on any caller. Preserve the +// original order. +// +// During the same scan, build callersOf from calleesOf. +function generate_callgraph(rawCallees) { + const callersOf = new Map(); + const calleesOf = new Map(); + + for (const [caller, callee_attrs] of rawCallees) { + const ordered_callees = []; + + // callee_attrs is a list of {callee,any,all} objects. + const callee2any = new Map(); + const callee2all = new Map(); + for (const {callee, any, all} of callee_attrs) { + const prev_any = callee2any.get(callee); + if (prev_any === undefined) { + assert(!callee2all.has(callee)); + callee2any.set(callee, any); + callee2all.set(callee, all); + ordered_callees.push(callee); + } else { + const prev_all = callee2all.get(callee); + callee2any.set(callee, prev_any | any); + callee2all.set(callee, prev_all & all); + } + } + + // Update the contents of callee_attrs to contain a single entry for + // each callee, with its attrs set to the AND of the attrs observed at + // all callsites within this caller function. + callee_attrs.length = 0; + for (const callee of ordered_callees) { + const any = callee2any.get(callee); + const all = callee2all.get(callee); + if (!calleesOf.has(caller)) + calleesOf.set(caller, new Map()); + calleesOf.get(caller).set(callee, {any, all}); + if (!callersOf.has(callee)) + callersOf.set(callee, new Map()); + callersOf.get(callee).set(caller, {any, all}); + } + } + + return {callersOf, calleesOf}; +} + +// Returns object mapping mangled => reason for GCing +function loadRawCallgraphFile(file, verbose) +{ + const functions = { + // "Map" from identifier to mangled name, or sometimes to a Class.Field name. + name: [""], + + // map from mangled name => list of readable names + readableName: {}, + + mangledToId: {} + }; + + const fieldCallAttrs = {}; + const fieldCallCSU = new Map(); // map from full field name id => csu name + + // set of mangled names (map from mangled name => [any,all]) + var functionAttrs = {}; + + const gcCalls = []; + const indirectCalls = []; + + // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset} + const rawCallees = new Map(); + + for (let line of readFileLines_gen(file)) { + line = line.replace(/\n/, ""); + + let match; + if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) { + const [ _, id, mangled ] = match; + assert(functions.name.length == id); + functions.name.push(mangled); + functions.mangledToId[mangled] = id|0; + continue; + } + if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) { + const [ _, id, readable ] = match; + const mangled = functions.name[id]; + if (mangled in functions.readableName) + functions.readableName[mangled].push(readable); + else + functions.readableName[mangled] = [ readable ]; + continue; + } + + let attrs = 0; + // Example line: D /17 6 7 + // + // This means a direct call from 6 -> 7, but within a scope that + // applies attrs 0x1 and 0x10 to the callee. + // + // Look for a bit specifier and remove it from the line if found. + if (line.indexOf("/") != -1) { + match = /^(..)\/(\d+) (.*)/.exec(line); + line = match[1] + match[3]; + attrs = match[2]|0; + } + const tag = line.charAt(0); + if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) { + const caller = match[1]|0; + const name = match[2]; + if (indirectCallCannotGC(functions.name[caller], name)) + attrs |= ATTR_GC_SUPPRESSED; + indirectCalls.push([caller, "IndirectCall: " + name, attrs]); + } else if (match = tag == 'F' && /^F (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { + const caller = match[1]|0; + const fullfield = match[2]|0; + const csu = match[3]; + const fullfield_str = csu + "." + match[4]; + assert(functions.name[fullfield] == fullfield_str); + if (attrs) + fieldCallAttrs[fullfield] = attrs; + addToMappedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs}); + fieldCallCSU.set(fullfield, csu); + + if (fieldCallCannotGC(csu, fullfield_str)) + addToMappedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0}); + else + addToMappedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0}); + } else if (match = tag == 'V' && /^V (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { + // V tag is no longer used, but we are still emitting it becasue it + // can be helpful to understand what's going on. + } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) { + const caller = match[1]|0; + const callee = match[2]|0; + addToMappedList(rawCallees, caller, {callee, any:attrs, all:attrs}); + } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) { + assert(false, "R tag is no longer used"); + } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) { + const id = match[1]|0; + let tag = match[2]; + if (tag == 'GC Call') + gcCalls.push(id); + } else { + assert(false, "Invalid format in callgraph line: " + line); + } + } + + if (verbose) { + printErr("Loaded[verbose=" + verbose + "] " + file); + } + + return { + fieldCallAttrs, + fieldCallCSU, + gcCalls, + indirectCalls, + rawCallees, + functions + }; +} + +// Take a set of rawcalls filenames (as in, the raw callgraph data output by +// computeCallgraph.js) and combine them into a global callgraph, renumbering +// the IDs as needed. +function mergeRawCallgraphs(filenames, verbose) { + let d; + for (const filename of filenames) { + const raw = loadRawCallgraphFile(filename, verbose); + if (!d) { + d = raw; + continue; + } + + const { + fieldCallAttrs, + fieldCallCSU, + gcCalls, + indirectCalls, + rawCallees, + functions + } = raw; + + // Compute the ID mapping. Incoming functions that already have an ID + // will be mapped to that ID; new ones will allocate a fresh ID. + const remap = new Array(functions.name.length); + for (let i = 1; i < functions.name.length; i++) { + const mangled = functions.name[i]; + const old_id = d.functions.mangledToId[mangled] + if (old_id) { + remap[i] = old_id; + } else { + const newid = d.functions.name.length; + d.functions.mangledToId[mangled] = newid; + d.functions.name.push(mangled); + remap[i] = newid; + assert(!(mangled in d.functions.readableName), mangled + " readable name is already found"); + const readables = functions.readableName[mangled]; + if (readables !== undefined) + d.functions.readableName[mangled] = readables; + } + } + + for (const [fullfield, attrs] of Object.entries(fieldCallAttrs)) + d.fieldCallAttrs[remap[fullfield]] = attrs; + for (const [fullfield, csu] of fieldCallCSU.entries()) + d.fieldCallCSU.set(remap[fullfield], csu); + for (const call of gcCalls) + d.gcCalls.push(remap[call]); + for (const [caller, name, attrs] of indirectCalls) + d.indirectCalls.push([remap[caller], name, attrs]); + for (const [caller, callees] of rawCallees) { + for (const {callee, any, all} of callees) { + addToMappedList(d.rawCallees, remap[caller]|0, {callee:remap[callee], any, all}); + } + } + } + + return d; +} + +function loadCallgraph(files, verbose) +{ + const { + fieldCallAttrs, + fieldCallCSU, + gcCalls, + indirectCalls, + rawCallees, + functions + } = mergeRawCallgraphs(files, verbose); + + assert(ID.jscode == functions.mangledToId["(js-code)"]); + assert(ID.anyfunc == functions.mangledToId["(any-function)"]); + assert(ID.nogcfunc == functions.mangledToId["(nogc-function)"]); + assert(ID.gc == functions.mangledToId["(GC)"]); + + addToMappedList(rawCallees, functions.mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0}); + + // Compute functionAttrs: it should contain the set of functions that + // are *always* called within some sort of limited context (eg GC + // suppression). + + // set of mangled names (map from mangled name => [any,all]) + const functionAttrs = {}; + + // Initialize to field calls with attrs set. + for (var [name, attrs] of Object.entries(fieldCallAttrs)) + functionAttrs[name] = [attrs, attrs]; + + // map from ID => reason + const gcFunctions = { [ID.gc]: 'internal' }; + + // Add in any extra functions at the end. (If we did this early, it would + // mess up the id <-> name correspondence. Also, we need to know if the + // functions even exist in the first place.) + for (var func of extraGCFunctions(functions.readableName)) { + addGCFunction(functions.mangledToId[func], "annotation", gcFunctions, functionAttrs, functions); + } + + for (const func of gcCalls) + addToMappedList(rawCallees, func, {callee:ID.gc, any:0, all:0}); + + for (const [caller, indirect, attrs] of indirectCalls) { + const id = functions.name.length; + functions.name.push(indirect); + functions.mangledToId[indirect] = id; + addToMappedList(rawCallees, caller, {callee:id, any:attrs, all:attrs}); + addToMappedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0}); + } + + // Callers have a list of callees, with duplicates (if the same function is + // called more than once.) Merge the repeated calls, only keeping attrs + // that are in force for *every* callsite of that callee. Also, generate + // the callersOf table at the same time. + // + // calleesOf : map from mangled => {mangled callee => {'any':intset, 'all':intset}} + // callersOf : map from mangled => {mangled caller => {'any':intset, 'all':intset}} + const {callersOf, calleesOf} = generate_callgraph(rawCallees); + + // Compute functionAttrs: it should contain the set of functions that + // are *always* called within some sort of limited context (eg GC + // suppression). + + // Initialize to field calls with attrs set. + for (var [name, attrs] of Object.entries(fieldCallAttrs)) + functionAttrs[name] = [attrs, attrs]; + + // Initialize functionAttrs to the set of all functions, where each one is + // maximally attributed, and return a worklist containing all simple roots + // (nodes with no callers). + const simple_roots = gather_simple_roots(functionAttrs, calleesOf, callersOf); + + // Traverse the graph, spreading the attrs down from the roots. + propagate_attrs(simple_roots, functionAttrs, calleesOf); + + // There are a surprising number of "recursive roots", where there is a + // cycle of functions calling each other but not called by anything else, + // and these roots may also have descendants. Now that the above traversal + // has eliminated everything reachable from simple roots, traverse the + // remaining graph to gather up a representative function from each root + // cycle. + // + // Simple example: in the JS shell build, moz_xstrdup calls itself, but + // there are no calls to it from within js/src. + const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions); + + // And do a final traversal starting with the recursive roots. + propagate_attrs(recursive_roots, functionAttrs, calleesOf); + + for (const [f, [any, all]] of Object.entries(functionAttrs)) { + // Throw out all functions with no attrs set, to reduce the size of the + // output. From now on, "not in functionAttrs" means [any=0, all=0]. + if (any == 0 && all == 0) + delete functionAttrs[f]; + + // Remove GC-suppressed functions from the set of functions known to GC. + // Also remove functions only reachable through calls that have been + // replaced. + if (all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) + delete gcFunctions[name]; + } + + // functionAttrs now contains all functions that are ever called in an + // attributed context, based on the known callgraph (i.e., calls through + // function pointers are not taken into consideration.) + + // Sanity check to make sure the callgraph has some functions annotated as + // GC Calls. This is mostly a check to be sure the earlier processing + // succeeded (as opposed to, say, running on empty xdb files because you + // didn't actually compile anything interesting.) + assert(gcCalls.length > 0, "No GC functions found!"); + + // Initialize the worklist to all known gcFunctions. + const worklist = [ID.gc]; + + // Include all field calls (but not virtual method calls). + for (const [name, csuName] of fieldCallCSU) { + const fullFieldName = functions.name[name]; + if (!fieldCallCannotGC(csuName, fullFieldName)) { + gcFunctions[name] = 'arbitrary function pointer ' + fullFieldName; + worklist.push(name); + } + } + + // Recursively find all callers not always called in a GC suppression + // context, and add them to the set of gcFunctions. + while (worklist.length) { + name = worklist.shift(); + assert(name in gcFunctions, "gcFunctions does not contain " + name); + if (!callersOf.has(name)) + continue; + for (const [caller, {any, all}] of callersOf.get(name)) { + if ((all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) == 0) { + if (addGCFunction(caller, name, gcFunctions, functionAttrs, functions)) + worklist.push(caller); + } + } + } + + // Convert functionAttrs to limitedFunctions (using mangled names instead + // of ids.) + + // set of mangled names (map from mangled name => {any,all,recursive_root:bool} + var limitedFunctions = {}; + + for (const [id, [any, all]] of Object.entries(functionAttrs)) { + if (all) { + limitedFunctions[functions.name[id]] = { attributes: all }; + } + } + + for (const [id, limits, label] of recursive_roots) { + const name = functions.name[id]; + const s = limitedFunctions[name] || (limitedFunctions[name] = {}); + s.recursive_root = true; + } + + // Remap ids to mangled names. + const namedGCFunctions = {}; + for (const [caller, reason] of Object.entries(gcFunctions)) { + namedGCFunctions[functions.name[caller]] = functions.name[reason] || reason; + } + + return { + gcFunctions: namedGCFunctions, + functions, + calleesOf, + callersOf, + limitedFunctions + }; +} + +function saveCallgraph(functions, calleesOf) { + // Write out all the ids and their readable names. + let id = -1; + for (const name of functions.name) { + id += 1; + if (id == 0) continue; + print(`#${id} ${name}`); + for (const readable of (functions.readableName[name] || [])) { + if (readable != name) + print(`= ${id} ${readable}`); + } + } + + // Omit field calls for now; let them appear as if they were functions. + + const attrstring = range => range.any || range.all ? `${range.all}:${range.any} ` : ''; + for (const [caller, callees] of calleesOf) { + for (const [callee, attrs] of callees) { + print(`D ${attrstring(attrs)}${caller} ${callee}`); + } + } + + // Omit tags for now. This really should preserve all tags. The "GC Call" + // tag will already be represented in the graph by having an edge to the + // "(GC)" node. +} + +// Return a worklist of functions with no callers, and also initialize +// functionAttrs to the set of all functions, each mapped to +// [ATTRS_NONE, ATTRS_UNVISITED]. +function gather_simple_roots(functionAttrs, calleesOf, callersOf) { + const roots = []; + for (const callee of callersOf.keys()) + functionAttrs[callee] = [ATTRS_NONE, ATTRS_UNVISITED]; + for (const caller of calleesOf.keys()) { + functionAttrs[caller] = [ATTRS_NONE, ATTRS_UNVISITED]; + if (!callersOf.has(caller)) + roots.push([caller, ATTRS_NONE, 'root']); + } + + return roots; +} + +// Recursively traverse the callgraph from the roots. Recurse through every +// edge that weakens the attrs. (Attrs that entirely disappear, ie go to a zero +// intset, will be removed from functionAttrs.) +function propagate_attrs(roots, functionAttrs, calleesOf) { + const worklist = Array.from(roots); + let top = worklist.length; + while (top > 0) { + // Consider caller where (graph) -> caller -> (0 or more callees) + // 'callercaller' is for debugging. + const [caller, edge_attrs, callercaller] = worklist[--top]; + assert(caller in functionAttrs); + const [prev_any, prev_all] = functionAttrs[caller]; + assert(prev_any !== undefined); + assert(prev_all !== undefined); + const [new_any, new_all] = [prev_any | edge_attrs, prev_all & edge_attrs]; + if (prev_any != new_any || prev_all != new_all) { + // Update function attrs, then recurse to the children if anything + // was updated. + functionAttrs[caller] = [new_any, new_all]; + for (const [callee, {any, all}] of (calleesOf.get(caller) || new Map)) + worklist[top++] = [callee, all | edge_attrs, caller]; + } + } +} + +// Mutually-recursive roots and their descendants will not have been visited, +// and will still be set to [0, ATTRS_UNVISITED]. Scan through and gather them. +function gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions) { + const roots = []; + + // Pick any node. Mark everything reachable by adding to a 'seen' set. At + // the end, if there are any incoming edges to that node from an unmarked + // node, then it is not a root. Otherwise, mark the node as a root. (There + // will be at least one back edge coming into the node from a marked node + // in this case, since otherwise it would have already been considered to + // be a root.) + // + // Repeat with remaining unmarked nodes until all nodes are marked. + const seen = new Set(); + for (let [func, [any, all]] of Object.entries(functionAttrs)) { + func = func|0; + if (all != ATTRS_UNVISITED) + continue; + + // We should only be looking at nodes with callers, since otherwise + // they would have been handled in the previous pass! + assert(callersOf.has(func)); + assert(callersOf.get(func).size > 0); + + if (seen.has(func)) + continue; + + const work = [func]; + while (work.length > 0) { + const f = work.pop(); + if (!calleesOf.has(f)) continue; + for (const callee of calleesOf.get(f).keys()) { + if (!seen.has(callee) && + callee != func && + functionAttrs[callee][1] == ATTRS_UNVISITED) + { + work.push(callee); + seen.add(callee); + } + } + } + + assert(!seen.has(func)); + seen.add(func); + if ([...callersOf.get(func).keys()].findIndex(f => !seen.has(f)) == -1) { + // No unmarked incoming edges, including self-edges, so this is a + // (recursive) root. + roots.push([func, ATTRS_NONE, 'recursive-root']); + } + } + + return roots; + + tmp = calleesOf; + calleesOf = {}; + for (const [callerId, callees] of Object.entries(calleesOf)) { + const caller = functionNames[callerId]; + for (const {calleeId, limits} of callees) + calleesOf[caller][functionNames[calleeId]] = limits; + } + + tmp = callersOf; + callersOf = {}; + for (const [calleeId, callers] of Object.entries(callersOf)) { + const callee = functionNames[calleeId]; + callersOf[callee] = {}; + for (const {callerId, limits} of callers) + callersOf[callee][functionNames[caller]] = limits; + } +} diff --git a/js/src/devtools/rootAnalysis/mach_commands.py b/js/src/devtools/rootAnalysis/mach_commands.py new file mode 100644 index 0000000000..c2fc1980c9 --- /dev/null +++ b/js/src/devtools/rootAnalysis/mach_commands.py @@ -0,0 +1,690 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import argparse +import html +import json +import logging +import os +import re +import textwrap +import webbrowser + +# Command files like this are listed in build/mach_initialize.py in alphabetical +# order, but we need to access commands earlier in the sorted order to grab +# their arguments. Force them to load now. +import mozbuild.artifact_commands # NOQA: F401 +import mozbuild.build_commands # NOQA: F401 +import mozhttpd +from mach.base import FailedCommandError, MachError +from mach.decorators import Command, CommandArgument, SubCommand +from mach.registrar import Registrar +from mozbuild.base import BuildEnvironmentNotFoundException +from mozbuild.mozconfig import MozconfigLoader + + +# Use a decorator to copy command arguments off of the named command. Instead +# of a decorator, this could be straight code that edits eg +# MachCommands.build_shell._mach_command.arguments, but that looked uglier. +def inherit_command_args(command, subcommand=None): + """Decorator for inheriting all command-line arguments from `mach build`. + + This should come earlier in the source file than @Command or @SubCommand, + because it relies on that decorator having run first.""" + + def inherited(func): + handler = Registrar.command_handlers.get(command) + if handler is not None and subcommand is not None: + handler = handler.subcommand_handlers.get(subcommand) + if handler is None: + raise MachError( + "{} command unknown or not yet loaded".format( + command if subcommand is None else command + " " + subcommand + ) + ) + func._mach_command.arguments.extend(handler.arguments) + return func + + return inherited + + +def state_dir(): + return os.environ.get("MOZBUILD_STATE_PATH", os.path.expanduser("~/.mozbuild")) + + +def tools_dir(): + if os.environ.get("MOZ_FETCHES_DIR"): + # In automation, tools are provided by toolchain dependencies. + return os.path.join(os.environ["HOME"], os.environ["MOZ_FETCHES_DIR"]) + + # In development, `mach hazard bootstrap` installs the tools separately + # to avoid colliding with the "main" compiler versions, which can + # change separately (and the precompiled sixgill and compiler version + # must match exactly). + return os.path.join(state_dir(), "hazard-tools") + + +def sixgill_dir(): + return os.path.join(tools_dir(), "sixgill") + + +def gcc_dir(): + return os.path.join(tools_dir(), "gcc") + + +def script_dir(command_context): + return os.path.join(command_context.topsrcdir, "js/src/devtools/rootAnalysis") + + +def get_work_dir(command_context, project, given): + if given is not None: + return given + return os.path.join(command_context.topsrcdir, "haz-" + project) + + +def get_objdir(command_context, kwargs): + project = kwargs["project"] + objdir = kwargs["haz_objdir"] + if objdir is None: + objdir = os.environ.get("HAZ_OBJDIR") + if objdir is None: + objdir = os.path.join(command_context.topsrcdir, "obj-analyzed-" + project) + return objdir + + +def ensure_dir_exists(dir): + os.makedirs(dir, exist_ok=True) + return dir + + +# Force the use of hazard-compatible installs of tools. +def setup_env_for_tools(env): + gccbin = os.path.join(gcc_dir(), "bin") + env["CC"] = os.path.join(gccbin, "gcc") + env["CXX"] = os.path.join(gccbin, "g++") + env["PATH"] = "{sixgill_dir}/usr/bin:{gccbin}:{PATH}".format( + sixgill_dir=sixgill_dir(), gccbin=gccbin, PATH=env["PATH"] + ) + + +def setup_env_for_shell(env, shell): + """Add JS shell directory to dynamic lib search path""" + for var in ("LD_LIBRARY_PATH", "DYLD_LIBRARY_PATH"): + env[var] = ":".join(p for p in (env.get(var), os.path.dirname(shell)) if p) + + +@Command( + "hazards", + category="build", + order="declaration", + description="Commands for running the static analysis for GC rooting hazards", +) +def hazards(command_context): + """Commands related to performing the GC rooting hazard analysis""" + print("See `mach hazards --help` for a list of subcommands") + + +@inherit_command_args("artifact", "toolchain") +@SubCommand( + "hazards", + "bootstrap", + description="Install prerequisites for the hazard analysis", +) +def bootstrap(command_context, **kwargs): + orig_dir = os.getcwd() + os.chdir(ensure_dir_exists(tools_dir())) + try: + kwargs["from_build"] = ("linux64-gcc-sixgill", "linux64-gcc-9") + command_context._mach_context.commands.dispatch( + "artifact", command_context._mach_context, subcommand="toolchain", **kwargs + ) + finally: + os.chdir(orig_dir) + + +CLOBBER_CHOICES = {"objdir", "work", "shell", "all"} + + +@SubCommand("hazards", "clobber", description="Clean up hazard-related files") +@CommandArgument("--project", default="browser", help="Build the given project.") +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument("--haz-objdir", default=None, help="Hazard analysis objdir.") +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +@CommandArgument( + "what", + default=["objdir", "work"], + nargs="*", + help="Target to clobber, must be one of {{{}}} (default " + "objdir and work).".format(", ".join(CLOBBER_CHOICES)), +) +def clobber(command_context, what, **kwargs): + from mozbuild.controller.clobber import Clobberer + + what = set(what) + if "all" in what: + what.update(CLOBBER_CHOICES) + invalid = what - CLOBBER_CHOICES + if invalid: + print( + "Unknown clobber target(s): {}. Choose from {{{}}}".format( + ", ".join(invalid), ", ".join(CLOBBER_CHOICES) + ) + ) + return 1 + + try: + substs = command_context.substs + except BuildEnvironmentNotFoundException: + substs = {} + + if "objdir" in what: + objdir = get_objdir(command_context, kwargs) + print(f"removing {objdir}") + Clobberer(command_context.topsrcdir, objdir, substs).remove_objdir(full=True) + if "work" in what: + project = kwargs["project"] + work_dir = get_work_dir(command_context, project, kwargs["work_dir"]) + print(f"removing {work_dir}") + Clobberer(command_context.topsrcdir, work_dir, substs).remove_objdir(full=True) + if "shell" in what: + objdir = os.path.join(command_context.topsrcdir, "obj-haz-shell") + print(f"removing {objdir}") + Clobberer(command_context.topsrcdir, objdir, substs).remove_objdir(full=True) + + +@inherit_command_args("build") +@SubCommand( + "hazards", "build-shell", description="Build a shell for the hazard analysis" +) +@CommandArgument( + "--mozconfig", + default=None, + metavar="FILENAME", + help="Build with the given mozconfig.", +) +def build_shell(command_context, **kwargs): + """Build a JS shell to use to run the rooting hazard analysis.""" + # The JS shell requires some specific configuration settings to execute + # the hazard analysis code, and configuration is done via mozconfig. + # Subprocesses find MOZCONFIG in the environment, so we can't just + # modify the settings in this process's loaded version. Pass it through + # the environment. + + default_mozconfig = "js/src/devtools/rootAnalysis/mozconfig.haz_shell" + mozconfig_path = ( + kwargs.pop("mozconfig", None) + or os.environ.get("MOZCONFIG") + or default_mozconfig + ) + mozconfig_path = os.path.join(command_context.topsrcdir, mozconfig_path) + loader = MozconfigLoader(command_context.topsrcdir) + mozconfig = loader.read_mozconfig(mozconfig_path) + + # Validate the mozconfig settings in case the user overrode the default. + configure_args = mozconfig["configure_args"] + if "--enable-ctypes" not in configure_args: + raise FailedCommandError( + "ctypes required in hazard JS shell, mozconfig=" + mozconfig_path + ) + + # Transmit the mozconfig location to build subprocesses. + os.environ["MOZCONFIG"] = mozconfig_path + + setup_env_for_tools(os.environ) + + # Set a default objdir for the shell, for developer builds. + os.environ.setdefault( + "MOZ_OBJDIR", os.path.join(command_context.topsrcdir, "obj-haz-shell") + ) + + return command_context._mach_context.commands.dispatch( + "build", command_context._mach_context, **kwargs + ) + + +def read_json_file(filename): + with open(filename) as fh: + return json.load(fh) + + +def ensure_shell(command_context, objdir): + if objdir is None: + objdir = os.path.join(command_context.topsrcdir, "obj-haz-shell") + + try: + binaries = read_json_file(os.path.join(objdir, "binaries.json")) + info = [b for b in binaries["programs"] if b["program"] == "js"][0] + return os.path.join(objdir, info["install_target"], "js") + except (OSError, KeyError): + raise FailedCommandError( + """\ +no shell found in %s -- must build the JS shell with `mach hazards build-shell` first""" + % objdir + ) + + +def validate_mozconfig(command_context, kwargs): + app = kwargs.pop("project") + default_mozconfig = "js/src/devtools/rootAnalysis/mozconfig.%s" % app + mozconfig_path = ( + kwargs.pop("mozconfig", None) + or os.environ.get("MOZCONFIG") + or default_mozconfig + ) + mozconfig_path = os.path.join(command_context.topsrcdir, mozconfig_path) + + loader = MozconfigLoader(command_context.topsrcdir) + mozconfig = loader.read_mozconfig(mozconfig_path) + configure_args = mozconfig["configure_args"] + + # Require an explicit --enable-project/application=APP (even if you just + # want to build the default browser project.) + if ( + "--enable-project=%s" % app not in configure_args + and "--enable-application=%s" % app not in configure_args + ): + raise FailedCommandError( + textwrap.dedent( + f"""\ + mozconfig {mozconfig_path} builds wrong project. + unset MOZCONFIG to use the default {default_mozconfig}\ + """ + ) + ) + + if not any("--with-compiler-wrapper" in a for a in configure_args): + raise FailedCommandError( + "mozconfig must wrap compiles with --with-compiler-wrapper" + ) + + return mozconfig_path + + +@inherit_command_args("build") +@SubCommand( + "hazards", + "gather", + description="Gather analysis data by compiling the given project", +) +@CommandArgument("--project", default="browser", help="Build the given project.") +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--haz-objdir", default=None, help="Write object files to this directory." +) +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +def gather_hazard_data(command_context, **kwargs): + """Gather analysis information by compiling the tree""" + project = kwargs["project"] + objdir = get_objdir(command_context, kwargs) + + work_dir = get_work_dir(command_context, project, kwargs["work_dir"]) + ensure_dir_exists(work_dir) + with open(os.path.join(work_dir, "defaults.py"), "wt") as fh: + data = textwrap.dedent( + """\ + analysis_scriptdir = "{script_dir}" + objdir = "{objdir}" + source = "{srcdir}" + sixgill = "{sixgill_dir}/usr/libexec/sixgill" + sixgill_bin = "{sixgill_dir}/usr/bin" + """ + ).format( + script_dir=script_dir(command_context), + objdir=objdir, + srcdir=command_context.topsrcdir, + sixgill_dir=sixgill_dir(), + gcc_dir=gcc_dir(), + ) + fh.write(data) + + buildscript = " ".join( + [ + command_context.topsrcdir + "/mach hazards compile", + *kwargs.get("what", []), + "--job-size=3.0", # Conservatively estimate 3GB/process + "--project=" + project, + "--haz-objdir=" + objdir, + ] + ) + args = [ + os.path.join(script_dir(command_context), "run_complete"), + "--foreground", + "--no-logs", + "--build-root=" + objdir, + "--wrap-dir=" + sixgill_dir() + "/usr/libexec/sixgill/scripts/wrap_gcc", + "--work-dir=work", + "-b", + sixgill_dir() + "/usr/bin", + "--buildcommand=" + buildscript, + ".", + ] + + return command_context.run_process(args=args, cwd=work_dir, pass_thru=True) + + +@inherit_command_args("build") +@SubCommand("hazards", "compile", description=argparse.SUPPRESS) +@CommandArgument( + "--mozconfig", + default=None, + metavar="FILENAME", + help="Build with the given mozconfig.", +) +@CommandArgument("--project", default="browser", help="Build the given project.") +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--haz-objdir", + default=os.environ.get("HAZ_OBJDIR"), + help="Write object files to this directory.", +) +def inner_compile(command_context, **kwargs): + """Build a source tree and gather analysis information while running + under the influence of the analysis collection server.""" + + env = os.environ + + # Check whether we are running underneath the manager (and therefore + # have a server to talk to). + if "XGILL_CONFIG" not in env: + raise FailedCommandError( + "no sixgill manager detected. `mach hazards compile` " + + "should only be run from `mach hazards gather`" + ) + + mozconfig_path = validate_mozconfig(command_context, kwargs) + + # Communicate mozconfig to build subprocesses. + env["MOZCONFIG"] = os.path.join(command_context.topsrcdir, mozconfig_path) + + # hazard mozconfigs need to find binaries in .mozbuild + env["MOZBUILD_STATE_PATH"] = state_dir() + + # Suppress the gathering of sources, to save disk space and memory. + env["XGILL_NO_SOURCE"] = "1" + + setup_env_for_tools(env) + + if "haz_objdir" in kwargs: + env["MOZ_OBJDIR"] = kwargs.pop("haz_objdir") + + return command_context._mach_context.commands.dispatch( + "build", command_context._mach_context, **kwargs + ) + + +@SubCommand( + "hazards", "analyze", description="Analyzed gathered data for rooting hazards" +) +@CommandArgument( + "--project", + default="browser", + help="Analyze the output for the given project.", +) +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--shell-objdir", + default=None, + help="objdir containing the optimized JS shell for running the analysis.", +) +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +@CommandArgument( + "--jobs", "-j", default=None, type=int, help="Number of parallel analyzers." +) +@CommandArgument( + "--verbose", + "-v", + default=False, + action="store_true", + help="Display executed commands.", +) +@CommandArgument( + "--from-stage", + default=None, + help="Stage to begin running at ('list' to see all).", +) +@CommandArgument( + "extra", + nargs=argparse.REMAINDER, + default=(), + help="Remaining non-optional arguments to analyze.py script", +) +def analyze( + command_context, + project, + shell_objdir, + work_dir, + jobs, + verbose, + from_stage, + extra, +): + """Analyzed gathered data for rooting hazards""" + + shell = ensure_shell(command_context, shell_objdir) + args = [ + os.path.join(script_dir(command_context), "analyze.py"), + "--js", + shell, + *extra, + ] + + if from_stage is None: + pass + elif from_stage == "list": + args.append("--list") + else: + args.extend(["--first", from_stage]) + + if jobs is not None: + args.extend(["-j", jobs]) + + if verbose: + args.append("-v") + + setup_env_for_tools(os.environ) + setup_env_for_shell(os.environ, shell) + + work_dir = get_work_dir(command_context, project, work_dir) + return command_context.run_process(args=args, cwd=work_dir, pass_thru=True) + + +@SubCommand( + "hazards", + "self-test", + description="Run a self-test to verify hazards are detected", +) +@CommandArgument( + "--shell-objdir", + default=None, + help="objdir containing the optimized JS shell for running the analysis.", +) +@CommandArgument( + "extra", + nargs=argparse.REMAINDER, + help="Remaining non-optional arguments to pass to run-test.py", +) +def self_test(command_context, shell_objdir, extra): + """Analyzed gathered data for rooting hazards""" + shell = ensure_shell(command_context, shell_objdir) + args = [ + os.path.join(script_dir(command_context), "run-test.py"), + "-v", + "--js", + shell, + "--sixgill", + os.path.join(tools_dir(), "sixgill"), + "--gccdir", + gcc_dir(), + ] + args.extend(extra) + + setup_env_for_tools(os.environ) + setup_env_for_shell(os.environ, shell) + + return command_context.run_process(args=args, pass_thru=True) + + +def annotated_source(filename, query): + """The index page has URLs of the format . + The `#m` part will be stripped off and used by the browser to jump to the correct line. + The `?L=m-n` or `?L=m` parameter will be processed here on the server to highlight + the given line range.""" + linequery = query.replace("L=", "") + if "-" in linequery: + line0, line1 = linequery.split("-", 1) + else: + line0, line1 = linequery or "0", linequery or "0" + line0 = int(line0) + line1 = int(line1) + + fh = open(filename, "rt") + + out = "
    "
    +    for lineno, line in enumerate(fh, 1):
    +        processed = f"{lineno} \n"
    +        out += processed
    +
    +    return out
    +
    +
    +@SubCommand(
    +    "hazards", "view", description="Display a web page describing any hazards found"
    +)
    +@CommandArgument(
    +    "--project",
    +    default="browser",
    +    help="Analyze the output for the given project.",
    +)
    +@CommandArgument("--application", dest="project", help="Build the given project.")
    +@CommandArgument(
    +    "--haz-objdir", default=None, help="Write object files to this directory."
    +)
    +@CommandArgument(
    +    "--work-dir", default=None, help="Directory for output and working files."
    +)
    +@CommandArgument("--port", default=6006, help="Port of the web server")
    +@CommandArgument(
    +    "--serve-only",
    +    default=False,
    +    action="store_true",
    +    help="Serve only, do not navigate to page",
    +)
    +def view_hazards(command_context, project, haz_objdir, work_dir, port, serve_only):
    +    work_dir = get_work_dir(command_context, project, work_dir)
    +    haztop = os.path.basename(work_dir)
    +    if haz_objdir is None:
    +        haz_objdir = os.environ.get("HAZ_OBJDIR")
    +    if haz_objdir is None:
    +        haz_objdir = os.path.join(command_context.topsrcdir, "obj-analyzed-" + project)
    +
    +    httpd = None
    +
    +    def serve_source_file(request, path):
    +        info = {"req": path}
    +
    +        def log(fmt, level=logging.INFO):
    +            return command_context.log(level, "view-hazards", info, fmt)
    +
    +        if path in ("", f"{haztop}"):
    +            info["dest"] = f"/{haztop}/hazards.html"
    +            info["code"] = 301
    +            log("serve '{req}' -> {code} {dest}")
    +            return (info["code"], {"Location": info["dest"]}, "")
    +
    +        # Allow files to be served from the source directory or the objdir.
    +        roots = (command_context.topsrcdir, haz_objdir)
    +
    +        try:
    +            # Validate the path. Some source files have weird characters in their paths (eg "+"), but they
    +            # all start with an alphanumeric or underscore.
    +            command_context.log(
    +                logging.DEBUG, "view-hazards", {"path": path}, "Raw path: {path}"
    +            )
    +            path_component = r"\w[\w\-\.\+]*"
    +            if not re.match(f"({path_component}/)*{path_component}$", path):
    +                raise ValueError("invalid path")
    +
    +            # Resolve the path to under one of the roots, and
    +            # ensure that the actual file really is underneath a root directory.
    +            for rootdir in roots:
    +                fullpath = os.path.join(rootdir, path)
    +                info["path"] = fullpath
    +                fullpath = os.path.realpath(fullpath)
    +                if os.path.isfile(fullpath):
    +                    # symlinks between roots are ok, but not symlinks outside of the roots.
    +                    tops = [
    +                        d
    +                        for d in roots
    +                        if fullpath.startswith(os.path.realpath(d) + "/")
    +                    ]
    +                    if len(tops) > 0:
    +                        break  # Found a file underneath a root.
    +            else:
    +                raise IOError("not found")
    +
    +            html = annotated_source(fullpath, request.query)
    +            log("serve '{req}' -> 200 {path}")
    +            return (
    +                200,
    +                {"Content-type": "text/html", "Content-length": len(html)},
    +                html,
    +            )
    +        except (IOError, ValueError):
    +            log("serve '{req}' -> 404 {path}", logging.ERROR)
    +            return (
    +                404,
    +                {"Content-type": "text/plain"},
    +                "We don't have that around here. Don't be asking for it.",
    +            )
    +
    +    httpd = mozhttpd.MozHttpd(
    +        port=port,
    +        docroot=None,
    +        path_mappings={"/" + haztop: work_dir},
    +        urlhandlers=[
    +            # Treat everything not starting with /haz-browser/ (or /haz-js/)
    +            # as a source file to be processed. Everything else is served
    +            # as a plain file.
    +            {
    +                "method": "GET",
    +                "path": "/(?!haz-" + project + "/)(.*)",
    +                "function": serve_source_file,
    +            },
    +        ],
    +        log_requests=True,
    +    )
    +
    +    # The mozhttpd request handler class eats log messages.
    +    httpd.handler_class.log_message = lambda self, format, *args: command_context.log(
    +        logging.INFO, "view-hazards", {}, format % args
    +    )
    +
    +    print("Serving at %s:%s" % (httpd.host, httpd.port))
    +
    +    httpd.start(block=False)
    +    url = httpd.get_url(f"/{haztop}/hazards.html")
    +    display_url = True
    +    if not serve_only:
    +        try:
    +            webbrowser.get().open_new_tab(url)
    +            display_url = False
    +        except Exception:
    +            pass
    +    if display_url:
    +        print("Please open %s in a browser." % url)
    +
    +    print("Hit CTRL+c to stop server.")
    +    httpd.server.join()
    diff --git a/js/src/devtools/rootAnalysis/mergeJSON.js b/js/src/devtools/rootAnalysis/mergeJSON.js
    new file mode 100644
    index 0000000000..2ac5a983db
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/mergeJSON.js
    @@ -0,0 +1,26 @@
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this file,
    + * You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
    +
    +var infiles = [...scriptArgs];
    +var outfile = infiles.pop();
    +
    +let output;
    +for (const filename of infiles) {
    +    const data = JSON.parse(os.file.readFile(filename));
    +    if (!output) {
    +        output = data;
    +    } else if (Array.isArray(data) != Array.isArray(output)) {
    +        throw new Error('mismatched types');
    +    } else if (Array.isArray(output)) {
    +        output.push(...data);
    +    } else {
    +        Object.assign(output, data);
    +    }
    +}
    +
    +var origOut = os.file.redirect(outfile);
    +print(JSON.stringify(output, null, 4));
    +os.file.close(os.file.redirect(origOut));
    diff --git a/js/src/devtools/rootAnalysis/mozconfig.browser b/js/src/devtools/rootAnalysis/mozconfig.browser
    new file mode 100644
    index 0000000000..6c3517865b
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/mozconfig.browser
    @@ -0,0 +1,19 @@
    +# This Source Code Form is subject to the terms of the Mozilla Public
    +# License, v. 2.0. If a copy of the MPL was not distributed with this
    +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    +
    +# This mozconfig is used when analyzing the source code of the Firefox browser
    +# for GC rooting hazards. See
    +# .
    +
    +ac_add_options --enable-project=browser
    +ac_add_options --enable-js-shell
    +
    +# the sixgill wrapper is not compatible with building wasm objects with clang.
    +export WASM_SANDBOXED_LIBRARIES=
    +
    +# the hazard analysis is not happy with std::filesystem uses in relrhack host
    +# tool.
    +ac_add_options --disable-elf-hack
    +
    +. $topsrcdir/js/src/devtools/rootAnalysis/mozconfig.common
    diff --git a/js/src/devtools/rootAnalysis/mozconfig.common b/js/src/devtools/rootAnalysis/mozconfig.common
    new file mode 100644
    index 0000000000..c68fb6a26c
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/mozconfig.common
    @@ -0,0 +1,37 @@
    +# This Source Code Form is subject to the terms of the Mozilla Public
    +# License, v. 2.0. If a copy of the MPL was not distributed with this
    +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    +
    +# Configuration shared between browser and shell builds.
    +
    +# The configuration options are chosen to compile the most code
    +# (--enable-debug, --enable-tests) in the trickiest way possible
    +# (--enable-optimize) to maximize the chance of seeing tricky static orderings.
    +ac_add_options --enable-debug
    +ac_add_options --enable-tests
    +ac_add_options --enable-optimize
    +
    +# Wrap all compiler invocations in order to enable the plugin and send
    +# information to a common database.
    +if [ -z "$AUTOMATION" ]; then
    +    # Developer build: `mach hazards bootstrap` puts tools here:
    +    TOOLS_DIR="$MOZBUILD_STATE_PATH/hazard-tools"
    +else
    +    # Automation build: tools are downloaded from upstream tasks.
    +    TOOLS_DIR="$MOZ_FETCHES_DIR"
    +fi
    +ac_add_options --with-compiler-wrapper="${TOOLS_DIR}"/sixgill/usr/libexec/sixgill/scripts/wrap_gcc/basecc
    +
    +# Stuff that gets in the way.
    +ac_add_options --without-ccache
    +ac_add_options --disable-replace-malloc
    +
    +# -Wattributes is very verbose due to attributes being ignored on template
    +# instantiations.
    +#
    +# -Wignored-attributes is very verbose due to attributes being
    +# ignored on template parameters.
    +ANALYSIS_EXTRA_CFLAGS="-Wno-attributes -Wno-ignored-attributes"
    +CFLAGS="$CFLAGS $ANALYSIS_EXTRA_CFLAGS"
    +CPPFLAGS="$CPPFLAGS $ANALYSIS_EXTRA_CFLAGS"
    +CXXFLAGS="$CXXFLAGS $ANALYSIS_EXTRA_CFLAGS"
    diff --git a/js/src/devtools/rootAnalysis/mozconfig.haz_shell b/js/src/devtools/rootAnalysis/mozconfig.haz_shell
    new file mode 100644
    index 0000000000..68741f0454
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/mozconfig.haz_shell
    @@ -0,0 +1,18 @@
    +# This Source Code Form is subject to the terms of the Mozilla Public
    +# License, v. 2.0. If a copy of the MPL was not distributed with this
    +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    +
    +# This mozconfig is for compiling the JS shell that runs the static rooting
    +# hazard analysis. See
    +# .
    +
    +ac_add_options --enable-ctypes
    +ac_add_options --enable-optimize
    +ac_add_options --disable-debug
    +ac_add_options --enable-project=js
    +ac_add_options --enable-nspr-build
    +ac_add_options --disable-jemalloc
    +
    +if [ -n "$AUTOMATION" ]; then
    +  mk_add_options MOZ_OBJDIR="${HAZARD_SHELL_OBJDIR}"
    +fi
    diff --git a/js/src/devtools/rootAnalysis/mozconfig.js b/js/src/devtools/rootAnalysis/mozconfig.js
    new file mode 100644
    index 0000000000..07e584c210
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/mozconfig.js
    @@ -0,0 +1,16 @@
    +# This Source Code Form is subject to the terms of the Mozilla Public
    +# License, v. 2.0. If a copy of the MPL was not distributed with this
    +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    +
    +# This mozconfig is used when analyzing the source code of the js/src tree for
    +# GC rooting hazards. See
    +# .
    +
    +ac_add_options --enable-project=js
    +
    +# Also compile NSPR to see through its part of the control flow graph (not
    +# currently needed, but also helps with weird problems finding the right
    +# headers.)
    +ac_add_options --enable-nspr-build
    +
    +. $topsrcdir/js/src/devtools/rootAnalysis/mozconfig.common
    diff --git a/js/src/devtools/rootAnalysis/run-analysis.sh b/js/src/devtools/rootAnalysis/run-analysis.sh
    new file mode 100755
    index 0000000000..157821cc92
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/run-analysis.sh
    @@ -0,0 +1,4 @@
    +#!/bin/sh
    +
    +SRCDIR=$(cd $(dirname $0)/../../../..; pwd)
    +GECKO_PATH=$SRCDIR $SRCDIR/taskcluster/scripts/builder/build-haz-linux.sh $(pwd) "$@"
    diff --git a/js/src/devtools/rootAnalysis/run-test.py b/js/src/devtools/rootAnalysis/run-test.py
    new file mode 100755
    index 0000000000..b4835efec5
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/run-test.py
    @@ -0,0 +1,154 @@
    +#!/usr/bin/env python3
    +# This Source Code Form is subject to the terms of the Mozilla Public
    +# License, v. 2.0. If a copy of the MPL was not distributed with this
    +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    +
    +import argparse
    +import os
    +import site
    +import subprocess
    +import sys
    +from glob import glob
    +
    +scriptdir = os.path.abspath(os.path.dirname(__file__))
    +testdir = os.path.join(scriptdir, "t")
    +
    +site.addsitedir(testdir)
    +from testlib import Test, equal
    +
    +parser = argparse.ArgumentParser(description="run hazard analysis tests")
    +parser.add_argument(
    +    "--js", default=os.environ.get("JS"), help="JS binary to run the tests with"
    +)
    +parser.add_argument(
    +    "--sixgill",
    +    default=os.environ.get("SIXGILL", os.path.join(testdir, "sixgill")),
    +    help="Path to root of sixgill installation",
    +)
    +parser.add_argument(
    +    "--sixgill-bin",
    +    default=os.environ.get("SIXGILL_BIN"),
    +    help="Path to sixgill binary dir",
    +)
    +parser.add_argument(
    +    "--sixgill-plugin",
    +    default=os.environ.get("SIXGILL_PLUGIN"),
    +    help="Full path to sixgill gcc plugin",
    +)
    +parser.add_argument(
    +    "--gccdir", default=os.environ.get("GCCDIR"), help="Path to GCC installation dir"
    +)
    +parser.add_argument("--cc", default=os.environ.get("CC"), help="Path to gcc")
    +parser.add_argument("--cxx", default=os.environ.get("CXX"), help="Path to g++")
    +parser.add_argument(
    +    "--verbose",
    +    "-v",
    +    default=0,
    +    action="count",
    +    help="Display verbose output, including commands executed",
    +)
    +ALL_TESTS = [
    +    "sixgill-tree",
    +    "suppression",
    +    "hazards",
    +    "exceptions",
    +    "virtual",
    +    "graph",
    +    "types",
    +]
    +parser.add_argument(
    +    "tests",
    +    nargs="*",
    +    default=ALL_TESTS,
    +    help="tests to run",
    +)
    +
    +cfg = parser.parse_args()
    +
    +if not cfg.js:
    +    sys.exit("Must specify JS binary through environment variable or --js option")
    +if not cfg.cc:
    +    if cfg.gccdir:
    +        cfg.cc = os.path.join(cfg.gccdir, "bin", "gcc")
    +    else:
    +        cfg.cc = "gcc"
    +if not cfg.cxx:
    +    if cfg.gccdir:
    +        cfg.cxx = os.path.join(cfg.gccdir, "bin", "g++")
    +    else:
    +        cfg.cxx = "g++"
    +if not cfg.sixgill_bin:
    +    cfg.sixgill_bin = os.path.join(cfg.sixgill, "usr", "bin")
    +if not cfg.sixgill_plugin:
    +    cfg.sixgill_plugin = os.path.join(
    +        cfg.sixgill, "usr", "libexec", "sixgill", "gcc", "xgill.so"
    +    )
    +
    +subprocess.check_call(
    +    [cfg.js, "-e", 'if (!getBuildConfiguration("has-ctypes")) quit(1)']
    +)
    +
    +
    +def binpath(prog):
    +    return os.path.join(cfg.sixgill_bin, prog)
    +
    +
    +def make_dir(dirname, exist_ok=True):
    +    try:
    +        os.mkdir(dirname)
    +    except OSError as e:
    +        if exist_ok and e.strerror == "File exists":
    +            pass
    +        else:
    +            raise
    +
    +
    +outroot = os.path.join(testdir, "out")
    +make_dir(outroot)
    +
    +os.environ["HAZARD_RUN_INTERNAL_TESTS"] = "1"
    +
    +exclude = []
    +tests = []
    +for t in cfg.tests:
    +    if t.startswith("!"):
    +        exclude.append(t[1:])
    +    else:
    +        tests.append(t)
    +if len(tests) == 0:
    +    tests = filter(lambda t: t not in exclude, ALL_TESTS)
    +
    +failed = set()
    +passed = set()
    +for path in tests:
    +    name = os.path.basename(path)
    +    indir = os.path.join(testdir, name)
    +    outdir = os.path.join(outroot, name)
    +    make_dir(outdir)
    +
    +    test = Test(indir, outdir, cfg, verbose=cfg.verbose)
    +
    +    os.chdir(outdir)
    +    for xdb in glob("*.xdb"):
    +        os.unlink(xdb)
    +    print("START TEST {}".format(name), flush=True)
    +    testpath = os.path.join(indir, "test.py")
    +    testscript = open(testpath).read()
    +    testcode = compile(testscript, testpath, "exec")
    +    try:
    +        exec(testcode, {"test": test, "equal": equal})
    +    except subprocess.CalledProcessError:
    +        print("TEST-FAILED: %s" % name)
    +        failed.add(name)
    +    except AssertionError:
    +        print("TEST-FAILED: %s" % name)
    +        failed.add(name)
    +        raise
    +    else:
    +        print("TEST-PASSED: %s" % name)
    +        passed.add(name)
    +
    +if failed:
    +    raise Exception("Failed tests: " + " ".join(failed))
    +
    +print(f"All {len(passed)} tests passed.")
    diff --git a/js/src/devtools/rootAnalysis/run_complete b/js/src/devtools/rootAnalysis/run_complete
    new file mode 100755
    index 0000000000..c9355267db
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/run_complete
    @@ -0,0 +1,384 @@
    +#!/usr/bin/perl
    +
    +# Sixgill: Static assertion checker for C/C++ programs.
    +# Copyright (C) 2009-2010  Stanford University
    +# Author: Brian Hackett
    +#
    +# This program is free software: you can redistribute it and/or modify
    +# it under the terms of the GNU General Public License as published by
    +# the Free Software Foundation, either version 3 of the License, or
    +# (at your option) any later version.
    +#
    +# This program is distributed in the hope that it will be useful,
    +# but WITHOUT ANY WARRANTY; without even the implied warranty of
    +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    +# GNU General Public License for more details.
    +#
    +# You should have received a copy of the GNU General Public License
    +# along with this program.  If not, see .
    +
    +# do a complete run of the system from raw source to reports. this requires
    +# various run_monitor processes to be running in the background (maybe on other
    +# machines) and watching a shared poll_file for jobs. if the output directory
    +# for this script already exists then an incremental analysis will be performed
    +# and the reports will only reflect the changes since the earlier run.
    +
    +use strict;
    +use warnings;
    +use IO::Handle;
    +use File::Basename qw(basename dirname);
    +use Getopt::Long;
    +use Cwd;
    +
    +#################################
    +# environment specific settings #
    +#################################
    +
    +my $WORKDIR;
    +my $SIXGILL_BIN;
    +
    +# poll file shared with the run_monitor script.
    +my $poll_file;
    +
    +# root directory of the project.
    +my $build_dir;
    +
    +# directory containing gcc wrapper scripts.
    +my $wrap_dir;
    +
    +# optional file with annotations from the web interface.
    +my $ann_file = "";
    +
    +# optional output directory to do a diff against.
    +my $old_dir = "";
    +
    +# run in the foreground
    +my $foreground;
    +
    +my $builder = "make -j4";
    +
    +my $suppress_logs;
    +GetOptions("build-root|b=s" => \$build_dir,
    +           "poll-file=s" => \$poll_file,
    +           "no-logs!" => \$suppress_logs,
    +           "work-dir=s" => \$WORKDIR,
    +           "sixgill-binaries|binaries|b=s" => \$SIXGILL_BIN,
    +           "wrap-dir=s" => \$wrap_dir,
    +           "annotations-file|annotations|a=s" => \$ann_file,
    +           "old-dir|old=s" => \$old_dir,
    +           "foreground!" => \$foreground,
    +           "buildcommand=s" => \$builder,
    +           )
    +    or die;
    +
    +if (not -d $build_dir) {
    +    mkdir($build_dir);
    +}
    +if ($old_dir ne "" && not -d $old_dir) {
    +    die "Old directory '$old_dir' does not exist\n";
    +}
    +
    +$WORKDIR ||= "sixgill-work";
    +mkdir($WORKDIR, 0755) if ! -d $WORKDIR;
    +$poll_file ||= "$WORKDIR/poll.file";
    +$build_dir ||= "$WORKDIR/js-inbound-xgill";
    +
    +if (!defined $SIXGILL_BIN) {
    +    chomp(my $path = `which xmanager`);
    +    if ($path) {
    +        use File::Basename qw(dirname);
    +        $SIXGILL_BIN = dirname($path);
    +    } else {
    +        die "Cannot find sixgill binaries. Use the -b option.";
    +    }
    +}
    +
    +$wrap_dir ||= "$WORKDIR/xgill-inbound/wrap_gcc";
    +$wrap_dir = "$SIXGILL_BIN/../scripts/wrap_gcc" if not (-e "$wrap_dir/basecc");
    +die "Bad wrapper directory: $wrap_dir" if not (-e "$wrap_dir/basecc");
    +
    +# code to clean the project from $build_dir.
    +sub clean_project {
    +    system("make clean");
    +}
    +
    +# code to build the project from $build_dir.
    +sub build_project {
    +    return system($builder) >> 8;
    +}
    +
    +our %kill_on_exit;
    +END {
    +    for my $pid (keys %kill_on_exit) {
    +        kill($pid);
    +    }
    +}
    +
    +# commands to start the various xgill binaries. timeouts can be specified
    +# for the backend analyses here, and a memory limit can be specified for
    +# xmanager if desired (and USE_COUNT_ALLOCATOR is defined in util/alloc.h).
    +my $xmanager = "$SIXGILL_BIN/xmanager";
    +my $xsource = "$SIXGILL_BIN/xsource";
    +my $xmemlocal = "$SIXGILL_BIN/xmemlocal -timeout=20";
    +my $xinfer = "$SIXGILL_BIN/xinfer -timeout=60";
    +my $xcheck = "$SIXGILL_BIN/xcheck -timeout=30";
    +
    +# prefix directory to strip off source files.
    +my $prefix_dir = $build_dir;
    +
    +##########################
    +# general purpose script #
    +##########################
    +
    +# Prevent ccache from being used. I don't think this does any good. The problem
    +# I'm struggling with is that if autoconf.mk still has 'ccache gcc' in it, the
    +# builds fail in a mysterious way.
    +$ENV{CCACHE_COMPILERCHECK} = 'date +%s.%N';
    +delete $ENV{CCACHE_PREFIX};
    +
    +my $usage = "USAGE: run_complete result-dir\n";
    +my $result_dir = shift or die $usage;
    +
    +if (not $foreground) {
    +    my $pid = fork();
    +    if ($pid != 0) {
    +        print "Forked, exiting...\n";
    +        exit(0);
    +    }
    +}
    +
    +# if the result directory does not already exist, mark for a clean build.
    +my $do_clean = 0;
    +if (not (-d $result_dir)) {
    +    $do_clean = 1;
    +    mkdir $result_dir;
    +}
    +
    +if (!$suppress_logs) {
    +    my $log_file = "$result_dir/complete.log";
    +    open(OUT, ">>", $log_file) or die "append to $log_file: $!";
    +    OUT->autoflush(1);  # don't buffer writes to the main log.
    +
    +    # redirect stdout and stderr to the log.
    +    STDOUT->fdopen(\*OUT, "w");
    +    STDERR->fdopen(\*OUT, "w");
    +}
    +
    +# pids to wait on before exiting. these are collating worker output.
    +my @waitpids;
    +
    +chdir $result_dir;
    +
    +# to do a partial run, comment out the commands here you don't want to do.
    +
    +my $status = run_build();
    +
    +# end of run commands.
    +
    +for my $pid (@waitpids) {
    +    waitpid($pid, 0);
    +    $status ||= $? >> 8;
    +}
    +
    +print "Exiting run_complete with status $status\n";
    +exit $status;
    +
    +# get the IP address which a freshly created manager is listening on.
    +sub get_manager_address
    +{
    +    my $log_file = shift or die;
    +
    +    # give the manager one second to start, any longer and something's broken.
    +    sleep(1);
    +
    +    my $log_data = `cat $log_file`;
    +    my ($port) = $log_data =~ /Listening on ([\.\:0-9]*)/
    +      or die "no manager found";
    +    print OUT "Connecting to manager on port $port\n" unless $suppress_logs;
    +    print "Connecting to manager on port $port.\n";
    +    return $1;
    +}
    +
    +sub logging_suffix {
    +    my ($show_logs, $log_file) = @_;
    +    return $show_logs ? "2>&1 | tee $log_file"
    +                      : "> $log_file 2>&1";
    +}
    +
    +sub run_build
    +{
    +    print "build started: ";
    +    print scalar(localtime());
    +    print "\n";
    +
    +    # fork off a process to run the build.
    +    defined(my $pid = fork) or die;
    +
    +    # log file for the manager.
    +    my $manager_log_file = "$result_dir/build_manager.log";
    +
    +    if (!$pid) {
    +        # this is the child process, fork another process to run a manager.
    +        defined(my $pid = fork) or die;
    +        my $logging = logging_suffix($suppress_logs, $manager_log_file);
    +        exec("$xmanager -terminate-on-assert $logging") if (!$pid);
    +        $kill_on_exit{$pid} = 1;
    +
    +        if (!$suppress_logs) {
    +            # open new streams to redirect stdout and stderr.
    +            open(LOGOUT, "> $result_dir/build.log");
    +            open(LOGERR, "> $result_dir/build_err.log");
    +            STDOUT->fdopen(\*LOGOUT, "w");
    +            STDERR->fdopen(\*LOGERR, "w");
    +        }
    +
    +        my $address = get_manager_address($manager_log_file);
    +
    +        # write the configuration file for the wrapper script.
    +        my $config_file = "$WORKDIR/xgill.config";
    +        open(CONFIG, ">", $config_file) or die "create $config_file: $!";
    +        print CONFIG "$prefix_dir\n";
    +        print CONFIG Cwd::abs_path("$result_dir/build_xgill.log")."\n";
    +        print CONFIG "$address\n";
    +        my @extra = ("-fplugin-arg-xgill-mangle=1");
    +        push(@extra, "-fplugin-arg-xgill-annfile=$ann_file")
    +            if ($ann_file ne "" && -e $ann_file);
    +        print CONFIG join(" ", @extra) . "\n";
    +        close(CONFIG);
    +
    +	# Tell the wrapper where to find the config
    +	$ENV{"XGILL_CONFIG"} = Cwd::abs_path($config_file);
    +
    +        # If overriding $CC, use GCCDIR to tell the wrapper scripts where the
    +        # real compiler is. If $CC is not set, then the wrapper script will
    +        # search $PATH anyway.
    +        if (exists $ENV{CC}) {
    +            $ENV{GCCDIR} = dirname($ENV{CC});
    +        }
    +
    +        # Force the wrapper scripts to be run in place of the compiler during
    +        # whatever build process we use.
    +        $ENV{CC} = "$wrap_dir/" . basename($ENV{CC} // "gcc");
    +        $ENV{CXX} = "$wrap_dir/" . basename($ENV{CXX} // "g++");
    +
    +        # do the build, cleaning if necessary.
    +        chdir $build_dir;
    +        clean_project() if ($do_clean);
    +        my $exit_status = build_project();
    +
    +        # signal the manager that it's over.
    +        system("$xsource -remote=$address -end-manager");
    +
    +        # wait for the manager to clean up and terminate.
    +        print "Waiting for manager to finish (build status $exit_status)...\n";
    +        waitpid($pid, 0);
    +        my $manager_status = $?;
    +        delete $kill_on_exit{$pid};
    +
    +        # build is finished, the complete run can resume.
    +        # return value only useful if --foreground
    +        print "Exiting with status " . ($manager_status || $exit_status) . "\n";
    +        exit($manager_status || $exit_status);
    +    }
    +
    +    # this is the complete process, wait for the build to finish.
    +    waitpid($pid, 0);
    +    my $status = $? >> 8;
    +    print "build finished (status $status): ";
    +    print scalar(localtime());
    +    print "\n";
    +
    +    return $status;
    +}
    +
    +sub run_pass
    +{
    +    my ($name, $command) = @_;
    +    my $log_file = "$result_dir/manager.$name.log";
    +
    +    # extra commands to pass to the manager.
    +    my $manager_extra = "";
    +    $manager_extra .= "-modset-wait=10" if ($name eq "xmemlocal");
    +
    +    # fork off a manager process for the analysis.
    +    defined(my $pid = fork) or die;
    +    my $logging = logging_suffix($suppress_logs, $log_file);
    +    exec("$xmanager $manager_extra $logging") if (!$pid);
    +
    +    my $address = get_manager_address($log_file);
    +
    +    # write the poll file for this pass.
    +    if (! -d dirname($poll_file)) {
    +        system("mkdir", "-p", dirname($poll_file));
    +    }
    +    open(POLL, "> $poll_file");
    +    print POLL "$command\n";
    +    print POLL "$result_dir/$name\n";
    +    print POLL "$address\n";
    +    close(POLL);
    +
    +    print "$name started: ";
    +    print scalar(localtime());
    +    print "\n";
    +
    +    waitpid($pid, 0);
    +    unlink($poll_file);
    +
    +    print "$name finished: ";
    +    print scalar(localtime());
    +    print "\n";
    +
    +    # collate the worker's output into a single file. make this asynchronous
    +    # so we can wait a bit and make sure we get all worker output.
    +    defined($pid = fork) or die;
    +
    +    if (!$pid) {
    +        sleep(20);
    +        exec("cat $name.*.log > $name.log");
    +    }
    +
    +    push(@waitpids, $pid);
    +}
    +
    +# the names of all directories containing reports to archive.
    +my $indexes;
    +
    +sub run_index
    +{
    +    my ($name, $kind) = @_;
    +
    +    return if (not (-e "report_$kind.xdb"));
    +
    +    print "$name started: ";
    +    print scalar(localtime());
    +    print "\n";
    +
    +    # make an index for the report diff if applicable.
    +    if ($old_dir ne "") {
    +        system("make_index $kind $old_dir > $name.diff.log");
    +        system("mv $kind diff_$kind");
    +        $indexes .= " diff_$kind";
    +    }
    +
    +    # make an index for the full set of reports.
    +    system("make_index $kind > $name.log");
    +    $indexes .= " $kind";
    +
    +    print "$name finished: ";
    +    print scalar(localtime());
    +    print "\n";
    +}
    +
    +sub archive_indexes
    +{
    +    print "archive started: ";
    +    print scalar(localtime());
    +    print "\n";
    +
    +    system("tar -czf reports.tgz $indexes");
    +    system("rm -rf $indexes");
    +
    +    print "archive finished: ";
    +    print scalar(localtime());
    +    print "\n";
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/exceptions/source.cpp b/js/src/devtools/rootAnalysis/t/exceptions/source.cpp
    new file mode 100644
    index 0000000000..8d38a790a1
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/exceptions/source.cpp
    @@ -0,0 +1,57 @@
    +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this
    + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +// Simply including  was enough to crash sixgill at one point.
    +#include 
    +
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +struct Cell {
    +  int f;
    +} ANNOTATE("GC Thing");
    +
    +extern void GC() ANNOTATE("GC Call");
    +
    +void GC() {
    +  // If the implementation is too trivial, the function body won't be emitted at
    +  // all.
    +  asm("");
    +}
    +
    +class RAII_GC {
    + public:
    +  RAII_GC() {}
    +  ~RAII_GC() { GC(); }
    +};
    +
    +// ~AutoSomething calls GC because of the RAII_GC field. The constructor,
    +// though, should *not* GC -- unless it throws an exception. Which is not
    +// possible when compiled with -fno-exceptions. This test will try it both
    +// ways.
    +class AutoSomething {
    +  RAII_GC gc;
    +
    + public:
    +  AutoSomething() : gc() {
    +    asm("");  // Ooh, scary, this might throw an exception
    +  }
    +  ~AutoSomething() { asm(""); }
    +};
    +
    +extern Cell* getcell();
    +
    +extern void usevar(Cell* cell);
    +
    +void f() {
    +  Cell* thing = getcell();  // Live range starts here
    +
    +  // When compiling with -fexceptions, there should be a hazard below. With
    +  // -fno-exceptions, there should not be one. We will check both.
    +  {
    +    AutoSomething smth;  // Constructor can GC only if exceptions are enabled
    +    usevar(thing);       // Live range ends here
    +  }  // In particular, 'thing' is dead at the destructor, so no hazard
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/exceptions/test.py b/js/src/devtools/rootAnalysis/t/exceptions/test.py
    new file mode 100644
    index 0000000000..a40753d87a
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/exceptions/test.py
    @@ -0,0 +1,21 @@
    +# flake8: noqa: F821
    +
    +test.compile("source.cpp", "-fno-exceptions")
    +test.run_analysis_script("gcTypes")
    +
    +hazards = test.load_hazards()
    +assert len(hazards) == 0
    +
    +# If we compile with exceptions, then there *should* be a hazard because
    +# AutoSomething::AutoSomething might throw an exception, which would cause the
    +# partially-constructed value to be torn down, which will call ~RAII_GC.
    +
    +test.compile("source.cpp", "-fexceptions")
    +test.run_analysis_script("gcTypes")
    +
    +hazards = test.load_hazards()
    +assert len(hazards) == 1
    +hazard = hazards[0]
    +assert hazard.function == "void f()"
    +assert hazard.variable == "thing"
    +assert "AutoSomething::AutoSomething" in hazard.GCFunction
    diff --git a/js/src/devtools/rootAnalysis/t/graph/source.cpp b/js/src/devtools/rootAnalysis/t/graph/source.cpp
    new file mode 100644
    index 0000000000..0adff8d532
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/graph/source.cpp
    @@ -0,0 +1,90 @@
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +extern void GC() ANNOTATE("GC Call");
    +
    +void GC() {
    +  // If the implementation is too trivial, the function body won't be emitted at
    +  // all.
    +  asm("");
    +}
    +
    +extern void g(int x);
    +extern void h(int x);
    +
    +void f(int x) {
    +  if (x % 3) {
    +    GC();
    +    g(x);
    +  }
    +  h(x);
    +}
    +
    +void g(int x) {
    +  if (x % 2) f(x);
    +  h(x);
    +}
    +
    +void h(int x) {
    +  if (x) {
    +    f(x - 1);
    +    g(x - 1);
    +  }
    +}
    +
    +void leaf() { asm(""); }
    +
    +void nonrecursive_root() {
    +  leaf();
    +  leaf();
    +  GC();
    +}
    +
    +void self_recursive(int x) {
    +  if (x) self_recursive(x - 1);
    +}
    +
    +// Set up the graph
    +//
    +//   n1 <--> n2          n4 <--> n5
    +//           \                  /
    +//            --> n3 <---------
    +//                 \
    +//                  ---> n6 --> n7 <---> n8 --> n9
    +//
    +// So recursive roots are one of (n1, n2) plus one of (n4, n5).
    +extern void n1(int x);
    +extern void n2(int x);
    +extern void n3(int x);
    +extern void n4(int x);
    +extern void n5(int x);
    +extern void n6(int x);
    +extern void n7(int x);
    +extern void n8(int x);
    +extern void n9(int x);
    +
    +void n1(int x) { n2(x); }
    +
    +void n2(int x) {
    +  if (x) n1(x - 1);
    +  n3(x);
    +}
    +
    +void n4(int x) { n5(x); }
    +
    +void n5(int x) {
    +  if (x) n4(x - 1);
    +  n3(x);
    +}
    +
    +void n3(int x) { n6(x); }
    +
    +void n6(int x) { n7(x); }
    +
    +void n7(int x) { n8(x); }
    +
    +void n8(int x) {
    +  if (x) n7(x - 1);
    +  n9(x);
    +}
    +
    +void n9(int x) { asm(""); }
    diff --git a/js/src/devtools/rootAnalysis/t/graph/test.py b/js/src/devtools/rootAnalysis/t/graph/test.py
    new file mode 100644
    index 0000000000..f78500f200
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/graph/test.py
    @@ -0,0 +1,54 @@
    +# 'test' is provided by the calling script.
    +# flake8: noqa: F821
    +
    +test.compile("source.cpp")
    +test.run_analysis_script("gcTypes")
    +
    +info = test.load_typeInfo()
    +
    +gcFunctions = test.load_gcFunctions()
    +
    +f = "void f(int32)"
    +g = "void g(int32)"
    +h = "void h(int32)"
    +
    +assert f in gcFunctions
    +assert g in gcFunctions
    +assert h in gcFunctions
    +assert "void leaf()" not in gcFunctions
    +assert "void nonrecursive_root()" in gcFunctions
    +
    +callgraph = test.load_callgraph()
    +assert callgraph.calleeGraph[f][g]
    +assert callgraph.calleeGraph[f][h]
    +assert callgraph.calleeGraph[g][f]
    +assert callgraph.calleeGraph[g][h]
    +
    +node = ["void n{}(int32)".format(i) for i in range(10)]
    +mnode = [callgraph.unmangledToMangled.get(f) for f in node]
    +for src, dst in [
    +    (1, 2),
    +    (2, 1),
    +    (4, 5),
    +    (5, 4),
    +    (2, 3),
    +    (5, 3),
    +    (3, 6),
    +    (6, 7),
    +    (7, 8),
    +    (8, 7),
    +    (8, 9),
    +]:
    +    assert callgraph.calleeGraph[node[src]][node[dst]]
    +
    +funcInfo = test.load_funcInfo()
    +rroots = set(
    +    [
    +        callgraph.mangledToUnmangled[f]
    +        for f in funcInfo
    +        if funcInfo[f].get("recursive_root")
    +    ]
    +)
    +assert len(set([node[1], node[2]]) & rroots) == 1
    +assert len(set([node[4], node[5]]) & rroots) == 1
    +assert len(rroots) == 4, "rroots = {}".format(rroots)  # n1, n4, f, self_recursive
    diff --git a/js/src/devtools/rootAnalysis/t/hazards/source.cpp b/js/src/devtools/rootAnalysis/t/hazards/source.cpp
    new file mode 100644
    index 0000000000..fe991653af
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/hazards/source.cpp
    @@ -0,0 +1,566 @@
    +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this
    + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +#include 
    +
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +// MarkVariableAsGCSafe is a magic function name used as an
    +// explicit annotation.
    +
    +namespace JS {
    +namespace detail {
    +template 
    +static void MarkVariableAsGCSafe(T&) {
    +  asm("");
    +}
    +}  // namespace detail
    +}  // namespace JS
    +
    +#define JS_HAZ_VARIABLE_IS_GC_SAFE(var) JS::detail::MarkVariableAsGCSafe(var)
    +
    +struct Cell {
    +  int f;
    +} ANNOTATE("GC Thing");
    +
    +template 
    +struct UntypedContainer {
    +  char data[sizeof(T) + sizeof(U)];
    +} ANNOTATE("moz_inherit_type_annotations_from_template_args");
    +
    +struct RootedCell {
    +  RootedCell(Cell*) {}
    +} ANNOTATE("Rooted Pointer");
    +
    +class AutoSuppressGC_Base {
    + public:
    +  AutoSuppressGC_Base() {}
    +  ~AutoSuppressGC_Base() {}
    +} ANNOTATE("Suppress GC");
    +
    +class AutoSuppressGC_Child : public AutoSuppressGC_Base {
    + public:
    +  AutoSuppressGC_Child() : AutoSuppressGC_Base() {}
    +};
    +
    +class AutoSuppressGC {
    +  AutoSuppressGC_Child helpImBeingSuppressed;
    +
    + public:
    +  AutoSuppressGC() {}
    +};
    +
    +class AutoCheckCannotGC {
    + public:
    +  AutoCheckCannotGC() {}
    +  ~AutoCheckCannotGC() { asm(""); }
    +} ANNOTATE("Invalidated by GC");
    +
    +extern void GC() ANNOTATE("GC Call");
    +extern void invisible();
    +
    +void GC() {
    +  // If the implementation is too trivial, the function body won't be emitted at
    +  // all.
    +  asm("");
    +  invisible();
    +}
    +
    +extern void usecell(Cell*);
    +
    +extern bool flipcoin();
    +
    +void suppressedFunction() {
    +  GC();  // Calls GC, but is always called within AutoSuppressGC
    +}
    +
    +void halfSuppressedFunction() {
    +  GC();  // Calls GC, but is sometimes called within AutoSuppressGC
    +}
    +
    +void unsuppressedFunction() {
    +  GC();  // Calls GC, never within AutoSuppressGC
    +}
    +
    +class IDL_Interface {
    + public:
    +  ANNOTATE("Can run script") virtual void canScriptThis() {}
    +  virtual void cannotScriptThis() {}
    +  ANNOTATE("Can run script") virtual void overridden_canScriptThis() = 0;
    +  virtual void overridden_cannotScriptThis() = 0;
    +};
    +
    +class IDL_Subclass : public IDL_Interface {
    +  ANNOTATE("Can run script") void overridden_canScriptThis() override {}
    +  void overridden_cannotScriptThis() override {}
    +};
    +
    +volatile static int x = 3;
    +volatile static int* xp = &x;
    +struct GCInDestructor {
    +  ~GCInDestructor() {
    +    invisible();
    +    asm("");
    +    *xp = 4;
    +    GC();
    +  }
    +};
    +
    +template 
    +void usecontainer(T* value) {
    +  if (value) asm("");
    +}
    +
    +Cell* cell() {
    +  static Cell c;
    +  return &c;
    +}
    +
    +Cell* f() {
    +  GCInDestructor kaboom;
    +
    +  Cell* cell1 = cell();
    +  Cell* cell2 = cell();
    +  Cell* cell3 = cell();
    +  Cell* cell4 = cell();
    +  {
    +    AutoSuppressGC nogc;
    +    suppressedFunction();
    +    halfSuppressedFunction();
    +  }
    +  usecell(cell1);
    +  halfSuppressedFunction();
    +  usecell(cell2);
    +  unsuppressedFunction();
    +  {
    +    // Old bug: it would look from the first AutoSuppressGC constructor it
    +    // found to the last destructor. This statement *should* have no effect.
    +    AutoSuppressGC nogc;
    +  }
    +  usecell(cell3);
    +  Cell* cell5 = cell();
    +  usecell(cell5);
    +
    +  {
    +    // Templatized container that inherits attributes from Cell*, should
    +    // report a hazard.
    +    UntypedContainer container1;
    +    usecontainer(&container1);
    +    GC();
    +    usecontainer(&container1);
    +  }
    +
    +  {
    +    // As above, but with a non-GC type.
    +    UntypedContainer container2;
    +    usecontainer(&container2);
    +    GC();
    +    usecontainer(&container2);
    +  }
    +
    +  // Hazard in return value due to ~GCInDestructor
    +  Cell* cell6 = cell();
    +  return cell6;
    +}
    +
    +Cell* copy_and_gc(Cell* src) {
    +  GC();
    +  return reinterpret_cast(88);
    +}
    +
    +void use(Cell* cell) {
    +  static int x = 0;
    +  if (cell) x++;
    +}
    +
    +struct CellContainer {
    +  Cell* cell;
    +  CellContainer() { asm(""); }
    +};
    +
    +void loopy() {
    +  Cell cell;
    +
    +  // No hazard: haz1 is not live during call to copy_and_gc.
    +  Cell* haz1;
    +  for (int i = 0; i < 10; i++) {
    +    haz1 = copy_and_gc(haz1);
    +  }
    +
    +  // No hazard: haz2 is live up to just before the GC, and starting at the
    +  // next statement after it, but not across the GC.
    +  Cell* haz2 = &cell;
    +  for (int j = 0; j < 10; j++) {
    +    use(haz2);
    +    GC();
    +    haz2 = &cell;
    +  }
    +
    +  // Hazard: haz3 is live from the final statement in one iteration, across
    +  // the GC in the next, to the use in the 2nd statement.
    +  Cell* haz3;
    +  for (int k = 0; k < 10; k++) {
    +    GC();
    +    use(haz3);
    +    haz3 = &cell;
    +  }
    +
    +  // Hazard: haz4 is live across a GC hidden in a loop.
    +  Cell* haz4 = &cell;
    +  for (int i2 = 0; i2 < 10; i2++) {
    +    GC();
    +  }
    +  use(haz4);
    +
    +  // Hazard: haz5 is live from within a loop across a GC.
    +  Cell* haz5;
    +  for (int i3 = 0; i3 < 10; i3++) {
    +    haz5 = &cell;
    +  }
    +  GC();
    +  use(haz5);
    +
    +  // No hazard: similar to the haz3 case, but verifying that we do not get
    +  // into an infinite loop.
    +  Cell* haz6;
    +  for (int i4 = 0; i4 < 10; i4++) {
    +    GC();
    +    haz6 = &cell;
    +  }
    +
    +  // No hazard: haz7 is constructed within the body, so it can't make a
    +  // hazard across iterations. Note that this requires CellContainer to have
    +  // a constructor, because otherwise the analysis doesn't see where
    +  // variables are declared. (With the constructor, it knows that
    +  // construction of haz7 obliterates any previous value it might have had.
    +  // Not that that's possible given its scope, but the analysis doesn't get
    +  // that information.)
    +  for (int i5 = 0; i5 < 10; i5++) {
    +    GC();
    +    CellContainer haz7;
    +    use(haz7.cell);
    +    haz7.cell = &cell;
    +  }
    +
    +  // Hazard: make sure we *can* see hazards across iterations involving
    +  // CellContainer;
    +  CellContainer haz8;
    +  for (int i6 = 0; i6 < 10; i6++) {
    +    GC();
    +    use(haz8.cell);
    +    haz8.cell = &cell;
    +  }
    +}
    +
    +namespace mozilla {
    +template 
    +class UniquePtr {
    +  T* val;
    +
    + public:
    +  UniquePtr() : val(nullptr) { asm(""); }
    +  UniquePtr(T* p) : val(p) {}
    +  UniquePtr(UniquePtr&& u) : val(u.val) { u.val = nullptr; }
    +  ~UniquePtr() { use(val); }
    +  T* get() { return val; }
    +  void reset() { val = nullptr; }
    +} ANNOTATE("moz_inherit_type_annotations_from_template_args");
    +}  // namespace mozilla
    +
    +extern void consume(mozilla::UniquePtr uptr);
    +
    +void safevals() {
    +  Cell cell;
    +
    +  // Simple hazard.
    +  Cell* unsafe1 = &cell;
    +  GC();
    +  use(unsafe1);
    +
    +  // Safe because it's known to be nullptr.
    +  Cell* safe2 = &cell;
    +  safe2 = nullptr;
    +  GC();
    +  use(safe2);
    +
    +  // Unsafe because it may not be nullptr.
    +  Cell* unsafe3 = &cell;
    +  if (reinterpret_cast(&cell) & 0x100) {
    +    unsafe3 = nullptr;
    +  }
    +  GC();
    +  use(unsafe3);
    +
    +  // Unsafe because it's not nullptr anymore.
    +  Cell* unsafe3b = &cell;
    +  unsafe3b = nullptr;
    +  unsafe3b = &cell;
    +  GC();
    +  use(unsafe3b);
    +
    +  // Hazard involving UniquePtr.
    +  {
    +    mozilla::UniquePtr unsafe4(&cell);
    +    GC();
    +    // Destructor uses unsafe4.
    +  }
    +
    +  // reset() to safe value before the GC.
    +  {
    +    mozilla::UniquePtr safe5(&cell);
    +    safe5.reset();
    +    GC();
    +  }
    +
    +  // reset() to safe value after the GC.
    +  {
    +    mozilla::UniquePtr safe6(&cell);
    +    GC();
    +    safe6.reset();
    +  }
    +
    +  // reset() to safe value after the GC -- but we've already used it, so it's
    +  // too late.
    +  {
    +    mozilla::UniquePtr unsafe7(&cell);
    +    GC();
    +    use(unsafe7.get());
    +    unsafe7.reset();
    +  }
    +
    +  // initialized to safe value.
    +  {
    +    mozilla::UniquePtr safe8;
    +    GC();
    +  }
    +
    +  // passed to a function that takes ownership before GC.
    +  {
    +    mozilla::UniquePtr safe9(&cell);
    +    consume(std::move(safe9));
    +    GC();
    +  }
    +
    +  // passed to a function that takes ownership after GC.
    +  {
    +    mozilla::UniquePtr unsafe10(&cell);
    +    GC();
    +    consume(std::move(unsafe10));
    +  }
    +
    +  // annotated to be safe before the GC. (This doesn't make
    +  // a lot of sense here; the annotation is for when some
    +  // type is known to only contain safe values, eg it is
    +  // initialized as empty, or it is a union and we know
    +  // that the GC pointer variants are not in use.)
    +  {
    +    mozilla::UniquePtr safe11(&cell);
    +    JS_HAZ_VARIABLE_IS_GC_SAFE(safe11);
    +    GC();
    +  }
    +
    +  // annotate as safe value after the GC -- since nothing else
    +  // has touched the variable, that means it was already safe
    +  // during the GC.
    +  {
    +    mozilla::UniquePtr safe12(&cell);
    +    GC();
    +    JS_HAZ_VARIABLE_IS_GC_SAFE(safe12);
    +  }
    +
    +  // annotate as safe after the GC -- but we've already used it, so it's
    +  // too late.
    +  {
    +    mozilla::UniquePtr unsafe13(&cell);
    +    GC();
    +    use(unsafe13.get());
    +    JS_HAZ_VARIABLE_IS_GC_SAFE(unsafe13);
    +  }
    +
    +  // Check JS_HAZ_CAN_RUN_SCRIPT annotation handling.
    +  IDL_Subclass sub;
    +  IDL_Subclass* subp = ⊂
    +  IDL_Interface* base = ⊂
    +  {
    +    Cell* unsafe14 = &cell;
    +    base->canScriptThis();
    +    use(unsafe14);
    +  }
    +  {
    +    Cell* unsafe15 = &cell;
    +    subp->canScriptThis();
    +    use(unsafe15);
    +  }
    +  {
    +    // Almost the same as the last one, except call using the actual object, not
    +    // a pointer. The type is known, so there is no danger of the actual type
    +    // being a subclass that has overridden the method with an implementation
    +    // that calls script.
    +    Cell* safe16 = &cell;
    +    sub.canScriptThis();
    +    use(safe16);
    +  }
    +  {
    +    Cell* safe17 = &cell;
    +    base->cannotScriptThis();
    +    use(safe17);
    +  }
    +  {
    +    Cell* safe18 = &cell;
    +    subp->cannotScriptThis();
    +    use(safe18);
    +  }
    +  {
    +    // A use after a GC, but not before. (This does not initialize safe19 by
    +    // setting it to a value, because assignment would start its live range, and
    +    // this test is to see if a variable with no known live range start requires
    +    // a use before the GC or not. It should.)
    +    Cell* safe19;
    +    GC();
    +    extern void initCellPtr(Cell**);
    +    initCellPtr(&safe19);
    +  }
    +}
    +
    +// Make sure `this` is live at the beginning of a function.
    +class Subcell : public Cell {
    +  int method() {
    +    GC();
    +    return f;  // this->f
    +  }
    +};
    +
    +template 
    +struct RefPtr {
    +  ~RefPtr() { GC(); }
    +  bool forget() { return true; }
    +  bool use() { return true; }
    +  void assign_with_AddRef(T* aRawPtr) { asm(""); }
    +};
    +
    +extern bool flipcoin();
    +
    +Cell* refptr_test1() {
    +  static Cell cell;
    +  RefPtr v1;
    +  Cell* ref_unsafe1 = &cell;
    +  return ref_unsafe1;
    +}
    +
    +Cell* refptr_test2() {
    +  static Cell cell;
    +  RefPtr v2;
    +  Cell* ref_safe2 = &cell;
    +  v2.forget();
    +  return ref_safe2;
    +}
    +
    +Cell* refptr_test3() {
    +  static Cell cell;
    +  RefPtr v3;
    +  Cell* ref_unsafe3 = &cell;
    +  if (x) {
    +    v3.forget();
    +  }
    +  return ref_unsafe3;
    +}
    +
    +Cell* refptr_test4() {
    +  static Cell cell;
    +  RefPtr r;
    +  return &cell;  // hazard in return value
    +}
    +
    +Cell* refptr_test5() {
    +  static Cell cell;
    +  RefPtr r;
    +  return nullptr;  // returning immobile value, so no hazard
    +}
    +
    +float somefloat = 1.2;
    +
    +Cell* refptr_test6() {
    +  static Cell cell;
    +  RefPtr v6;
    +  Cell* ref_unsafe6 = &cell;
    +  // v6 can be used without an intervening forget() before the end of the
    +  // function, even though forget() will be called at least once.
    +  v6.forget();
    +  if (x) {
    +    v6.forget();
    +    v6.assign_with_AddRef(&somefloat);
    +  }
    +  return ref_unsafe6;
    +}
    +
    +Cell* refptr_test7() {
    +  static Cell cell;
    +  RefPtr v7;
    +  Cell* ref_unsafe7 = &cell;
    +  // Similar to above, but with a loop.
    +  while (flipcoin()) {
    +    v7.forget();
    +    v7.assign_with_AddRef(&somefloat);
    +  }
    +  return ref_unsafe7;
    +}
    +
    +Cell* refptr_test8() {
    +  static Cell cell;
    +  RefPtr v8;
    +  Cell* ref_unsafe8 = &cell;
    +  // If the loop is traversed, forget() will be called. But that doesn't
    +  // matter, because even on the last iteration v8.use() will have been called
    +  // (and potentially dropped the refcount or whatever.)
    +  while (v8.use()) {
    +    v8.forget();
    +  }
    +  return ref_unsafe8;
    +}
    +
    +Cell* refptr_test9() {
    +  static Cell cell;
    +  RefPtr v9;
    +  Cell* ref_safe9 = &cell;
    +  // Even when not going through the loop, forget() will be called and so the
    +  // dtor will not Release.
    +  while (v9.forget()) {
    +    v9.assign_with_AddRef(&somefloat);
    +  }
    +  return ref_safe9;
    +}
    +
    +Cell* refptr_test10() {
    +  static Cell cell;
    +  RefPtr v10;
    +  Cell* ref_unsafe10 = &cell;
    +  // The destructor has a backwards path that skips the loop body.
    +  v10.assign_with_AddRef(&somefloat);
    +  while (flipcoin()) {
    +    v10.forget();
    +  }
    +  return ref_unsafe10;
    +}
    +
    +std::pair pair_returning_function() {
    +  return std::make_pair(true, AutoCheckCannotGC());
    +}
    +
    +void aggr_init_unsafe() {
    +  // nogc will be live after the call, so across the GC.
    +  auto [ok, nogc] = pair_returning_function();
    +  GC();
    +}
    +
    +void aggr_init_safe() {
    +  // The analysis should be able to tell that nogc is only live after the call,
    +  // not before. (This is to check for a problem where the return value was
    +  // getting stored into a different temporary than the local nogc variable,
    +  // and so its initialization was never seen and so it was assumed to be live
    +  // throughout the function.)
    +  GC();
    +  auto [ok, nogc] = pair_returning_function();
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/hazards/test.py b/js/src/devtools/rootAnalysis/t/hazards/test.py
    new file mode 100644
    index 0000000000..c4e9549305
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/hazards/test.py
    @@ -0,0 +1,121 @@
    +# flake8: noqa: F821
    +
    +from collections import defaultdict
    +
    +test.compile("source.cpp")
    +test.run_analysis_script("gcTypes")
    +
    +# gcFunctions should be the inverse, but we get to rely on unmangled names here.
    +gcFunctions = test.load_gcFunctions()
    +assert "void GC()" in gcFunctions
    +assert "void suppressedFunction()" not in gcFunctions
    +assert "void halfSuppressedFunction()" in gcFunctions
    +assert "void unsuppressedFunction()" in gcFunctions
    +assert "int32 Subcell::method()" in gcFunctions
    +assert "Cell* f()" in gcFunctions
    +
    +hazards = test.load_hazards()
    +hazmap = {haz.variable: haz for haz in hazards}
    +assert "cell1" not in hazmap
    +assert "cell2" in hazmap
    +assert "cell3" in hazmap
    +assert "cell4" not in hazmap
    +assert "cell5" not in hazmap
    +assert "cell6" not in hazmap
    +assert "" in hazmap
    +assert "this" in hazmap
    +
    +assert hazmap["cell2"].function == "Cell* f()"
    +
    +# Check that the correct GC call is reported for each hazard. (cell3 has a
    +# hazard from two different GC calls; it doesn't really matter which is
    +# reported.)
    +assert hazmap["cell2"].GCFunction == "void halfSuppressedFunction()"
    +assert hazmap["cell3"].GCFunction in (
    +    "void halfSuppressedFunction()",
    +    "void unsuppressedFunction()",
    +)
    +returnval_hazards = set(
    +    haz.function for haz in hazards if haz.variable == ""
    +)
    +assert "Cell* f()" in returnval_hazards
    +assert "Cell* refptr_test1()" in returnval_hazards
    +assert "Cell* refptr_test2()" not in returnval_hazards
    +assert "Cell* refptr_test3()" in returnval_hazards
    +assert "Cell* refptr_test4()" in returnval_hazards
    +assert "Cell* refptr_test5()" not in returnval_hazards
    +assert "Cell* refptr_test6()" in returnval_hazards
    +assert "Cell* refptr_test7()" in returnval_hazards
    +assert "Cell* refptr_test8()" in returnval_hazards
    +assert "Cell* refptr_test9()" not in returnval_hazards
    +
    +assert "container1" in hazmap
    +assert "container2" not in hazmap
    +
    +# Type names are handy to have in the report.
    +assert hazmap["cell2"].type == "Cell*"
    +assert hazmap[""].type == "Cell*"
    +assert hazmap["this"].type == "Subcell*"
    +
    +# loopy hazards. See comments in source.
    +assert "haz1" not in hazmap
    +assert "haz2" not in hazmap
    +assert "haz3" in hazmap
    +assert "haz4" in hazmap
    +assert "haz5" in hazmap
    +assert "haz6" not in hazmap
    +assert "haz7" not in hazmap
    +assert "haz8" in hazmap
    +
    +# safevals hazards. See comments in source.
    +assert "unsafe1" in hazmap
    +assert "safe2" not in hazmap
    +assert "unsafe3" in hazmap
    +assert "unsafe3b" in hazmap
    +assert "unsafe4" in hazmap
    +assert "safe5" not in hazmap
    +assert "safe6" not in hazmap
    +assert "unsafe7" in hazmap
    +assert "safe8" not in hazmap
    +assert "safe9" not in hazmap
    +assert "safe10" not in hazmap
    +assert "safe11" not in hazmap
    +assert "safe12" not in hazmap
    +assert "unsafe13" in hazmap
    +assert "unsafe14" in hazmap
    +assert "unsafe15" in hazmap
    +assert "safe16" not in hazmap
    +assert "safe17" not in hazmap
    +assert "safe18" not in hazmap
    +assert "safe19" not in hazmap
    +
    +# method hazard.
    +
    +byfunc = defaultdict(lambda: defaultdict(dict))
    +for haz in hazards:
    +    byfunc[haz.function][haz.variable] = haz
    +
    +methhaz = byfunc["int32 Subcell::method()"]
    +assert "this" in methhaz
    +assert methhaz["this"].type == "Subcell*"
    +
    +haz_functions = set(haz.function for haz in hazards)
    +
    +# RefPtr tests.
    +
    +haz_functions = set(haz.function for haz in hazards)
    +assert "Cell* refptr_test1()" in haz_functions
    +assert "Cell* refptr_test2()" not in haz_functions
    +assert "Cell* refptr_test3()" in haz_functions
    +assert "Cell* refptr_test4()" in haz_functions
    +assert "Cell* refptr_test5()" not in haz_functions
    +assert "Cell* refptr_test6()" in haz_functions
    +assert "Cell* refptr_test7()" in haz_functions
    +assert "Cell* refptr_test8()" in haz_functions
    +assert "Cell* refptr_test9()" not in haz_functions
    +assert "Cell* refptr_test10()" in haz_functions
    +
    +# aggr_init tests.
    +
    +assert "void aggr_init_safe()" not in haz_functions
    +assert "void aggr_init_unsafe()" in haz_functions
    diff --git a/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp b/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp
    new file mode 100644
    index 0000000000..149d77b03a
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp
    @@ -0,0 +1,76 @@
    +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this
    + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +namespace js {
    +namespace gc {
    +struct Cell {
    +  int f;
    +} ANNOTATE("GC Thing");
    +}  // namespace gc
    +}  // namespace js
    +
    +struct Bogon {};
    +
    +struct JustACell : public js::gc::Cell {
    +  bool iHaveNoDataMembers() { return true; }
    +};
    +
    +struct JSObject : public js::gc::Cell, public Bogon {
    +  int g;
    +};
    +
    +struct SpecialObject : public JSObject {
    +  int z;
    +};
    +
    +struct ErrorResult {
    +  bool hasObj;
    +  JSObject* obj;
    +  void trace() {}
    +} ANNOTATE("Suppressed GC Pointer");
    +
    +struct OkContainer {
    +  ErrorResult res;
    +  bool happy;
    +};
    +
    +struct UnrootedPointer {
    +  JSObject* obj;
    +};
    +
    +template 
    +class Rooted {
    +  T data;
    +} ANNOTATE("Rooted Pointer");
    +
    +extern void js_GC() ANNOTATE("GC Call") ANNOTATE("Slow");
    +
    +void js_GC() {}
    +
    +void root_arg(JSObject* obj, JSObject* random) {
    +  // Use all these types so they get included in the output.
    +  SpecialObject so;
    +  UnrootedPointer up;
    +  Bogon b;
    +  OkContainer okc;
    +  Rooted ro;
    +  Rooted rso;
    +
    +  obj = random;
    +
    +  JSObject* other1 = obj;
    +  js_GC();
    +
    +  float MARKER1 = 0;
    +  JSObject* other2 = obj;
    +  other1->f = 1;
    +  other2->f = -1;
    +
    +  unsigned int u1 = 1;
    +  unsigned int u2 = -1;
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py b/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py
    new file mode 100644
    index 0000000000..5e99fff908
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py
    @@ -0,0 +1,63 @@
    +# flake8: noqa: F821
    +import re
    +
    +test.compile("source.cpp")
    +test.computeGCTypes()
    +body = test.process_body(test.load_db_entry("src_body", re.compile(r"root_arg"))[0])
    +
    +# Rendering positive and negative integers
    +marker1 = body.assignment_line("MARKER1")
    +equal(body.edge_from_line(marker1 + 2)["Exp"][1]["String"], "1")
    +equal(body.edge_from_line(marker1 + 3)["Exp"][1]["String"], "-1")
    +
    +equal(body.edge_from_point(body.assignment_point("u1"))["Exp"][1]["String"], "1")
    +equal(
    +    body.edge_from_point(body.assignment_point("u2"))["Exp"][1]["String"], "4294967295"
    +)
    +
    +assert "obj" in body["Variables"]
    +assert "random" in body["Variables"]
    +assert "other1" in body["Variables"]
    +assert "other2" in body["Variables"]
    +
    +# Test function annotations
    +js_GC = test.process_body(test.load_db_entry("src_body", re.compile(r"js_GC"))[0])
    +annotations = js_GC["Variables"]["void js_GC()"]["Annotation"]
    +assert annotations
    +found_call_annotate = False
    +for annotation in annotations:
    +    (annType, value) = annotation["Name"]
    +    if annType == "annotate" and value == "GC Call":
    +        found_call_annotate = True
    +assert found_call_annotate
    +
    +# Test type annotations
    +
    +# js::gc::Cell first
    +cell = test.load_db_entry("src_comp", "js::gc::Cell")[0]
    +assert cell["Kind"] == "Struct"
    +annotations = cell["Annotation"]
    +assert len(annotations) == 1
    +(tag, value) = annotations[0]["Name"]
    +assert tag == "annotate"
    +assert value == "GC Thing"
    +
    +# Check JSObject inheritance.
    +JSObject = test.load_db_entry("src_comp", "JSObject")[0]
    +bases = [b["Base"] for b in JSObject["CSUBaseClass"]]
    +assert "js::gc::Cell" in bases
    +assert "Bogon" in bases
    +assert len(bases) == 2
    +
    +# Check type analysis
    +gctypes = test.load_gcTypes()
    +assert "js::gc::Cell" in gctypes["GCThings"]
    +assert "JustACell" in gctypes["GCThings"]
    +assert "JSObject" in gctypes["GCThings"]
    +assert "SpecialObject" in gctypes["GCThings"]
    +assert "UnrootedPointer" in gctypes["GCPointers"]
    +assert "Bogon" not in gctypes["GCThings"]
    +assert "Bogon" not in gctypes["GCPointers"]
    +assert "ErrorResult" not in gctypes["GCPointers"]
    +assert "OkContainer" not in gctypes["GCPointers"]
    +assert "class Rooted" not in gctypes["GCPointers"]
    diff --git a/js/src/devtools/rootAnalysis/t/sixgill.py b/js/src/devtools/rootAnalysis/t/sixgill.py
    new file mode 100644
    index 0000000000..307f13fae5
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/sixgill.py
    @@ -0,0 +1,70 @@
    +#!/usr/bin/env python
    +# This Source Code Form is subject to the terms of the Mozilla Public
    +# License, v. 2.0. If a copy of the MPL was not distributed with this
    +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
    +
    +from collections import defaultdict
    +
    +# Simplified version of the body info.
    +
    +
    +class Body(dict):
    +    def __init__(self, body):
    +        self["BlockIdKind"] = body["BlockId"]["Kind"]
    +        if "Variable" in body["BlockId"]:
    +            self["BlockName"] = body["BlockId"]["Variable"]["Name"][0].split("$")[-1]
    +        loc = body["Location"]
    +        self["LineRange"] = (loc[0]["Line"], loc[1]["Line"])
    +        self["Filename"] = loc[0]["CacheString"]
    +        self["Edges"] = body.get("PEdge", [])
    +        self["Points"] = {
    +            i: p["Location"]["Line"] for i, p in enumerate(body["PPoint"], 1)
    +        }
    +        self["Index"] = body["Index"]
    +        self["Variables"] = {
    +            x["Variable"]["Name"][0].split("$")[-1]: x["Type"]
    +            for x in body["DefineVariable"]
    +        }
    +
    +        # Indexes
    +        self["Line2Points"] = defaultdict(list)
    +        for point, line in self["Points"].items():
    +            self["Line2Points"][line].append(point)
    +        self["SrcPoint2Edges"] = defaultdict(list)
    +        for edge in self["Edges"]:
    +            src, dst = edge["Index"]
    +            self["SrcPoint2Edges"][src].append(edge)
    +        self["Line2Edges"] = defaultdict(list)
    +        for src, edges in self["SrcPoint2Edges"].items():
    +            line = self["Points"][src]
    +            self["Line2Edges"][line].extend(edges)
    +
    +    def edges_from_line(self, line):
    +        return self["Line2Edges"][line]
    +
    +    def edge_from_line(self, line):
    +        edges = self.edges_from_line(line)
    +        assert len(edges) == 1
    +        return edges[0]
    +
    +    def edges_from_point(self, point):
    +        return self["SrcPoint2Edges"][point]
    +
    +    def edge_from_point(self, point):
    +        edges = self.edges_from_point(point)
    +        assert len(edges) == 1
    +        return edges[0]
    +
    +    def assignment_point(self, varname):
    +        for edge in self["Edges"]:
    +            if edge["Kind"] != "Assign":
    +                continue
    +            dst = edge["Exp"][0]
    +            if dst["Kind"] != "Var":
    +                continue
    +            if dst["Variable"]["Name"][0] == varname:
    +                return edge["Index"][0]
    +        raise Exception("assignment to variable %s not found" % varname)
    +
    +    def assignment_line(self, varname):
    +        return self["Points"][self.assignment_point(varname)]
    diff --git a/js/src/devtools/rootAnalysis/t/suppression/source.cpp b/js/src/devtools/rootAnalysis/t/suppression/source.cpp
    new file mode 100644
    index 0000000000..56e458bdaa
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/suppression/source.cpp
    @@ -0,0 +1,72 @@
    +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this
    + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +struct Cell {
    +  int f;
    +} ANNOTATE("GC Thing");
    +
    +class AutoSuppressGC_Base {
    + public:
    +  AutoSuppressGC_Base() {}
    +  ~AutoSuppressGC_Base() {}
    +} ANNOTATE("Suppress GC");
    +
    +class AutoSuppressGC_Child : public AutoSuppressGC_Base {
    + public:
    +  AutoSuppressGC_Child() : AutoSuppressGC_Base() {}
    +};
    +
    +class AutoSuppressGC {
    +  AutoSuppressGC_Child helpImBeingSuppressed;
    +
    + public:
    +  AutoSuppressGC() {}
    +};
    +
    +extern void GC() ANNOTATE("GC Call");
    +
    +void GC() {
    +  // If the implementation is too trivial, the function body won't be emitted at
    +  // all.
    +  asm("");
    +}
    +
    +extern void foo(Cell*);
    +
    +void suppressedFunction() {
    +  GC();  // Calls GC, but is always called within AutoSuppressGC
    +}
    +
    +void halfSuppressedFunction() {
    +  GC();  // Calls GC, but is sometimes called within AutoSuppressGC
    +}
    +
    +void unsuppressedFunction() {
    +  GC();  // Calls GC, never within AutoSuppressGC
    +}
    +
    +void f() {
    +  Cell* cell1 = nullptr;
    +  Cell* cell2 = nullptr;
    +  Cell* cell3 = nullptr;
    +  {
    +    AutoSuppressGC nogc;
    +    suppressedFunction();
    +    halfSuppressedFunction();
    +  }
    +  foo(cell1);
    +  halfSuppressedFunction();
    +  foo(cell2);
    +  unsuppressedFunction();
    +  {
    +    // Old bug: it would look from the first AutoSuppressGC constructor it
    +    // found to the last destructor. This statement *should* have no effect.
    +    AutoSuppressGC nogc;
    +  }
    +  foo(cell3);
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/suppression/test.py b/js/src/devtools/rootAnalysis/t/suppression/test.py
    new file mode 100644
    index 0000000000..118ae422ab
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/suppression/test.py
    @@ -0,0 +1,21 @@
    +# flake8: noqa: F821
    +test.compile("source.cpp")
    +test.run_analysis_script("gcTypes", upto="gcFunctions")
    +
    +# The suppressions file uses mangled names.
    +info = test.load_funcInfo()
    +suppressed = [f for f, v in info.items() if v.get("limits", 0) | 1]
    +
    +# Only one of these is fully suppressed (ie, *always* called within the scope
    +# of an AutoSuppressGC).
    +assert len(list(filter(lambda f: "suppressedFunction" in f, suppressed))) == 1
    +assert len(list(filter(lambda f: "halfSuppressedFunction" in f, suppressed))) == 0
    +assert len(list(filter(lambda f: "unsuppressedFunction" in f, suppressed))) == 0
    +
    +# gcFunctions should be the inverse, but we get to rely on unmangled names here.
    +gcFunctions = test.load_gcFunctions()
    +assert "void GC()" in gcFunctions
    +assert "void suppressedFunction()" not in gcFunctions
    +assert "void halfSuppressedFunction()" in gcFunctions
    +assert "void unsuppressedFunction()" in gcFunctions
    +assert "void f()" in gcFunctions
    diff --git a/js/src/devtools/rootAnalysis/t/testlib.py b/js/src/devtools/rootAnalysis/t/testlib.py
    new file mode 100644
    index 0000000000..e08b236e4f
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/testlib.py
    @@ -0,0 +1,249 @@
    +import json
    +import os
    +import re
    +import subprocess
    +import sys
    +from collections import defaultdict, namedtuple
    +
    +from sixgill import Body
    +
    +scriptdir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    +
    +HazardSummary = namedtuple(
    +    "HazardSummary", ["function", "variable", "type", "GCFunction", "location"]
    +)
    +
    +Callgraph = namedtuple(
    +    "Callgraph",
    +    [
    +        "functionNames",
    +        "nameToId",
    +        "mangledToUnmangled",
    +        "unmangledToMangled",
    +        "calleesOf",
    +        "callersOf",
    +        "tags",
    +        "calleeGraph",
    +        "callerGraph",
    +    ],
    +)
    +
    +
    +def equal(got, expected):
    +    if got != expected:
    +        print("Got '%s', expected '%s'" % (got, expected))
    +
    +
    +def extract_unmangled(func):
    +    return func.split("$")[-1]
    +
    +
    +class Test(object):
    +    def __init__(self, indir, outdir, cfg, verbose=0):
    +        self.indir = indir
    +        self.outdir = outdir
    +        self.cfg = cfg
    +        self.verbose = verbose
    +
    +    def infile(self, path):
    +        return os.path.join(self.indir, path)
    +
    +    def binpath(self, prog):
    +        return os.path.join(self.cfg.sixgill_bin, prog)
    +
    +    def compile(self, source, options=""):
    +        env = os.environ
    +        env["CCACHE_DISABLE"] = "1"
    +        if "-fexceptions" not in options and "-fno-exceptions" not in options:
    +            options += " -fno-exceptions"
    +        cmd = "{CXX} -c {source} -O3 -std=c++17 -fplugin={sixgill} -fplugin-arg-xgill-mangle=1 {options}".format(  # NOQA: E501
    +            source=self.infile(source),
    +            CXX=self.cfg.cxx,
    +            sixgill=self.cfg.sixgill_plugin,
    +            options=options,
    +        )
    +        if self.cfg.verbose > 0:
    +            print("Running %s" % cmd)
    +        subprocess.check_call(["sh", "-c", cmd])
    +
    +    def load_db_entry(self, dbname, pattern):
    +        """Look up an entry from an XDB database file, 'pattern' may be an exact
    +        matching string, or an re pattern object matching a single entry."""
    +
    +        if hasattr(pattern, "match"):
    +            output = subprocess.check_output(
    +                [self.binpath("xdbkeys"), dbname + ".xdb"], universal_newlines=True
    +            )
    +            matches = list(filter(lambda _: re.search(pattern, _), output.splitlines()))
    +            if len(matches) == 0:
    +                raise Exception("entry not found")
    +            if len(matches) > 1:
    +                raise Exception("multiple entries found")
    +            pattern = matches[0]
    +
    +        output = subprocess.check_output(
    +            [self.binpath("xdbfind"), "-json", dbname + ".xdb", pattern],
    +            universal_newlines=True,
    +        )
    +        return json.loads(output)
    +
    +    def run_analysis_script(self, startPhase="gcTypes", upto=None):
    +        open("defaults.py", "w").write(
    +            """\
    +analysis_scriptdir = '{scriptdir}'
    +sixgill_bin = '{bindir}'
    +""".format(
    +                scriptdir=scriptdir, bindir=self.cfg.sixgill_bin
    +            )
    +        )
    +        cmd = [
    +            sys.executable,
    +            os.path.join(scriptdir, "analyze.py"),
    +            ["-q", "", "-v"][min(self.verbose, 2)],
    +        ]
    +        cmd += ["--first", startPhase]
    +        if upto:
    +            cmd += ["--last", upto]
    +        cmd.append("--source=%s" % self.indir)
    +        cmd.append("--js=%s" % self.cfg.js)
    +        if self.cfg.verbose:
    +            print("Running " + " ".join(cmd))
    +        subprocess.check_call(cmd)
    +
    +    def computeGCTypes(self):
    +        self.run_analysis_script("gcTypes", upto="gcTypes")
    +
    +    def computeHazards(self):
    +        self.run_analysis_script("gcTypes")
    +
    +    def load_text_file(self, filename, extract=lambda l: l):
    +        fullpath = os.path.join(self.outdir, filename)
    +        values = (extract(line.strip()) for line in open(fullpath, "r"))
    +        return list(filter(lambda _: _ is not None, values))
    +
    +    def load_json_file(self, filename, reviver=None):
    +        fullpath = os.path.join(self.outdir, filename)
    +        with open(fullpath) as fh:
    +            return json.load(fh, object_hook=reviver)
    +
    +    def load_gcTypes(self):
    +        def grab_type(line):
    +            m = re.match(r"^(GC\w+): (.*)", line)
    +            if m:
    +                return (m.group(1) + "s", m.group(2))
    +            return None
    +
    +        gctypes = defaultdict(list)
    +        for collection, typename in self.load_text_file(
    +            "gcTypes.txt", extract=grab_type
    +        ):
    +            gctypes[collection].append(typename)
    +        return gctypes
    +
    +    def load_typeInfo(self, filename="typeInfo.txt"):
    +        return self.load_json_file(filename)
    +
    +    def load_funcInfo(self, filename="limitedFunctions.lst"):
    +        return self.load_json_file(filename)
    +
    +    def load_gcFunctions(self):
    +        return self.load_text_file("gcFunctions.lst", extract=extract_unmangled)
    +
    +    def load_callgraph(self):
    +        data = Callgraph(
    +            functionNames=["dummy"],
    +            nameToId={},
    +            mangledToUnmangled={},
    +            unmangledToMangled={},
    +            calleesOf=defaultdict(list),
    +            callersOf=defaultdict(list),
    +            tags=defaultdict(set),
    +            calleeGraph=defaultdict(dict),
    +            callerGraph=defaultdict(dict),
    +        )
    +
    +        def lookup(id):
    +            mangled = data.functionNames[int(id)]
    +            return data.mangledToUnmangled.get(mangled, mangled)
    +
    +        def add_call(caller, callee, limit):
    +            data.calleesOf[caller].append(callee)
    +            data.callersOf[callee].append(caller)
    +            data.calleeGraph[caller][callee] = True
    +            data.callerGraph[callee][caller] = True
    +
    +        def process(line):
    +            if line.startswith("#"):
    +                name = line.split(" ", 1)[1]
    +                data.nameToId[name] = len(data.functionNames)
    +                data.functionNames.append(name)
    +                return
    +
    +            if line.startswith("="):
    +                m = re.match(r"^= (\d+) (.*)", line)
    +                mangled = data.functionNames[int(m.group(1))]
    +                unmangled = m.group(2)
    +                data.nameToId[unmangled] = id
    +                data.mangledToUnmangled[mangled] = unmangled
    +                data.unmangledToMangled[unmangled] = mangled
    +                return
    +
    +            # Sample lines:
    +            #   D 10 20
    +            #   D /3 10 20
    +            #   D 3:3 10 20
    +            # All of these mean that there is a direct call from function #10
    +            # to function #20. The latter two mean that the call is made in a
    +            # context where the 0x1 and 0x2 properties (3 == 0x1 | 0x2) are in
    +            # effect. The `/n` syntax was the original, which was then expanded
    +            # to `m:n` to allow multiple calls to be combined together when not
    +            # all calls have the same properties in effect. The `/n` syntax is
    +            # deprecated.
    +            #
    +            # The properties usually refer to "limits", eg "GC is suppressed
    +            # in the scope surrounding this call". For testing purposes, the
    +            # difference between `m` and `n` in `m:n` is currently ignored.
    +            tokens = line.split(" ")
    +            limit = 0
    +            if tokens[1].startswith("/"):
    +                attr_str = tokens.pop(1)
    +                limit = int(attr_str[1:])
    +            elif ":" in tokens[1]:
    +                attr_str = tokens.pop(1)
    +                limit = int(attr_str[0 : attr_str.index(":")])
    +
    +            if tokens[0] in ("D", "R"):
    +                _, caller, callee = tokens
    +                add_call(lookup(caller), lookup(callee), limit)
    +            elif tokens[0] == "T":
    +                data.tags[tokens[1]].add(line.split(" ", 2)[2])
    +            elif tokens[0] in ("F", "V"):
    +                pass
    +
    +            elif tokens[0] == "I":
    +                m = re.match(r"^I (\d+) VARIABLE ([^\,]*)", line)
    +                pass
    +
    +        self.load_text_file("callgraph.txt", extract=process)
    +        return data
    +
    +    def load_hazards(self):
    +        def grab_hazard(line):
    +            m = re.match(
    +                r"Function '(.*?)' has unrooted '(.*?)' of type '(.*?)' live across GC call '(.*?)' at (.*)",  # NOQA: E501
    +                line,
    +            )
    +            if m:
    +                info = list(m.groups())
    +                info[0] = info[0].split("$")[-1]
    +                info[3] = info[3].split("$")[-1]
    +                return HazardSummary(*info)
    +            return None
    +
    +        return self.load_text_file("hazards.txt", extract=grab_hazard)
    +
    +    def process_body(self, body):
    +        return Body(body)
    +
    +    def process_bodies(self, bodies):
    +        return [self.process_body(b) for b in bodies]
    diff --git a/js/src/devtools/rootAnalysis/t/types/source.cpp b/js/src/devtools/rootAnalysis/t/types/source.cpp
    new file mode 100644
    index 0000000000..c8a2d4aa73
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/types/source.cpp
    @@ -0,0 +1,167 @@
    +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this
    + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +#include 
    +#include 
    +
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +struct Cell {
    +  int f;
    +} ANNOTATE("GC Thing");
    +
    +namespace World {
    +namespace NS {
    +struct Unsafe {
    +  int g;
    +  ~Unsafe() { asm(""); }
    +} ANNOTATE("Invalidated by GC") ANNOTATE("GC Pointer or Reference");
    +}  // namespace NS
    +}  // namespace World
    +
    +extern void GC() ANNOTATE("GC Call");
    +extern void invisible();
    +
    +void GC() {
    +  // If the implementation is too trivial, the function body won't be emitted at
    +  // all.
    +  asm("");
    +  invisible();
    +}
    +
    +struct GCOnDestruction {
    +  ~GCOnDestruction() { GC(); }
    +};
    +
    +struct NoGCOnDestruction {
    +  ~NoGCOnDestruction() { asm(""); }
    +};
    +
    +extern void usecell(Cell*);
    +
    +Cell* cell() {
    +  static Cell c;
    +  return &c;
    +}
    +
    +template 
    +struct SimpleTemplate {
    +  int member;
    +};
    +
    +template 
    +class ANNOTATE("moz_inherit_type_annotations_from_template_args") Container {
    + public:
    +  template 
    +  void foo(V& v, W& w) {
    +    class InnerClass {};
    +    InnerClass xxx;
    +    return;
    +  }
    +
    +  struct Entry {
    +    T t;
    +    U u;
    +  }* ent;
    +};
    +
    +Cell* f() {
    +  Container c1;
    +  Container, SimpleTemplate> c2;
    +  Container, Container> c3;
    +  Container, float>,
    +            Container>>
    +      c4;
    +
    +  return nullptr;
    +}
    +
    +// Define a set of classes for verifying that there is no infinite loop
    +// when a class contains itself via mozilla::UniquePtr.
    +
    +namespace mozilla {
    +
    +template 
    +struct JustAField {
    +  A field;
    +
    +  // Hack to allow UniquePtr and SimpleUniquePtr to be swapped.
    +  A& operator->() { return field; }
    +};
    +
    +template 
    +struct UniquePtr {
    +  JustAField holder;
    +};
    +
    +// This did not trigger the infinite loop, because the pointer here
    +// caused the UniquePtr special handling to be skipped. It requires
    +// the above definition to be triggered, which matches the actual
    +// implementation (JustAField maps to CompactPair, more or less).
    +// The bugfix for the infinite loop also drops this requirement, so
    +// now this *would* trigger the bug if it weren't fixed in the same
    +// commit.
    +template 
    +struct SimpleUniquePtr {
    +  T* holder;
    +};
    +
    +}  // namespace mozilla
    +
    +class Recursive {
    + public:
    +  using EntryMap = Container;
    +  mozilla::UniquePtr entries;
    +};
    +
    +void rvalue_ref(World::NS::Unsafe&& arg1) { GC(); }
    +
    +void ref(const World::NS::Unsafe& arg2) {
    +  Recursive* foo;
    +  // Must actually use a type for the compiler to instantiate the
    +  // template specializations.
    +  foo->entries.holder->ent;
    +  GC();
    +  static int use = arg2.g;
    +}
    +
    +// A function that consumes a parameter, but only if passed by rvalue reference.
    +extern void eat(World::NS::Unsafe&&);
    +extern void eat(World::NS::Unsafe&);
    +
    +void rvalue_ref_ok() {
    +  World::NS::Unsafe unsafe1;
    +  eat(std::move(unsafe1));
    +  GC();
    +}
    +
    +void rvalue_ref_not_ok() {
    +  World::NS::Unsafe unsafe2;
    +  eat(unsafe2);
    +  GC();
    +}
    +
    +void rvalue_ref_arg_ok(World::NS::Unsafe&& unsafe3) {
    +  eat(std::move(unsafe3));
    +  GC();
    +}
    +
    +void rvalue_ref_arg_not_ok(World::NS::Unsafe&& unsafe4) {
    +  eat(unsafe4);
    +  GC();
    +}
    +
    +void shared_ptr_hazard() {
    +  Cell* unsafe5 = f();
    +  { auto p = std::make_shared(); }
    +  usecell(unsafe5);
    +}
    +
    +void shared_ptr_no_hazard() {
    +  Cell* safe6 = f();
    +  { auto p = std::make_shared(); }
    +  usecell(safe6);
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/types/test.py b/js/src/devtools/rootAnalysis/t/types/test.py
    new file mode 100644
    index 0000000000..4a2b985abf
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/types/test.py
    @@ -0,0 +1,16 @@
    +# flake8: noqa: F821
    +
    +from collections import defaultdict
    +
    +test.compile("source.cpp")
    +test.run_analysis_script()
    +hazards = test.load_hazards()
    +hazmap = {haz.variable: haz for haz in hazards}
    +assert "arg1" in hazmap
    +assert "arg2" in hazmap
    +assert "unsafe1" not in hazmap
    +assert "unsafe2" in hazmap
    +assert "unsafe3" not in hazmap
    +assert "unsafe4" in hazmap
    +assert "unsafe5" in hazmap
    +assert "safe6" not in hazmap
    diff --git a/js/src/devtools/rootAnalysis/t/virtual/source.cpp b/js/src/devtools/rootAnalysis/t/virtual/source.cpp
    new file mode 100644
    index 0000000000..980546f38d
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/virtual/source.cpp
    @@ -0,0 +1,366 @@
    +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this
    + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +#define ANNOTATE(property) __attribute__((annotate(property)))
    +
    +extern void GC() ANNOTATE("GC Call");
    +
    +void GC() {
    +  // If the implementation is too trivial, the function body won't be emitted at
    +  // all.
    +  asm("");
    +}
    +
    +// Special-cased function -- code that can run JS has an artificial edge to
    +// js::RunScript.
    +namespace js {
    +void RunScript() { GC(); }
    +}  // namespace js
    +
    +struct Cell {
    +  int f;
    +} ANNOTATE("GC Thing");
    +
    +extern void foo();
    +
    +void bar() { GC(); }
    +
    +typedef void (*func_t)();
    +
    +class Base {
    + public:
    +  int ANNOTATE("field annotation") dummy;
    +  virtual void someGC() ANNOTATE("Base pure virtual method") = 0;
    +  virtual void someGC(int) ANNOTATE("overloaded Base pure virtual method") = 0;
    +  virtual void sibGC() = 0;
    +  virtual void onBase() { bar(); }
    +  func_t functionField;
    +
    +  // For now, this is just to verify that the plugin doesn't crash. The
    +  // analysis code does not yet look at this annotation or output it anywhere
    +  // (though it *is* being recorded.)
    +  static float testAnnotations() ANNOTATE("static func");
    +
    +  // Similar, though sixgill currently completely ignores parameter annotations.
    +  static double testParamAnnotations(Cell& ANNOTATE("param annotation")
    +                                         ANNOTATE("second param annot") cell)
    +      ANNOTATE("static func") ANNOTATE("second func");
    +};
    +
    +float Base::testAnnotations() {
    +  asm("");
    +  return 1.1;
    +}
    +
    +double Base::testParamAnnotations(Cell& cell) {
    +  asm("");
    +  return 1.2;
    +}
    +
    +class Super : public Base {
    + public:
    +  virtual void ANNOTATE("Super pure virtual") noneGC() = 0;
    +  virtual void allGC() = 0;
    +  virtual void onSuper() { asm(""); }
    +  void nonVirtualFunc() { asm(""); }
    +};
    +
    +class Sub1 : public Super {
    + public:
    +  void noneGC() override { foo(); }
    +  void someGC() override ANNOTATE("Sub1 override") ANNOTATE("second attr") {
    +    foo();
    +  }
    +  void someGC(int) override ANNOTATE("Sub1 override for int overload") {
    +    foo();
    +  }
    +  void allGC() override {
    +    foo();
    +    bar();
    +  }
    +  void sibGC() override { foo(); }
    +  void onBase() override { foo(); }
    +} ANNOTATE("CSU1") ANNOTATE("CSU2");
    +
    +class Sub2 : public Super {
    + public:
    +  void noneGC() override { foo(); }
    +  void someGC() override {
    +    foo();
    +    bar();
    +  }
    +  void someGC(int) override {
    +    foo();
    +    bar();
    +  }
    +  void allGC() override {
    +    foo();
    +    bar();
    +  }
    +  void sibGC() override { foo(); }
    +};
    +
    +class Sibling : public Base {
    + public:
    +  virtual void noneGC() { foo(); }
    +  void someGC() override {
    +    foo();
    +    bar();
    +  }
    +  void someGC(int) override {
    +    foo();
    +    bar();
    +  }
    +  virtual void allGC() {
    +    foo();
    +    bar();
    +  }
    +  void sibGC() override { bar(); }
    +};
    +
    +class AutoSuppressGC {
    + public:
    +  AutoSuppressGC() {}
    +  ~AutoSuppressGC() {}
    +} ANNOTATE("Suppress GC");
    +
    +void use(Cell*) { asm(""); }
    +
    +class nsISupports {
    + public:
    +  virtual ANNOTATE("Can run script") void danger() { asm(""); }
    +
    +  virtual ~nsISupports() = 0;
    +};
    +
    +class nsIPrincipal : public nsISupports {
    + public:
    +  ~nsIPrincipal() override{};
    +};
    +
    +struct JSPrincipals {
    +  int debugToken;
    +  JSPrincipals() = default;
    +  virtual ~JSPrincipals() { GC(); }
    +};
    +
    +class nsJSPrincipals : public nsIPrincipal, public JSPrincipals {
    + public:
    +  void Release() { delete this; }
    +};
    +
    +class SafePrincipals : public nsIPrincipal {
    + public:
    +  ~SafePrincipals() { foo(); }
    +};
    +
    +void f() {
    +  Sub1 s1;
    +  Sub2 s2;
    +
    +  static Cell cell;
    +  {
    +    Cell* c1 = &cell;
    +    s1.noneGC();
    +    use(c1);
    +  }
    +  {
    +    Cell* c2 = &cell;
    +    s2.someGC();
    +    use(c2);
    +  }
    +  {
    +    Cell* c3 = &cell;
    +    s1.allGC();
    +    use(c3);
    +  }
    +  {
    +    Cell* c4 = &cell;
    +    s2.noneGC();
    +    use(c4);
    +  }
    +  {
    +    Cell* c5 = &cell;
    +    s2.someGC();
    +    use(c5);
    +  }
    +  {
    +    Cell* c6 = &cell;
    +    s2.allGC();
    +    use(c6);
    +  }
    +
    +  Super* super = &s2;
    +  {
    +    Cell* c7 = &cell;
    +    super->noneGC();
    +    use(c7);
    +  }
    +  {
    +    Cell* c8 = &cell;
    +    super->someGC();
    +    use(c8);
    +  }
    +  {
    +    Cell* c9 = &cell;
    +    super->allGC();
    +    use(c9);
    +  }
    +
    +  {
    +    Cell* c10 = &cell;
    +    s1.functionField();
    +    use(c10);
    +  }
    +  {
    +    Cell* c11 = &cell;
    +    super->functionField();
    +    use(c11);
    +  }
    +  {
    +    Cell* c12 = &cell;
    +    super->sibGC();
    +    use(c12);
    +  }
    +
    +  Base* base = &s2;
    +  {
    +    Cell* c13 = &cell;
    +    base->sibGC();
    +    use(c13);
    +  }
    +
    +  nsJSPrincipals pals;
    +  {
    +    Cell* c14 = &cell;
    +    nsISupports* p = &pals;
    +    p->danger();
    +    use(c14);
    +  }
    +
    +  // Base defines, Sub1 overrides, static Super can call either.
    +  {
    +    Cell* c15 = &cell;
    +    super->onBase();
    +    use(c15);
    +  }
    +
    +  {
    +    Cell* c16 = &cell;
    +    s2.someGC(7);
    +    use(c16);
    +  }
    +
    +  {
    +    Cell* c17 = &cell;
    +    super->someGC(7);
    +    use(c17);
    +  }
    +
    +  {
    +    nsJSPrincipals* princ = new nsJSPrincipals();
    +    Cell* c18 = &cell;
    +    delete princ;  // Can GC
    +    use(c18);
    +  }
    +
    +  {
    +    nsJSPrincipals* princ = new nsJSPrincipals();
    +    nsISupports* supp = static_cast(princ);
    +    Cell* c19 = &cell;
    +    delete supp;  // Can GC
    +    use(c19);
    +  }
    +
    +  {
    +    auto* safe = new SafePrincipals();
    +    Cell* c20 = &cell;
    +    delete safe;  // Cannot GC
    +    use(c20);
    +  }
    +
    +  {
    +    auto* safe = new SafePrincipals();
    +    nsISupports* supp = static_cast(safe);
    +    Cell* c21 = &cell;
    +    delete supp;  // Compiler thinks destructor can GC.
    +    use(c21);
    +  }
    +}
    +
    +template 
    +void Call1(Function&& f) {
    +  f();
    +}
    +
    +template 
    +void Call2(Function&& f) {
    +  f();
    +}
    +
    +void function_pointers() {
    +  Cell cell;
    +
    +  {
    +    auto* f = GC;
    +    Cell* c22 = &cell;
    +    f();
    +    use(c22);
    +  }
    +
    +  {
    +    auto* f = GC;
    +    auto*& g = f;
    +    Cell* c23 = &cell;
    +    g();
    +    use(c23);
    +  }
    +
    +  {
    +    auto* f = GC;
    +    Call1([&] {
    +      Cell* c24 = &cell;
    +      f();
    +      use(c24);
    +    });
    +  }
    +}
    +
    +// Use a separate function to test `mallocSizeOf` annotations. Bug 1872197:
    +// functions that are specialized on a lambda function and call that function
    +// will have that call get mixed up with other calls of lambdas defined within
    +// the same function.
    +void annotated_function_pointers() {
    +  Cell cell;
    +
    +  // Variables with the specific name "mallocSizeOf" are
    +  // annotated to not GC. (Heh... even though here, they
    +  // *do* GC!)
    +
    +  {
    +    auto* mallocSizeOf = GC;
    +    Cell* c25 = &cell;
    +    mallocSizeOf();
    +    use(c25);
    +  }
    +
    +  {
    +    auto* f = GC;
    +    auto*& mallocSizeOf = f;
    +    Cell* c26 = &cell;
    +    mallocSizeOf();
    +    use(c26);
    +  }
    +
    +  {
    +    auto* mallocSizeOf = GC;
    +    Call2([&] {
    +      Cell* c27 = &cell;
    +      mallocSizeOf();
    +      use(c27);
    +    });
    +  }
    +}
    diff --git a/js/src/devtools/rootAnalysis/t/virtual/test.py b/js/src/devtools/rootAnalysis/t/virtual/test.py
    new file mode 100644
    index 0000000000..26d2e51ed6
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/t/virtual/test.py
    @@ -0,0 +1,99 @@
    +# 'test' is provided by the calling script.
    +# flake8: noqa: F821
    +
    +test.compile("source.cpp")
    +test.run_analysis_script("gcTypes")
    +
    +info = test.load_typeInfo()
    +
    +assert "Sub1" in info["OtherCSUTags"]
    +assert ["CSU1", "CSU2"] == sorted(info["OtherCSUTags"]["Sub1"])
    +assert "Base" in info["OtherFieldTags"]
    +assert "someGC" in info["OtherFieldTags"]["Base"]
    +assert "Sub1" in info["OtherFieldTags"]
    +assert "someGC" in info["OtherFieldTags"]["Sub1"]
    +
    +# For now, fields with the same name (eg overloaded virtual methods) just
    +# accumulate attributes.
    +assert ["Sub1 override", "Sub1 override for int overload", "second attr"] == sorted(
    +    info["OtherFieldTags"]["Sub1"]["someGC"]
    +)
    +
    +gcFunctions = test.load_gcFunctions()
    +
    +assert "void Sub1::noneGC()" not in gcFunctions
    +assert "void Sub1::someGC()" not in gcFunctions
    +assert "void Sub1::someGC(int32)" not in gcFunctions
    +assert "void Sub1::allGC()" in gcFunctions
    +assert "void Sub2::noneGC()" not in gcFunctions
    +assert "void Sub2::someGC()" in gcFunctions
    +assert "void Sub2::someGC(int32)" in gcFunctions
    +assert "void Sub2::allGC()" in gcFunctions
    +
    +callgraph = test.load_callgraph()
    +
    +assert callgraph.calleeGraph["void f()"]["Super.noneGC:0"]
    +assert callgraph.calleeGraph["Super.noneGC:0"]["Sub1.noneGC:0"]
    +assert callgraph.calleeGraph["Super.noneGC:0"]["Sub2.noneGC:0"]
    +assert callgraph.calleeGraph["Sub1.noneGC:0"]["void Sub1::noneGC()"]
    +assert callgraph.calleeGraph["Sub2.noneGC:0"]["void Sub2::noneGC()"]
    +assert "void Sibling::noneGC()" not in callgraph.calleeGraph["Super.noneGC:0"]
    +assert callgraph.calleeGraph["Super.onBase:0"]["Sub1.onBase:0"]
    +assert callgraph.calleeGraph["Sub1.onBase:0"]["void Sub1::onBase()"]
    +assert callgraph.calleeGraph["Super.onBase:0"]["void Base::onBase()"]
    +assert "void Sibling::onBase()" not in callgraph.calleeGraph["Super.onBase:0"]
    +
    +hazards = test.load_hazards()
    +hazmap = {haz.variable: haz for haz in hazards}
    +
    +assert "c1" not in hazmap
    +assert "c2" in hazmap
    +assert "c3" in hazmap
    +assert "c4" not in hazmap
    +assert "c5" in hazmap
    +assert "c6" in hazmap
    +assert "c7" not in hazmap
    +assert "c8" in hazmap
    +assert "c9" in hazmap
    +assert "c10" in hazmap
    +assert "c11" in hazmap
    +
    +# Virtual resolution should take the static type into account: the only method
    +# implementations considered should be those of descendants, even if the
    +# virtual method is inherited and not overridden in the static class. (Base
    +# defines sibGC() as pure virtual, Super inherits it without overriding,
    +# Sibling and Sub2 both implement it.)
    +
    +# Call Base.sibGC on a Super pointer: can only call Sub2.sibGC(), which does not GC.
    +# In particular, PEdgeCallInstance.Exp.Field.FieldCSU.Type = {Kind: "CSU", Name="Super"}
    +assert "c12" not in hazmap
    +# Call Base.sibGC on a Base pointer; can call Sibling.sibGC(), which GCs.
    +assert "c13" in hazmap
    +
    +# Call nsISupports.danger() which is annotated to be overridable and hence can GC.
    +assert "c14" in hazmap
    +
    +# someGC(int) overload
    +assert "c16" in hazmap
    +assert "c17" in hazmap
    +
    +# Super.onBase() could call the GC'ing Base::onBase().
    +assert "c15" in hazmap
    +
    +# virtual ~nsJSPrincipals calls ~JSPrincipals calls GC.
    +assert "c18" in hazmap
    +assert "c19" in hazmap
    +
    +# ~SafePrincipals does not GC.
    +assert "c20" not in hazmap
    +
    +# ...but when cast to a nsISupports*, the compiler can't tell that it won't.
    +assert "c21" in hazmap
    +
    +# Function pointers! References to function pointers! Created by reference-capturing lambdas!
    +assert "c22" in hazmap
    +assert "c23" in hazmap
    +assert "c24" in hazmap
    +assert "c25" not in hazmap
    +assert "c26" not in hazmap
    +assert "c27" not in hazmap
    diff --git a/js/src/devtools/rootAnalysis/utility.js b/js/src/devtools/rootAnalysis/utility.js
    new file mode 100644
    index 0000000000..94b5391c02
    --- /dev/null
    +++ b/js/src/devtools/rootAnalysis/utility.js
    @@ -0,0 +1,422 @@
    +/* This Source Code Form is subject to the terms of the Mozilla Public
    + * License, v. 2.0. If a copy of the MPL was not distributed with this file,
    + * You can obtain one at http://mozilla.org/MPL/2.0/. */
    +
    +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
    +
    +"use strict";
    +
    +loadRelativeToScript('dumpCFG.js');
    +
    +// Attribute bits - each call edge may carry a set of 'attrs' bits, saying eg
    +// that the edge takes place within a scope where GC is suppressed, for
    +// example.
    +var ATTR_GC_SUPPRESSED     = 1 << 0;
    +var ATTR_CANSCRIPT_BOUNDED = 1 << 1; // Unimplemented
    +var ATTR_DOM_ITERATING     = 1 << 2; // Unimplemented
    +var ATTR_NONRELEASING      = 1 << 3; // ~RefPtr of value whose refcount will not go to zero
    +var ATTR_REPLACED          = 1 << 4; // Ignore edge, it was replaced by zero or more better edges.
    +var ATTR_SYNTHETIC         = 1 << 5; // Call was manufactured in some way.
    +
    +var ATTR_LAST              = 1 << 5;
    +var ATTRS_NONE             = 0;
    +var ATTRS_ALL              = (ATTR_LAST << 1) - 1; // All possible bits set
    +
    +// The traversal algorithms we run will recurse into children if you change any
    +// attrs bit to zero. Use all bits set to maximally attributed, including
    +// additional bits that all just mean "unvisited", so that the first time we
    +// see a node with this attrs, we're guaranteed to turn at least one bit off
    +// and thereby keep going.
    +var ATTRS_UNVISITED = 0xffff;
    +
    +// gcc appends this to mangled function names for "not in charge"
    +// constructors/destructors.
    +var internalMarker = " *INTERNAL* ";
    +
    +if (! Set.prototype.hasOwnProperty("update")) {
    +    Object.defineProperty(Set.prototype, "update", {
    +        value: function (collection) {
    +            for (let elt of collection)
    +                this.add(elt);
    +        }
    +    });
    +}
    +
    +function assert(x, msg)
    +{
    +    if (x)
    +        return;
    +    debugger;
    +    if (msg)
    +        throw new Error("assertion failed: " + msg + "\n");
    +    else
    +        throw new Error("assertion failed");
    +}
    +
    +function defined(x) {
    +    return x !== undefined;
    +}
    +
    +function xprint(x, padding)
    +{
    +    if (!padding)
    +        padding = "";
    +    if (x instanceof Array) {
    +        print(padding + "[");
    +        for (var elem of x)
    +            xprint(elem, padding + " ");
    +        print(padding + "]");
    +    } else if (x instanceof Object) {
    +        print(padding + "{");
    +        for (var prop in x) {
    +            print(padding + " " + prop + ":");
    +            xprint(x[prop], padding + "  ");
    +        }
    +        print(padding + "}");
    +    } else {
    +        print(padding + x);
    +    }
    +}
    +
    +// Command-line argument parser.
    +//
    +// `parameters` is a dict of parameters specs, each of which is a dict with keys:
    +//
    +//   - name: name of option, prefixed with "--" if it is named (otherwise, it
    +//     is interpreted as a positional parameter.)
    +//   - dest: key to store the result in, defaulting to the parameter name without
    +//     any leading "--"" and with dashes replaced with underscores.
    +//   - default: value of option if no value is given. Positional parameters with
    +//     a default value are optional. If no default is given, the parameter's name
    +//     is not included in the return value.
    +//   - type: `bool` if it takes no argument, otherwise an argument is required.
    +//     Named arguments default to 'bool', positional arguments to 'string'.
    +//   - nargs: the only supported value is `+`, which means to grab all following
    +//     arguments, up to the next named option, and store them as a list.
    +//
    +// The command line is parsed for `--foo=value` and `--bar` arguments.
    +//
    +// Return value is a dict of parameter values, keyed off of `dest` as determined
    +// above. An extra option named "rest" will be set to the list of all remaining
    +// arguments passed in.
    +//
    +function parse_options(parameters, inArgs = scriptArgs) {
    +    const options = {};
    +
    +    const named = {};
    +    const positional = [];
    +    for (const param of parameters) {
    +        if (param.name.startsWith("-")) {
    +            named[param.name] = param;
    +            if (!param.dest) {
    +                if (!param.name.startsWith("--")) {
    +                    throw new Error(`parameter '${param.name}' requires param.dest to be set`);
    +                }
    +                param.dest = param.name.substring(2).replace("-", "_");
    +            }
    +        } else {
    +            if (!('default' in param) && positional.length > 0 && ('default' in positional.at(-1))) {
    +                throw new Error(`required parameter '${param.name}' follows optional parameter`);
    +            }
    +            param.positional = true;
    +            positional.push(param);
    +            param.dest = param.dest || param.name.replace("-", "_");
    +        }
    +
    +        if (!param.type) {
    +            if (param.nargs === "+") {
    +                param.type = "list";
    +            } else if (param.positional) {
    +                param.type = "string";
    +            } else {
    +                param.type = "bool";
    +            }
    +        }
    +
    +        if ('default' in param) {
    +            options[param.dest] = param.default;
    +        }
    +    }
    +
    +    options.rest = [];
    +    const args = [...inArgs];
    +    let grabbing_into = undefined;
    +    while (args.length > 0) {
    +        let arg = args.shift();
    +        let param;
    +        if (arg.startsWith("-") && arg in named) {
    +            param = named[arg];
    +            if (param.type !== 'bool') {
    +                if (args.length == 0) {
    +                    throw(new Error(`${param.name} requires an argument`));
    +                }
    +                arg = args.shift();
    +            }
    +        } else {
    +            const pos = arg.indexOf("=");
    +            if (pos != -1) {
    +                const name = arg.substring(0, pos);
    +                param = named[name];
    +                if (!param) {
    +                    throw(new Error(`Unknown option '${name}'`));
    +                } else if (param.type === 'bool') {
    +                    throw(new Error(`--${param.name} does not take an argument`));
    +                }
    +                arg = arg.substring(pos + 1);
    +            }
    +        }
    +
    +        // If this isn't a --named param, and we're not accumulating into a nargs="+" param, then
    +        // use the next positional.
    +        if (!param && !grabbing_into && positional.length > 0) {
    +            param = positional.shift();
    +        }
    +
    +        // If a parameter was identified, then any old accumulator is done and we might start a new one.
    +        if (param) {
    +            if (param.type === 'list') {
    +                grabbing_into = options[param.dest] = options[param.dest] || [];
    +            } else {
    +                grabbing_into = undefined;
    +            }
    +        }
    +
    +        if (grabbing_into) {
    +            grabbing_into.push(arg);
    +        } else if (param) {
    +            if (param.type === 'bool') {
    +                options[param.dest] = true;
    +            } else {
    +                options[param.dest] = arg;
    +            }
    +        } else {
    +            options.rest.push(arg);
    +        }
    +    }
    +
    +    for (const param of positional) {
    +        if (!('default' in param)) {
    +            throw(new Error(`'${param.name}' option is required`));
    +        }
    +    }
    +
    +    for (const param of parameters) {
    +        if (param.nargs === '+' && options[param.dest].length == 0) {
    +            throw(new Error(`at least one value required for option '${param.name}'`));
    +        }
    +    }
    +
    +    return options;
    +}
    +
    +function sameBlockId(id0, id1)
    +{
    +    if (id0.Kind != id1.Kind)
    +        return false;
    +    if (!sameVariable(id0.Variable, id1.Variable))
    +        return false;
    +    if (id0.Kind == "Loop" && id0.Loop != id1.Loop)
    +        return false;
    +    return true;
    +}
    +
    +function sameVariable(var0, var1)
    +{
    +    assert("Name" in var0 || var0.Kind == "This" || var0.Kind == "Return");
    +    assert("Name" in var1 || var1.Kind == "This" || var1.Kind == "Return");
    +    if ("Name" in var0)
    +        return "Name" in var1 && var0.Name[0] == var1.Name[0];
    +    return var0.Kind == var1.Kind;
    +}
    +
    +function blockIdentifier(body)
    +{
    +    if (body.BlockId.Kind == "Loop")
    +        return body.BlockId.Loop;
    +    assert(body.BlockId.Kind == "Function", "body.Kind should be Function, not " + body.BlockId.Kind);
    +    return body.BlockId.Variable.Name[0];
    +}
    +
    +function collectBodyEdges(body)
    +{
    +    body.predecessors = [];
    +    body.successors = [];
    +    if (!("PEdge" in body))
    +        return;
    +
    +    for (var edge of body.PEdge) {
    +        var [ source, target ] = edge.Index;
    +        if (!(target in body.predecessors))
    +            body.predecessors[target] = [];
    +        body.predecessors[target].push(edge);
    +        if (!(source in body.successors))
    +            body.successors[source] = [];
    +        body.successors[source].push(edge);
    +    }
    +}
    +
    +function getPredecessors(body)
    +{
    +    if (!('predecessors' in body))
    +        collectBodyEdges(body);
    +    return body.predecessors;
    +}
    +
    +function getSuccessors(body)
    +{
    +    if (!('successors' in body))
    +        collectBodyEdges(body);
    +    return body.successors;
    +}
    +
    +// Split apart a function from sixgill into its mangled and unmangled name. If
    +// no mangled name was given, use the unmangled name as its mangled name
    +function splitFunction(func)
    +{
    +    var split = func.indexOf("$");
    +    if (split != -1)
    +        return [ func.substr(0, split), func.substr(split+1) ];
    +    split = func.indexOf("|");
    +    if (split != -1)
    +        return [ func.substr(0, split), func.substr(split+1) ];
    +    return [ func, func ];
    +}
    +
    +function mangled(fullname)
    +{
    +    var [ mangled, unmangled ] = splitFunction(fullname);
    +    return mangled;
    +}
    +
    +function readable(fullname)
    +{
    +    var [ mangled, unmangled ] = splitFunction(fullname);
    +    return unmangled;
    +}
    +
    +function xdbLibrary()
    +{
    +    var lib = ctypes.open(os.getenv('XDB'));
    +    var api = {
    +        open: lib.declare("xdb_open", ctypes.default_abi, ctypes.void_t, ctypes.char.ptr),
    +        min_data_stream: lib.declare("xdb_min_data_stream", ctypes.default_abi, ctypes.int),
    +        max_data_stream: lib.declare("xdb_max_data_stream", ctypes.default_abi, ctypes.int),
    +        read_key: lib.declare("xdb_read_key", ctypes.default_abi, ctypes.char.ptr, ctypes.int),
    +        read_entry: lib.declare("xdb_read_entry", ctypes.default_abi, ctypes.char.ptr, ctypes.char.ptr),
    +        free_string: lib.declare("xdb_free", ctypes.default_abi, ctypes.void_t, ctypes.char.ptr)
    +    };
    +    try {
    +        api.lookup_key = lib.declare("xdb_lookup_key", ctypes.default_abi, ctypes.int, ctypes.char.ptr);
    +    } catch (e) {
    +        // lookup_key is for development use only and is not strictly necessary.
    +    }
    +    return api;
    +}
    +
    +function openLibrary(names) {
    +    for (const name of names) {
    +        try {
    +            return ctypes.open(name);
    +        } catch(e) {
    +        }
    +    }
    +    return undefined;
    +}
    +
    +function cLibrary()
    +{
    +    const lib = openLibrary(['libc.so.6', 'libc.so', 'libc.dylib']);
    +    if (!lib) {
    +        throw new Error("Unable to open libc");
    +    }
    +
    +    if (getBuildConfiguration("moz-memory")) {
    +        throw new Error("cannot use libc functions with --enable-jemalloc, since they will be routed " +
    +                        "through jemalloc, but calling libc.free() directly will bypass it and the " +
    +                        "malloc/free will be mismatched");
    +    }
    +
    +    return {
    +        fopen: lib.declare("fopen", ctypes.default_abi, ctypes.void_t.ptr, ctypes.char.ptr, ctypes.char.ptr),
    +        getline: lib.declare("getline", ctypes.default_abi, ctypes.ssize_t, ctypes.char.ptr.ptr, ctypes.size_t.ptr, ctypes.void_t.ptr),
    +        fclose: lib.declare("fclose", ctypes.default_abi, ctypes.int, ctypes.void_t.ptr),
    +        free: lib.declare("free", ctypes.default_abi, ctypes.void_t, ctypes.void_t.ptr),
    +    };
    +}
    +
    +function* readFileLines_gen(filename)
    +{
    +    var libc = cLibrary();
    +    var linebuf = ctypes.char.ptr();
    +    var bufsize = ctypes.size_t(0);
    +    var fp = libc.fopen(filename, "r");
    +    if (fp.isNull())
    +        throw new Error("Unable to open '" + filename + "'");
    +
    +    while (libc.getline(linebuf.address(), bufsize.address(), fp) > 0)
    +        yield linebuf.readString();
    +    libc.fclose(fp);
    +    libc.free(ctypes.void_t.ptr(linebuf));
    +}
    +
    +function addToKeyedList(collection, key, entry)
    +{
    +    if (!(key in collection))
    +        collection[key] = [];
    +    collection[key].push(entry);
    +    return collection[key];
    +}
    +
    +function addToMappedList(map, key, entry)
    +{
    +    if (!map.has(key))
    +        map.set(key, []);
    +    map.get(key).push(entry);
    +    return map.get(key);
    +}
    +
    +function loadTypeInfo(filename)
    +{
    +    return JSON.parse(os.file.readFile(filename));
    +}
    +
    +// Given the range `first` .. `last`, break it down into `count` batches and
    +// return the start of the (1-based) `num` batch.
    +function batchStart(num, count, first, last) {
    +  const N = (last - first) + 1;
    +  return Math.floor((num - 1) / count * N) + first;
    +}
    +
    +// As above, but return the last value in the (1-based) `num` batch.
    +function batchLast(num, count, first, last) {
    +  const N = (last - first) + 1;
    +  return Math.floor(num / count * N) + first - 1;
    +}
    +
    +// Debugging tool. See usage below.
    +function PropertyTracer(traced_prop, check) {
    +    return {
    +        matches(prop, value) {
    +            if (prop != traced_prop)
    +                return false;
    +            if ('value' in check)
    +                return value == check.value;
    +            return true;
    +        },
    +
    +        // Also called when defining a property.
    +        set(obj, prop, value) {
    +            if (this.matches(prop, value))
    +                debugger;
    +            return Reflect.set(...arguments);
    +        },
    +    };
    +}
    +
    +// Usage: var myobj = traced({}, 'name', {value: 'Bob'})
    +//
    +// This will execute a `debugger;` statement when myobj['name'] is defined or
    +// set to 'Bob'.
    +function traced(obj, traced_prop, check) {
    +  return new Proxy(obj, PropertyTracer(traced_prop, check));
    +}
    -- 
    cgit v1.2.3