diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/devtools/rootAnalysis | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/devtools/rootAnalysis')
46 files changed, 10966 insertions, 0 deletions
diff --git a/js/src/devtools/rootAnalysis/CFG.js b/js/src/devtools/rootAnalysis/CFG.js new file mode 100644 index 0000000000..1b6f714279 --- /dev/null +++ b/js/src/devtools/rootAnalysis/CFG.js @@ -0,0 +1,1178 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +// Utility code for traversing the JSON data structures produced by sixgill. + +"use strict"; + +var TRACING = false; + +// When edge.Kind == "Pointer", these are the meanings of the edge.Reference field. +var PTR_POINTER = 0; +var PTR_REFERENCE = 1; +var PTR_RVALUE_REF = 2; + +// Find all points (positions within the code) of the body given by the list of +// bodies and the blockId to match (which will specify an outer function or a +// loop within it), recursing into loops if needed. +function findAllPoints(bodies, blockId, bits) +{ + var points = []; + var body; + + for (var xbody of bodies) { + if (sameBlockId(xbody.BlockId, blockId)) { + assert(!body); + body = xbody; + } + } + assert(body); + + if (!("PEdge" in body)) + return; + for (var edge of body.PEdge) { + points.push([body, edge.Index[0], bits]); + if (edge.Kind == "Loop") + points.push(...findAllPoints(bodies, edge.BlockId, bits)); + } + + return points; +} + +// Visitor of a graph of <body, ppoint> vertexes and sixgill-generated edges, +// where the edges represent the actual computation happening. +// +// Uses the syntax `var Visitor = class { ... }` rather than `class Visitor` +// to allow reloading this file with the JS debugger. +var Visitor = class { + constructor(bodies) { + this.visited_bodies = new Map(); + for (const body of bodies) { + this.visited_bodies.set(body, new Map()); + } + } + + // Prepend `edge` to the info stored at the successor node, returning + // the updated info value. This should be overridden by pretty much any + // subclass, as a traversal's semantics are largely determined by this method. + extend_path(edge, body, ppoint, successor_value) { return true; } + + // Default implementation does a basic "only visit nodes once" search. + // (Whether this is BFS/DFS/other is determined by the caller.) + + // Override if you need to revisit nodes. Valid actions are "continue", + // "prune", and "done". "continue" means continue with the search. "prune" + // means do not continue to predecessors of this node, only continue with + // the remaining entries in the work queue. "done" means the + // whole search is complete even if unvisited nodes remain. + next_action(prev, current) { return prev ? "prune" : "continue"; } + + // Update the info at a node. If this is the first time the node has been + // seen, `prev` will be undefined. `current` will be the info computed by + // `extend_path`. The node will be updated with the return value. + merge_info(prev, current) { return true; } + + // Default visit() implementation. Subclasses will usually leave this alone + // and use the other methods as extension points. + // + // Take a body, a point within that body, and the info computed by + // extend_path() for that point when traversing an edge. Return whether the + // search should continue ("continue"), the search should be pruned and + // other paths followed ("prune"), or that the whole search is complete and + // it is time to return a value ("done", and the value returned by + // merge_info() will be returned by the overall search). + // + // Persistently record the value computed so far at each point, and call + // (overridable) next_action() and merge_info() methods with the previous + // and freshly-computed value for each point. + // + // Often, extend_path() will decide how/whether to continue the search and + // will return the search action to take, and next_action() will blindly + // return it if the point has not yet been visited. (And if it has, it will + // prune this branch of the search so that no point is visited multiple + // times.) + visit(body, ppoint, info) { + const visited_value_table = this.visited_bodies.get(body); + const existing_value_if_visited = visited_value_table.get(ppoint); + const action = this.next_action(existing_value_if_visited, info); + const merged = this.merge_info(existing_value_if_visited, info); + visited_value_table.set(ppoint, merged); + return [action, merged]; + } +}; + +function findMatchingBlock(bodies, blockId) { + for (const body of bodies) { + if (sameBlockId(body.BlockId, blockId)) { + return body; + } + } + assert(false); +} + +// For a given function containing a set of bodies, each containing a set of +// ppoints, perform a mostly breadth-first traversal through the complete graph +// of all <body, ppoint> nodes throughout all the bodies of the function. +// +// When traversing, every <body, ppoint> node is associated with a value that +// is assigned or updated whenever it is visited. The overall traversal +// terminates when a given condition is reached, and an arbitrary custom value +// is returned. If the search completes without the termination condition +// being reached, it will return the value associated with the entrypoint +// node, which is initialized to `entrypoint_fallback_value` (and thus serves as +// the fallback return value if all search paths are pruned before reaching +// the entrypoint.) +// +// The traversal is only *mostly* breadth-first because the visitor decides +// whether to stop searching when it sees a node. If a node is visited for a +// second time, the visitor can choose to continue (and thus revisit the node) +// in order to find "better" paths that may include a node more than once. +// The search is done in the "upwards" direction -- as in, it starts at the +// exit point and searches through predecessors. +// +// Override visitor.visit() to return an action and a value. The action +// determines whether the overall search should terminate ('done'), or +// continues looking through the predecessors of the current node ('continue'), +// or whether it should just continue processing the work queue without +// looking at predecessors ('prune'). +// +// This allows this function to be used in different ways. If the visitor +// associates a value with each node that chains onto its forward-flow successors +// (predecessors in the "upwards" search order), then a complete path through +// the graph will be returned. +// +// Alternatively, BFS_upwards() can be used to test whether a condition holds +// (eg "the exit point is reachable only after calling SomethingImportant()"), +// in which case no path is needed and the visitor can compute a simple boolean +// every time it encounters a point. Note that `entrypoint_fallback_value` will +// still be returned if the search terminates without ever reaching the +// entrypoint, which is useful for dominator analyses. +// +// See the Visitor base class's implementation of visit(), above, for the +// most commonly used visit logic. +function BFS_upwards(start_body, start_ppoint, bodies, visitor, + initial_successor_value = {}, + entrypoint_fallback_value=null) +{ + let entrypoint_value = entrypoint_fallback_value; + + const work = [[start_body, start_ppoint, null, initial_successor_value]]; + if (TRACING) { + printErr(`BFS start at ${blockIdentifier(start_body)}:${start_ppoint}`); + } + + while (work.length > 0) { + const [body, ppoint, edgeToAdd, successor_value] = work.shift(); + if (TRACING) { + const s = edgeToAdd ? " : " + str(edgeToAdd) : ""; + printErr(`prepending edge from ${ppoint} to state '${successor_value}'${s}`); + } + let value = visitor.extend_path(edgeToAdd, body, ppoint, successor_value); + + const [action, merged_value] = visitor.visit(body, ppoint, value); + if (action === "done") { + return merged_value; + } + if (action === "prune") { + // Do not push anything else to the work queue, but continue processing + // other branches. + continue; + } + assert(action == "continue"); + value = merged_value; + + const predecessors = getPredecessors(body); + for (const edge of (predecessors[ppoint] || [])) { + if (edge.Kind == "Loop") { + // Propagate the search into the exit point of the loop body. + const loopBody = findMatchingBlock(bodies, edge.BlockId); + const loopEnd = loopBody.Index[1]; + work.push([loopBody, loopEnd, null, value]); + // Don't continue to predecessors here without going through + // the loop. (The points in this body that enter the loop will + // be traversed when we reach the entry point of the loop.) + } + work.push([body, edge.Index[0], edge, value]); + } + + // Check for hitting the entry point of a loop body. + if (ppoint == body.Index[0] && body.BlockId.Kind == "Loop") { + // Propagate to outer body parents that enter the loop body. + for (const parent of (body.BlockPPoint || [])) { + const parentBody = findMatchingBlock(bodies, parent.BlockId); + work.push([parentBody, parent.Index, null, value]); + } + + // This point is also preceded by the *end* of this loop, for the + // previous iteration. + work.push([body, body.Index[1], null, value]); + } + + // Check for reaching the entrypoint of the function. + if (body === start_body && ppoint == body.Index[0]) { + entrypoint_value = value; + } + } + + // The search space was exhausted without finding a 'done' state. That + // might be because all search paths were pruned before reaching the entry + // point of the function, in which case entrypoint_value will still be its initial + // value. (If entrypoint_value has been set, then we may still not have visited the + // entire graph, if some paths were pruned but at least one made it to the entrypoint.) + return entrypoint_value; +} + +// Given the CFG for the constructor call of some RAII, return whether the +// given edge is the matching destructor call. +function isMatchingDestructor(constructor, edge) +{ + if (edge.Kind != "Call") + return false; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + var variable = callee.Variable; + assert(variable.Kind == "Func"); + if (variable.Name[1].charAt(0) != '~') + return false; + + // Note that in some situations, a regular function can begin with '~', so + // we don't necessarily have a destructor in hand. This is probably a + // sixgill artifact, but in js::wasm::ModuleGenerator::~ModuleGenerator, a + // templatized static inline EraseIf is invoked, and it gets named ~EraseIf + // for some reason. + if (!("PEdgeCallInstance" in edge)) + return false; + + var constructExp = constructor.PEdgeCallInstance.Exp; + assert(constructExp.Kind == "Var"); + + var destructExp = edge.PEdgeCallInstance.Exp; + if (destructExp.Kind != "Var") + return false; + + return sameVariable(constructExp.Variable, destructExp.Variable); +} + +// Return all calls within the RAII scope of any constructor matched by +// isConstructor(). (Note that this would be insufficient if you needed to +// treat each instance separately, such as when different regions of a function +// body were guarded by these constructors and you needed to do something +// different with each.) +function allRAIIGuardedCallPoints(typeInfo, bodies, body, isConstructor) +{ + if (!("PEdge" in body)) + return []; + + var points = []; + + for (var edge of body.PEdge) { + if (edge.Kind != "Call") + continue; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + continue; + var variable = callee.Variable; + assert(variable.Kind == "Func"); + const bits = isConstructor(typeInfo, edge.Type, variable.Name); + if (!bits) + continue; + if (!("PEdgeCallInstance" in edge)) + continue; + if (edge.PEdgeCallInstance.Exp.Kind != "Var") + continue; + + points.push(...pointsInRAIIScope(bodies, body, edge, bits)); + } + + return points; +} + +// Test whether the given edge is the constructor corresponding to the given +// destructor edge. +function isMatchingConstructor(destructor, edge) +{ + if (edge.Kind != "Call") + return false; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + var variable = callee.Variable; + if (variable.Kind != "Func") + return false; + var name = readable(variable.Name[0]); + var destructorName = readable(destructor.Exp[0].Variable.Name[0]); + var match = destructorName.match(/^(.*?::)~(\w+)\(/); + if (!match) { + printErr("Unhandled destructor syntax: " + destructorName); + return false; + } + var constructorSubstring = match[1] + match[2]; + if (name.indexOf(constructorSubstring) == -1) + return false; + + var destructExp = destructor.PEdgeCallInstance.Exp; + if (destructExp.Kind != "Var") + return false; + + var constructExp = edge.PEdgeCallInstance.Exp; + if (constructExp.Kind != "Var") + return false; + + return sameVariable(constructExp.Variable, destructExp.Variable); +} + +function findMatchingConstructor(destructorEdge, body, warnIfNotFound=true) +{ + var worklist = [destructorEdge]; + var predecessors = getPredecessors(body); + while(worklist.length > 0) { + var edge = worklist.pop(); + if (isMatchingConstructor(destructorEdge, edge)) + return edge; + if (edge.Index[0] in predecessors) { + for (var e of predecessors[edge.Index[0]]) + worklist.push(e); + } + } + if (warnIfNotFound) + printErr("Could not find matching constructor!"); + return undefined; +} + +function pointsInRAIIScope(bodies, body, constructorEdge, bits) { + var seen = {}; + var worklist = [constructorEdge.Index[1]]; + var points = []; + while (worklist.length) { + var point = worklist.pop(); + if (point in seen) + continue; + seen[point] = true; + points.push([body, point, bits]); + var successors = getSuccessors(body); + if (!(point in successors)) + continue; + for (var nedge of successors[point]) { + if (isMatchingDestructor(constructorEdge, nedge)) + continue; + if (nedge.Kind == "Loop") + points.push(...findAllPoints(bodies, nedge.BlockId, bits)); + worklist.push(nedge.Index[1]); + } + } + + return points; +} + +function isImmobileValue(exp) { + if (exp.Kind == "Int" && exp.String == "0") { + return true; + } + return false; +} + +// Returns whether decl is a body.DefineVariable[] entry for a non-temporary reference. +function isReferenceDecl(decl) { + return decl.Type.Kind == "Pointer" && decl.Type.Reference != PTR_POINTER && decl.Variable.Kind != "Temp"; +} + +function expressionIsVariableAddress(exp, variable) +{ + while (exp.Kind == "Fld") + exp = exp.Exp[0]; + return exp.Kind == "Var" && sameVariable(exp.Variable, variable); +} + +function edgeTakesVariableAddress(edge, variable, body) +{ + if (ignoreEdgeUse(edge, variable, body)) + return false; + if (ignoreEdgeAddressTaken(edge)) + return false; + switch (edge.Kind) { + case "Assign": + return expressionIsVariableAddress(edge.Exp[1], variable); + case "Call": + if ("PEdgeCallArguments" in edge) { + for (var exp of edge.PEdgeCallArguments.Exp) { + if (expressionIsVariableAddress(exp, variable)) + return true; + } + } + return false; + default: + return false; + } +} + +// Look at an invocation of a virtual method or function pointer contained in a +// field, and return the static type of the invocant (or the containing struct, +// for a function pointer field.) +function getFieldCallInstanceCSU(edge, field) +{ + if ("FieldInstanceFunction" in field) { + // We have a 'this'. + const instanceExp = edge.PEdgeCallInstance.Exp; + if (instanceExp.Kind == 'Drf') { + // somevar->foo() + return edge.Type.TypeFunctionCSU.Type.Name; + } else if (instanceExp.Kind == 'Fld') { + // somevar.foo() + return instanceExp.Field.FieldCSU.Type.Name; + } else if (instanceExp.Kind == 'Index') { + // A strange construct. + // C++ code: static_cast<JS::CustomAutoRooter*>(this)->trace(trc); + // CFG: Call(21,30, this*[-1]{JS::CustomAutoRooter}.trace*(trc*)) + return instanceExp.Type.Name; + } else if (instanceExp.Kind == 'Var') { + // C++: reinterpret_cast<SimpleTimeZone*>(gRawGMT)->~SimpleTimeZone(); + // CFG: + // # icu_64::SimpleTimeZone::icu_64::SimpleTimeZone.__comp_dtor + // [6,7] Call gRawGMT.icu_64::SimpleTimeZone.__comp_dtor () + return field.FieldCSU.Type.Name; + } else { + printErr("------------------ edge -------------------"); + printErr(JSON.stringify(edge, null, 4)); + printErr("------------------ field -------------------"); + printErr(JSON.stringify(field, null, 4)); + assert(false, `unrecognized FieldInstanceFunction Kind ${instanceExp.Kind}`); + } + } else { + // somefar.foo() where somevar is a field of some CSU. + return field.FieldCSU.Type.Name; + } +} + +function expressionUsesVariable(exp, variable) +{ + if (exp.Kind == "Var" && sameVariable(exp.Variable, variable)) + return true; + if (!("Exp" in exp)) + return false; + for (var childExp of exp.Exp) { + if (expressionUsesVariable(childExp, variable)) + return true; + } + return false; +} + +function expressionUsesVariableContents(exp, variable) +{ + if (!("Exp" in exp)) + return false; + for (var childExp of exp.Exp) { + if (childExp.Kind == 'Drf') { + if (expressionUsesVariable(childExp, variable)) + return true; + } else if (expressionUsesVariableContents(childExp, variable)) { + return true; + } + } + return false; +} + +// Detect simple |return nullptr;| statements. +function isReturningImmobileValue(edge, variable) +{ + if (variable.Kind == "Return") { + if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) { + if (isImmobileValue(edge.Exp[1])) + return true; + } + } + return false; +} + +// If the edge uses the given variable's value, return the earliest point at +// which the use is definite. Usually, that means the source of the edge +// (anything that reaches that source point will end up using the variable, but +// there may be other ways to reach the destination of the edge.) +// +// Return values are implicitly used at the very last point in the function. +// This makes a difference: if an RAII class GCs in its destructor, we need to +// start looking at the final point in the function, not one point back from +// that, since that would skip over the GCing call. +// +// Certain references may be annotated to be live to the end of the function +// as well (eg AutoCheckCannotGC&& parameters). +// +// Note that this returns a nonzero value only if the variable's incoming value is used. +// So this would return 0 for 'obj': +// +// obj = someFunction(); +// +// but these would return a positive value: +// +// obj = someFunction(obj); +// obj->foo = someFunction(); +// +function edgeUsesVariable(edge, variable, body, liveToEnd=false) +{ + if (ignoreEdgeUse(edge, variable, body)) + return 0; + + if (variable.Kind == "Return") { + liveToEnd = true; + } + + if (liveToEnd && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") { + // The last point in the function body is treated as using the return + // value. This is the only time the destination point is returned + // rather than the source point. + return edge.Index[1]; + } + + var src = edge.Index[0]; + + switch (edge.Kind) { + + case "Assign": { + // Detect `Return := nullptr`. + if (isReturningImmobileValue(edge, variable)) + return 0; + const [lhs, rhs] = edge.Exp; + // Detect `lhs := ...variable...` + if (expressionUsesVariable(rhs, variable)) + return src; + // Detect `...variable... := rhs` but not `variable := rhs`. The latter + // overwrites the previous value of `variable` without using it. + if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) + return src; + return 0; + } + + case "Assume": + return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0; + + case "Call": { + const callee = edge.Exp[0]; + if (expressionUsesVariable(callee, variable)) + return src; + if ("PEdgeCallInstance" in edge) { + if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) { + if (edgeStartsValueLiveRange(edge, variable)) { + // If the variable is being constructed, then the incoming + // value is not used here; it didn't exist before + // construction. (The analysis doesn't get told where + // variables are defined, so must infer it from + // construction. If the variable does not have a + // constructor, its live range may be larger than it really + // ought to be if it is defined within a loop body, but + // that is conservative.) + } else { + return src; + } + } + } + if ("PEdgeCallArguments" in edge) { + for (var exp of edge.PEdgeCallArguments.Exp) { + if (expressionUsesVariable(exp, variable)) + return src; + } + } + if (edge.Exp.length == 1) + return 0; + + // Assigning call result to a variable. + const lhs = edge.Exp[1]; + if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) + return src; + return 0; + } + + case "Loop": + return 0; + + case "Assembly": + return 0; + + default: + assert(false); + } +} + +// If `decl` is the body.DefineVariable[] declaration of a reference type, then +// return the expression without the outer dereference. Otherwise, return the +// original expression. +function maybeDereference(exp, decl) { + if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") { + if (isReferenceDecl(decl)) { + return exp.Exp[0]; + } + } + return exp; +} + +function expressionIsVariable(exp, variable) +{ + return exp.Kind == "Var" && sameVariable(exp.Variable, variable); +} + +// Similar to the above, except treat uses of a reference as if they were uses +// of the dereferenced contents. This requires knowing the type of the +// variable, and so takes its declaration rather than the variable itself. +function expressionIsDeclaredVariable(exp, decl) +{ + exp = maybeDereference(exp, decl); + return expressionIsVariable(exp, decl.Variable); +} + +function expressionIsMethodOnVariableDecl(exp, decl) +{ + // This might be calling a method on a base class, in which case exp will + // be an unnamed field of the variable instead of the variable itself. + while (exp.Kind == "Fld" && exp.Field.Name[0].startsWith("field:")) + exp = exp.Exp[0]; + return expressionIsDeclaredVariable(exp, decl); +} + +// Return whether the edge starts the live range of a variable's value, by setting +// it to some new value. Examples of starting obj's live range: +// +// obj = foo; +// obj = foo(); +// obj = foo(obj); // uses previous value but then sets to new value +// SomeClass obj(true, 1); // constructor +// +function edgeStartsValueLiveRange(edge, variable) +{ + // Direct assignments start live range of lhs: var = value + if (edge.Kind == "Assign") { + const [lhs, rhs] = edge.Exp; + return (expressionIsVariable(lhs, variable) && + !isReturningImmobileValue(edge, variable)); + } + + if (edge.Kind != "Call") + return false; + + // Assignments of call results start live range: var = foo() + if (1 in edge.Exp) { + var lhs = edge.Exp[1]; + if (expressionIsVariable(lhs, variable)) + return true; + } + + // Constructor calls start live range of instance: SomeClass var(...) + if ("PEdgeCallInstance" in edge) { + var instance = edge.PEdgeCallInstance.Exp; + + // Kludge around incorrect dereference on some constructor calls. + if (instance.Kind == "Drf") + instance = instance.Exp[0]; + + if (!expressionIsVariable(instance, variable)) + return false; + + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + + assert(callee.Variable.Kind == "Func"); + var calleeName = readable(callee.Variable.Name[0]); + + // Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('. + var openParen = calleeName.indexOf('('); + if (openParen < 0) + return false; + calleeName = calleeName.substring(0, openParen); + + var lastColon = calleeName.lastIndexOf('::'); + if (lastColon < 0) + return false; + var constructorName = calleeName.substr(lastColon + 2); + calleeName = calleeName.substr(0, lastColon); + + var lastTemplateOpen = calleeName.lastIndexOf('<'); + if (lastTemplateOpen >= 0) + calleeName = calleeName.substr(0, lastTemplateOpen); + + if (calleeName.endsWith(constructorName)) + return true; + } + + return false; +} + +// Return the result of a `matcher` callback on the call found in the given +// `edge`, if the edge is a direct call to a named function (if not, return false). +// `matcher` is given the name of the callee (actually, a tuple +// [fully qualified name, base name]), an array of expressions containing the +// arguments, and if the result of the call is assigned to a variable, +// the expression representing that variable(the lhs). +// +// https://firefox-source-docs.mozilla.org/js/HazardAnalysis/CFG.html for +// documentation of the data structure used here. +function matchEdgeCall(edge, matcher) { + if (edge.Kind != "Call") { + return false; + } + + const callee = edge.Exp[0]; + + if (edge.Type.Kind == 'Function' && + edge.Exp[0].Kind == 'Var' && + edge.Exp[0].Variable.Kind == 'Func') { + const calleeName = edge.Exp[0].Variable.Name; + const args = edge.PEdgeCallArguments; + const argExprs = args ? args.Exp : []; + const lhs = edge.Exp[1]; // May be undefined + return matcher(calleeName, argExprs, lhs); + } + + return false; +} + +function edgeMarksVariableGCSafe(edge, variable) { + return matchEdgeCall(edge, (calleeName, argExprs, _lhs) => { + // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation + return (calleeName[1] == 'MarkVariableAsGCSafe' && + calleeName[0].includes("JS::detail::MarkVariableAsGCSafe") && + argExprs.length == 1 && + expressionIsVariable(argExprs[0], variable)); + }); +} + +// Match an optional <namespace>:: followed by the class name, +// and then an optional template parameter marker. +// +// Example: mozilla::dom::UniquePtr<... +// +function parseTypeName(typeName) { + const m = typeName.match(/^(((?:\w|::)+::)?(\w+))\b(\<)?/); + if (!m) { + return undefined; + } + const [, type, raw_namespace, classname, is_specialized] = m; + const namespace = raw_namespace === null ? "" : raw_namespace; + return { type, namespace, classname, is_specialized } +} + +// Return whether an edge "clears out" a variable's value. A simple example +// would be +// +// var = nullptr; +// +// for analyses for which nullptr is a "safe" value (eg GC rooting hazards; you +// can't get in trouble by holding a nullptr live across a GC.) A more complex +// example is a Maybe<T> that gets reset: +// +// Maybe<AutoCheckCannotGC> nogc; +// nogc.emplace(cx); +// nogc.reset(); +// gc(); // <-- not a problem; nogc is invalidated by prev line +// nogc.emplace(cx); +// foo(nogc); +// +// Yet another example is a UniquePtr being passed by value, which means the +// receiver takes ownership: +// +// UniquePtr<JSObject*> uobj(obj); +// foo(uobj); +// gc(); +// +function edgeEndsValueLiveRange(edge, variable, body) +{ + // var = nullptr; + if (edge.Kind == "Assign") { + const [lhs, rhs] = edge.Exp; + return expressionIsVariable(lhs, variable) && isImmobileValue(rhs); + } + + if (edge.Kind != "Call") + return false; + + if (edgeMarksVariableGCSafe(edge, variable)) { + // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation + return true; + } + + const decl = lookupVariable(body, variable); + + if (matchEdgeCall(edge, (calleeName, argExprs, lhs) => { + return calleeName[1] == 'move' && calleeName[0].includes('std::move(') && + expressionIsDeclaredVariable(argExprs[0], decl) && + lhs && + lhs.Kind == 'Var' && + lhs.Variable.Kind == 'Temp'; + })) { + // temp = std::move(var) + // + // If var is a UniquePtr, and we pass it into something that takes + // ownership, then it should be considered to be invalid. Example: + // + // consume(std::move(var)); + // + // where consume takes a UniquePtr. This will compile to something like + // + // UniquePtr* __temp_1 = &std::move(var); + // UniquePtr&& __temp_2(*temp_1); // move constructor + // consume(__temp_2); + // ~UniquePtr(__temp_2); + // + // The line commented with "// move constructor" is a result of passing + // a UniquePtr as a parameter. If consume() took a UniquePtr&& + // directly, this would just be: + // + // UniquePtr* __temp_1 = &std::move(var); + // consume(__temp_1); + // + // which is not guaranteed to move from the reference. It might just + // ignore the parameter. We can't predict what consume(UniquePtr&&) + // will do. We do know that UniquePtr(UniquePtr&& other) moves out of + // `other`. + // + // The std::move() technically is irrelevant, but because we only care + // about bare variables, it has to be used, which is fortunate because + // the UniquePtr&& constructor operates on a temporary, not the + // variable we care about. + + const lhs = edge.Exp[1].Variable; + if (basicBlockEatsVariable(lhs, body, edge.Index[1])) + return true; + } + + const callee = edge.Exp[0]; + + if (edge.Type.Kind == 'Function' && + edge.Type.TypeFunctionCSU && + edge.PEdgeCallInstance && + expressionIsMethodOnVariableDecl(edge.PEdgeCallInstance.Exp, decl)) + { + const typeName = edge.Type.TypeFunctionCSU.Type.Name; + + // Synthesize a zero-arg constructor name like + // mozilla::dom::UniquePtr<T>::UniquePtr(). Note that the `<T>` is + // literal -- the pretty name from sixgill will render the actual + // constructor name as something like + // + // UniquePtr<T>::UniquePtr() [where T = int] + // + const parsed = parseTypeName(typeName); + if (parsed) { + const { type, namespace, classname, is_specialized } = parsed; + + // special-case: the initial constructor that doesn't provide a value. + // Useful for things like Maybe<T>. + const template = is_specialized ? '<T>' : ''; + const ctorName = `${namespace}${classname}${template}::${classname}()`; + if (callee.Kind == 'Var' && + typesWithSafeConstructors.has(type) && + callee.Variable.Name[0].includes(ctorName)) + { + return true; + } + + // special-case: UniquePtr::reset() and similar. + if (callee.Kind == 'Var' && + type in resetterMethods && + resetterMethods[type].has(callee.Variable.Name[1])) + { + return true; + } + } + } + + // special-case: passing UniquePtr<T> by value. + if (edge.Type.Kind == 'Function' && + edge.Type.TypeFunctionArgument && + edge.PEdgeCallArguments) + { + for (const i in edge.Type.TypeFunctionArgument) { + const param = edge.Type.TypeFunctionArgument[i]; + if (param.Type.Kind != 'CSU') + continue; + if (!param.Type.Name.startsWith("mozilla::UniquePtr<")) + continue; + const arg = edge.PEdgeCallArguments.Exp[i]; + if (expressionIsVariable(arg, variable)) { + return true; + } + } + } + + return false; +} + +// Look up a variable in the list of declarations for this body. +function lookupVariable(body, variable) { + for (const decl of (body.DefineVariable || [])) { + if (sameVariable(decl.Variable, variable)) { + return decl; + } + } + return undefined; +} + +function edgeMovesVariable(edge, variable, body) +{ + if (edge.Kind != 'Call') + return false; + const callee = edge.Exp[0]; + if (callee.Kind == 'Var' && + callee.Variable.Kind == 'Func') + { + const { Variable: { Name: [ fullname, shortname ] } } = callee; + + // Match an rvalue parameter. + + if (!edge || !edge.PEdgeCallArguments || !edge.PEdgeCallArguments.Exp) { + return false; + } + + for (const arg of edge.PEdgeCallArguments.Exp) { + if (arg.Kind != 'Drf') continue; + const val = arg.Exp[0]; + if (val.Kind == 'Var' && sameVariable(val.Variable, variable)) { + // This argument is the variable we're looking for. Return true + // if it is passed as an rvalue reference. + const type = lookupVariable(body, variable).Type; + if (type.Kind == "Pointer" && type.Reference == PTR_RVALUE_REF) { + return true; + } + } + } + } + + return false; +} + +// Scan forward through the basic block in 'body' starting at 'startpoint', +// looking for a call that passes 'variable' to a move constructor that +// "consumes" it (eg UniquePtr::UniquePtr(UniquePtr&&)). +function basicBlockEatsVariable(variable, body, startpoint) +{ + const successors = getSuccessors(body); + let point = startpoint; + while (point in successors) { + // Only handle a single basic block. If it forks, stop looking. + const edges = successors[point]; + if (edges.length != 1) { + return false; + } + const edge = edges[0]; + + if (edgeMovesVariable(edge, variable, body)) { + return true; + } + + // edgeStartsValueLiveRange will find places where 'variable' is given + // a new value. Never observed in practice, since this function is only + // called with a temporary resulting from std::move(), which is used + // immediately for a call. But just to be robust to future uses: + if (edgeStartsValueLiveRange(edge, variable)) { + return false; + } + + point = edge.Index[1]; + } + + return false; +} + +var PROP_REFCNT = 1 << 0; +var PROP_SHARED_PTR_DTOR = 1 << 1; + +function getCalleeProperties(calleeName) { + let props = 0; + + if (isRefcountedDtor(calleeName)) { + props |= PROP_REFCNT; + } + if (calleeName.includes("~shared_ptr()")) { + props |= PROP_SHARED_PTR_DTOR; + } + return props; +} + +// Basic C++ ABI mangling: prefix an identifier with its length, in decimal. +function mangle(name) { + return name.length + name; +} + +var TriviallyDestructibleTypes = new Set([ + // Single-token types from + // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin + "void", "wchar_t", "bool", "char", "short", "int", "long", "float", "double", + "__int64", "__int128", "__float128", "char32_t", "char16_t", "char8_t", + // Remaining observed cases. These are types T in shared_ptr<T> that have + // been observed, where the types themselves have trivial destructors, and + // the custom deleter doesn't do anything nontrivial that we might care about. + "_IO_FILE" +]); +function synthesizeDestructorName(className) { + if (className.includes("<") || className.includes(" ") || className.includes("{")) { + return; + } + if (TriviallyDestructibleTypes.has(className)) { + return; + } + const parts = className.split("::"); + const mangled_dtor = "_ZN" + parts.map(p => mangle(p)).join("") + "D2Ev"; + const pretty_dtor = `void ${className}::~${parts.at(-1)}()`; + // Note that there will be a later check to verify that the function name + // synthesized here is an actual function, and assert if not (see + // assertFunctionExists() in computeCallgraph.js.) + return mangled_dtor + "$" + pretty_dtor; +} + +function getCallEdgeProperties(body, edge, calleeName, functionBodies) { + let attrs = 0; + let extraCalls = []; + + if (edge.Kind !== "Call") { + return { attrs, extraCalls }; + } + + const props = getCalleeProperties(calleeName); + if (props & PROP_REFCNT) { + // std::swap of two refcounted values thinks it can drop the + // ref count to zero. Or rather, it just calls operator=() in a context + // where the refcount will never drop to zero. + const blockId = blockIdentifier(body); + if (blockId.includes("std::swap") || blockId.includes("mozilla::Swap")) { + // Replace the refcnt release call with nothing. It's not going to happen. + attrs |= ATTR_REPLACED; + } + } + + if (props & PROP_SHARED_PTR_DTOR) { + // Replace shared_ptr<T>::~shared_ptr() calls to T::~T() calls. + // Note that this will only apply to simple cases. + // Any templatized type, in particular, will be ignored and the original + // call tree will be left alone. If this triggers a hazard, then we can + // consider extending the mangling support. + // + // If the call to ~shared_ptr is not replaced, then it might end up calling + // an unknown function pointer. This does not always happen-- in some cases, + // the call tree below ~shared_ptr will invoke the correct destructor without + // going through function pointers. + const m = calleeName.match(/shared_ptr<(.*?)>::~shared_ptr\(\)(?: \[with T = ([\w:]+))?/); + assert(m); + let className = m[1] == "T" ? m[2] : m[1]; + assert(className != ""); + // cv qualification does not apply to destructors. + className = className.replace("const ", ""); + className = className.replace("volatile ", ""); + const dtor = synthesizeDestructorName(className); + if (dtor) { + attrs |= ATTR_REPLACED; + extraCalls.push({ + attrs: ATTR_SYNTHETIC, + name: dtor, + }); + } + } + + if ((props & PROP_REFCNT) == 0) { + return { attrs, extraCalls }; + } + + let callee = edge.Exp[0]; + while (callee.Kind === "Drf") { + callee = callee.Exp[0]; + } + + const instance = edge.PEdgeCallInstance.Exp; + if (instance.Kind !== "Var") { + // TODO: handle field destructors + return { attrs, extraCalls }; + } + + // Test whether the dtor call is dominated by operations on the variable + // that mean it will not go to a zero refcount in the dtor: either because + // it's already dead (eg r.forget() was called) or because it can be proven + // to have a ref count of greater than 1. This is implemented by looking + // for the reverse: find a path scanning backwards from the dtor call where + // the variable is used in any way that does *not* ensure that it is + // trivially destructible. + + const variable = instance.Variable; + + const visitor = new class DominatorVisitor extends Visitor { + // Do not revisit nodes. For new nodes, relay the decision made by + // extend_path. + next_action(seen, current) { return seen ? "prune" : current; } + + // We don't revisit, so always use the new. + merge_info(seen, current) { return current; } + + // Return the action to take from this node. + extend_path(edge, body, ppoint, successor_value) { + if (!edge) { + // Dummy edge to join two points. + return "continue"; + } + + if (!edgeUsesVariable(edge, variable, body)) { + // Nothing of interest on this edge, keep searching. + return "continue"; + } + + if (edgeEndsValueLiveRange(edge, variable, body)) { + // This path is safe! + return "prune"; + } + + // Unsafe. Found a use that might set the variable to a + // nonzero refcount. + return "done"; + } + }(functionBodies); + + // Searching upwards from a destructor call, return the opposite of: is + // there a path to a use or the start of the function that does NOT hit a + // safe assignment like refptr.forget() first? + // + // In graph terms: return whether the destructor call is dominated by forget() calls (or similar). + const edgeIsNonReleasingDtor = !BFS_upwards( + body, edge.Index[0], functionBodies, visitor, "start", + false // Return value if we do not reach the root without finding a non-forget() use. + ); + if (edgeIsNonReleasingDtor) { + attrs |= ATTR_GC_SUPPRESSED | ATTR_NONRELEASING; + } + return { attrs, extraCalls }; +} + +// gcc uses something like "__dt_del " for virtual destructors that it +// generates. +function isSyntheticVirtualDestructor(funcName) { + return funcName.endsWith(" "); +} + +function typedField(field) +{ + if ("FieldInstanceFunction" in field) { + // Virtual call + // + // This makes a minimal attempt at dealing with overloading, by + // incorporating the number of parameters. So far, that is all that has + // been needed. If more is needed, sixgill will need to produce a full + // mangled type. + const {Type, Name: [name]} = field; + + // Virtual destructors don't need a type or argument count, + // and synthetic ones don't have them filled in. + if (isSyntheticVirtualDestructor(name)) { + return name; + } + + var nargs = 0; + if (Type.Kind == "Function" && "TypeFunctionArguments" in Type) + nargs = Type.TypeFunctionArguments.Type.length; + return name + ":" + nargs; + } else { + // Function pointer field + return field.Name[0]; + } +} + +function fieldKey(csuName, field) +{ + return csuName + "." + typedField(field); +} diff --git a/js/src/devtools/rootAnalysis/README.md b/js/src/devtools/rootAnalysis/README.md new file mode 100644 index 0000000000..08a4fcde29 --- /dev/null +++ b/js/src/devtools/rootAnalysis/README.md @@ -0,0 +1,3 @@ +# Spidermonkey JSAPI rooting analysis + +See js/src/docs/HazardAnalysis/index.md diff --git a/js/src/devtools/rootAnalysis/analyze.py b/js/src/devtools/rootAnalysis/analyze.py new file mode 100755 index 0000000000..dd37991d41 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyze.py @@ -0,0 +1,462 @@ +#!/usr/bin/env python3 + +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Runs the static rooting analysis +""" + +import argparse +import os +import subprocess +import sys +from subprocess import Popen + +try: + from shlex import quote +except ImportError: + from pipes import quote + + +def execfile(thefile, globals): + exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals) + + +# Label a string as an output. +class Output(str): + pass + + +# Label a string as a pattern for multiple inputs. +class MultiInput(str): + pass + + +# Construct a new environment by merging in some settings needed for running the individual scripts. +def env(config): + # Add config['sixgill_bin'] to $PATH if not already there. + path = os.environ["PATH"].split(":") + if dir := config.get("sixgill_bin"): + if dir not in path: + path.insert(0, dir) + + return dict( + os.environ, + PATH=":".join(path), + XDB=f"{config['sixgill_bin']}/xdb.so", + SOURCE=config["source"], + ) + + +def fill(command, config): + filled = [] + for s in command: + try: + rep = s.format(**config) + except KeyError: + print("Substitution failed: %s" % s) + filled = None + break + + if isinstance(s, Output): + filled.append(Output(rep)) + elif isinstance(s, MultiInput): + N = int(config["jobs"]) + for i in range(1, N + 1): + filled.append(rep.format(i=i, n=N)) + else: + filled.append(rep) + + if filled is None: + raise Exception("substitution failure") + + return tuple(filled) + + +def print_command(job, config, env=None): + # Display a command to run that has roughly the same effect as what was + # actually run. The actual command uses temporary files that get renamed at + # the end, and run some commands in parallel chunks. The printed command + # will substitute in the actual output and run in a single chunk, so that + # it is easier to cut & paste and add a --function flag for debugging. + cfg = dict(config, n=1, i=1, jobs=1) + cmd = job_command_with_final_output_names(job) + cmd = fill(cmd, cfg) + + cmd = [quote(s) for s in cmd] + if outfile := job.get("redirect-output"): + cmd.extend([">", quote(outfile.format(**cfg))]) + if HOME := os.environ.get("HOME"): + cmd = [s.replace(HOME, "~") for s in cmd] + + if env: + # Try to keep the command as short as possible by only displaying + # modified environment variable settings. + e = os.environ + changed = {key: value for key, value in env.items() if value != e.get(key)} + if changed: + settings = [] + for key, value in changed.items(): + if key in e and e[key] in value: + # Display modifications as V=prefix${V}suffix when + # possible. This can make a huge different for $PATH. + start = value.index(e[key]) + end = start + len(e[key]) + setting = '%s="%s${%s}%s"' % (key, value[:start], key, value[end:]) + else: + setting = '%s="%s"' % (key, value) + if HOME: + setting = setting.replace(HOME, "$HOME") + settings.append(setting) + + cmd = settings + cmd + + print(" " + " ".join(cmd)) + + +JOBS = { + "list-dbs": {"command": ["ls", "-l"]}, + "rawcalls": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeCallgraph.js", + "{typeInfo}", + Output("{rawcalls}"), + "{i}", + "{n}", + ], + "multi-output": True, + "outputs": ["rawcalls.{i}.of.{n}"], + }, + "gcFunctions": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeGCFunctions.js", + MultiInput("{rawcalls}"), + "--outputs", + Output("{callgraph}"), + Output("{gcFunctions}"), + Output("{gcFunctions_list}"), + Output("{limitedFunctions_list}"), + ], + "outputs": [ + "callgraph.txt", + "gcFunctions.txt", + "gcFunctions.lst", + "limitedFunctions.lst", + ], + }, + "gcTypes": { + "command": [ + "{js}", + "{analysis_scriptdir}/computeGCTypes.js", + Output("{gcTypes}"), + Output("{typeInfo}"), + ], + "outputs": ["gcTypes.txt", "typeInfo.txt"], + }, + "allFunctions": { + "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"], + "redirect-output": "allFunctions.txt", + }, + "hazards": { + "command": [ + "{js}", + "{analysis_scriptdir}/analyzeRoots.js", + "{gcFunctions_list}", + "{limitedFunctions_list}", + "{gcTypes}", + "{typeInfo}", + "{i}", + "{n}", + "tmp.{i}.of.{n}", + ], + "multi-output": True, + "redirect-output": "rootingHazards.{i}.of.{n}", + }, + "gather-hazards": { + "command": [ + "{js}", + "{analysis_scriptdir}/mergeJSON.js", + MultiInput("{hazards}"), + Output("{all_hazards}"), + ], + "outputs": ["rootingHazards.json"], + }, + "explain": { + "command": [ + sys.executable, + "{analysis_scriptdir}/explain.py", + "{all_hazards}", + "{gcFunctions}", + Output("{explained_hazards}"), + Output("{unnecessary}"), + Output("{refs}"), + Output("{html}"), + ], + "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt", "hazards.html"], + }, + "heapwrites": { + "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"], + "redirect-output": "heapWriteHazards.txt", + }, +} + + +# Generator of (i, j, item) tuples corresponding to outputs: +# - i is just the index of the yielded tuple (a la enumerate()) +# - j is the index of the item in the command list +# - item is command[j] +def out_indexes(command): + i = 0 + for j, fragment in enumerate(command): + if isinstance(fragment, Output): + yield (i, j, fragment) + i += 1 + + +def job_command_with_final_output_names(job): + outfiles = job.get("outputs", []) + command = list(job["command"]) + for i, j, name in out_indexes(job["command"]): + command[j] = outfiles[i] + return command + + +def run_job(name, config): + job = JOBS[name] + outs = job.get("outputs") or job.get("redirect-output") + print("Running " + name + " to generate " + str(outs)) + if "function" in job: + job["function"](config, job["redirect-output"]) + return + + N = int(config["jobs"]) if job.get("multi-output") else 1 + config["n"] = N + jobs = {} + for i in range(1, N + 1): + config["i"] = i + cmd = fill(job["command"], config) + info = spawn_command(cmd, job, name, config) + jobs[info["proc"].pid] = info + + if config["verbose"] > 0: + print_command(job, config, env=env(config)) + + final_status = 0 + while jobs: + pid, status = os.wait() + final_status = final_status or status + info = jobs[pid] + del jobs[pid] + if "redirect" in info: + info["redirect"].close() + + # Rename the temporary files to their final names. + for temp, final in info["rename_map"].items(): + try: + if config["verbose"] > 1: + print("Renaming %s -> %s" % (temp, final)) + os.rename(temp, final) + except OSError: + print("Error renaming %s -> %s" % (temp, final)) + raise + + if final_status != 0: + raise Exception("job {} returned status {}".format(name, final_status)) + + +def spawn_command(cmdspec, job, name, config): + rename_map = {} + + if "redirect-output" in job: + stdout_filename = "{}.tmp{}".format(name, config.get("i", "")) + final_outfile = job["redirect-output"].format(**config) + rename_map[stdout_filename] = final_outfile + command = cmdspec + else: + outfiles = fill(job["outputs"], config) + stdout_filename = None + + # Replace the Outputs with temporary filenames, and record a mapping + # from those temp names to their actual final names that will be used + # if the command succeeds. + command = list(cmdspec) + for i, j, raw_name in out_indexes(cmdspec): + [name] = fill([raw_name], config) + command[j] = "{}.tmp{}".format(name, config.get("i", "")) + rename_map[command[j]] = outfiles[i] + + sys.stdout.flush() + info = {"rename_map": rename_map} + if stdout_filename: + info["redirect"] = open(stdout_filename, "w") + info["proc"] = Popen(command, stdout=info["redirect"], env=env(config)) + else: + info["proc"] = Popen(command, env=env(config)) + + if config["verbose"] > 1: + print("Spawned process {}".format(info["proc"].pid)) + + return info + + +# Default to conservatively assuming 4GB/job. +def max_parallel_jobs(job_size=4 * 2**30): + """Return the max number of parallel jobs we can run without overfilling + memory, assuming heavyweight jobs.""" + from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip()) + mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") + from_mem = round(mem_bytes / job_size) + return min(from_cores, from_mem) + + +config = {"analysis_scriptdir": os.path.dirname(__file__)} + +defaults = [ + "%s/defaults.py" % config["analysis_scriptdir"], + "%s/defaults.py" % os.getcwd(), +] + +parser = argparse.ArgumentParser( + description="Statically analyze build tree for rooting hazards." +) +parser.add_argument( + "step", metavar="STEP", type=str, nargs="?", help="run only step STEP" +) +parser.add_argument( + "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze" +) +parser.add_argument( + "--js", + metavar="JSSHELL", + type=str, + nargs="?", + help="full path to ctypes-capable JS shell", +) +parser.add_argument( + "--first", + metavar="STEP", + type=str, + nargs="?", + help="execute all jobs starting with STEP", +) +parser.add_argument( + "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP" +) +parser.add_argument( + "--jobs", + "-j", + default=None, + metavar="JOBS", + type=int, + help="number of simultaneous analyzeRoots.js jobs", +) +parser.add_argument( + "--list", const=True, nargs="?", type=bool, help="display available steps" +) +parser.add_argument( + "--expect-file", + type=str, + nargs="?", + help="deprecated option, temporarily still present for backwards " "compatibility", +) +parser.add_argument( + "--verbose", + "-v", + action="count", + default=1, + help="Display cut & paste commands to run individual steps (give twice for more output)", +) +parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output") + +args = parser.parse_args() +args.verbose = max(0, args.verbose - args.quiet) + +for default in defaults: + try: + execfile(default, config) + if args.verbose > 1: + print("Loaded %s" % default) + except Exception: + pass + +# execfile() used config as the globals for running the +# defaults.py script, and will have set a __builtins__ key as a side effect. +del config["__builtins__"] +data = config.copy() + +for k, v in vars(args).items(): + if v is not None: + data[k] = v + +if args.jobs is not None: + data["jobs"] = args.jobs +if not data.get("jobs"): + data["jobs"] = max_parallel_jobs() + +if "GECKO_PATH" in os.environ: + data["source"] = os.environ["GECKO_PATH"] +if "SOURCE" in os.environ: + data["source"] = os.environ["SOURCE"] + +steps = [ + "gcTypes", + "rawcalls", + "gcFunctions", + "allFunctions", + "hazards", + "gather-hazards", + "explain", + "heapwrites", +] + +if args.list: + for step in steps: + job = JOBS[step] + outfiles = job.get("outputs") or job.get("redirect-output") + if outfiles: + print( + "%s\n ->%s %s" + % (step, "*" if job.get("multi-output") else "", outfiles) + ) + else: + print(step) + sys.exit(0) + +for step in steps: + job = JOBS[step] + if "redirect-output" in job: + data[step] = job["redirect-output"] + elif "outputs" in job and "command" in job: + outfiles = job["outputs"] + num_outputs = 0 + for i, j, name in out_indexes(job["command"]): + # Trim the {curly brackets} off of the output keys. + data[name[1:-1]] = outfiles[i] + num_outputs += 1 + assert ( + len(outfiles) == num_outputs + ), 'step "%s": mismatched number of output files (%d) and params (%d)' % ( + step, + num_outputs, + len(outfiles), + ) # NOQA: E501 + +if args.step: + if args.first or args.last: + raise Exception( + "--first and --last cannot be used when a step argument is given" + ) + steps = [args.step] +else: + if args.first: + steps = steps[steps.index(args.first) :] + if args.last: + steps = steps[: steps.index(args.last) + 1] + +for step in steps: + run_job(step, data) diff --git a/js/src/devtools/rootAnalysis/analyzeHeapWrites.js b/js/src/devtools/rootAnalysis/analyzeHeapWrites.js new file mode 100644 index 0000000000..28679676a5 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyzeHeapWrites.js @@ -0,0 +1,1396 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('callgraph.js'); +loadRelativeToScript('dumpCFG.js'); + +/////////////////////////////////////////////////////////////////////////////// +// Annotations +/////////////////////////////////////////////////////////////////////////////// + +function checkExternalFunction(entry) +{ + var whitelist = [ + "__builtin_clz", + "__builtin_expect", + "isprint", + "ceilf", + "floorf", + /^rusturl/, + "memcmp", + "strcmp", + "fmod", + "floor", + "ceil", + "atof", + /memchr/, + "strlen", + /Servo_DeclarationBlock_GetCssText/, + "Servo_GetArcStringData", + "Servo_IsWorkerThread", + /nsIFrame::AppendOwnedAnonBoxes/, + // Assume that atomic accesses are threadsafe. + /^__atomic_/, + ]; + if (entry.matches(whitelist)) + return; + + // memcpy and memset are safe if the target pointer is threadsafe. + const simpleWrites = [ + "memcpy", + "memset", + "memmove", + ]; + + if (entry.isSafeArgument(1) && simpleWrites.includes(entry.name)) + return; + + dumpError(entry, null, "External function"); +} + +function hasThreadsafeReferenceCounts(entry, regexp) +{ + // regexp should match some nsISupports-operating function and produce the + // name of the nsISupports class via exec(). + + // nsISupports classes which have threadsafe reference counting. + var whitelist = [ + "nsIRunnable", + + // I don't know if these always have threadsafe refcounts. + "nsAtom", + "nsIPermissionManager", + "nsIURI", + ]; + + var match = regexp.exec(entry.name); + return match && nameMatchesArray(match[1], whitelist); +} + +function checkOverridableVirtualCall(entry, location, callee) +{ + // We get here when a virtual call is made on a structure which might be + // overridden by script or by a binary extension. This includes almost + // everything under nsISupports, however, so for the most part we ignore + // this issue. The exception is for nsISupports AddRef/Release, which are + // not in general threadsafe and whose overrides will not be generated by + // the callgraph analysis. + if (callee != "nsISupports.AddRef" && callee != "nsISupports.Release") + return; + + if (hasThreadsafeReferenceCounts(entry, /::~?nsCOMPtr\(.*?\[with T = (.*?)\]$/)) + return; + if (hasThreadsafeReferenceCounts(entry, /RefPtrTraits.*?::Release.*?\[with U = (.*?)\]/)) + return; + if (hasThreadsafeReferenceCounts(entry, /nsCOMPtr<T>::assign_assuming_AddRef.*?\[with T = (.*?)\]/)) + return; + if (hasThreadsafeReferenceCounts(entry, /nsCOMPtr<T>::assign_with_AddRef.*?\[with T = (.*?)\]/)) + return; + + // Watch for raw addref/release. + var whitelist = [ + "Gecko_AddRefAtom", + "Gecko_ReleaseAtom", + /nsPrincipal::Get/, + /CounterStylePtr::Reset/, + ]; + if (entry.matches(whitelist)) + return; + + dumpError(entry, location, "AddRef/Release on nsISupports"); +} + +function checkIndirectCall(entry, location, callee) +{ + var name = entry.name; + + // These hash table callbacks should be threadsafe. + if (/PLDHashTable/.test(name) && (/matchEntry/.test(callee) || /hashKey/.test(callee))) + return; + if (/PL_HashTable/.test(name) && /keyCompare/.test(callee)) + return; + + dumpError(entry, location, "Indirect call " + callee); +} + +function checkVariableAssignment(entry, location, variable) +{ + var name = entry.name; + + dumpError(entry, location, "Variable assignment " + variable); +} + +// Annotations for function parameters, based on function name and parameter +// name + type. +function treatAsSafeArgument(entry, varName, csuName) +{ + var whitelist = [ + // These iterator classes should all be thread local. They are passed + // in to some Servo bindings and are created on the heap by others, so + // just ignore writes to them. + [null, null, /StyleChildrenIterator/], + [null, null, /ExplicitChildIterator/], + + // The use of BeginReading() to instantiate this class confuses the + // analysis. + [null, null, /nsReadingIterator/], + + // These classes are passed to some Servo bindings to fill in. + [/^Gecko_/, null, "nsStyleImageLayers"], + [/^Gecko_/, null, /FontFamilyList/], + + // Various Servo binding out parameters. This is a mess and there needs + // to be a way to indicate which params are out parameters, either using + // an attribute or a naming convention. + ["Gecko_SetCounterStyleToName", "aPtr", null], + ["Gecko_SetCounterStyleToSymbols", "aPtr", null], + ["Gecko_SetCounterStyleToString", "aPtr", null], + ["Gecko_CopyCounterStyle", "aDst", null], + ["Gecko_SetMozBinding", "aDisplay", null], + [/ClassOrClassList/, /aClass/, null], + ["Gecko_GetAtomAsUTF16", "aLength", null], + ["Gecko_CopyMozBindingFrom", "aDest", null], + ["Gecko_SetNullImageValue", "aImage", null], + ["Gecko_SetGradientImageValue", "aImage", null], + ["Gecko_SetImageElement", "aImage", null], + ["Gecko_SetLayerImageImageValue", "aImage", null], + ["Gecko_CopyImageValueFrom", "aImage", null], + ["Gecko_SetCursorArrayLength", "aStyleUI", null], + ["Gecko_CopyCursorArrayFrom", "aDest", null], + ["Gecko_SetCursorImageValue", "aCursor", null], + ["Gecko_SetListStyleImageImageValue", "aList", null], + ["Gecko_SetListStyleImageNone", "aList", null], + ["Gecko_CopyListStyleImageFrom", "aList", null], + ["Gecko_ClearStyleContents", "aContent", null], + ["Gecko_CopyStyleContentsFrom", "aContent", null], + ["Gecko_CopyStyleGridTemplateValues", "aGridTemplate", null], + ["Gecko_ResetStyleCoord", null, null], + ["Gecko_CopyClipPathValueFrom", "aDst", null], + ["Gecko_DestroyClipPath", "aClip", null], + ["Gecko_ResetFilters", "effects", null], + [/Gecko_CSSValue_Set/, "aCSSValue", null], + ["Gecko_CSSValue_Drop", "aCSSValue", null], + ["Gecko_CSSFontFaceRule_GetCssText", "aResult", null], + ["Gecko_EnsureTArrayCapacity", "aArray", null], + ["Gecko_ClearPODTArray", "aArray", null], + ["Gecko_SetStyleGridTemplate", "aGridTemplate", null], + ["Gecko_ResizeTArrayForStrings", "aArray", null], + ["Gecko_ClearAndResizeStyleContents", "aContent", null], + [/Gecko_ClearAndResizeCounter/, "aContent", null], + [/Gecko_CopyCounter.*?From/, "aContent", null], + [/Gecko_SetContentDataImageValue/, "aList", null], + [/Gecko_SetContentData/, "aContent", null], + ["Gecko_SetCounterFunction", "aContent", null], + [/Gecko_EnsureStyle.*?ArrayLength/, "aArray", null], + ["Gecko_GetOrCreateKeyframeAtStart", "aKeyframes", null], + ["Gecko_GetOrCreateInitialKeyframe", "aKeyframes", null], + ["Gecko_GetOrCreateFinalKeyframe", "aKeyframes", null], + ["Gecko_AppendPropertyValuePair", "aProperties", null], + ["Gecko_SetStyleCoordCalcValue", null, null], + ["Gecko_StyleClipPath_SetURLValue", "aClip", null], + ["Gecko_nsStyleFilter_SetURLValue", "aEffects", null], + ["Gecko_nsStyleSVG_SetDashArrayLength", "aSvg", null], + ["Gecko_nsStyleSVG_CopyDashArray", "aDst", null], + ["Gecko_nsStyleFont_SetLang", "aFont", null], + ["Gecko_nsStyleFont_CopyLangFrom", "aFont", null], + ["Gecko_ClearWillChange", "aDisplay", null], + ["Gecko_AppendWillChange", "aDisplay", null], + ["Gecko_CopyWillChangeFrom", "aDest", null], + ["Gecko_InitializeImageCropRect", "aImage", null], + ["Gecko_CopyShapeSourceFrom", "aDst", null], + ["Gecko_DestroyShapeSource", "aShape", null], + ["Gecko_StyleShapeSource_SetURLValue", "aShape", null], + ["Gecko_NewBasicShape", "aShape", null], + ["Gecko_NewShapeImage", "aShape", null], + ["Gecko_nsFont_InitSystem", "aDest", null], + ["Gecko_nsFont_SetFontFeatureValuesLookup", "aFont", null], + ["Gecko_nsFont_ResetFontFeatureValuesLookup", "aFont", null], + ["Gecko_nsStyleFont_FixupNoneGeneric", "aFont", null], + ["Gecko_StyleTransition_SetUnsupportedProperty", "aTransition", null], + ["Gecko_AddPropertyToSet", "aPropertySet", null], + ["Gecko_CalcStyleDifference", "aAnyStyleChanged", null], + ["Gecko_CalcStyleDifference", "aOnlyResetStructsChanged", null], + ["Gecko_nsStyleSVG_CopyContextProperties", "aDst", null], + ["Gecko_nsStyleFont_PrefillDefaultForGeneric", "aFont", null], + ["Gecko_nsStyleSVG_SetContextPropertiesLength", "aSvg", null], + ["Gecko_ClearAlternateValues", "aFont", null], + ["Gecko_AppendAlternateValues", "aFont", null], + ["Gecko_CopyAlternateValuesFrom", "aDest", null], + ["Gecko_CounterStyle_GetName", "aResult", null], + ["Gecko_CounterStyle_GetSingleString", "aResult", null], + ["Gecko_nsTArray_FontFamilyName_AppendNamed", "aNames", null], + ["Gecko_nsTArray_FontFamilyName_AppendGeneric", "aNames", null], + ]; + for (var [entryMatch, varMatch, csuMatch] of whitelist) { + assert(entryMatch || varMatch || csuMatch); + if (entryMatch && !nameMatches(entry.name, entryMatch)) + continue; + if (varMatch && !nameMatches(varName, varMatch)) + continue; + if (csuMatch && (!csuName || !nameMatches(csuName, csuMatch))) + continue; + return true; + } + return false; +} + +function isSafeAssignment(entry, edge, variable) +{ + if (edge.Kind != 'Assign') + return false; + + var [mangled, unmangled] = splitFunction(entry.name); + + // The assignment + // + // nsFont* font = fontTypes[eType]; + // + // ends up with 'font' pointing to a member of 'this', so it should inherit + // the safety of 'this'. + if (unmangled.includes("mozilla::LangGroupFontPrefs::Initialize") && + variable == 'font') + { + const [lhs, rhs] = edge.Exp; + const {Kind, Exp: [{Kind: indexKind, Exp: [collection, index]}]} = rhs; + if (Kind == 'Drf' && + indexKind == 'Index' && + collection.Kind == 'Var' && + collection.Variable.Name[0] == 'fontTypes') + { + return entry.isSafeArgument(0); // 'this' + } + } + + return false; +} + +function checkFieldWrite(entry, location, fields) +{ + var name = entry.name; + for (var field of fields) { + // The analysis is having some trouble keeping track of whether + // already_AddRefed and nsCOMPtr structures are safe to access. + // Hopefully these will be thread local, but it would be better to + // improve the analysis to handle these. + if (/already_AddRefed.*?.mRawPtr/.test(field)) + return; + if (/nsCOMPtr<.*?>.mRawPtr/.test(field)) + return; + + if (/\bThreadLocal<\b/.test(field)) + return; + + // Debugging check for string corruption. + if (field == "nsStringBuffer.mCanary") + return; + } + + var str = ""; + for (var field of fields) + str += " " + field; + + dumpError(entry, location, "Field write" + str); +} + +function checkDereferenceWrite(entry, location, variable) +{ + var name = entry.name; + + // Maybe<T> uses placement new on local storage in a way we don't understand. + // Allow this if the Maybe<> value itself is threadsafe. + if (/Maybe.*?::emplace/.test(name) && entry.isSafeArgument(0)) + return; + + // UniquePtr writes through temporaries referring to its internal storage. + // Allow this if the UniquePtr<> is threadsafe. + if (/UniquePtr.*?::reset/.test(name) && entry.isSafeArgument(0)) + return; + + // Operations on nsISupports reference counts. + if (hasThreadsafeReferenceCounts(entry, /nsCOMPtr<T>::swap\(.*?\[with T = (.*?)\]/)) + return; + + // ConvertToLowerCase::write writes through a local pointer into the first + // argument. + if (/ConvertToLowerCase::write/.test(name) && entry.isSafeArgument(0)) + return; + + dumpError(entry, location, "Dereference write " + (variable ? variable : "<unknown>")); +} + +function ignoreCallEdge(entry, callee) +{ + var name = entry.name; + + // nsPropertyTable::GetPropertyInternal has the option of removing data + // from the table, but when it is called by nsPropertyTable::GetProperty + // this will not occur. + if (/nsPropertyTable::GetPropertyInternal/.test(callee) && + /nsPropertyTable::GetProperty/.test(name)) + { + return true; + } + + // Document::PropertyTable calls GetExtraPropertyTable (which has side + // effects) if the input category is non-zero. If a literal zero was passed + // in for the category then we treat it as a safe argument, per + // isEdgeSafeArgument, so just watch for that. + if (/Document::GetExtraPropertyTable/.test(callee) && + /Document::PropertyTable/.test(name) && + entry.isSafeArgument(1)) + { + return true; + } + + // This function has an explicit test for being on the main thread if the + // style has non-threadsafe refcounts, but the analysis isn't smart enough + // to understand what the actual styles that can be involved are. + if (/nsStyleList::SetCounterStyle/.test(callee)) + return true; + + // CachedBorderImageData is exclusively owned by nsStyleImage, but the + // analysis is not smart enough to know this. + if (/CachedBorderImageData::PurgeCachedImages/.test(callee) && + /nsStyleImage::/.test(name) && + entry.isSafeArgument(0)) + { + return true; + } + + // StyleShapeSource exclusively owns its UniquePtr<nsStyleImage>. + if (/nsStyleImage::SetURLValue/.test(callee) && + /StyleShapeSource::SetURL/.test(name) && + entry.isSafeArgument(0)) + { + return true; + } + + // The AddRef through a just-assigned heap pointer here is not handled by + // the analysis. + if (/nsCSSValue::Array::AddRef/.test(callee) && + /nsStyleContentData::SetCounters/.test(name) && + entry.isSafeArgument(2)) + { + return true; + } + + // AllChildrenIterator asks AppendOwnedAnonBoxes to append into an nsTArray + // local variable. + if (/nsIFrame::AppendOwnedAnonBoxes/.test(callee) && + /AllChildrenIterator::AppendNativeAnonymousChildren/.test(name)) + { + return true; + } + + // Runnables are created and named on one thread, then dispatched + // (possibly to another). Writes on the origin thread are ok. + if (/::SetName/.test(callee) && + /::UnlabeledDispatch/.test(name)) + { + return true; + } + + // We manually lock here + if (name == "Gecko_nsFont_InitSystem" || + name == "Gecko_GetFontMetrics" || + name == "Gecko_nsStyleFont_FixupMinFontSize" || + /ThreadSafeGetDefaultFontHelper/.test(name)) + { + return true; + } + + return false; +} + +function ignoreContents(entry) +{ + var whitelist = [ + // We don't care what happens when we're about to crash. + "abort", + /MOZ_ReportAssertionFailure/, + /MOZ_ReportCrash/, + /MOZ_Crash/, + /MOZ_CrashPrintf/, + /AnnotateMozCrashReason/, + /InvalidArrayIndex_CRASH/, + /NS_ABORT_OOM/, + + // These ought to be threadsafe. + "NS_DebugBreak", + /mozalloc_handle_oom/, + /^NS_Log/, /log_print/, /LazyLogModule::operator/, + /SprintfLiteral/, "PR_smprintf", "PR_smprintf_free", + /NS_DispatchToMainThread/, /NS_ReleaseOnMainThread/, + /NS_NewRunnableFunction/, /NS_Atomize/, + /nsCSSValue::BufferFromString/, + /NS_xstrdup/, + /Assert_NoQueryNeeded/, + /AssertCurrentThreadOwnsMe/, + /PlatformThread::CurrentId/, + /imgRequestProxy::GetProgressTracker/, // Uses an AutoLock + /Smprintf/, + "malloc", + "calloc", + "free", + "realloc", + "memalign", + "strdup", + "strndup", + "moz_xmalloc", + "moz_xcalloc", + "moz_xrealloc", + "moz_xmemalign", + "moz_xstrdup", + "moz_xstrndup", + "jemalloc_thread_local_arena", + + // These all create static strings in local storage, which is threadsafe + // to do but not understood by the analysis yet. + / EmptyString\(\)/, + + // These could probably be handled by treating the scope of PSAutoLock + // aka BaseAutoLock<PSMutex> as threadsafe. + /profiler_register_thread/, + /profiler_unregister_thread/, + + // The analysis thinks we'll write to mBits in the DoGetStyleFoo<false> + // call. Maybe the template parameter confuses it? + /ComputedStyle::PeekStyle/, + + // The analysis can't cope with the indirection used for the objects + // being initialized here, from nsCSSValue::Array::Create to the return + // value of the Item(i) getter. + /nsCSSValue::SetCalcValue/, + + // Unable to analyze safety of linked list initialization. + "Gecko_NewCSSValueSharedList", + "Gecko_CSSValue_InitSharedList", + + // Unable to trace through dataflow, but straightforward if inspected. + "Gecko_NewNoneTransform", + + // Need main thread assertions or other fixes. + /EffectCompositor::GetServoAnimationRule/, + ]; + if (entry.matches(whitelist)) + return true; + + if (entry.isSafeArgument(0)) { + var heapWhitelist = [ + // Operations on heap structures pointed to by arrays and strings are + // threadsafe as long as the array/string itself is threadsafe. + /nsTArray_Impl.*?::AppendElement/, + /nsTArray_Impl.*?::RemoveElementsAt/, + /nsTArray_Impl.*?::ReplaceElementsAt/, + /nsTArray_Impl.*?::InsertElementAt/, + /nsTArray_Impl.*?::SetCapacity/, + /nsTArray_Impl.*?::SetLength/, + /nsTArray_base.*?::EnsureCapacity/, + /nsTArray_base.*?::ShiftData/, + /AutoTArray.*?::Init/, + /(nsTSubstring<T>|nsAC?String)::SetCapacity/, + /(nsTSubstring<T>|nsAC?String)::SetLength/, + /(nsTSubstring<T>|nsAC?String)::Assign/, + /(nsTSubstring<T>|nsAC?String)::Append/, + /(nsTSubstring<T>|nsAC?String)::Replace/, + /(nsTSubstring<T>|nsAC?String)::Trim/, + /(nsTSubstring<T>|nsAC?String)::Truncate/, + /(nsTSubstring<T>|nsAC?String)::StripTaggedASCII/, + /(nsTSubstring<T>|nsAC?String)::operator=/, + /nsTAutoStringN<T, N>::nsTAutoStringN/, + + // Similar for some other data structures + /nsCOMArray_base::SetCapacity/, + /nsCOMArray_base::Clear/, + /nsCOMArray_base::AppendElement/, + + // UniquePtr is similar. + /mozilla::UniquePtr/, + + // The use of unique pointers when copying mCropRect here confuses + // the analysis. + /nsStyleImage::DoCopy/, + ]; + if (entry.matches(heapWhitelist)) + return true; + } + + if (entry.isSafeArgument(1)) { + var firstArgWhitelist = [ + /nsTextFormatter::snprintf/, + /nsTextFormatter::ssprintf/, + /_ASCIIToUpperInSitu/, + + // Handle some writes into an array whose safety we don't have a good way + // of tracking currently. + /FillImageLayerList/, + /FillImageLayerPositionCoordList/, + ]; + if (entry.matches(firstArgWhitelist)) + return true; + } + + if (entry.isSafeArgument(2)) { + var secondArgWhitelist = [ + /nsStringBuffer::ToString/, + /AppendUTF\d+toUTF\d+/, + /AppendASCIItoUTF\d+/, + ]; + if (entry.matches(secondArgWhitelist)) + return true; + } + + return false; +} + +/////////////////////////////////////////////////////////////////////////////// +// Sixgill Utilities +/////////////////////////////////////////////////////////////////////////////// + +function variableName(variable) +{ + return (variable && variable.Name) ? variable.Name[0] : null; +} + +function stripFields(exp) +{ + // Fields and index operations do not involve any dereferences. Remove them + // from the expression but remember any encountered fields for use by + // annotations later on. + var fields = []; + while (true) { + if (exp.Kind == "Index") { + exp = exp.Exp[0]; + continue; + } + if (exp.Kind == "Fld") { + var csuName = exp.Field.FieldCSU.Type.Name; + var fieldName = exp.Field.Name[0]; + assert(csuName && fieldName); + fields.push(csuName + "." + fieldName); + exp = exp.Exp[0]; + continue; + } + break; + } + return [exp, fields]; +} + +function isLocalVariable(variable) +{ + switch (variable.Kind) { + case "Return": + case "Temp": + case "Local": + case "Arg": + return true; + } + return false; +} + +function isDirectCall(edge, regexp) +{ + return edge.Kind == "Call" + && edge.Exp[0].Kind == "Var" + && regexp.test(variableName(edge.Exp[0].Variable)); +} + +function isZero(exp) +{ + return exp.Kind == "Int" && exp.String == "0"; +} + +/////////////////////////////////////////////////////////////////////////////// +// Analysis Structures +/////////////////////////////////////////////////////////////////////////////// + +// Safe arguments are those which may be written through (directly, not through +// pointer fields etc.) without concerns about thread safety. This includes +// pointers to stack data, null pointers, and other data we know is thread +// local, such as certain arguments to the root functions. +// +// Entries in the worklist keep track of the pointer arguments to the function +// which are safe using a sorted array, so that this can be propagated down the +// stack. Zero is |this|, and arguments are indexed starting at one. + +function WorklistEntry(name, safeArguments, stack, parameterNames) +{ + this.name = name; + this.safeArguments = safeArguments; + this.stack = stack; + this.parameterNames = parameterNames; +} + +WorklistEntry.prototype.readable = function() +{ + const [ mangled, readable ] = splitFunction(this.name); + return readable; +} + +WorklistEntry.prototype.mangledName = function() +{ + var str = this.name; + for (var safe of this.safeArguments) + str += " SAFE " + safe; + return str; +} + +WorklistEntry.prototype.isSafeArgument = function(index) +{ + for (var safe of this.safeArguments) { + if (index == safe) + return true; + } + return false; +} + +WorklistEntry.prototype.setParameterName = function(index, name) +{ + this.parameterNames[index] = name; +} + +WorklistEntry.prototype.addSafeArgument = function(index) +{ + if (this.isSafeArgument(index)) + return; + this.safeArguments.push(index); + + // Sorting isn't necessary for correctness but makes printed stack info tidier. + this.safeArguments.sort(); +} + +function safeArgumentIndex(variable) +{ + if (variable.Kind == "This") + return 0; + if (variable.Kind == "Arg") + return variable.Index + 1; + return -1; +} + +function nameMatches(name, match) +{ + if (typeof match == "string") { + if (name == match) + return true; + } else { + assert(match instanceof RegExp); + if (match.test(name)) + return true; + } + return false; +} + +function nameMatchesArray(name, matchArray) +{ + for (var match of matchArray) { + if (nameMatches(name, match)) + return true; + } + return false; +} + +WorklistEntry.prototype.matches = function(matchArray) +{ + return nameMatchesArray(this.name, matchArray); +} + +function CallSite(callee, safeArguments, location, parameterNames) +{ + this.callee = callee; + this.safeArguments = safeArguments; + this.location = location; + this.parameterNames = parameterNames; +} + +CallSite.prototype.safeString = function() +{ + if (this.safeArguments.length) { + var str = ""; + for (var i = 0; i < this.safeArguments.length; i++) { + var arg = this.safeArguments[i]; + if (arg in this.parameterNames) + str += " " + this.parameterNames[arg]; + else + str += " <" + ((arg == 0) ? "this" : "arg" + (arg - 1)) + ">"; + } + return " ### SafeArguments:" + str; + } + return ""; +} + +/////////////////////////////////////////////////////////////////////////////// +// Analysis Core +/////////////////////////////////////////////////////////////////////////////// + +var errorCount = 0; +var errorLimit = 100; + +// We want to suppress output for functions that ended up not having any +// hazards, for brevity of the final output. So each new toplevel function will +// initialize this to a string, which should be printed only if an error is +// seen. +var errorHeader; + +var startTime = new Date; +function elapsedTime() +{ + var seconds = (new Date - startTime) / 1000; + return "[" + seconds.toFixed(2) + "s] "; +} + +var options = parse_options([ + { + name: '--strip-prefix', + default: os.getenv('SOURCE') || '', + type: 'string' + }, + { + name: '--add-prefix', + default: os.getenv('URLPREFIX') || '', + type: 'string' + }, + { + name: '--verbose', + type: 'bool' + }, +]); + +function add_trailing_slash(str) { + if (str == '') + return str; + return str.endsWith("/") ? str : str + "/"; +} + +var removePrefix = add_trailing_slash(options.strip_prefix); +var addPrefix = add_trailing_slash(options.add_prefix); + +if (options.verbose) { + printErr(`Removing prefix ${removePrefix} from paths`); + printErr(`Prepending ${addPrefix} to paths`); +} + +print(elapsedTime() + "Loading types..."); +if (os.getenv("TYPECACHE")) + loadTypesWithCache('src_comp.xdb', os.getenv("TYPECACHE")); +else + loadTypes('src_comp.xdb'); +print(elapsedTime() + "Starting analysis..."); + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); +var roots = []; + +var [flag, arg] = scriptArgs; +if (flag && (flag == '-f' || flag == '--function')) { + roots = [arg]; +} else { + for (var bodyIndex = minStream; bodyIndex <= maxStream; bodyIndex++) { + var key = xdb.read_key(bodyIndex); + var name = key.readString(); + if (/^Gecko_/.test(name)) { + var data = xdb.read_entry(key); + if (/ServoBindings.cpp/.test(data.readString())) + roots.push(name); + xdb.free_string(data); + } + xdb.free_string(key); + } +} + +print(elapsedTime() + "Found " + roots.length + " roots."); +for (var i = 0; i < roots.length; i++) { + var root = roots[i]; + errorHeader = elapsedTime() + "#" + (i + 1) + " Analyzing " + root + " ..."; + try { + processRoot(root); + } catch (e) { + if (e != "Error!") + throw e; + } +} + +print(`${elapsedTime()}Completed analysis, found ${errorCount}/${errorLimit} allowed errors`); + +var currentBody; + +// All local variable assignments we have seen in either the outer or inner +// function. This crosses loop boundaries, and currently has an unsoundness +// where later assignments in a loop are not taken into account. +var assignments; + +// All loops in the current function which are reachable off main thread. +var reachableLoops; + +// Functions that are reachable from the current root. +var reachable = {}; + +function dumpError(entry, location, text) +{ + if (errorHeader) { + print(errorHeader); + errorHeader = undefined; + } + + var stack = entry.stack; + print("Error: " + text); + print("Location: " + entry.name + (location ? " @ " + location : "") + stack[0].safeString()); + print("Stack Trace:"); + // Include the callers in the stack trace instead of the callees. Make sure + // the dummy stack entry we added for the original roots is in place. + assert(stack[stack.length - 1].location == null); + for (var i = 0; i < stack.length - 1; i++) + print(stack[i + 1].callee + " @ " + stack[i].location + stack[i + 1].safeString()); + print("\n"); + + if (++errorCount == errorLimit) { + print("Maximum number of errors encountered, exiting..."); + quit(); + } + + throw "Error!"; +} + +// If edge is an assignment from a local variable, return the rhs variable. +function variableAssignRhs(edge) +{ + if (edge.Kind == "Assign" && edge.Exp[1].Kind == "Drf" && edge.Exp[1].Exp[0].Kind == "Var") { + var variable = edge.Exp[1].Exp[0].Variable; + if (isLocalVariable(variable)) + return variable; + } + return null; +} + +function processAssign(body, entry, location, lhs, edge) +{ + var fields; + [lhs, fields] = stripFields(lhs); + + switch (lhs.Kind) { + case "Var": + var name = variableName(lhs.Variable); + if (isLocalVariable(lhs.Variable)) { + // Remember any assignments to local variables in this function. + // Note that we ignore any points where the variable's address is + // taken and indirect assignments might occur. This is an + // unsoundness in the analysis. + + let assign = [body, edge]; + + // Chain assignments if the RHS has only been assigned once. + var rhsVariable = variableAssignRhs(edge); + if (rhsVariable) { + var rhsAssign = singleAssignment(variableName(rhsVariable)); + if (rhsAssign) + assign = rhsAssign; + } + + if (!(name in assignments)) + assignments[name] = []; + assignments[name].push(assign); + } else { + checkVariableAssignment(entry, location, name); + } + return; + case "Drf": + var variable = null; + if (lhs.Exp[0].Kind == "Var") { + variable = lhs.Exp[0].Variable; + if (isSafeVariable(entry, variable)) + return; + } else if (lhs.Exp[0].Kind == "Fld") { + const { + Name: [ fieldName ], + Type: {Kind, Type: fieldType}, + FieldCSU: {Type: {Kind: containerTypeKind, + Name: containerTypeName}} + } = lhs.Exp[0].Field; + const [containerExpr] = lhs.Exp[0].Exp; + + if (containerTypeKind == 'CSU' && + Kind == 'Pointer' && + isEdgeSafeArgument(entry, containerExpr) && + isSafeMemberPointer(containerTypeName, fieldName, fieldType)) + { + return; + } + } + if (fields.length) + checkFieldWrite(entry, location, fields); + else + checkDereferenceWrite(entry, location, variableName(variable)); + return; + case "Int": + if (isZero(lhs)) { + // This shows up under MOZ_ASSERT, to crash the process. + return; + } + } + dumpError(entry, location, "Unknown assignment " + JSON.stringify(lhs)); +} + +function get_location(rawLocation) { + const filename = rawLocation.CacheString.replace(removePrefix, ''); + return addPrefix + filename + "#" + rawLocation.Line; +} + +function process(entry, body, addCallee) +{ + if (!("PEdge" in body)) + return; + + // Add any arguments which are safe due to annotations. + if ("DefineVariable" in body) { + for (var defvar of body.DefineVariable) { + var index = safeArgumentIndex(defvar.Variable); + if (index >= 0) { + var varName = index ? variableName(defvar.Variable) : "this"; + assert(varName); + entry.setParameterName(index, varName); + var csuName = null; + var type = defvar.Type; + if (type.Kind == "Pointer" && type.Type.Kind == "CSU") + csuName = type.Type.Name; + if (treatAsSafeArgument(entry, varName, csuName)) + entry.addSafeArgument(index); + } + } + } + + // Points in the body which are reachable if we are not on the main thread. + var nonMainThreadPoints = []; + nonMainThreadPoints[body.Index[0]] = true; + + for (var edge of body.PEdge) { + // Ignore code that only executes on the main thread. + if (!(edge.Index[0] in nonMainThreadPoints)) + continue; + + var location = get_location(body.PPoint[edge.Index[0] - 1].Location); + + var callees = getCallees(edge); + for (var callee of callees) { + switch (callee.kind) { + case "direct": + var safeArguments = getEdgeSafeArguments(entry, edge, callee.name); + addCallee(new CallSite(callee.name, safeArguments, location, {})); + break; + case "resolved-field": + break; + case "field": + var field = callee.csu + "." + callee.field; + if (callee.isVirtual) + checkOverridableVirtualCall(entry, location, field); + else + checkIndirectCall(entry, location, field); + break; + case "indirect": + checkIndirectCall(entry, location, callee.variable); + break; + default: + dumpError(entry, location, "Unknown call " + callee.kind); + break; + } + } + + var fallthrough = true; + + if (edge.Kind == "Assign") { + assert(edge.Exp.length == 2); + processAssign(body, entry, location, edge.Exp[0], edge); + } else if (edge.Kind == "Call") { + assert(edge.Exp.length <= 2); + if (edge.Exp.length == 2) + processAssign(body, entry, location, edge.Exp[1], edge); + + // Treat assertion failures as if they don't return, so that + // asserting NS_IsMainThread() is sufficient to prevent the + // analysis from considering a block of code. + if (isDirectCall(edge, /MOZ_ReportAssertionFailure/)) + fallthrough = false; + } else if (edge.Kind == "Loop") { + reachableLoops[edge.BlockId.Loop] = true; + } else if (edge.Kind == "Assume") { + if (testFailsOffMainThread(edge.Exp[0], edge.PEdgeAssumeNonZero)) + fallthrough = false; + } + + if (fallthrough) + nonMainThreadPoints[edge.Index[1]] = true; + } +} + +function maybeProcessMissingFunction(entry, addCallee) +{ + // If a function is missing it might be because a destructor Foo::~Foo() is + // being called but GCC only gave us an implementation for + // Foo::~Foo(int32). See computeCallgraph.js for a little more info. + var name = entry.name; + if (name.indexOf("::~") > 0 && name.indexOf("()") > 0) { + var callee = name.replace("()", "(int32)"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + // Similarly, a call to a C1 constructor might invoke the C4 constructor. A + // mangled constructor will be something like _ZN<length><name>C1E... or in + // the case of a templatized constructor, _ZN<length><name>C1I...EE... so + // we hack it and look for "C1E" or "C1I" and replace them with their C4 + // variants. This will have rare false matches, but so far we haven't hit + // any external function calls of that sort. + if (entry.mangledName().includes("C1E") || entry.mangledName().includes("C1I")) { + var callee = name.replace("C1E", "C4E").replace("C1I", "C4I"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + // Hack to manually follow some typedefs that show up on some functions. + // This is a bug in the sixgill GCC plugin I think, since sixgill is + // supposed to follow any typedefs itself. + if (/mozilla::dom::Element/.test(name)) { + var callee = name.replace("mozilla::dom::Element", "Document::Element"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + // Hack for contravariant return types. When overriding a virtual method + // with a method that returns a different return type (a subtype of the + // original return type), we are getting the right mangled name but the + // wrong return type in the unmangled name. + if (/\$nsTextFrame*/.test(name)) { + var callee = name.replace("nsTextFrame", "nsIFrame"); + addCallee(new CallSite(name, entry.safeArguments, entry.stack[0].location, entry.parameterNames)); + return true; + } + + return false; +} + +function processRoot(name) +{ + var safeArguments = []; + var parameterNames = {}; + var worklist = [new WorklistEntry(name, safeArguments, [new CallSite(name, safeArguments, null, parameterNames)], parameterNames)]; + + reachable = {}; + + while (worklist.length > 0) { + var entry = worklist.pop(); + + // In principle we would be better off doing a meet-over-paths here to get + // the common subset of arguments which are safe to write through. However, + // analyzing functions separately for each subset if simpler, ensures that + // the stack traces we produce accurately characterize the stack arguments, + // and should be fast enough for now. + + if (entry.mangledName() in reachable) + continue; + reachable[entry.mangledName()] = true; + + if (ignoreContents(entry)) + continue; + + var data = xdb.read_entry(entry.name); + var dataString = data.readString(); + var callees = []; + if (dataString.length) { + // Reverse the order of the bodies we process so that we visit the + // outer function and see its assignments before the inner loops. + assignments = {}; + reachableLoops = {}; + var bodies = JSON.parse(dataString).reverse(); + for (var body of bodies) { + if (!body.BlockId.Loop || body.BlockId.Loop in reachableLoops) { + currentBody = body; + process(entry, body, Array.prototype.push.bind(callees)); + } + } + } else { + if (!maybeProcessMissingFunction(entry, Array.prototype.push.bind(callees))) + checkExternalFunction(entry); + } + xdb.free_string(data); + + for (var callee of callees) { + if (!ignoreCallEdge(entry, callee.callee)) { + var nstack = [callee, ...entry.stack]; + worklist.push(new WorklistEntry(callee.callee, callee.safeArguments, nstack, callee.parameterNames)); + } + } + } +} + +function isEdgeSafeArgument(entry, exp) +{ + var fields; + [exp, fields] = stripFields(exp); + + if (exp.Kind == "Var" && isLocalVariable(exp.Variable)) + return true; + if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") { + var variable = exp.Exp[0].Variable; + return isSafeVariable(entry, variable); + } + if (isZero(exp)) + return true; + return false; +} + +function getEdgeSafeArguments(entry, edge, callee) +{ + assert(edge.Kind == "Call"); + var res = []; + if ("PEdgeCallInstance" in edge) { + if (isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + res.push(0); + } + if ("PEdgeCallArguments" in edge) { + var args = edge.PEdgeCallArguments.Exp; + for (var i = 0; i < args.length; i++) { + if (isEdgeSafeArgument(entry, args[i])) + res.push(i + 1); + } + } + return res; +} + +function singleAssignment(name) +{ + if (name in assignments) { + var edges = assignments[name]; + if (edges.length == 1) + return edges[0]; + } + return null; +} + +function expressionValueEdge(exp) { + if (!(exp.Kind == "Var" && exp.Variable.Kind == "Temp")) + return null; + const assign = singleAssignment(variableName(exp.Variable)); + if (!assign) + return null; + const [body, edge] = assign; + return edge; +} + +// Examples: +// +// void foo(type* aSafe) { +// type* safeBecauseNew = new type(...); +// type* unsafeBecauseMultipleAssignments = new type(...); +// if (rand()) +// unsafeBecauseMultipleAssignments = bar(); +// type* safeBecauseSingleAssignmentOfSafe = aSafe; +// } +// +function isSafeVariable(entry, variable) +{ + var index = safeArgumentIndex(variable); + if (index >= 0) + return entry.isSafeArgument(index); + + if (variable.Kind != "Temp" && variable.Kind != "Local") + return false; + var name = variableName(variable); + + if (!entry.safeLocals) + entry.safeLocals = new Map; + if (entry.safeLocals.has(name)) + return entry.safeLocals.get(name); + + const safe = isSafeLocalVariable(entry, name); + entry.safeLocals.set(name, safe); + return safe; +} + +function isSafeLocalVariable(entry, name) +{ + // If there is a single place where this variable has been assigned on + // edges we are considering, look at that edge. + var assign = singleAssignment(name); + if (assign) { + const [body, edge] = assign; + + // Treat temporary pointers to DebugOnly contents as thread local. + if (isDirectCall(edge, /DebugOnly.*?::operator/)) + return true; + + // Treat heap allocated pointers as thread local during construction. + // Hopefully the construction code doesn't leak pointers to the object + // to places where other threads might access it. + if (isDirectCall(edge, /operator new/) || + isDirectCall(edge, /nsCSSValue::Array::Create/)) + { + return true; + } + + if ("PEdgeCallInstance" in edge) { + // References to the contents of an array are threadsafe if the array + // itself is threadsafe. + if ((isDirectCall(edge, /operator\[\]/) || + isDirectCall(edge, /nsTArray.*?::InsertElementAt\b/) || + isDirectCall(edge, /nsStyleContent::ContentAt/) || + isDirectCall(edge, /nsTArray_base.*?::GetAutoArrayBuffer\b/)) && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + { + return true; + } + + // Watch for the coerced result of a getter_AddRefs or getter_Copies call. + if (isDirectCall(edge, /operator /)) { + var otherEdge = expressionValueEdge(edge.PEdgeCallInstance.Exp); + if (otherEdge && + isDirectCall(otherEdge, /getter_(?:AddRefs|Copies)/) && + isEdgeSafeArgument(entry, otherEdge.PEdgeCallArguments.Exp[0])) + { + return true; + } + } + + // RefPtr::operator->() and operator* transmit the safety of the + // RefPtr to the return value. + if (isDirectCall(edge, /RefPtr<.*?>::operator(->|\*)\(\)/) && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + { + return true; + } + + // Placement-new returns a pointer that is as safe as the pointer + // passed to it. Exp[0] is the size, Exp[1] is the pointer/address. + // Note that the invocation of the constructor is a separate call, + // and so need not be considered here. + if (isDirectCall(edge, /operator new/) && + edge.PEdgeCallInstance.Exp.length == 2 && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp[1])) + { + return true; + } + + // Coercion via AsAString preserves safety. + if (isDirectCall(edge, /AsAString/) && + isEdgeSafeArgument(entry, edge.PEdgeCallInstance.Exp)) + { + return true; + } + + // Special case: + // + // keyframe->mTimingFunction.emplace() + // keyframe->mTimingFunction->Init() + // + // The object calling Init should be considered safe here because + // we just emplaced it, though in general keyframe::operator-> + // could do something crazy. + if (isDirectCall(edge, /operator->/)) do { + const predges = getPredecessors(body)[edge.Index[0]]; + if (!predges || predges.length != 1) + break; + const predge = predges[0]; + if (!isDirectCall(predge, /\bemplace\b/)) + break; + const instance = predge.PEdgeCallInstance; + if (JSON.stringify(instance) == JSON.stringify(edge.PEdgeCallInstance)) + return true; + } while (false); + } + + if (isSafeAssignment(entry, edge, name)) + return true; + + // Watch out for variables which were assigned arguments. + var rhsVariable = variableAssignRhs(edge); + if (rhsVariable) + return isSafeVariable(entry, rhsVariable); + } + + // When temporary stack structures are created (either to return or to call + // methods on without assigning them a name), the generated sixgill JSON is + // rather strange. The temporary has structure type and is never assigned + // to, but is dereferenced. GCC is probably not showing us everything it is + // doing to compile this code. Pattern match for this case here. + + // The variable should have structure type. + var type = null; + for (var defvar of currentBody.DefineVariable) { + if (variableName(defvar.Variable) == name) { + type = defvar.Type; + break; + } + } + if (!type || type.Kind != "CSU") + return false; + + // The variable should not have been written to anywhere up to this point. + // If it is initialized at this point we should have seen *some* write + // already, since the CFG edges are visited in reverse post order. + if (name in assignments) + return false; + + return true; +} + +function isSafeMemberPointer(containerType, memberName, memberType) +{ + // nsTArray owns its header. + if (containerType.includes("nsTArray_base") && memberName == "mHdr") + return true; + + if (memberType.Kind != 'Pointer') + return false; + + // Special-cases go here :) + return false; +} + +// Return whether 'exp == value' holds only when execution is on the main thread. +function testFailsOffMainThread(exp, value) { + switch (exp.Kind) { + case "Drf": + var edge = expressionValueEdge(exp.Exp[0]); + if (edge) { + if (isDirectCall(edge, /NS_IsMainThread/) && value) + return true; + if (isDirectCall(edge, /IsInServoTraversal/) && !value) + return true; + if (isDirectCall(edge, /IsCurrentThreadInServoTraversal/) && !value) + return true; + if (isDirectCall(edge, /__builtin_expect/)) + return testFailsOffMainThread(edge.PEdgeCallArguments.Exp[0], value); + if (edge.Kind == "Assign") + return testFailsOffMainThread(edge.Exp[1], value); + } + break; + case "Unop": + if (exp.OpCode == "LogicalNot") + return testFailsOffMainThread(exp.Exp[0], !value); + break; + case "Binop": + if (exp.OpCode == "NotEqual" || exp.OpCode == "Equal") { + var cmpExp = isZero(exp.Exp[0]) + ? exp.Exp[1] + : (isZero(exp.Exp[1]) ? exp.Exp[0] : null); + if (cmpExp) + return testFailsOffMainThread(cmpExp, exp.OpCode == "NotEqual" ? value : !value); + } + break; + case "Int": + if (exp.String == "0" && value) + return true; + if (exp.String == "1" && !value) + return true; + break; + } + return false; +} diff --git a/js/src/devtools/rootAnalysis/analyzeRoots.js b/js/src/devtools/rootAnalysis/analyzeRoots.js new file mode 100644 index 0000000000..46bc7ea1fb --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyzeRoots.js @@ -0,0 +1,963 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('callgraph.js'); +loadRelativeToScript('CFG.js'); +loadRelativeToScript('dumpCFG.js'); + +var sourceRoot = (os.getenv('SOURCE') || '') + '/'; + +var functionName; +var functionBodies; + +try { + var options = parse_options([ + { + name: "--function", + type: 'string', + }, + { + name: "-f", + type: "string", + dest: "function", + }, + { + name: "gcFunctions", + default: "gcFunctions.lst" + }, + { + name: "limitedFunctions", + default: "limitedFunctions.lst" + }, + { + name: "gcTypes", + default: "gcTypes.txt" + }, + { + name: "typeInfo", + default: "typeInfo.txt" + }, + { + name: "batch", + type: "number", + default: 1 + }, + { + name: "numBatches", + type: "number", + default: 1 + }, + { + name: "tmpfile", + default: "tmp.txt" + }, + ]); +} catch (e) { + printErr(e); + printErr("Usage: analyzeRoots.js [-f function_name] <gcFunctions.lst> <limitedFunctions.lst> <gcTypes.txt> <typeInfo.txt> [start end [tmpfile]]"); + quit(1); +} +var gcFunctions = {}; +var text = snarf(options.gcFunctions).split("\n"); +assert(text.pop().length == 0); +for (const line of text) + gcFunctions[mangled(line)] = readable(line); + +var limitedFunctions = JSON.parse(snarf(options.limitedFunctions)); +text = null; + +var typeInfo = loadTypeInfo(options.typeInfo); + +var match; +var gcThings = new Set(); +var gcPointers = new Set(); +var gcRefs = new Set(typeInfo.GCRefs); + +text = snarf(options.gcTypes).split("\n"); +for (var line of text) { + if (match = /^GCThing: (.*)/.exec(line)) + gcThings.add(match[1]); + if (match = /^GCPointer: (.*)/.exec(line)) + gcPointers.add(match[1]); +} +text = null; + +function isGCRef(type) +{ + if (type.Kind == "CSU") + return gcRefs.has(type.Name); + return false; +} + +function isGCType(type) +{ + if (type.Kind == "CSU") + return gcThings.has(type.Name); + else if (type.Kind == "Array") + return isGCType(type.Type); + return false; +} + +function isUnrootedPointerDeclType(decl) +{ + // Treat non-temporary T& references as if they were the underlying type T. + // For now, restrict this to only the types specifically annotated with JS_HAZ_GC_REF + // to avoid lots of false positives with other types. + let type = isReferenceDecl(decl) && isGCRef(decl.Type.Type) ? decl.Type.Type : decl.Type; + + while (type.Kind == "Array") { + type = type.Type; + } + + if (type.Kind == "Pointer") { + return isGCType(type.Type); + } else if (type.Kind == "CSU") { + return gcPointers.has(type.Name); + } else { + return false; + } +} + +function edgeCanGC(functionName, body, edge, scopeAttrs, functionBodies) +{ + if (edge.Kind != "Call") { + return false; + } + + for (const { callee, attrs } of getCallees(body, edge, scopeAttrs, functionBodies)) { + if (attrs & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) { + continue; + } + + if (callee.kind == "direct") { + const func = mangled(callee.name); + if ((func in gcFunctions) || ((func + internalMarker) in gcFunctions)) + return `'${func}$${gcFunctions[func]}'`; + return false; + } else if (callee.kind == "indirect") { + if (!indirectCallCannotGC(functionName, callee.variable)) { + return "'*" + callee.variable + "'"; + } + } else if (callee.kind == "field") { + if (fieldCallCannotGC(callee.staticCSU, callee.field)) { + continue; + } + const fieldkey = callee.fieldKey; + if (fieldkey in gcFunctions) { + return `'${fieldkey}'`; + } + } else { + return "<unknown>"; + } + } + + return false; +} + +// Search upwards through a function's control flow graph (CFG) to find a path containing: +// +// - a use of a variable, preceded by +// +// - a function call that can GC, preceded by +// +// - a use of the variable that shows that the live range starts at least that +// far back, preceded by +// +// - an informative use of the variable (which might be the same use), one that +// assigns to it a value that might contain a GC pointer (or is the start of +// the function for parameters or 'this'.) This is not necessary for +// correctness, it just makes it easier to understand why something might be +// a hazard. The output of the analysis will include the whole path from the +// informative use to the post-GC use, to make the problem as understandable +// as possible. +// +// A canonical example might be: +// +// void foo() { +// JS::Value* val = lookupValue(); <-- informative use +// if (!val.isUndefined()) { <-- any use +// GC(); <-- GC call +// } +// putValue(val); <-- a use after a GC +// } +// +// The search is performed on an underlying CFG that we traverse in +// breadth-first order (to find the shortest path). We build a path starting +// from an empty path and conditionally lengthening and improving it according +// to the computation occurring on each incoming edge. (If that path so far +// does not have a GC call and we traverse an edge with a GC call, then we +// lengthen the path by that edge and record it as including a GC call.) The +// resulting path may include a point or edge more than once! For example, in: +// +// void foo(JS::Value val) { +// for (int i = 0; i < N; i++) { +// GC(); +// val = processValue(val); +// } +// } +// +// the path would start at the point after processValue(), go through the GC(), +// then back to the processValue() (for the call in the previous loop +// iteration). +// +// While searching, each point is annotated with a path node corresponding to +// the best path found to that node so far. When a later search ends up at the +// same point, the best path node is kept. (But the path that it heads may +// include an earlier path node for the same point, as in the case above.) +// +// What info we want depends on whether the variable turns out to be live +// across a GC call. We are looking for both hazards (unrooted variables live +// across GC calls) and unnecessary roots (rooted variables that have no GC +// calls in their live ranges.) +// +// If not: +// +// - 'minimumUse': the earliest point in each body that uses the variable, for +// reporting on unnecessary roots. +// +// If so: +// +// - 'successor': a path from the GC call to a use of the variable after the GC +// call, chained through 'successor' field in the returned edge descriptor +// +// - 'gcInfo': a direct pointer to the GC call edge +// +function findGCBeforeValueUse(start_body, start_point, funcAttrs, variable) +{ + // Scan through all edges preceding an unrooted variable use, using an + // explicit worklist, looking for a GC call and a preceding point where the + // variable is known to be live. A worklist contains an incoming edge + // together with a description of where it or one of its successors GC'd + // (if any). + + class Path { + get ProgressProperties() { return ["informativeUse", "anyUse", "gcInfo"]; } + + constructor(successor_path, body, ppoint) { + Object.assign(this, {body, ppoint}); + if (successor_path !== undefined) { + this.successor = successor_path; + for (const prop of this.ProgressProperties) { + if (prop in successor_path) { + this[prop] = successor_path[prop]; + } + } + } + } + + toString() { + const trail = []; + for (let path = this; path.ppoint; path = path.successor) { + trail.push(path.ppoint); + } + return trail.join(); + } + + // Return -1, 0, or 1 to indicate how complete this Path is compared + // to another one. + compare(other) { + for (const prop of this.ProgressProperties) { + const a = this.hasOwnProperty(prop); + const b = other.hasOwnProperty(prop); + if (a != b) { + return a - b; + } + } + return 0; + } + }; + + // In case we never find an informative use, keep track of the best path + // found with any use. + let bestPathWithAnyUse = null; + + const visitor = new class extends Visitor { + constructor() { + super(functionBodies); + } + + // Do a BFS upwards through the CFG, starting from a use of the + // variable and searching for a path containing a GC followed by an + // initializing use of the variable (or, in forward direction, a start + // of the variable's live range, a GC within that live range, and then + // a use showing that the live range extends past the GC call.) + // Actually, possibly two uses: any use at all, and then if available + // an "informative" use that is more convincing (they may be the same). + // + // The CFG is a graph (a 'body' here is acyclic, but they can contain + // loop nodes that bridge to additional bodies for the loop, so the + // overall graph can by cyclic.) That means there may be multiple paths + // from point A to point B, and we want paths with a GC on them. This + // can be thought of as searching for a "maximal GCing" path from a use + // A to an initialization B. + // + // This is implemented as a BFS search that when it reaches a point + // that has been visited before, stops if and only if the current path + // being advanced is a less GC-ful path. The traversal pushes a + // `gcInfo` token, initially empty, up through the graph and stores the + // maximal one visited so far at every point. + // + // Note that this means we may traverse through the same point more + // than once, and so in theory this scan is superlinear -- if you visit + // every point twice, once for a non GC path and once for a GC path, it + // would be 2^n. But that is unlikely to matter, since you'd need lots + // of split/join pairs that GC on one side and not the other, and you'd + // have to visit them in an unlucky order. This could be fixed by + // updating the gcInfo for past points in a path when a GC is found, + // but it hasn't been found to matter in practice yet. + + next_action(prev, current) { + // Continue if first visit, or the new path is more complete than the old path. This + // could be enhanced at some point to choose paths with 'better' + // examples of GC (eg a call that invokes GC through concrete functions rather than going through a function pointer that is conservatively assumed to GC.) + + if (!current) { + // This search path has been terminated. + return "prune"; + } + + if (current.informativeUse) { + // We have a path with an informative use leading to a GC + // leading to the starting point. + assert(current.gcInfo); + return "done"; + } + + if (prev === undefined) { + // first visit + return "continue"; + } + + if (!prev.gcInfo && current.gcInfo) { + // More GC. + return "continue"; + } else { + return "prune"; + } + } + + merge_info(prev, current) { + // Keep the most complete path. + + if (!prev || !current) { + return prev || current; + } + + // Tie goes to the first found, since it will be shorter when doing a BFS-like search. + return prev.compare(current) >= 0 ? prev : current; + } + + extend_path(edge, body, ppoint, successor_path) { + // Clone the successor path node and then tack on the new point. Other values + // will be updated during the rest of this function, according to what is + // happening on the edge. + const path = new Path(successor_path, body, ppoint); + if (edge === null) { + // Artificial edge to connect loops to their surrounding nodes in the outer body. + // Does not influence "completeness" of path. + return path; + } + + assert(ppoint == edge.Index[0]); + + if (edgeEndsValueLiveRange(edge, variable, body)) { + // Terminate the search through this point. + return null; + } + + const edge_starts = edgeStartsValueLiveRange(edge, variable); + const edge_uses = edgeUsesVariable(edge, variable, body); + + if (edge_starts || edge_uses) { + if (!body.minimumUse || ppoint < body.minimumUse) + body.minimumUse = ppoint; + } + + if (edge_starts) { + // This is a beginning of the variable's live range. If we can + // reach a GC call from here, then we're done -- we have a path + // from the beginning of the live range, through the GC call, to a + // use after the GC call that proves its live range extends at + // least that far. + if (path.gcInfo) { + path.anyUse = path.anyUse || edge; + path.informativeUse = path.informativeUse || edge; + return path; + } + + // Otherwise, truncate this particular branch of the search at this + // edge -- there is no GC after this use, and traversing the edge + // would lead to a different live range. + return null; + } + + // The value is live across this edge. Check whether this edge can + // GC (if we don't have a GC yet on this path.) + const had_gcInfo = Boolean(path.gcInfo); + const edgeAttrs = body.attrs[ppoint] | funcAttrs; + if (!path.gcInfo && !(edgeAttrs & (ATTR_GC_SUPPRESSED | ATTR_REPLACED))) { + var gcName = edgeCanGC(functionName, body, edge, edgeAttrs, functionBodies); + if (gcName) { + path.gcInfo = {name:gcName, body, ppoint, edge: edge.Index}; + } + } + + // Beginning of function? + if (ppoint == body.Index[0] && body.BlockId.Kind != "Loop") { + if (path.gcInfo && (variable.Kind == "Arg" || variable.Kind == "This")) { + // The scope of arguments starts at the beginning of the + // function. + path.anyUse = path.informativeUse = true; + } + + if (path.anyUse) { + // We know the variable was live across the GC. We may or + // may not have found an "informative" explanation + // beginning of the live range. (This can happen if the + // live range started when a variable is used as a + // retparam.) + return path; + } + } + + if (!path.gcInfo) { + // We haven't reached a GC yet, so don't start looking for uses. + return path; + } + + if (!edge_uses) { + // We have a GC. If this edge doesn't use the value, then there + // is no change to the completeness of the path. + return path; + } + + // The live range starts at least this far back, so we're done for + // the same reason as with edge_starts. The only difference is that + // a GC on this edge indicates a hazard, whereas if we're killing a + // live range in the GC call then it's not live *across* the call. + // + // However, we may want to generate a longer usage chain for the + // variable than is minimally necessary. For example, consider: + // + // Value v = f(); + // if (v.isUndefined()) + // return false; + // gc(); + // return v; + // + // The call to .isUndefined() is considered to be a use and + // therefore indicates that v must be live at that point. But it's + // more helpful to the user to continue the 'successor' path to + // include the ancestor where the value was generated. So we will + // only stop here if edge.Kind is Assign; otherwise, we'll pass a + // "preGCLive" value up through the worklist to remember that the + // variable *is* alive before the GC and so this function should be + // returning a true value even if we don't find an assignment. + + // One special case: if the use of the variable is on the + // destination part of the edge (which currently only happens for + // the return value and a terminal edge in the body), and this edge + // is also GCing, then that usage happens *after* the GC and so + // should not be used for anyUse or informativeUse. This matters + // for a hazard involving a destructor GC'ing after an immobile + // return value has been assigned: + // + // GCInDestructor guard(cx); + // if (cond()) { + // return nullptr; + // } + // + // which boils down to + // + // p1 --(construct guard)--> + // p2 --(call cond)--> + // p3 --(returnval := nullptr) --> + // p4 --(destruct guard, possibly GCing)--> + // p5 + // + // The return value is considered to be live at p5. The live range + // of the return value would ordinarily be from p3->p4->p5, except + // that the nullptr assignment means it needn't be considered live + // back that far, and so the live range is *just* p5. The GC on the + // 4->5 edge happens just before that range, so the value was not + // live across the GC. + // + if (!had_gcInfo && edge_uses == edge.Index[1]) { + return path; // New GC does not cross this variable use. + } + + path.anyUse = path.anyUse || edge; + bestPathWithAnyUse = bestPathWithAnyUse || path; + if (edge.Kind == 'Assign') { + path.informativeUse = edge; // Done! Setting this terminates the search. + } + + return path; + }; + }; + + const result = BFS_upwards(start_body, start_point, functionBodies, visitor, new Path()); + if (result && result.gcInfo && result.anyUse) { + return result; + } else { + return bestPathWithAnyUse; + } +} + +function variableLiveAcrossGC(funcAttrs, variable, liveToEnd=false) +{ + // A variable is live across a GC if (1) it is used by an edge (as in, it + // was at least initialized), and (2) it is used after a GC in a successor + // edge. + + for (var body of functionBodies) + body.minimumUse = 0; + + for (var body of functionBodies) { + if (!("PEdge" in body)) + continue; + for (var edge of body.PEdge) { + // Examples: + // + // JSObject* obj = NewObject(); + // cangc(); + // obj = NewObject(); <-- mentions 'obj' but kills previous value + // + // This is not a hazard. Contrast this with: + // + // JSObject* obj = NewObject(); + // cangc(); + // obj = LookAt(obj); <-- uses 'obj' and kills previous value + // + // This is a hazard; the initial value of obj is live across + // cangc(). And a third possibility: + // + // JSObject* obj = NewObject(); + // obj = CopyObject(obj); + // + // This is not a hazard, because even though CopyObject can GC, obj + // is not live across it. (obj is live before CopyObject, and + // probably after, but not across.) There may be a hazard within + // CopyObject, of course. + // + + // Ignore uses that are just invalidating the previous value. + if (edgeEndsValueLiveRange(edge, variable, body)) + continue; + + var usePoint = edgeUsesVariable(edge, variable, body, liveToEnd); + if (usePoint) { + var call = findGCBeforeValueUse(body, usePoint, funcAttrs, variable); + if (!call) + continue; + + call.afterGCUse = usePoint; + return call; + } + } + } + return null; +} + +// An unrooted variable has its address stored in another variable via +// assignment, or passed into a function that can GC. If the address is +// assigned into some other variable, we can't track it to see if it is held +// live across a GC. If it is passed into a function that can GC, then it's +// sort of like a Handle to an unrooted location, and the callee could GC +// before overwriting it or rooting it. +function unsafeVariableAddressTaken(funcAttrs, variable) +{ + for (var body of functionBodies) { + if (!("PEdge" in body)) + continue; + for (var edge of body.PEdge) { + if (edgeTakesVariableAddress(edge, variable, body)) { + if (funcAttrs & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) { + continue; + } + if (edge.Kind == "Assign" || edgeCanGC(functionName, body, edge, funcAttrs, functionBodies)) { + return {body:body, ppoint:edge.Index[0]}; + } + } + } + } + return null; +} + +// Read out the brief (non-JSON, semi-human-readable) CFG description for the +// given function and store it. +function loadPrintedLines(functionName) +{ + assert(!os.system("xdbfind src_body.xdb '" + functionName + "' > " + options.tmpfile)); + var lines = snarf(options.tmpfile).split('\n'); + + for (var body of functionBodies) + body.lines = []; + + // Distribute lines of output to the block they originate from. + var currentBody = null; + for (var line of lines) { + if (/^block:/.test(line)) { + if (match = /:(loop#[\d#]+)/.exec(line)) { + var loop = match[1]; + var found = false; + for (var body of functionBodies) { + if (body.BlockId.Kind == "Loop" && body.BlockId.Loop == loop) { + assert(!found); + found = true; + currentBody = body; + } + } + assert(found); + } else { + for (var body of functionBodies) { + if (body.BlockId.Kind == "Function") + currentBody = body; + } + } + } + if (currentBody) + currentBody.lines.push(line); + } +} + +function findLocation(body, ppoint, opts={brief: false}) +{ + var location = body.PPoint[ppoint ? ppoint - 1 : 0].Location; + var file = location.CacheString; + + if (file.indexOf(sourceRoot) == 0) + file = file.substring(sourceRoot.length); + + if (opts.brief) { + var m = /.*\/(.*)/.exec(file); + if (m) + file = m[1]; + } + + return file + ":" + location.Line; +} + +function locationLine(text) +{ + if (match = /:(\d+)$/.exec(text)) + return match[1]; + return 0; +} + +function getEntryTrace(functionName, entry) +{ + const trace = []; + + var gcPoint = entry.gcInfo ? entry.gcInfo.ppoint : 0; + + if (!functionBodies[0].lines) + loadPrintedLines(functionName); + + while (entry.successor) { + var ppoint = entry.ppoint; + var lineText = findLocation(entry.body, ppoint, {"brief": true}); + + var edgeText = ""; + if (entry.successor && entry.successor.body == entry.body) { + // If the next point in the trace is in the same block, look for an + // edge between them. + var next = entry.successor.ppoint; + + if (!entry.body.edgeTable) { + var table = {}; + entry.body.edgeTable = table; + for (var line of entry.body.lines) { + if (match = /^\w+\((\d+,\d+),/.exec(line)) + table[match[1]] = line; // May be multiple? + } + if (entry.body.BlockId.Kind == 'Loop') { + const [startPoint, endPoint] = entry.body.Index; + table[`${endPoint},${startPoint}`] = '(loop to next iteration)'; + } + } + + edgeText = entry.body.edgeTable[ppoint + "," + next]; + assert(edgeText); + if (ppoint == gcPoint) + edgeText += " [[GC call]]"; + } else { + // Look for any outgoing edge from the chosen point. + for (var line of entry.body.lines) { + if (match = /\((\d+),/.exec(line)) { + if (match[1] == ppoint) { + edgeText = line; + break; + } + } + } + if (ppoint == entry.body.Index[1] && entry.body.BlockId.Kind == "Function") + edgeText += " [[end of function]]"; + } + + // TODO: Store this in a more structured form for better markup, and perhaps + // linking to line numbers. + trace.push({lineText, edgeText}); + entry = entry.successor; + } + + return trace; +} + +function isRootedDeclType(decl) +{ + // Treat non-temporary T& references as if they were the underlying type T. + const type = isReferenceDecl(decl) ? decl.Type.Type : decl.Type; + return type.Kind == "CSU" && ((type.Name in typeInfo.RootedPointers) || + (type.Name in typeInfo.RootedGCThings)); +} + +function printRecord(record) { + print(JSON.stringify(record)); +} + +function processBodies(functionName, wholeBodyAttrs) +{ + if (!("DefineVariable" in functionBodies[0])) + return; + const funcInfo = limitedFunctions[mangled(functionName)] || { attributes: 0 }; + const funcAttrs = funcInfo.attributes | wholeBodyAttrs; + + // Look for the JS_EXPECT_HAZARDS annotation, so as to output a different + // message in that case that won't be counted as a hazard. + var annotations = new Set(); + for (const variable of functionBodies[0].DefineVariable) { + if (variable.Variable.Kind == "Func" && variable.Variable.Name[0] == functionName) { + for (const { Name: [tag, value] } of (variable.Type.Annotation || [])) { + if (tag == 'annotate') + annotations.add(value); + } + } + } + + let missingExpectedHazard = annotations.has("Expect Hazards"); + + // Awful special case, hopefully temporary: + // + // The DOM bindings code generator uses "holders" to externally root + // variables. So for example: + // + // StringObjectRecordOrLong arg0; + // StringObjectRecordOrLongArgument arg0_holder(arg0); + // arg0_holder.TrySetToStringObjectRecord(cx, args[0]); + // GC(); + // self->PassUnion22(cx, arg0); + // + // This appears to be a rooting hazard on arg0, but it is rooted by + // arg0_holder if you set it to any of its union types that requires + // rooting. + // + // Additionally, the holder may be reported as a hazard because it's not + // itself a Rooted or a subclass of AutoRooter; it contains a + // Maybe<RecordRooter<T>> that will get emplaced if rooting is required. + // + // Hopefully these will be simplified at some point (see bug 1517829), but + // for now we special-case functions in the mozilla::dom namespace that + // contain locals with types ending in "Argument". Or + // Maybe<SomethingArgument>. Or Maybe<SpiderMonkeyInterfaceRooter<T>>. It's + // a harsh world. + const ignoreVars = new Set(); + if (functionName.match(/mozilla::dom::/)) { + const vars = functionBodies[0].DefineVariable.filter( + v => v.Type.Kind == 'CSU' && v.Variable.Kind == 'Local' + ).map( + v => [ v.Variable.Name[0], v.Type.Name ] + ); + + const holders = vars.filter( + ([n, t]) => n.match(/^arg\d+_holder$/) && + (t.includes("Argument") || t.includes("Rooter"))); + for (const [holder,] of holders) { + ignoreVars.add(holder); // Ignore the holder. + ignoreVars.add(holder.replace("_holder", "")); // Ignore the "managed" arg. + } + } + + const [mangledSymbol, readable] = splitFunction(functionName); + + for (let decl of functionBodies[0].DefineVariable) { + var name; + if (decl.Variable.Kind == "This") + name = "this"; + else if (decl.Variable.Kind == "Return") + name = "<returnvalue>"; + else + name = decl.Variable.Name[0]; + + if (ignoreVars.has(name)) + continue; + + let liveToEnd = false; + if (decl.Variable.Kind == "Arg" && isReferenceDecl(decl) && decl.Type.Reference == 2) { + // References won't run destructors, so they would normally not be + // considered live at the end of the function. In order to handle + // the pattern of moving a GC-unsafe value into a function (eg an + // AutoCheckCannotGC&&), assume all argument rvalue references live to the + // end of the function unless their liveness is terminated by + // calling reset() or moving them into another function call. + liveToEnd = true; + } + + if (isRootedDeclType(decl)) { + if (!variableLiveAcrossGC(funcAttrs, decl.Variable)) { + // The earliest use of the variable should be its constructor. + var lineText; + for (var body of functionBodies) { + if (body.minimumUse) { + var text = findLocation(body, body.minimumUse); + if (!lineText || locationLine(lineText) > locationLine(text)) + lineText = text; + } + } + const record = { + record: "unnecessary", + functionName, + mangled: mangledSymbol, + readable, + variable: name, + type: str_Type(decl.Type), + loc: lineText || "???", + } + print(","); + printRecord(record); + } + } else if (isUnrootedPointerDeclType(decl)) { + var result = variableLiveAcrossGC(funcAttrs, decl.Variable, liveToEnd); + if (result) { + assert(result.gcInfo); + const edge = result.gcInfo.edge; + const body = result.gcInfo.body; + const lineText = findLocation(body, result.gcInfo.ppoint); + const makeLoc = l => [l.Location.CacheString, l.Location.Line]; + const range = [makeLoc(body.PPoint[edge[0] - 1]), makeLoc(body.PPoint[edge[1] - 1])]; + const record = { + record: "unrooted", + expected: annotations.has("Expect Hazards"), + functionName, + mangled: mangledSymbol, + readable, + variable: name, + type: str_Type(decl.Type), + gccall: result.gcInfo.name.replaceAll("'", ""), + gcrange: range, + loc: lineText, + trace: getEntryTrace(functionName, result), + }; + missingExpectedHazard = false; + print(","); + printRecord(record); + } + result = unsafeVariableAddressTaken(funcAttrs, decl.Variable); + if (result) { + var lineText = findLocation(result.body, result.ppoint); + const record = { + record: "address", + functionName, + mangled: mangledSymbol, + readable, + variable: name, + loc: lineText, + trace: getEntryTrace(functionName, {body:result.body, ppoint:result.ppoint}), + }; + print(","); + printRecord(record); + } + } + } + + if (missingExpectedHazard) { + const { + Location: [ + { CacheString: startfile, Line: startline }, + { CacheString: endfile, Line: endline } + ] + } = functionBodies[0]; + + const loc = (startfile == endfile) ? `${startfile}:${startline}-${endline}` + : `${startfile}:${startline}`; + + const record = { + record: "missing", + functionName, + mangled: mangledSymbol, + readable, + loc, + } + print(","); + printRecord(record); + } +} + +print("[\n"); +var now = new Date(); +printRecord({record: "time", iso: "" + now, t: now.getTime()}); + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +var minStream = xdb.min_data_stream()|0; +var maxStream = xdb.max_data_stream()|0; + +var start = batchStart(options.batch, options.numBatches, minStream, maxStream); +var end = batchLast(options.batch, options.numBatches, minStream, maxStream); + +function process(name, json) { + functionName = name; + functionBodies = JSON.parse(json); + + // Annotate body with a table of all points within the body that may be in + // a limited scope (eg within the scope of a GC suppression RAII class.) + // body.attrs is a plain object indexed by point, with the value being a + // bit set stored in an integer. + for (var body of functionBodies) + body.attrs = []; + + for (var body of functionBodies) { + for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) + { + if (attrs) + pbody.attrs[id] = attrs; + } + } + + processBodies(functionName); +} + +if (options.function) { + var data = xdb.read_entry(options.function); + var json = data.readString(); + debugger; + process(options.function, json); + xdb.free_string(data); + print("\n]\n"); + quit(0); +} + +for (var nameIndex = start; nameIndex <= end; nameIndex++) { + var name = xdb.read_key(nameIndex); + var functionName = name.readString(); + var data = xdb.read_entry(name); + xdb.free_string(name); + var json = data.readString(); + try { + process(functionName, json); + } catch (e) { + printErr("Exception caught while handling " + functionName); + throw(e); + } + xdb.free_string(data); +} + +print("\n]\n"); diff --git a/js/src/devtools/rootAnalysis/annotations.js b/js/src/devtools/rootAnalysis/annotations.js new file mode 100644 index 0000000000..7aedc7edac --- /dev/null +++ b/js/src/devtools/rootAnalysis/annotations.js @@ -0,0 +1,489 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +// Ignore calls made through these function pointers +var ignoreIndirectCalls = { + "mallocSizeOf" : true, + "aMallocSizeOf" : true, + "__conv" : true, + "__convf" : true, + "callback_newtable" : true, +}; + +// Types that when constructed with no arguments, are "safe" values (they do +// not contain GC pointers, or values with nontrivial destructors.) +var typesWithSafeConstructors = new Set([ + "mozilla::Maybe", + "mozilla::dom::Nullable", + "mozilla::dom::Optional", + "mozilla::UniquePtr", + "js::UniquePtr" +]); + +var resetterMethods = { + 'mozilla::Maybe': new Set(["reset"]), + 'mozilla::UniquePtr': new Set(["reset"]), + 'js::UniquePtr': new Set(["reset"]), + 'mozilla::dom::Nullable': new Set(["SetNull"]), + 'mozilla::dom::TypedArray_base': new Set(["Reset"]), + 'RefPtr': new Set(["forget"]), + 'nsCOMPtr': new Set(["forget"]), + 'JS::AutoAssertNoGC': new Set(["reset"]), +}; + +function isRefcountedDtor(name) { + return name.includes("::~RefPtr(") || name.includes("::~nsCOMPtr("); +} + +function indirectCallCannotGC(fullCaller, fullVariable) +{ + var caller = readable(fullCaller); + + // This is usually a simple variable name, but sometimes a full name gets + // passed through. And sometimes that name is truncated. Examples: + // _ZL13gAbortHandler$mozalloc_oom.cpp:void (* gAbortHandler)(size_t) + // _ZL14pMutexUnlockFn$umutex.cpp:void (* pMutexUnlockFn)(const void* + var name = readable(fullVariable); + + if (name in ignoreIndirectCalls) + return true; + + if (name == "mapper" && caller == "ptio.c:pt_MapError") + return true; + + if (name == "params" && caller == "PR_ExplodeTime") + return true; + + // hook called during script finalization which cannot GC. + if (/CallDestroyScriptHook/.test(caller)) + return true; + + // Call through a 'callback' function pointer, in a place where we're going + // to be throwing a JS exception. + if (name == "callback" && caller.includes("js::ErrorToException")) + return true; + + // The math cache only gets called with non-GC math functions. + if (name == "f" && caller.includes("js::MathCache::lookup")) + return true; + + // It would probably be better to somehow rewrite PR_CallOnce(foo) into a + // call of foo, but for now just assume that nobody is crazy enough to use + // PR_CallOnce with a function that can GC. + if (name == "func" && caller == "PR_CallOnce") + return true; + + return false; +} + +// Ignore calls through functions pointers with these types +var ignoreClasses = { + "JSStringFinalizer" : true, + "SprintfState" : true, + "SprintfStateStr" : true, + "JSLocaleCallbacks" : true, + "JSC::ExecutableAllocator" : true, + "PRIOMethods": true, + "_MD_IOVector" : true, + "malloc_table_t": true, // replace_malloc + "malloc_hook_table_t": true, // replace_malloc + "mozilla::MallocSizeOf": true, + "MozMallocSizeOf": true, +}; + +// Ignore calls through TYPE.FIELD, where TYPE is the class or struct name containing +// a function pointer field named FIELD. +var ignoreCallees = { + "js::Class.trace" : true, + "js::Class.finalize" : true, + "JSClassOps.trace" : true, + "JSClassOps.finalize" : true, + "JSRuntime.destroyPrincipals" : true, + "icu_50::UObject.__deleting_dtor" : true, // destructors in ICU code can't cause GC + "mozilla::CycleCollectedJSRuntime.DescribeCustomObjects" : true, // During tracing, cannot GC. + "mozilla::CycleCollectedJSRuntime.NoteCustomGCThingXPCOMChildren" : true, // During tracing, cannot GC. + "PLDHashTableOps.hashKey" : true, + "PLDHashTableOps.clearEntry" : true, + "z_stream_s.zfree" : true, + "z_stream_s.zalloc" : true, + "GrGLInterface.fCallback" : true, + "std::strstreambuf._M_alloc_fun" : true, + "std::strstreambuf._M_free_fun" : true, + "struct js::gc::Callback<void (*)(JSContext*, void*)>.op" : true, + "mozilla::ThreadSharedFloatArrayBufferList::Storage.mFree" : true, + "mozilla::SizeOfState.mMallocSizeOf": true, + "mozilla::gfx::SourceSurfaceRawData.mDeallocator": true, +}; + +function fieldCallCannotGC(csu, fullfield) +{ + if (csu in ignoreClasses) + return true; + if (fullfield in ignoreCallees) + return true; + return false; +} + +function ignoreEdgeUse(edge, variable, body) +{ + // Horrible special case for ignoring a false positive in xptcstubs: there + // is a local variable 'paramBuffer' holding an array of nsXPTCMiniVariant + // on the stack, which appears to be live across a GC call because its + // constructor is called when the array is initialized, even though the + // constructor is a no-op. So we'll do a very narrow exclusion for the use + // that incorrectly started the live range, which was basically "__temp_1 = + // paramBuffer". + // + // By scoping it so narrowly, we can detect most hazards that would be + // caused by modifications in the PrepareAndDispatch code. It just barely + // avoids having a hazard already. + if (('Name' in variable) && (variable.Name[0] == 'paramBuffer')) { + if (body.BlockId.Kind == 'Function' && body.BlockId.Variable.Name[0] == 'PrepareAndDispatch') + if (edge.Kind == 'Assign' && edge.Type.Kind == 'Pointer') + if (edge.Exp[0].Kind == 'Var' && edge.Exp[1].Kind == 'Var') + if (edge.Exp[1].Variable.Kind == 'Local' && edge.Exp[1].Variable.Name[0] == 'paramBuffer') + return true; + } + + // Functions which should not be treated as using variable. + if (edge.Kind == "Call") { + var callee = edge.Exp[0]; + if (callee.Kind == "Var") { + var name = callee.Variable.Name[0]; + if (/~DebugOnly/.test(name)) + return true; + if (/~ScopedThreadSafeStringInspector/.test(name)) + return true; + } + } + + return false; +} + +function ignoreEdgeAddressTaken(edge) +{ + // Functions which may take indirect pointers to unrooted GC things, + // but will copy them into rooted locations before calling anything + // that can GC. These parameters should usually be replaced with + // handles or mutable handles. + if (edge.Kind == "Call") { + var callee = edge.Exp[0]; + if (callee.Kind == "Var") { + var name = callee.Variable.Name[0]; + if (/js::Invoke\(/.test(name)) + return true; + } + } + + return false; +} + +// Ignore calls of these functions (so ignore any stack containing these) +var ignoreFunctions = { + "ptio.c:pt_MapError" : true, + "je_malloc_printf" : true, + "malloc_usable_size" : true, + "vprintf_stderr" : true, + "PR_ExplodeTime" : true, + "PR_ErrorInstallTable" : true, + "PR_SetThreadPrivate" : true, + "uint8 NS_IsMainThread()" : true, + + // Has an indirect call under it by the name "__f", which seemed too + // generic to ignore by itself. + "void* std::_Locale_impl::~_Locale_impl(int32)" : true, + + // Bug 1056410 - devirtualization prevents the standard nsISupports::Release heuristic from working + "uint32 nsXPConnect::Release()" : true, + "uint32 nsAtom::Release()" : true, + + // Allocation API + "malloc": true, + "calloc": true, + "realloc": true, + "free": true, + + // FIXME! + "NS_LogInit": true, + "NS_LogTerm": true, + "NS_LogAddRef": true, + "NS_LogRelease": true, + "NS_LogCtor": true, + "NS_LogDtor": true, + "NS_LogCOMPtrAddRef": true, + "NS_LogCOMPtrRelease": true, + + // FIXME! + "NS_DebugBreak": true, + + // Similar to heap snapshot mock classes, and GTests below. This posts a + // synchronous runnable when a GTest fails, and we are pretty sure that the + // particular runnable it posts can't even GC, but the analysis isn't + // currently smart enough to determine that. In either case, this is (a) + // only in GTests, and (b) only when the Gtest has already failed. We have + // static and dynamic checks for no GC in the non-test code, and in the test + // code we fall back to only the dynamic checks. + "void test::RingbufferDumper::OnTestPartResult(testing::TestPartResult*)" : true, + + "float64 JS_GetCurrentEmbedderTime()" : true, + + // This calls any JSObjectMovedOp for the tenured object via an indirect call. + "JSObject* js::TenuringTracer::moveToTenuredSlow(JSObject*)" : true, + + "void js::Nursery::freeMallocedBuffers()" : true, + + "void js::AutoEnterOOMUnsafeRegion::crash(uint64, int8*)" : true, + "void js::AutoEnterOOMUnsafeRegion::crash_impl(uint64, int8*)" : true, + + "void mozilla::dom::WorkerPrivate::AssertIsOnWorkerThread() const" : true, + + // It would be cool to somehow annotate that nsTHashtable<T> will use + // nsTHashtable<T>::s_MatchEntry for its matchEntry function pointer, but + // there is no mechanism for that. So we will just annotate a particularly + // troublesome logging-related usage. + "EntryType* nsTHashtable<EntryType>::PutEntry(nsTHashtable<EntryType>::KeyType, const fallible_t&) [with EntryType = nsBaseHashtableET<nsCharPtrHashKey, nsAutoPtr<mozilla::LogModule> >; nsTHashtable<EntryType>::KeyType = const char*; nsTHashtable<EntryType>::fallible_t = mozilla::fallible_t]" : true, + "EntryType* nsTHashtable<EntryType>::GetEntry(nsTHashtable<EntryType>::KeyType) const [with EntryType = nsBaseHashtableET<nsCharPtrHashKey, nsAutoPtr<mozilla::LogModule> >; nsTHashtable<EntryType>::KeyType = const char*]" : true, + "EntryType* nsTHashtable<EntryType>::PutEntry(nsTHashtable<EntryType>::KeyType) [with EntryType = nsBaseHashtableET<nsPtrHashKey<const mozilla::BlockingResourceBase>, nsAutoPtr<mozilla::DeadlockDetector<mozilla::BlockingResourceBase>::OrderingEntry> >; nsTHashtable<EntryType>::KeyType = const mozilla::BlockingResourceBase*]" : true, + "EntryType* nsTHashtable<EntryType>::GetEntry(nsTHashtable<EntryType>::KeyType) const [with EntryType = nsBaseHashtableET<nsPtrHashKey<const mozilla::BlockingResourceBase>, nsAutoPtr<mozilla::DeadlockDetector<mozilla::BlockingResourceBase>::OrderingEntry> >; nsTHashtable<EntryType>::KeyType = const mozilla::BlockingResourceBase*]" : true, + + // VTune internals that lazy-load a shared library and make IndirectCalls. + "iJIT_IsProfilingActive" : true, + "iJIT_NotifyEvent": true, + + // The big hammers. + "PR_GetCurrentThread" : true, + "calloc" : true, + + // This will happen early enough in initialization to not matter. + "_PR_UnixInit" : true, + + "uint8 nsContentUtils::IsExpandedPrincipal(nsIPrincipal*)" : true, + + "void mozilla::AutoProfilerLabel::~AutoProfilerLabel(int32)" : true, + + // Stores a function pointer in an AutoProfilerLabelData struct and calls it. + // And it's in mozglue, which doesn't have access to the attributes yet. + "void mozilla::ProfilerLabelEnd(std::tuple<void*, unsigned int>*)" : true, + + // This gets into PLDHashTable function pointer territory, and should get + // set up early enough to not do anything when it matters anyway. + "mozilla::LogModule* mozilla::LogModule::Get(int8*)": true, + + // This annotation is correct, but the reasoning is still being hashed out + // in bug 1582326 comment 8 and on. + "nsCycleCollector.cpp:nsISupports* CanonicalizeXPCOMParticipant(nsISupports*)": true, + + // PLDHashTable again + "void mozilla::DeadlockDetector<T>::Add(const T*) [with T = mozilla::BlockingResourceBase]": true, + + // OOM handling during logging + "void mozilla::detail::log_print(mozilla::LogModule*, int32, int8*)": true, + + // This would need to know that the nsCOMPtr refcount will not go to zero. + "uint8 XPCJSRuntime::DescribeCustomObjects(JSObject*, JSClass*, int8[72]*)[72]) const": true, + + // As the comment says "Refcount isn't zero, so Suspect won't delete anything." + "uint64 nsCycleCollectingAutoRefCnt::incr(void*, nsCycleCollectionParticipant*) [with void (* suspect)(void*, nsCycleCollectionParticipant*, nsCycleCollectingAutoRefCnt*, bool*) = NS_CycleCollectorSuspect3; uintptr_t = long unsigned int]": true, + + // Calls MergeSort + "uint8 v8::internal::RegExpDisjunction::SortConsecutiveAtoms(v8::internal::RegExpCompiler*)": true, + + // nsIEventTarget.IsOnCurrentThreadInfallible does not get resolved, and + // this is called on non-JS threads so cannot use AutoSuppressGCAnalysis. + "uint8 nsAutoOwningEventTarget::IsCurrentThread() const": true, + + // ~JSStreamConsumer calls 2 ~RefCnt/~nsCOMPtr destructors for its fields, + // but the body of the destructor is written so that all Releases + // are proxied, and the members will all be empty at destruction time. + "void mozilla::dom::JSStreamConsumer::~JSStreamConsumer() [[base_dtor]]": true, +}; + +function extraGCFunctions(readableNames) { + return ["ffi_call"].filter(f => f in readableNames); +} + +function isProtobuf(name) +{ + return name.match(/\bgoogle::protobuf\b/) || + name.match(/\bmozilla::devtools::protobuf\b/); +} + +function isHeapSnapshotMockClass(name) +{ + return name.match(/\bMockWriter\b/) || + name.match(/\bMockDeserializedNode\b/); +} + +function isGTest(name) +{ + return name.match(/\btesting::/); +} + +function isICU(name) +{ + return name.match(/\bicu_\d+::/) || + name.match(/u(prv_malloc|prv_realloc|prv_free|case_toFullLower)_\d+/) +} + +function ignoreGCFunction(mangled, readableNames) +{ + // Field calls will not be in readableNames + if (!(mangled in readableNames)) + return false; + + const fun = readableNames[mangled][0]; + + if (fun in ignoreFunctions) + return true; + + // The protobuf library, and [de]serialization code generated by the + // protobuf compiler, uses a _ton_ of function pointers but they are all + // internal. The same is true for ICU. Easiest to just ignore that mess + // here. + if (isProtobuf(fun) || isICU(fun)) + return true; + + // Ignore anything that goes through heap snapshot GTests or mocked classes + // used in heap snapshot GTests. GTest and GMock expose a lot of virtual + // methods and function pointers that could potentially GC after an + // assertion has already failed (depending on user-provided code), but don't + // exhibit that behavior currently. For non-test code, we have dynamic and + // static checks that ensure we don't GC. However, for test code we opt out + // of static checks here, because of the above stated GMock/GTest issues, + // and rely on only the dynamic checks provided by AutoAssertCannotGC. + if (isHeapSnapshotMockClass(fun) || isGTest(fun)) + return true; + + // Templatized function + if (fun.includes("void nsCOMPtr<T>::Assert_NoQueryNeeded()")) + return true; + + // Bug 1577915 - Sixgill is ignoring a template param that makes its CFG + // impossible. + if (fun.includes("UnwrapObjectInternal") && fun.includes("mayBeWrapper = false")) + return true; + + // These call through an 'op' function pointer. + if (fun.includes("js::WeakMap<Key, Value, HashPolicy>::getDelegate(")) + return true; + + // TODO: modify refillFreeList<NoGC> to not need data flow analysis to + // understand it cannot GC. As of gcc 6, the same problem occurs with + // tryNewTenuredThing, tryNewNurseryObject, and others. + if (/refillFreeList|tryNew/.test(fun) && /= js::NoGC/.test(fun)) + return true; + + return false; +} + +function stripUCSAndNamespace(name) +{ + name = name.replace(/(struct|class|union|const) /g, ""); + name = name.replace(/(js::ctypes::|js::|JS::|mozilla::dom::|mozilla::)/g, ""); + return name; +} + +function extraRootedGCThings() +{ + return [ 'JSAddonId' ]; +} + +function extraRootedPointers() +{ + return [ + ]; +} + +function isRootedGCPointerTypeName(name) +{ + name = stripUCSAndNamespace(name); + + if (name.startsWith('MaybeRooted<')) + return /\(js::AllowGC\)1u>::RootType/.test(name); + + return false; +} + +function isUnsafeStorage(typeName) +{ + typeName = stripUCSAndNamespace(typeName); + return typeName.startsWith('UniquePtr<'); +} + +// If edgeType is a constructor type, return whatever bits it implies for its +// scope (or zero if not matching). +function isLimitConstructor(typeInfo, edgeType, varName) +{ + // Check whether this could be a constructor + if (edgeType.Kind != 'Function') + return 0; + if (!('TypeFunctionCSU' in edgeType)) + return 0; + if (edgeType.Type.Kind != 'Void') + return 0; + + // Check whether the type is a known suppression type. + var type = edgeType.TypeFunctionCSU.Type.Name; + let attrs = 0; + if (type in typeInfo.GCSuppressors) + attrs = attrs | ATTR_GC_SUPPRESSED; + + // And now make sure this is the constructor, not some other method on a + // suppression type. varName[0] contains the qualified name. + var [ mangled, unmangled ] = splitFunction(varName[0]); + if (mangled.search(/C\d[EI]/) == -1) + return 0; // Mangled names of constructors have C<num>E or C<num>I + var m = unmangled.match(/([~\w]+)(?:<.*>)?\(/); + if (!m) + return 0; + var type_stem = type.replace(/\w+::/g, '').replace(/\<.*\>/g, ''); + if (m[1] != type_stem) + return 0; + + return attrs; +} + +// XPIDL-generated methods may invoke JS code, depending on the IDL +// attributes. This is not visible in the static callgraph since it +// goes through generated asm code. We can use the JS_HAZ_CAN_RUN_SCRIPT +// annotation to tell whether this is possible, which is set programmatically +// by the code generator when needed (bug 1347999): +// https://searchfox.org/mozilla-central/rev/81c52abeec336685330af5956c37b4bcf8926476/xpcom/idl-parser/xpidl/header.py#213-219 +// +// Note that WebIDL callbacks can also invoke JS code, but our code generator +// produces regular C++ code and so does not need any annotations. (There will +// be a call to JS::Call() or similar.) +function virtualCanRunJS(csu, field) +{ + const tags = typeInfo.OtherFieldTags; + const iface = tags[csu] + if (!iface) { + return false; + } + const virtual_method_tags = iface[field]; + return virtual_method_tags && virtual_method_tags.includes("Can run script"); +} + +function listNonGCPointers() { + return [ + // Safe only because jsids are currently only made from pinned strings. + 'NPIdentifier', + ]; +} + +function isJSNative(mangled) +{ + // _Z...E = function + // 9JSContext = JSContext* + // j = uint32 + // PN2JS5Value = JS::Value* + // P = pointer + // N2JS = JS:: + // 5Value = Value + return mangled.endsWith("P9JSContextjPN2JS5ValueE") && mangled.startsWith("_Z"); +} diff --git a/js/src/devtools/rootAnalysis/build.js b/js/src/devtools/rootAnalysis/build.js new file mode 100644 index 0000000000..78ef04fea1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build.js @@ -0,0 +1,15 @@ +#!/bin/sh +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + + +set -e + +cd $SOURCE +./mach configure +./mach build export +./mach build -X nsprpub mfbt memory memory/mozalloc modules/zlib mozglue js/src xpcom/glue js/xpconnect/loader js/xpconnect/wrappers js/xpconnect/src +status=$? +echo "[[[[ build.js complete, exit code $status ]]]]" +exit $status diff --git a/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest b/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest new file mode 100644 index 0000000000..1ecb5d0665 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest @@ -0,0 +1,10 @@ +[ +{ +"hg_id" : "ec7b7d2442e8", +"algorithm" : "sha512", +"digest" : "49627d734df52cb9e7319733da5a6be1812b9373355dc300ee5600b431122570e00d380d50c7c5b5003c462c2c2cb022494b42c4ad00f8eba01c2259cbe6e502", +"filename" : "sixgill.tar.xz", +"size" : 2628868, +"unpack" : true +} +] diff --git a/js/src/devtools/rootAnalysis/build/sixgill.manifest b/js/src/devtools/rootAnalysis/build/sixgill.manifest new file mode 100644 index 0000000000..49ccdcbd3f --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/sixgill.manifest @@ -0,0 +1,10 @@ +[ +{ +"digest" : "2e56a3cf84764b8e63720e5f961cff7ba8ba5cf2f353dac55c69486489bcd89f53a757e09469a07700b80cd09f09666c2db4ce375b67060ac3be967714597231", +"size" : 2629600, +"hg_id" : "221d0d2eead9", +"unpack" : true, +"filename" : "sixgill.tar.xz", +"algorithm" : "sha512" +} +] diff --git a/js/src/devtools/rootAnalysis/callgraph.js b/js/src/devtools/rootAnalysis/callgraph.js new file mode 100644 index 0000000000..750324f0ed --- /dev/null +++ b/js/src/devtools/rootAnalysis/callgraph.js @@ -0,0 +1,233 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('CFG.js'); + +// Map from csu => set of immediate subclasses +var subclasses = new Map(); + +// Map from csu => set of immediate superclasses +var superclasses = new Map(); + +// Map from "csu.name:nargs" => set of full method name +var virtualDefinitions = new Map(); + +// Every virtual method declaration, anywhere. +// +// Map from csu => Set of function-info. +// function-info: { +// name : simple string +// typedfield : "name:nargs" ("mangled" field name) +// field: full Field datastructure +// annotations : Set of [annotation-name, annotation-value] 2-element arrays +// inherited : whether the method is inherited from a base class +// pureVirtual : whether the method is pure virtual on this CSU +// dtor : if this is a virtual destructor with a definition in this class or +// a superclass, then the full name of the definition as if it were defined +// in this class. This is weird, but it's how gcc emits it. We will add a +// synthetic call from this function to its immediate base classes' dtors, +// so even if the function does not actually exist and is inherited from a +// base class, we will get a path to the inherited function. (Regular +// virtual methods are *not* claimed to exist when they don't.) +// } +var virtualDeclarations = new Map(); + +var virtualResolutionsSeen = new Set(); + +var ID = { + jscode: 1, + anyfunc: 2, + nogcfunc: 3, + gc: 4, +}; + +// map is a map from names to sets of entries. +function addToNamedSet(map, name, entry) +{ + if (!map.has(name)) + map.set(name, new Set()); + const s = map.get(name); + s.add(entry); + return s; +} + +// CSU is "Class/Struct/Union" +function processCSU(csuName, csu) +{ + if (!("FunctionField" in csu)) + return; + + for (const {Base} of (csu.CSUBaseClass || [])) { + addToNamedSet(subclasses, Base, csuName); + addToNamedSet(superclasses, csuName, Base); + } + + for (const {Field, Variable} of csu.FunctionField) { + // Virtual method + const info = Field[0]; + const name = info.Name[0]; + const annotations = new Set(); + const funcInfo = { + name, + typedfield: typedField(info), + field: info, + annotations, + inherited: (info.FieldCSU.Type.Name != csuName), // Always false for virtual dtors + pureVirtual: Boolean(Variable), + dtor: false, + }; + + if (Variable && isSyntheticVirtualDestructor(name)) { + // This is one of gcc's artificial dtors. + funcInfo.dtor = Variable.Name[0]; + funcInfo.pureVirtual = false; + } + + addToNamedSet(virtualDeclarations, csuName, funcInfo); + if ('Annotation' in info) { + for (const {Name: [annType, annValue]} of info.Annotation) { + annotations.add([annType, annValue]); + } + } + + if (Variable) { + // Note: not dealing with overloading correctly. + const name = Variable.Name[0]; + addToNamedSet(virtualDefinitions, fieldKey(csuName, Field[0]), name); + } + } +} + +// Return a list of all callees that the given edge might be a call to. Each +// one is represented by an object with a 'kind' field that is one of +// ('direct', 'field', 'resolved-field', 'indirect', 'unknown'), though note +// that 'resolved-field' is really a global record of virtual method +// resolutions, indepedent of this particular edge. +function translateCallees(edge) +{ + if (edge.Kind != "Call") + return []; + + const callee = edge.Exp[0]; + if (callee.Kind == "Var") { + assert(callee.Variable.Kind == "Func"); + return [{'kind': 'direct', 'name': callee.Variable.Name[0]}]; + } + + // At some point, we were intentionally invoking invalid function pointers + // (as in, a small integer cast to a function pointer type) to convey a + // small amount of information in the crash address. + if (callee.Kind == "Int") + return []; // Intentional crash + + assert(callee.Kind == "Drf"); + let called = callee.Exp[0]; + let indirection = 1; + if (called.Kind == "Drf") { + // This is probably a reference to a function pointer (`func*&`). It + // would be possible to determine that for certain by looking up the + // variable's type, which is doable but unnecessary. Indirect calls + // are assumed to call anything (any function in the codebase) unless they + // are annotated otherwise, and the `funkyName` annotation applies to + // `(**funkyName)(args)` as well as `(*funkyName)(args)`, it's ok. + called = called.Exp[0]; + indirection += 1; + } + + if (called.Kind == "Var") { + // indirect call through a variable. Note that the `indirection` field is + // currently unused by the later analysis. It is the number of dereferences + // applied to the variable before invoking the resulting function. + // + // The variable name passed through is the simplified one, since that is + // what annotations.js uses and we don't want the annotation to be missed + // if eg there is another variable of the same name in a sibling scope such + // that the fully decorated name no longer matches. + const [decorated, bare] = called.Variable.Name; + return [{'kind': "indirect", 'variable': bare, indirection}]; + } + + if (called.Kind != "Fld") { + // unknown call target. + return [{'kind': "unknown"}]; + } + + // Return one 'field' callee record giving the full description of what's + // happening here (which is either a virtual method call, or a call through + // a function pointer stored in a field), and then boil the call down to a + // synthetic function that incorporates both the name of the field and the + // static type of whatever you're calling the method on. Both refer to the + // same call; they're just different ways of describing it. + const callees = []; + const field = called.Field; + const staticCSU = getFieldCallInstanceCSU(edge, field); + callees.push({'kind': "field", 'csu': field.FieldCSU.Type.Name, staticCSU, + 'field': field.Name[0], 'fieldKey': fieldKey(staticCSU, field), + 'isVirtual': ("FieldInstanceFunction" in field)}); + callees.push({'kind': "direct", 'name': fieldKey(staticCSU, field)}); + + return callees; +} + +function getCallees(body, edge, scopeAttrs, functionBodies) { + const calls = []; + + // getCallEdgeProperties can set the ATTR_REPLACED attribute, which + // means that the call in the edge has been replaced by zero or + // more edges to other functions. This is used when the original + // edge will end up calling through a function pointer or something + // (eg ~shared_ptr<T> calls a function pointer that can only be + // T::~T()). The original call edges are left in the graph in case + // they are useful for other purposes. + for (const callee of translateCallees(edge)) { + if (callee.kind != "direct") { + calls.push({ callee, attrs: scopeAttrs }); + } else { + const edgeInfo = getCallEdgeProperties(body, edge, callee.name, functionBodies); + for (const extra of (edgeInfo.extraCalls || [])) { + calls.push({ attrs: scopeAttrs | extra.attrs, callee: { name: extra.name, 'kind': "direct", } }); + } + calls.push({ callee, attrs: scopeAttrs | edgeInfo.attrs}); + } + } + + return calls; +} + +function loadTypes(type_xdb_filename) { + const xdb = xdbLibrary(); + xdb.open(type_xdb_filename); + + const minStream = xdb.min_data_stream(); + const maxStream = xdb.max_data_stream(); + + for (var csuIndex = minStream; csuIndex <= maxStream; csuIndex++) { + const csu = xdb.read_key(csuIndex); + const data = xdb.read_entry(csu); + const json = JSON.parse(data.readString()); + processCSU(csu.readString(), json[0]); + + xdb.free_string(csu); + xdb.free_string(data); + } +} + +function loadTypesWithCache(type_xdb_filename, cache_filename) { + try { + const cacheAB = os.file.readFile(cache_filename, "binary"); + const cb = serialize(); + cb.clonebuffer = cacheAB.buffer; + const cacheData = deserialize(cb); + subclasses = cacheData.subclasses; + superclasses = cacheData.superclasses; + virtualDefinitions = cacheData.virtualDefinitions; + } catch (e) { + loadTypes(type_xdb_filename); + const cb = serialize({subclasses, superclasses, virtualDefinitions}); + os.file.writeTypedArrayToFile(cache_filename, + new Uint8Array(cb.arraybuffer)); + } +} diff --git a/js/src/devtools/rootAnalysis/computeCallgraph.js b/js/src/devtools/rootAnalysis/computeCallgraph.js new file mode 100644 index 0000000000..d847465678 --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeCallgraph.js @@ -0,0 +1,434 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('callgraph.js'); + +var options = parse_options([ + { + name: '--verbose', + type: 'bool' + }, + { + name: '--function', + type: 'string' + }, + { + name: 'typeInfo_filename', + type: 'string', + default: "typeInfo.txt" + }, + { + name: 'callgraphOut_filename', + type: 'string', + default: "rawcalls.txt" + }, + { + name: 'batch', + default: 1, + type: 'number' + }, + { + name: 'numBatches', + default: 1, + type: 'number' + }, +]); + +var origOut = os.file.redirect(options.callgraphOut_filename); + +var memoized = new Map(); + +var unmangled2id = new Set(); + +// Insert a string into the name table and return the ID. Do not use for +// functions, which must be handled specially. +function getId(name) +{ + let id = memoized.get(name); + if (id !== undefined) + return id; + + id = memoized.size + 1; + memoized.set(name, id); + print(`#${id} ${name}`); + + return id; +} + +// Split a function into mangled and unmangled parts and return the ID for the +// function. +function functionId(name) +{ + const [mangled, unmangled] = splitFunction(name); + const id = getId(mangled); + + // Only produce a mangled -> unmangled mapping once, unless there are + // multiple unmangled names for the same mangled name. + if (unmangled2id.has(unmangled)) + return id; + + print(`= ${id} ${unmangled}`); + unmangled2id.add(unmangled); + return id; +} + +var lastline; +function printOnce(line) +{ + if (line != lastline) { + print(line); + lastline = line; + } +} + +// Returns a table mapping function name to lists of +// [annotation-name, annotation-value] pairs: +// { function-name => [ [annotation-name, annotation-value] ] } +// +// Note that sixgill will only store certain attributes (annotation-names), so +// this won't be *all* the attributes in the source, just the ones that sixgill +// watches for. +function getAllAttributes(body) +{ + var all_annotations = {}; + for (var v of (body.DefineVariable || [])) { + if (v.Variable.Kind != 'Func') + continue; + var name = v.Variable.Name[0]; + var annotations = all_annotations[name] = []; + + for (var ann of (v.Type.Annotation || [])) { + annotations.push(ann.Name); + } + } + + return all_annotations; +} + +// Get just the annotations understood by the hazard analysis. +function getAnnotations(functionName, body) { + var tags = new Set(); + var attributes = getAllAttributes(body); + if (functionName in attributes) { + for (var [ annName, annValue ] of attributes[functionName]) { + if (annName == 'annotate') + tags.add(annValue); + } + } + return tags; +} + +// Scan through a function body, pulling out all annotations and calls and +// recording them in callgraph.txt. +function processBody(functionName, body, functionBodies) +{ + if (!('PEdge' in body)) + return; + + for (var tag of getAnnotations(functionName, body).values()) { + const id = functionId(functionName); + print(`T ${id} ${tag}`); + if (tag == "Calls JSNatives") + printOnce(`D ${id} ${functionId("(js-code)")}`); + } + + // Set of all callees that have been output so far, in order to suppress + // repeated callgraph edges from being recorded. This uses a Map from + // callees to limit sets, because we don't want a limited edge to prevent + // an unlimited edge from being recorded later. (So an edge will be skipped + // if it exists and is at least as limited as the previously seen edge.) + // + // Limit sets are implemented as integers interpreted as bitfields. + // + var seen = new Map(); + + lastline = null; + for (var edge of body.PEdge) { + if (edge.Kind != "Call") + continue; + + // The attrs (eg ATTR_GC_SUPPRESSED) are determined by whatever RAII + // scopes might be active, which have been computed previously for all + // points in the body. + const scopeAttrs = body.attrs[edge.Index[0]] | 0; + + for (const { callee, attrs } of getCallees(body, edge, scopeAttrs, functionBodies)) { + // Some function names will be synthesized by manually constructing + // their names. Verify that we managed to synthesize an existing function. + // This cannot be done later with either the callees or callers tables, + // because the function may be an otherwise uncalled leaf. + if (attrs & ATTR_SYNTHETIC) { + assertFunctionExists(callee.name); + } + + // Individual callees may have additional attrs. The only such + // bit currently is that nsISupports.{AddRef,Release} are assumed + // to never GC. + let prologue = attrs ? `/${attrs} ` : ""; + prologue += functionId(functionName) + " "; + if (callee.kind == 'direct') { + const prev_attrs = seen.has(callee.name) ? seen.get(callee.name) : ATTRS_UNVISITED; + if (prev_attrs & ~attrs) { + // Only output an edge if it loosens a limit. + seen.set(callee.name, prev_attrs & attrs); + printOnce("D " + prologue + functionId(callee.name)); + } + } else if (callee.kind == 'field') { + var { csu, field, isVirtual } = callee; + const tag = isVirtual ? 'V' : 'F'; + const fullfield = `${csu}.${field}`; + printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`); + } else if (callee.kind == 'resolved-field') { + // Fully-resolved field (virtual method) call. Record the + // callgraph edges. Do not consider attrs, since they are local + // to this callsite and we are writing out a global record + // here. + // + // Any field call that does *not* have an R entry must be + // assumed to call anything. + var { csu, field, callees } = callee; + var fullFieldName = csu + "." + field; + if (!virtualResolutionsSeen.has(fullFieldName)) { + virtualResolutionsSeen.add(fullFieldName); + for (var target of callees) + printOnce("R " + getId(fullFieldName) + " " + functionId(target.name)); + } + } else if (callee.kind == 'indirect') { + printOnce("I " + prologue + "VARIABLE " + callee.variable); + } else if (callee.kind == 'unknown') { + printOnce("I " + prologue + "VARIABLE UNKNOWN"); + } else { + printErr("invalid " + callee.kind + " callee"); + debugger; + } + } + } +} + +// Reserve IDs for special function names. + +// represents anything that can run JS +assert(ID.jscode == functionId("(js-code)")); + +// function pointers will get an edge to this in loadCallgraph.js; only the ID +// reservation is present in callgraph.txt +assert(ID.anyfunc == functionId("(any-function)")); + +// same as above, but for fields annotated to never GC +assert(ID.nogcfunc == functionId("(nogc-function)")); + +// garbage collection +assert(ID.gc == functionId("(GC)")); + +var typeInfo = loadTypeInfo(options.typeInfo_filename); + +loadTypes("src_comp.xdb"); + +// Arbitrary JS code must always be assumed to GC. In real code, there would +// always be a path anyway through some arbitrary JSNative, but this route will be shorter. +print(`D ${ID.jscode} ${ID.gc}`); + +// An unknown function is assumed to GC. +print(`D ${ID.anyfunc} ${ID.gc}`); + +// Output call edges for all virtual methods defined anywhere, from +// Class.methodname to what a (dynamic) instance of Class would run when +// methodname was called (either Class::methodname() if defined, or some +// Base::methodname() for inherited method definitions). +for (const [fieldkey, methods] of virtualDefinitions) { + const caller = getId(fieldkey); + for (const name of methods) { + const callee = functionId(name); + printOnce(`D ${caller} ${callee}`); + } +} + +// Output call edges from C.methodname -> S.methodname for all subclasses S of +// class C. This is for when you are calling methodname on a pointer/ref of +// dynamic type C, so that the callgraph contains calls to all descendant +// subclasses' implementations. +for (const [csu, methods] of virtualDeclarations) { + for (const {field, dtor} of methods) { + const caller = getId(fieldKey(csu, field)); + if (virtualCanRunJS(csu, field.Name[0])) + printOnce(`D ${caller} ${functionId("(js-code)")}`); + if (dtor) + printOnce(`D ${caller} ${functionId(dtor)}`); + if (!subclasses.has(csu)) + continue; + for (const sub of subclasses.get(csu)) { + printOnce(`D ${caller} ${getId(fieldKey(sub, field))}`); + } + } +} + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +if (options.verbose) { + printErr("Finished loading data structures"); +} + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +if (options.function) { + var index = xdb.lookup_key(options.function); + if (!index) { + printErr("Function not found"); + quit(1); + } + minStream = maxStream = index; +} + +function assertFunctionExists(name) { + var data = xdb.read_entry(name); + assert(data.contents != 0, `synthetic function '${name}' not found!`); +} + +function process(functionName, functionBodies) +{ + for (var body of functionBodies) + body.attrs = []; + + for (var body of functionBodies) { + for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) { + pbody.attrs[id] = attrs; + } + } + + if (options.function) { + debugger; + } + for (var body of functionBodies) { + processBody(functionName, body, functionBodies); + } + + // Not strictly necessary, but add an edge from the synthetic "(js-code)" + // to RunScript to allow better stacks than just randomly selecting a + // JSNative to blame things on. + if (functionName.includes("js::RunScript")) + print(`D ${functionId("(js-code)")} ${functionId(functionName)}`); + + // GCC generates multiple constructors and destructors ("in-charge" and + // "not-in-charge") to handle virtual base classes. They are normally + // identical, and it appears that GCC does some magic to alias them to the + // same thing. But this aliasing is not visible to the analysis. So we'll + // add a dummy call edge from "foo" -> "foo *INTERNAL* ", since only "foo" + // will show up as called but only "foo *INTERNAL* " will be emitted in the + // case where the constructors are identical. + // + // This is slightly conservative in the case where they are *not* + // identical, but that should be rare enough that we don't care. + var markerPos = functionName.indexOf(internalMarker); + if (markerPos > 0) { + var inChargeXTor = functionName.replace(internalMarker, ""); + printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName)); + } + + const [ mangled, unmangled ] = splitFunction(functionName); + + // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the + // different kinds of constructors/destructors are: + // C1 # complete object constructor + // C2 # base object constructor + // C3 # complete object allocating constructor + // D0 # deleting destructor + // D1 # complete object destructor + // D2 # base object destructor + // + // In actual practice, I have observed C4 and D4 xtors generated by gcc + // 4.9.3 (but not 4.7.3). The gcc source code says: + // + // /* This is the old-style "[unified]" constructor. + // In some cases, we may emit this function and call + // it from the clones in order to share code and save space. */ + // + // Unfortunately, that "call... from the clones" does not seem to appear in + // the CFG we get from GCC. So if we see a C4 constructor or D4 destructor, + // inject an edge to it from C1, C2, and C3 (or D1, D2, and D3). (Note that + // C3 isn't even used in current GCC, but add the edge anyway just in + // case.) + // + // from gcc/cp/mangle.c: + // + // <special-name> ::= D0 # deleting (in-charge) destructor + // ::= D1 # complete object (in-charge) destructor + // ::= D2 # base object (not-in-charge) destructor + // <special-name> ::= C1 # complete object constructor + // ::= C2 # base object constructor + // ::= C3 # complete object allocating constructor + // + // Currently, allocating constructors are never used. + // + if (functionName.indexOf("C4") != -1) { + // E terminates the method name (and precedes the method parameters). + // If eg "C4E" shows up in the mangled name for another reason, this + // will create bogus edges in the callgraph. But it will affect little + // and is somewhat difficult to avoid, so we will live with it. + // + // Another possibility! A templatized constructor will contain C4I...E + // for template arguments. + // + for (let [synthetic, variant, desc] of [ + ['C4E', 'C1E', 'complete_ctor'], + ['C4E', 'C2E', 'base_ctor'], + ['C4E', 'C3E', 'complete_alloc_ctor'], + ['C4I', 'C1I', 'complete_ctor'], + ['C4I', 'C2I', 'base_ctor'], + ['C4I', 'C3I', 'complete_alloc_ctor']]) + { + if (mangled.indexOf(synthetic) == -1) + continue; + + let variant_mangled = mangled.replace(synthetic, variant); + let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`; + printOnce("D " + functionId(variant_full) + " " + functionId(functionName)); + } + } + + // For destructors: + // + // I've never seen D4Ev() + D4Ev(int32), only one or the other. So + // for a D4Ev of any sort, create: + // + // D0() -> D1() # deleting destructor calls complete destructor, then deletes + // D1() -> D2() # complete destructor calls base destructor, then destroys virtual bases + // D2() -> D4(?) # base destructor might be aliased to unified destructor + // # use whichever one is defined, in-charge or not. + // # ('?') means either () or (int32). + // + // Note that this doesn't actually make sense -- D0 and D1 should be + // in-charge, but gcc doesn't seem to give them the in-charge parameter?! + // + if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) { + const not_in_charge_dtor = functionName.replace("(int32)", "()"); + const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]"; + const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]"; + const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]"; + printOnce("D " + functionId(D0) + " " + functionId(D1)); + printOnce("D " + functionId(D1) + " " + functionId(D2)); + printOnce("D " + functionId(D2) + " " + functionId(functionName)); + } + + if (isJSNative(mangled)) + printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`); +} + +var start = batchStart(options.batch, options.numBatches, minStream, maxStream); +var end = batchLast(options.batch, options.numBatches, minStream, maxStream); + +for (var nameIndex = start; nameIndex <= end; nameIndex++) { + var name = xdb.read_key(nameIndex); + var data = xdb.read_entry(name); + process(name.readString(), JSON.parse(data.readString())); + xdb.free_string(name); + xdb.free_string(data); +} + +os.file.close(os.file.redirect(origOut)); diff --git a/js/src/devtools/rootAnalysis/computeGCFunctions.js b/js/src/devtools/rootAnalysis/computeGCFunctions.js new file mode 100644 index 0000000000..99410efdf8 --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeGCFunctions.js @@ -0,0 +1,113 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('loadCallgraph.js'); + +function usage() { + throw "Usage: computeGCFunctions.js <rawcalls1.txt> <rawcalls2.txt>... --outputs <out:callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:limitedFunctions.lst>"; +} + +if (typeof scriptArgs[0] != 'string') + usage(); + +var start = "Time: " + new Date; + +try { + var options = parse_options([ + { + name: '--verbose', + type: 'bool' + }, + { + name: 'inputs', + dest: 'rawcalls_filenames', + nargs: '+' + }, + { + name: '--outputs', + type: 'bool' + }, + { + name: 'callgraph', + type: 'string', + default: 'callgraph.txt' + }, + { + name: 'gcFunctions', + type: 'string', + default: 'gcFunctions.txt' + }, + { + name: 'gcFunctionsList', + type: 'string', + default: 'gcFunctions.lst' + }, + { + name: 'limitedFunctions', + type: 'string', + default: 'limitedFunctions.lst' + }, + ]); +} catch { + printErr("Usage: computeGCFunctions.js [--verbose] <rawcalls1.txt> <rawcalls2.txt>... --outputs <out:callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:limitedFunctions.lst>"); + quit(1); +}; + +function info(message) { + if (options.verbose) { + printErr(message); + } +} + +var { + gcFunctions, + functions, + calleesOf, + limitedFunctions +} = loadCallgraph(options.rawcalls_filenames, options.verbose); + +info("Writing " + options.gcFunctions); +redirect(options.gcFunctions); + +for (var name in gcFunctions) { + for (let readable of (functions.readableName[name] || [name])) { + print(""); + const fullname = (name == readable) ? name : name + "$" + readable; + print("GC Function: " + fullname); + let current = name; + do { + current = gcFunctions[current]; + if (current === 'internal') + ; // Hit the end + else if (current in functions.readableName) + print(" " + functions.readableName[current][0]); + else + print(" " + current); + } while (current in gcFunctions); + } +} + +info("Writing " + options.gcFunctionsList); +redirect(options.gcFunctionsList); +for (var name in gcFunctions) { + if (name in functions.readableName) { + for (var readable of functions.readableName[name]) + print(name + "$" + readable); + } else { + print(name); + } +} + +info("Writing " + options.limitedFunctions); +redirect(options.limitedFunctions); +print(JSON.stringify(limitedFunctions, null, 4)); + +info("Writing " + options.callgraph); +redirect(options.callgraph); +saveCallgraph(functions, calleesOf); diff --git a/js/src/devtools/rootAnalysis/computeGCTypes.js b/js/src/devtools/rootAnalysis/computeGCTypes.js new file mode 100644 index 0000000000..c38a13dabb --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeGCTypes.js @@ -0,0 +1,550 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); + +var options = parse_options([ + { name: '--verbose', type: 'bool' }, + { name: "gcTypes", default: "gcTypes.txt" }, + { name: "typeInfo", default: "typeInfo.txt" } +]); + +var typeInfo = { + 'GCPointers': [], + 'GCThings': [], + 'GCInvalidated': [], + 'GCRefs': [], + 'NonGCTypes': {}, // unused + 'NonGCPointers': {}, + 'RootedGCThings': {}, + 'RootedPointers': {}, + 'RootedBases': {'JS::AutoGCRooter': true}, + 'InheritFromTemplateArgs': {}, + 'OtherCSUTags': {}, + 'OtherFieldTags': {}, + + // RAII types within which we should assume GC is suppressed, eg + // AutoSuppressGC. + 'GCSuppressors': {}, +}; + +var gDescriptors = new Map; // Map from descriptor string => Set of typeName + +var structureParents = {}; // Map from field => list of <parent, fieldName> +var pointerParents = {}; // Map from field => list of <parent, fieldName> +var baseClasses = {}; // Map from struct name => list of base class name strings +var subClasses = {}; // Map from struct name => list of subclass name strings + +var gcTypes = {}; // map from parent struct => Set of GC typed children +var gcPointers = {}; // map from parent struct => Set of GC typed children +var gcFields = new Map; + +var rootedPointers = {}; + +// Accumulate the base GC types before propagating info through the type graph, +// so that we can include edges from types processed later +// (eg MOZ_INHERIT_TYPE_ANNOTATIONS_FROM_TEMPLATE_ARGS). +var pendingGCTypes = []; // array of [name, reason, ptrdness] + +function processCSU(csu, body) +{ + for (let { 'Name': [ annType, tag ] } of (body.Annotation || [])) { + if (annType != 'annotate') + continue; + + if (tag == 'GC Pointer') + typeInfo.GCPointers.push(csu); + else if (tag == 'Invalidated by GC') + typeInfo.GCInvalidated.push(csu); + else if (tag == 'GC Pointer or Reference') + typeInfo.GCRefs.push(csu); + else if (tag == 'GC Thing') + typeInfo.GCThings.push(csu); + else if (tag == 'Suppressed GC Pointer') + typeInfo.NonGCPointers[csu] = true; + else if (tag == 'Rooted Pointer') + typeInfo.RootedPointers[csu] = true; + else if (tag == 'Rooted Base') + typeInfo.RootedBases[csu] = true; + else if (tag == 'Suppress GC') + typeInfo.GCSuppressors[csu] = true; + else if (tag == 'moz_inherit_type_annotations_from_template_args') + typeInfo.InheritFromTemplateArgs[csu] = true; + else + addToKeyedList(typeInfo.OtherCSUTags, csu, tag); + } + + for (let { 'Base': base } of (body.CSUBaseClass || [])) + addBaseClass(csu, base); + + for (const field of (body.DataField || [])) { + var type = field.Field.Type; + var fieldName = field.Field.Name[0]; + if (type.Kind == "Pointer") { + var target = type.Type; + if (target.Kind == "CSU") + addNestedPointer(csu, target.Name, fieldName); + } + if (type.Kind == "Array") { + var target = type.Type; + if (target.Kind == "CSU") + addNestedStructure(csu, target.Name, fieldName); + } + if (type.Kind == "CSU") + addNestedStructure(csu, type.Name, fieldName); + + for (const { 'Name': [ annType, tag ] } of (field.Annotation || [])) { + if (!(csu in typeInfo.OtherFieldTags)) + typeInfo.OtherFieldTags[csu] = []; + addToKeyedList(typeInfo.OtherFieldTags[csu], fieldName, tag); + } + } + + for (const funcfield of (body.FunctionField || [])) { + const fields = funcfield.Field; + // Pure virtual functions will not have field.Variable; others will. + for (const field of funcfield.Field) { + for (const {'Name': [annType, tag]} of (field.Annotation || [])) { + if (!(csu in typeInfo.OtherFieldTags)) + typeInfo.OtherFieldTags[csu] = {}; + addToKeyedList(typeInfo.OtherFieldTags[csu], field.Name[0], tag); + } + } + } +} + +// csu.field is of type inner +function addNestedStructure(csu, inner, field) +{ + if (!(inner in structureParents)) + structureParents[inner] = []; + + // Skip fields that are really base classes, to avoid duplicating the base + // fields; addBaseClass already added a "base-N" name. + if (field.match(/^field:\d+$/) && (csu in baseClasses) && (baseClasses[csu].indexOf(inner) != -1)) + return; + + structureParents[inner].push([ csu, field ]); +} + +function addBaseClass(csu, base) { + if (!(csu in baseClasses)) + baseClasses[csu] = []; + baseClasses[csu].push(base); + if (!(base in subClasses)) + subClasses[base] = []; + subClasses[base].push(csu); + var k = baseClasses[csu].length; + addNestedStructure(csu, base, `<base-${k}>`); +} + +function addNestedPointer(csu, inner, field) +{ + if (!(inner in pointerParents)) + pointerParents[inner] = []; + pointerParents[inner].push([ csu, field ]); +} + +var xdb = xdbLibrary(); +xdb.open("src_comp.xdb"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +for (var csuIndex = minStream; csuIndex <= maxStream; csuIndex++) { + var csu = xdb.read_key(csuIndex); + var data = xdb.read_entry(csu); + var json = JSON.parse(data.readString()); + assert(json.length == 1); + processCSU(csu.readString(), json[0]); + + xdb.free_string(csu); + xdb.free_string(data); +} + +for (const typename of extraRootedGCThings()) + typeInfo.RootedGCThings[typename] = true; + +for (const typename of extraRootedPointers()) + typeInfo.RootedPointers[typename] = true; + +// Everything that inherits from a "Rooted Base" is considered to be rooted. +// This is for things like CustomAutoRooter and its subclasses. +var basework = Object.keys(typeInfo.RootedBases); +while (basework.length) { + const base = basework.pop(); + typeInfo.RootedPointers[base] = true; + if (base in subClasses) + basework.push(...subClasses[base]); +} + +// Now that we have the whole hierarchy set up, add all the types and propagate +// info. +for (const csu of typeInfo.GCThings) + addGCType(csu); +for (const csu of typeInfo.GCPointers) + addGCPointer(csu); +for (const csu of typeInfo.GCInvalidated) + addGCPointer(csu); + +function parseTemplateType(typeName, validate=false) { + // We only want templatized types. `Foo<U, T>::Member` doesn't count. + // Foo<U, T>::Bar<X, Y> does count. Which turns out to be a simple rule: + // check whether the type ends in '>'. + if (!typeName.endsWith(">")) { + return [typeName, undefined]; + } + + // "Tokenize" into angle brackets, commas, and everything else. We store + // match objects as tokens because we'll need the string offset after we + // finish grabbing the template parameters. + const tokens = []; + const tokenizer = /[<>,]|[^<>,]+/g; + let match; + while ((match = tokenizer.exec(typeName)) !== null) { + tokens.push(match); + } + + // Walk backwards through the tokens, stopping when we find the matching + // open bracket. + const args = []; + let depth = 0; + let arg; + let first_result; + for (const match of tokens.reverse()) { + const token = match[0]; + if (depth == 1 && (token == ',' || token == '<')) { + // We've walked back to the beginning of a template parameter, + // where we will see either a comma or open bracket. + args.unshift(arg); + arg = ''; + } else if (depth == 0 && token == '>') { + arg = ''; // We just started. + } else { + arg = token + arg; + } + + // Maintain the depth. + if (token == '<') { + // This could be bug 1728151. + assert(depth > 0, `Invalid type: too many '<' signs in '${typeName}'`); + depth--; + } else if (token == '>') { + depth++; + } + + if (depth == 0) { + // We've walked out of the template parameter list. + // Record the results. + assert(args.length > 0); + const templateName = typeName.substr(0, match.index); + const result = [templateName, args.map(arg => arg.trim())]; + if (!validate) { + // Normal processing is to return the result the first time we + // get to the '<' that matches the terminal '>', without validating + // that the rest of the type name is balanced. + return result; + } else if (!first_result) { + // If we are validating, remember the result when we hit the + // first matching '<', but then keep processing the rest of + // the input string to count brackets. + first_result = result; + } + } + } + + // This could be bug 1728151. + assert(depth == 0, `Invalid type: too many '>' signs in '${typeName}'`); + return first_result; +} + +if (os.getenv("HAZARD_RUN_INTERNAL_TESTS")) { + function check_parse(typeName, result) { + assertEq(JSON.stringify(parseTemplateType(typeName)), JSON.stringify(result)); + } + + check_parse("int", ["int", undefined]); + check_parse("Type<int>", ["Type", ["int"]]); + check_parse("Container<int, double>", ["Container", ["int", "double"]]); + check_parse("Container<Container<void, void>, double>", ["Container", ["Container<void, void>", "double"]]); + check_parse("Foo<Bar<a,b>,Bar<a,b>>::Container<Container<void, void>, double>", ["Foo<Bar<a,b>,Bar<a,b>>::Container", ["Container<void, void>", "double"]]); + check_parse("AlignedStorage2<TypedArray<foo>>", ["AlignedStorage2", ["TypedArray<foo>"]]); + check_parse("mozilla::AlignedStorage2<mozilla::dom::TypedArray<unsigned char, JS::UnwrapArrayBufferMaybeShared, JS::GetArrayBufferMaybeSharedData, JS::GetArrayBufferMaybeSharedLengthAndData, JS::NewArrayBuffer> >", + [ + "mozilla::AlignedStorage2", + [ + "mozilla::dom::TypedArray<unsigned char, JS::UnwrapArrayBufferMaybeShared, JS::GetArrayBufferMaybeSharedData, JS::GetArrayBufferMaybeSharedLengthAndData, JS::NewArrayBuffer>" + ] + ] + ); + check_parse( + "mozilla::ArrayIterator<const mozilla::dom::binding_detail::RecordEntry<nsTString<char16_t>, mozilla::dom::Nullable<mozilla::dom::TypedArray<unsigned char, JS::UnwrapArrayBufferMaybeShared, JS::GetArrayBufferMaybeSharedData, JS::GetArrayBufferMaybeSharedLengthAndData, JS::NewArrayBuffer> > >&, nsTArray_Impl<mozilla::dom::binding_detail::RecordEntry<nsTString<char16_t>, mozilla::dom::Nullable<mozilla::dom::TypedArray<unsigned char, JS::UnwrapArrayBufferMaybeShared, JS::GetArrayBufferMaybeSharedData, JS::GetArrayBufferMaybeSharedLengthAndData, JS::NewArrayBuffer> > >, nsTArrayInfallibleAllocator> >", + [ + "mozilla::ArrayIterator", + [ + "const mozilla::dom::binding_detail::RecordEntry<nsTString<char16_t>, mozilla::dom::Nullable<mozilla::dom::TypedArray<unsigned char, JS::UnwrapArrayBufferMaybeShared, JS::GetArrayBufferMaybeSharedData, JS::GetArrayBufferMaybeSharedLengthAndData, JS::NewArrayBuffer> > >&", + "nsTArray_Impl<mozilla::dom::binding_detail::RecordEntry<nsTString<char16_t>, mozilla::dom::Nullable<mozilla::dom::TypedArray<unsigned char, JS::UnwrapArrayBufferMaybeShared, JS::GetArrayBufferMaybeSharedData, JS::GetArrayBufferMaybeSharedLengthAndData, JS::NewArrayBuffer> > >, nsTArrayInfallibleAllocator>" + ] + ] + ); + + function check_throws(f, exc) { + try { + f(); + } catch (e) { + assertEq(e.message.includes(exc), true, "incorrect exception: " + e.message); + return; + } + assertEq(undefined, exc); + } + // Note that these need to end in '>' or the whole thing will be ignored. + check_throws(() => parseTemplateType("foo>", true), "too many '>' signs"); + check_throws(() => parseTemplateType("foo<<>", true), "too many '<' signs"); + check_throws(() => parseTemplateType("foo<a::bar<a,b>", true), "too many '<' signs"); + check_throws(() => parseTemplateType("foo<a>*>::bar<a,b>", true), "too many '>' signs"); +} + +// GC Thing and GC Pointer annotations can be inherited from template args if +// this annotation is used. Think of Maybe<T> for example: Maybe<JSObject*> has +// the same GC rules as JSObject*. + +var inheritors = Object.keys(typeInfo.InheritFromTemplateArgs).sort((a, b) => a.length - b.length); +for (const csu of inheritors) { + const [templateName, templateArgs] = parseTemplateType(csu); + for (const param of (templateArgs || [])) { + const pos = param.search(/\**$/); + const ptrdness = param.length - pos; + const core_type = param.substr(0, pos); + if (ptrdness == 0) { + addToKeyedList(structureParents, core_type, [csu, "template-param-" + param]); + } else if (ptrdness == 1) { + addToKeyedList(pointerParents, core_type, [csu, "template-param-" + param]); + } + } +} + +function Ptr(level) { + if (level < 0) + return Array(-level).fill("&").join(""); + else + return Array(level).fill("*").join(""); +} + +// "typeName is a (pointer to a)^'typePtrLevel' GC type because it contains a field +// named 'child' of type 'childType' (or pointer to 'childType' if fieldPtrLevel == 1), +// which is itself a GCThing or GCPointer." +function markGCType(typeName, child, childType, typePtrLevel, fieldPtrLevel, indent = "") { + // Some types, like UniquePtr, do not mark/trace/relocate their contained + // pointers and so should not hold them live across a GC. UniquePtr in + // particular should be the only thing pointing to a structure containing a + // GCPointer, so nothing else can possibly trace it and it'll die when the + // UniquePtr goes out of scope. So we say that memory pointed to by a + // UniquePtr is just as unsafe as the stack for storing GC pointers. + if (isUnsafeStorage(typeName)) { + // If a UniquePtr<T> itself is on the stack, then there's a problem if + // T contains a Cell*. But the UniquePtr itself stores a T*, not a T, + // so set fieldPtrLevel=-1 to "undo" the pointer. When the type T is + // scanned for pointers and a Cell* is found, then when unwrapping the + // types, UniquePtr<T> will be seen as a T*=Cell** that should be + // treated as a Cell*. + // + // However, that creates the possibility of an infinite loop, if you + // have a type T that contains a UniquePtr<T> (which is allowed, because + // it's storing a T* not a T.) + const ptrLevel = typePtrLevel + fieldPtrLevel - 1; + if (options.verbose) { + printErr(`.${child} : (${childType} : "Cell${Ptr(typePtrLevel)}")${Ptr(fieldPtrLevel)} is-field-of ${typeName} : "Cell${Ptr(ptrLevel)}" [unsafe]`); + } + markGCTypeImpl(typeName, child, childType, ptrLevel, indent); + + // Also treat UniquePtr<T> as if it were any other struct. + } + + // Example: with: + // struct Pair { JSObject* foo; int bar; }; + // struct { Pair** info }*** + // make a call to: + // child='info' typePtrLevel=3 fieldPtrLevel=2 + // for a final ptrLevel of 5, used to later call: + // child='foo' typePtrLevel=5 fieldPtrLevel=1 + // + const ptrLevel = typePtrLevel + fieldPtrLevel; + if (options.verbose) { + printErr(`.${child} : (${childType} : "Cell${Ptr(typePtrLevel)}")${Ptr(fieldPtrLevel)} is-field-of ${typeName} : "Cell${Ptr(ptrLevel)}"`); + } + markGCTypeImpl(typeName, child, childType, ptrLevel, indent); +} + +function markGCTypeImpl(typeName, child, childType, ptrLevel, indent) { + // ...except when > 2 levels of pointers away from an actual GC thing, stop + // searching the graph. (This would just be > 1, except that a UniquePtr + // field might still have a GC pointer.) + if (ptrLevel > 2) + return; + + if (isRootedGCPointerTypeName(typeName) && !(typeName in typeInfo.RootedPointers)) + printErr("FIXME: use in-source annotation for " + typeName); + + if (ptrLevel == 0 && (typeName in typeInfo.RootedGCThings)) + return; + if (ptrLevel == 1 && (isRootedGCPointerTypeName(typeName) || (typeName in typeInfo.RootedPointers))) + return; + + if (ptrLevel == 0) { + if (typeName in typeInfo.NonGCTypes) + return; + if (!(typeName in gcTypes)) + gcTypes[typeName] = new Set(); + gcTypes[typeName].add(childType); + } else if (ptrLevel == 1) { + if (typeName in typeInfo.NonGCPointers) + return; + if (!(typeName in gcPointers)) + gcPointers[typeName] = new Set(); + gcPointers[typeName].add(childType); + } + + if (ptrLevel < 2) { + if (!gcFields.has(typeName)) + gcFields.set(typeName, new Map()); + const fields = gcFields.get(typeName); + if (fields.has(child)) { + const [orig_childType, orig_ptrLevel] = fields.get(child); + if (ptrLevel >= orig_ptrLevel) { + // Do not recurse for things more levels of pointers away from Cell. + // This will prevent infinite loops when types are defined recursively + // (eg a struct containing a UniquePtr of itself). + return; + } + } + fields.set(child, [childType, ptrLevel]); + } + + if (typeName in structureParents) { + for (var field of structureParents[typeName]) { + var [ holderType, fieldName ] = field; + markGCType(holderType, fieldName, typeName, ptrLevel, 0, indent + " "); + } + } + if (typeName in pointerParents) { + for (var field of pointerParents[typeName]) { + var [ holderType, fieldName ] = field; + markGCType(holderType, fieldName, typeName, ptrLevel, 1, indent + " "); + } + } +} + +function addGCType(typeName) +{ + pendingGCTypes.push([typeName, '<annotation>', '(annotation)', 0, 0]); +} + +function addGCPointer(typeName) +{ + pendingGCTypes.push([typeName, '<pointer-annotation>', '(annotation)', 1, 0]); +} + +for (const pending of pendingGCTypes) { + markGCType(...pending); +} + +// Call a function for a type and every type that contains the type in a field +// or as a base class (which internally is pretty much the same thing -- +// subclasses are structs beginning with the base class and adding on their +// local fields.) +function foreachContainingStruct(typeName, func, seen = new Set()) +{ + function recurse(container, typeName) { + if (seen.has(typeName)) + return; + seen.add(typeName); + + func(container, typeName); + + if (typeName in subClasses) { + for (const sub of subClasses[typeName]) + recurse("subclass of " + typeName, sub); + } + if (typeName in structureParents) { + for (const [holder, field] of structureParents[typeName]) + recurse(field + " : " + typeName, holder); + } + } + + recurse('<annotation>', typeName); +} + +for (var type of listNonGCPointers()) + typeInfo.NonGCPointers[type] = true; + +function explain(csu, indent, seen) { + if (!seen) + seen = new Set(); + seen.add(csu); + if (!gcFields.has(csu)) + return; + var fields = gcFields.get(csu); + + if (fields.has('<annotation>')) { + print(indent + "which is annotated as a GCThing"); + return; + } + if (fields.has('<pointer-annotation>')) { + print(indent + "which is annotated as a GCPointer"); + return; + } + for (var [ field, [ child, ptrdness ] ] of fields) { + var msg = indent; + if (field[0] == '<') + msg += "inherits from "; + else { + if (field.startsWith("template-param-")) { + msg += "inherits annotations from template parameter '" + field.substr(15) + "' "; + } else { + msg += "contains field '" + field + "' "; + } + if (ptrdness == -1) + msg += "(with a pointer to unsafe storage) holding a "; + else if (ptrdness == 0) + msg += "of type "; + else + msg += "pointing to type "; + } + msg += child; + print(msg); + if (!seen.has(child)) + explain(child, indent + " ", seen); + } +} + +var origOut = os.file.redirect(options.gcTypes); + +for (var csu in gcTypes) { + print("GCThing: " + csu); + explain(csu, " "); +} +for (var csu in gcPointers) { + print("GCPointer: " + csu); + explain(csu, " "); +} + +// Redirect output to the typeInfo file and close the gcTypes file. +os.file.close(os.file.redirect(options.typeInfo)); + +// Compute the set of types that suppress GC within their RAII scopes (eg +// AutoSuppressGC, AutoSuppressGCForAnalysis). +var seen = new Set(); +for (let csu in typeInfo.GCSuppressors) + foreachContainingStruct(csu, + (holder, typeName) => { typeInfo.GCSuppressors[typeName] = holder }, + seen); + +print(JSON.stringify(typeInfo, null, 4)); + +os.file.close(os.file.redirect(origOut)); diff --git a/js/src/devtools/rootAnalysis/dumpCFG.js b/js/src/devtools/rootAnalysis/dumpCFG.js new file mode 100644 index 0000000000..0ac220840c --- /dev/null +++ b/js/src/devtools/rootAnalysis/dumpCFG.js @@ -0,0 +1,273 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// const cfg = loadCFG(scriptArgs[0]); +// dump_CFG(cfg); + +function loadCFG(filename) { + const data = os.file.readFile(filename); + return JSON.parse(data); +} + +function dump_CFG(cfg) { + for (const body of cfg) + dump_body(body); +} + +function dump_body(body, src, dst) { + const {BlockId,Command,DefineVariable,Index,Location,PEdge,PPoint,Version} = body; + + const [mangled, unmangled] = splitFunction(BlockId.Variable.Name[0]); + print(`${unmangled} at ${Location[0].CacheString}:${Location[0].Line}`); + + if (src === undefined) { + for (const def of DefineVariable) + print(str_definition(def)); + print(""); + } + + for (const edge of PEdge) { + if (src === undefined || edge.Index[0] == src) { + if (dst == undefined || edge.Index[1] == dst) + print(str_edge(edge, body)); + } + } +} + +function str_definition(def) { + const {Type, Variable} = def; + return `define ${str_Variable(Variable)} : ${str_Type(Type)}`; +} + +function badFormat(what, val) { + printErr("Bad format of " + what + ": " + JSON.stringify(val, null, 4)); + printErr((new Error).stack); +} + +function str_Variable(variable) { + if (variable.Kind == 'Return') + return '<returnval>'; + else if (variable.Kind == 'This') + return 'this'; + + try { + return variable.Name[1]; + } catch(e) { + badFormat("variable", variable); + } +} + +function str_Type(type) { + try { + const {Kind, Type, Name, TypeFunctionArguments} = type; + if (Kind == 'Pointer') + return str_Type(Type) + ["*", "&", "&&"][type.Reference]; + else if (Kind == 'CSU') + return Name; + else if (Kind == 'Array') + return str_Type(Type) + "[]"; + else if (Kind == 'Function') + return str_Type(Type) + "()"; + + return Kind; + } catch(e) { + badFormat("type", type); + } +} + +var OpCodeNames = { + 'LessEqual': ['<=', '>'], + 'LessThan': ['<', '>='], + 'GreaterEqual': ['>=', '<'], + 'Greater': ['>', '<='], + 'Plus': '+', + 'Minus': '-', +}; + +function opcode_name(opcode, invert) { + if (opcode in OpCodeNames) { + const name = OpCodeNames[opcode]; + if (invert === undefined) + return name; + return name[invert ? 1 : 0]; + } else { + if (invert === undefined) + return opcode; + return (invert ? '!' : '') + opcode; + } +} + +function str_value(val, env, options) { + const {Kind, Variable, String, Exp} = val; + if (Kind == 'Var') + return str_Variable(Variable); + else if (Kind == 'Drf') { + // Suppress the vtable lookup dereference + if (Exp[0].Kind == 'Fld' && "FieldInstanceFunction" in Exp[0].Field) + return str_value(Exp[0], env); + const exp = str_value(Exp[0], env); + if (options && options.noderef) + return exp; + return "*" + exp; + } else if (Kind == 'Fld') { + const {Exp, Field} = val; + const name = Field.Name[0]; + if ("FieldInstanceFunction" in Field) { + return Field.FieldCSU.Type.Name + "." + name; + } + const container = str_value(Exp[0]); + if (container.startsWith("*")) + return container.substring(1) + "->" + name; + return container + "." + name; + } else if (Kind == 'Empty') { + return '<unknown>'; + } else if (Kind == 'Binop') { + const {OpCode} = val; + const op = opcode_name(OpCode); + return `${str_value(Exp[0], env)} ${op} ${str_value(Exp[1], env)}`; + } else if (Kind == 'Unop') { + const exp = str_value(Exp[0], env); + const {OpCode} = val; + if (OpCode == 'LogicalNot') + return `not ${exp}`; + return `${OpCode}(${exp})`; + } else if (Kind == 'Index') { + const index = str_value(Exp[1], env); + if (Exp[0].Kind == 'Drf') + return `${str_value(Exp[0], env, {noderef:true})}[${index}]`; + else + return `&${str_value(Exp[0], env)}[${index}]`; + } else if (Kind == 'NullTest') { + return `nullptr == ${str_value(Exp[0], env)}`; + } else if (Kind == "String") { + return '"' + String + '"'; + } else if (String !== undefined) { + return String; + } + badFormat("value", val); +} + +function str_thiscall_Exp(exp) { + return exp.Kind == 'Drf' ? str_value(exp.Exp[0]) + "->" : str_value(exp) + "."; +} + +function stripcsu(s) { + return s.replace("class ", "").replace("struct ", "").replace("union "); +} + +function str_call(prefix, edge, env) { + const {Exp, Type, PEdgeCallArguments, PEdgeCallInstance} = edge; + const {Kind, Type:cType, TypeFunctionArguments, TypeFunctionCSU} = Type; + + if (Kind == 'Function') { + const params = PEdgeCallArguments ? PEdgeCallArguments.Exp : []; + const strParams = params.map(str_value); + + let func; + let comment = ""; + let assign_exp; + if (PEdgeCallInstance) { + const csu = TypeFunctionCSU.Type.Name; + const method = str_value(Exp[0], env); + + // Heuristic to only display the csu for constructors + if (csu.includes(method)) { + func = stripcsu(csu) + "::" + method; + } else { + func = method; + comment = "# " + csu + "::" + method + "\n"; + } + + const {Exp: thisExp} = PEdgeCallInstance; + func = str_thiscall_Exp(thisExp) + func; + } else { + func = str_value(Exp[0]); + } + assign_exp = Exp[1]; + + let assign = ""; + if (assign_exp) { + assign = str_value(assign_exp) + " := "; + } + return `${comment}${prefix} Call ${assign}${func}(${strParams.join(", ")})`; + } + + print(JSON.stringify(edge, null, 4)); + throw new Error("unhandled format error"); +} + +function str_assign(prefix, edge) { + const {Exp} = edge; + const [lhs, rhs] = Exp; + return `${prefix} Assign ${str_value(lhs)} := ${str_value(rhs)}`; +} + +function str_loop(prefix, edge) { + const {BlockId: {Loop}} = edge; + return `${prefix} Loop ${Loop}`; +} + +function str_assume(prefix, edge) { + const {Exp, PEdgeAssumeNonZero} = edge; + const cmp = PEdgeAssumeNonZero ? "" : "!"; + + const {Exp: aExp, Kind, OpCode} = Exp[0]; + if (Kind == 'Binop') { + const [lhs, rhs] = aExp; + const op = opcode_name(OpCode, !PEdgeAssumeNonZero); + return `${prefix} Assume ${str_value(lhs)} ${op} ${str_value(rhs)}`; + } else if (Kind == 'Unop') { + return `${prefix} Assume ${cmp}${OpCode} ${str_value(aExp[0])}`; + } else if (Kind == 'NullTest') { + return `${prefix} Assume nullptr ${cmp}== ${str_value(aExp[0])}`; + } else if (Kind == 'Drf') { + return `${prefix} Assume ${cmp}${str_value(Exp[0])}`; + } + + print(JSON.stringify(edge, null, 4)); + throw new Error("unhandled format error"); +} + +function str_edge(edge, env) { + const {Index, Kind} = edge; + const [src, dst] = Index; + const prefix = `[${src},${dst}]`; + + if (Kind == "Call") + return str_call(prefix, edge, env); + if (Kind == 'Assign') + return str_assign(prefix, edge); + if (Kind == 'Assume') + return str_assume(prefix, edge); + if (Kind == 'Loop') + return str_loop(prefix, edge); + + print(JSON.stringify(edge, null, 4)); + throw "unhandled edge type"; +} + +function str(unknown) { + if ("Name" in unknown) { + return str_Variable(unknown); + } else if ("Index" in unknown) { + // Note: Variable also has .Index, with a different meaning. + return str_edge(unknown); + } else if ("Type" in unknown) { + if ("Variable" in unknown) { + return str_definition(unknown); + } else { + return str_Type(unknown); + } + } else if ("Kind" in unknown) { + if ("BlockId" in unknown) + return str_Variable(unknown); + return str_value(unknown); + } + return "unknown"; +} + +function jdump(x) { + print(JSON.stringify(x, null, 4)); + quit(0); +} diff --git a/js/src/devtools/rootAnalysis/expect.b2g.json b/js/src/devtools/rootAnalysis/expect.b2g.json new file mode 100644 index 0000000000..06f2beb36f --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.b2g.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/expect.browser.json b/js/src/devtools/rootAnalysis/expect.browser.json new file mode 100644 index 0000000000..06f2beb36f --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.browser.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/expect.shell.json b/js/src/devtools/rootAnalysis/expect.shell.json new file mode 100644 index 0000000000..06f2beb36f --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.shell.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/explain.py b/js/src/devtools/rootAnalysis/explain.py new file mode 100755 index 0000000000..2fb45e07f9 --- /dev/null +++ b/js/src/devtools/rootAnalysis/explain.py @@ -0,0 +1,345 @@ +#!/usr/bin/python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + + +import argparse +import json +import pathlib +import re +from html import escape + +SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute() + +parser = argparse.ArgumentParser( + description="Convert the JSON output of the hazard analysis into various text files describing the results.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) +parser.add_argument("--verbose", type=bool, default=False, help="verbose output") + +inputs = parser.add_argument_group("Input") +inputs.add_argument( + "rootingHazards", + nargs="?", + default="rootingHazards.json", + help="JSON input file describing the output of the hazard analysis", +) + +outputs = parser.add_argument_group("Output") +outputs.add_argument( + "gcFunctions", + nargs="?", + default="gcFunctions.txt", + help="file containing a list of functions that can GC", +) +outputs.add_argument( + "hazards", + nargs="?", + default="hazards.txt", + help="file containing the rooting hazards found", +) +outputs.add_argument( + "extra", + nargs="?", + default="unnecessary.txt", + help="file containing unnecessary roots", +) +outputs.add_argument( + "refs", + nargs="?", + default="refs.txt", + help="file containing a list of unsafe references to unrooted values", +) +outputs.add_argument( + "html", + nargs="?", + default="hazards.html", + help="HTML-formatted file with the hazards found", +) + +args = parser.parse_args() + + +# Imitate splitFunction from utility.js. +def splitfunc(full): + idx = full.find("$") + if idx == -1: + return (full, full) + return (full[0:idx], full[idx + 1 :]) + + +def print_header(outfh): + print( + """\ +<!DOCTYPE html> +<head> +<meta charset="utf-8"> +<style> +input { + position: absolute; + opacity: 0; + z-index: -1; +} +tt { + background: #eee; +} +.tab-label { + cursor: s-resize; +} +.tab-label a { + color: #222; +} +.tab-label:hover { + background: #eee; +} +.tab-label::after { + content: " \\25B6"; + width: 1em; + height: 1em; + color: #75f; + text-align: center; + transition: all 0.35s; +} +.accorntent { + max-height: 0; + padding: 0 1em; + color: #2c3e50; + overflow: hidden; + background: white; + transition: all 0.35s; +} + +input:checked + .tab-label::after { + transform: rotate(90deg); + content: " \\25BC"; +} +input:checked + .tab-label { + cursor: n-resize; +} +input:checked ~ .accorntent { + max-height: 100vh; +} +</style> +</head> +<body>""", + file=outfh, + ) + + +def print_footer(outfh): + print("</ol></body>", file=outfh) + + +def sourcelink(symbol=None, loc=None, range=None): + if symbol: + return f"https://searchfox.org/mozilla-central/search?q=symbol:{symbol}" + elif range: + filename, lineno = loc.split(":") + [f0, l0] = range[0] + [f1, l1] = range[1] + if f0 == f1 and l1 > l0: + return f"../{filename}?L={l0}-{l1 - 1}#{l0}" + else: + return f"../{filename}?L={l0}#{l0}" + elif loc: + filename, lineno = loc.split(":") + return f"../{filename}?L={lineno}#{lineno}" + else: + raise Exception("missing argument to sourcelink()") + + +def quoted_dict(d): + return {k: escape(v) for k, v in d.items() if type(v) == str} + + +num_hazards = 0 +num_refs = 0 +num_missing = 0 + +try: + with open(args.rootingHazards) as rootingHazards, open( + args.hazards, "w" + ) as hazards, open(args.extra, "w") as extra, open(args.refs, "w") as refs, open( + args.html, "w" + ) as html: + current_gcFunction = None + + hazardousGCFunctions = set() + + results = json.load(rootingHazards) + print_header(html) + + when = min((r for r in results if r["record"] == "time"), key=lambda r: r["t"])[ + "iso" + ] + line = f"Time: {when}" + print(line, file=hazards) + print(line, file=extra) + print(line, file=refs) + + checkboxCounter = 0 + hazard_results = [] + seen_time = False + for result in results: + if result["record"] == "unrooted": + hazard_results.append(result) + gccall_mangled, _ = splitfunc(result["gccall"]) + hazardousGCFunctions.add(gccall_mangled) + if not result.get("expected"): + num_hazards += 1 + + elif result["record"] == "unnecessary": + print( + "\nFunction '{mangled}' has unnecessary root '{variable}' of type {type} at {loc}".format( + **result + ), + file=extra, + ) + + elif result["record"] == "address": + print( + ( + "\nFunction '{functionName}'" + " takes unsafe address of unrooted '{variable}'" + " at {loc}" + ).format(**result), + file=refs, + ) + num_refs += 1 + + elif result["record"] == "missing": + print( + "\nFunction '{functionName}' expected hazard(s) but none were found at {loc}".format( + **result + ), + file=hazards, + ) + num_missing += 1 + + readable2mangled = {} + with open(args.gcFunctions) as gcFunctions: + gcExplanations = {} # gcFunction => stack showing why it can GC + + current_func = None + explanation = [] + for line in gcFunctions: + if m := re.match(r"^GC Function: (.*)", line): + if current_func: + gcExplanations[splitfunc(current_func)[0]] = explanation + functionName = m.group(1) + mangled, readable = splitfunc(functionName) + if mangled not in hazardousGCFunctions: + current_func = None + continue + current_func = functionName + if readable != mangled: + readable2mangled[readable] = mangled + # TODO: store the mangled name here, and change + # gcFunctions.txt -> gcFunctions.json and key off of the mangled name. + explanation = [readable] + elif current_func: + explanation.append(line.strip()) + if current_func: + gcExplanations[splitfunc(current_func)[0]] = explanation + + print( + "Found %d hazards, %d unsafe references, %d missing." + % (num_hazards, num_refs, num_missing), + file=html, + ) + print("<ol>", file=html) + + for result in hazard_results: + (result["gccall_mangled"], result["gccall_readable"]) = splitfunc( + result["gccall"] + ) + # Attempt to extract out the function name. Won't handle `Foo<int, Bar<int>>::Foo()`. + if m := re.search(r"((?:\w|:|<[^>]*?>)+)\(", result["gccall_readable"]): + result["gccall_short"] = m.group(1) + "()" + else: + result["gccall_short"] = result["gccall_readable"] + if result.get("expected"): + print("\nThis is expected, but ", end="", file=hazards) + else: + print("\nFunction ", end="", file=hazards) + print( + "'{readable}' has unrooted '{variable}'" + " of type '{type}' live across GC call '{gccall_readable}' at {loc}".format( + **result + ), + file=hazards, + ) + for edge in result["trace"]: + print(" {lineText}: {edgeText}".format(**edge), file=hazards) + explanation = gcExplanations.get(result["gccall_mangled"]) + explanation = explanation or gcExplanations.get( + readable2mangled.get( + result["gccall_readable"], result["gccall_readable"] + ), + [], + ) + if explanation: + print("GC Function: " + explanation[0], file=hazards) + for func in explanation[1:]: + print(" " + func, file=hazards) + print(file=hazards) + + if result.get("expected"): + continue + + cfgid = f"CFG_{checkboxCounter}" + gcid = f"GC_{checkboxCounter}" + checkboxCounter += 1 + print( + ( + "<li><ul>\n" + "<li>Function <a href='{symbol_url}'>{readable}</a>\n" + "<li>has unrooted <tt>{variable}</tt> of type '<tt>{type}</tt>'\n" + "<li><input type='checkbox' id='{cfgid}'><label class='tab-label' for='{cfgid}'>" + "live across GC call to" + "</label>\n" + "<div class='accorntent'>\n" + ).format( + **quoted_dict(result), + symbol_url=sourcelink(symbol=result["mangled"]), + cfgid=cfgid, + ), + file=html, + ) + for edge in result["trace"]: + print( + "<pre> {lineText}: {edgeText}</pre>".format(**quoted_dict(edge)), + file=html, + ) + print("</div>", file=html) + print( + "<li><input type='checkbox' id='{gcid}'><label class='tab-label' for='{gcid}'>" + "<a href='{loc_url}'><tt>{gccall_short}</tt></a> at {loc}" + "</label>\n" + "<div class='accorntent'>".format( + **quoted_dict(result), + loc_url=sourcelink(range=result["gcrange"], loc=result["loc"]), + gcid=gcid, + ), + file=html, + ) + for func in explanation: + print(f"<pre>{escape(func)}</pre>", file=html) + print("</div><hr></ul>", file=html) + + print_footer(html) + +except IOError as e: + print("Failed: %s" % str(e)) + +if args.verbose: + print("Wrote %s" % args.hazards) + print("Wrote %s" % args.extra) + print("Wrote %s" % args.refs) + print("Wrote %s" % args.html) + +print( + "Found %d hazards %d unsafe references %d missing" + % (num_hazards, num_refs, num_missing) +) diff --git a/js/src/devtools/rootAnalysis/gen-hazards.sh b/js/src/devtools/rootAnalysis/gen-hazards.sh new file mode 100755 index 0000000000..7007969a14 --- /dev/null +++ b/js/src/devtools/rootAnalysis/gen-hazards.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +JOBS="$1" + +for j in $(seq $JOBS); do + env PATH=$PATH:$SIXGILL/bin XDB=$SIXGILL/bin/xdb.so $JS $ANALYZE gcFunctions.lst suppressedFunctions.lst gcTypes.txt $j $JOBS tmp.$j > rootingHazards.$j & +done + +wait + +for j in $(seq $JOBS); do + cat rootingHazards.$j +done diff --git a/js/src/devtools/rootAnalysis/loadCallgraph.js b/js/src/devtools/rootAnalysis/loadCallgraph.js new file mode 100644 index 0000000000..0a388f4de1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/loadCallgraph.js @@ -0,0 +1,590 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('callgraph.js'); + +// Functions come out of sixgill in the form "mangled$readable". The mangled +// name is Truth. One mangled name might correspond to multiple readable names, +// for multiple reasons, including (1) sixgill/gcc doesn't always qualify types +// the same way or de-typedef the same amount; (2) sixgill's output treats +// references and pointers the same, and so doesn't distinguish them, but C++ +// treats them as separate for overloading and linking; (3) (identical) +// destructors sometimes have an int32 parameter, sometimes not. +// +// The readable names are useful because they're far more meaningful to the +// user, and are what should show up in reports and questions to mrgiggles. At +// least in most cases, it's fine to have the extra mangled name tacked onto +// the beginning for these. +// +// The strategy used is to separate out the pieces whenever they are read in, +// create a table mapping mangled names to all readable names, and use the +// mangled names in all computation -- except for limited circumstances when +// the readable name is used in annotations. +// +// Note that callgraph.txt uses a compressed representation -- each name is +// mapped to an integer, and those integers are what is recorded in the edges. +// But the integers depend on the full name, whereas the true edge should only +// consider the mangled name. And some of the names encoded in callgraph.txt +// are FieldCalls, not just function names. + +var gcEdges = {}; + +// Returns whether the function was added. (It will be refused if it was +// already there, or if attrs or annotations say it shouldn't be added.) +function addGCFunction(caller, reason, gcFunctions, functionAttrs, functions) +{ + if (functionAttrs[caller] && functionAttrs[caller][1] & ATTR_GC_SUPPRESSED) + return false; + + if (ignoreGCFunction(functions.name[caller], functions.readableName)) + return false; + + if (!(caller in gcFunctions)) { + gcFunctions[caller] = reason; + return true; + } + + return false; +} + +// Every caller->callee callsite is associated with attrs saying what is +// allowed at that callsite (eg if it's in a GC suppression zone, it would have +// ATTR_GC_SUPPRESSED set.) A given caller might call the same callee multiple +// times, with different attributes. Associate the <caller,callee> edge with +// the intersection (AND) and disjunction (OR) of all of the callsites' attrs. +// The AND ('all') says what attributes are present for all callers; the OR +// ('any') says what attributes are present on any caller. Preserve the +// original order. +// +// During the same scan, build callersOf from calleesOf. +function generate_callgraph(rawCallees) { + const callersOf = new Map(); + const calleesOf = new Map(); + + for (const [caller, callee_attrs] of rawCallees) { + const ordered_callees = []; + + // callee_attrs is a list of {callee,any,all} objects. + const callee2any = new Map(); + const callee2all = new Map(); + for (const {callee, any, all} of callee_attrs) { + const prev_any = callee2any.get(callee); + if (prev_any === undefined) { + assert(!callee2all.has(callee)); + callee2any.set(callee, any); + callee2all.set(callee, all); + ordered_callees.push(callee); + } else { + const prev_all = callee2all.get(callee); + callee2any.set(callee, prev_any | any); + callee2all.set(callee, prev_all & all); + } + } + + // Update the contents of callee_attrs to contain a single entry for + // each callee, with its attrs set to the AND of the attrs observed at + // all callsites within this caller function. + callee_attrs.length = 0; + for (const callee of ordered_callees) { + const any = callee2any.get(callee); + const all = callee2all.get(callee); + if (!calleesOf.has(caller)) + calleesOf.set(caller, new Map()); + calleesOf.get(caller).set(callee, {any, all}); + if (!callersOf.has(callee)) + callersOf.set(callee, new Map()); + callersOf.get(callee).set(caller, {any, all}); + } + } + + return {callersOf, calleesOf}; +} + +// Returns object mapping mangled => reason for GCing +function loadRawCallgraphFile(file, verbose) +{ + const functions = { + // "Map" from identifier to mangled name, or sometimes to a Class.Field name. + name: [""], + + // map from mangled name => list of readable names + readableName: {}, + + mangledToId: {} + }; + + const fieldCallAttrs = {}; + const fieldCallCSU = new Map(); // map from full field name id => csu name + + // set of mangled names (map from mangled name => [any,all]) + var functionAttrs = {}; + + const gcCalls = []; + const indirectCalls = []; + + // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset} + const rawCallees = new Map(); + + for (let line of readFileLines_gen(file)) { + line = line.replace(/\n/, ""); + + let match; + if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) { + const [ _, id, mangled ] = match; + assert(functions.name.length == id); + functions.name.push(mangled); + functions.mangledToId[mangled] = id|0; + continue; + } + if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) { + const [ _, id, readable ] = match; + const mangled = functions.name[id]; + if (mangled in functions.readableName) + functions.readableName[mangled].push(readable); + else + functions.readableName[mangled] = [ readable ]; + continue; + } + + let attrs = 0; + // Example line: D /17 6 7 + // + // This means a direct call from 6 -> 7, but within a scope that + // applies attrs 0x1 and 0x10 to the callee. + // + // Look for a bit specifier and remove it from the line if found. + if (line.indexOf("/") != -1) { + match = /^(..)\/(\d+) (.*)/.exec(line); + line = match[1] + match[3]; + attrs = match[2]|0; + } + const tag = line.charAt(0); + if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) { + const caller = match[1]|0; + const name = match[2]; + if (indirectCallCannotGC(functions.name[caller], name)) + attrs |= ATTR_GC_SUPPRESSED; + indirectCalls.push([caller, "IndirectCall: " + name, attrs]); + } else if (match = tag == 'F' && /^F (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { + const caller = match[1]|0; + const fullfield = match[2]|0; + const csu = match[3]; + const fullfield_str = csu + "." + match[4]; + assert(functions.name[fullfield] == fullfield_str); + if (attrs) + fieldCallAttrs[fullfield] = attrs; + addToMappedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs}); + fieldCallCSU.set(fullfield, csu); + + if (fieldCallCannotGC(csu, fullfield_str)) + addToMappedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0}); + else + addToMappedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0}); + } else if (match = tag == 'V' && /^V (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { + // V tag is no longer used, but we are still emitting it becasue it + // can be helpful to understand what's going on. + } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) { + const caller = match[1]|0; + const callee = match[2]|0; + addToMappedList(rawCallees, caller, {callee, any:attrs, all:attrs}); + } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) { + assert(false, "R tag is no longer used"); + } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) { + const id = match[1]|0; + let tag = match[2]; + if (tag == 'GC Call') + gcCalls.push(id); + } else { + assert(false, "Invalid format in callgraph line: " + line); + } + } + + if (verbose) { + printErr("Loaded[verbose=" + verbose + "] " + file); + } + + return { + fieldCallAttrs, + fieldCallCSU, + gcCalls, + indirectCalls, + rawCallees, + functions + }; +} + +// Take a set of rawcalls filenames (as in, the raw callgraph data output by +// computeCallgraph.js) and combine them into a global callgraph, renumbering +// the IDs as needed. +function mergeRawCallgraphs(filenames, verbose) { + let d; + for (const filename of filenames) { + const raw = loadRawCallgraphFile(filename, verbose); + if (!d) { + d = raw; + continue; + } + + const { + fieldCallAttrs, + fieldCallCSU, + gcCalls, + indirectCalls, + rawCallees, + functions + } = raw; + + // Compute the ID mapping. Incoming functions that already have an ID + // will be mapped to that ID; new ones will allocate a fresh ID. + const remap = new Array(functions.name.length); + for (let i = 1; i < functions.name.length; i++) { + const mangled = functions.name[i]; + const old_id = d.functions.mangledToId[mangled] + if (old_id) { + remap[i] = old_id; + } else { + const newid = d.functions.name.length; + d.functions.mangledToId[mangled] = newid; + d.functions.name.push(mangled); + remap[i] = newid; + assert(!(mangled in d.functions.readableName), mangled + " readable name is already found"); + const readables = functions.readableName[mangled]; + if (readables !== undefined) + d.functions.readableName[mangled] = readables; + } + } + + for (const [fullfield, attrs] of Object.entries(fieldCallAttrs)) + d.fieldCallAttrs[remap[fullfield]] = attrs; + for (const [fullfield, csu] of fieldCallCSU.entries()) + d.fieldCallCSU.set(remap[fullfield], csu); + for (const call of gcCalls) + d.gcCalls.push(remap[call]); + for (const [caller, name, attrs] of indirectCalls) + d.indirectCalls.push([remap[caller], name, attrs]); + for (const [caller, callees] of rawCallees) { + for (const {callee, any, all} of callees) { + addToMappedList(d.rawCallees, remap[caller]|0, {callee:remap[callee], any, all}); + } + } + } + + return d; +} + +function loadCallgraph(files, verbose) +{ + const { + fieldCallAttrs, + fieldCallCSU, + gcCalls, + indirectCalls, + rawCallees, + functions + } = mergeRawCallgraphs(files, verbose); + + assert(ID.jscode == functions.mangledToId["(js-code)"]); + assert(ID.anyfunc == functions.mangledToId["(any-function)"]); + assert(ID.nogcfunc == functions.mangledToId["(nogc-function)"]); + assert(ID.gc == functions.mangledToId["(GC)"]); + + addToMappedList(rawCallees, functions.mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0}); + + // Compute functionAttrs: it should contain the set of functions that + // are *always* called within some sort of limited context (eg GC + // suppression). + + // set of mangled names (map from mangled name => [any,all]) + const functionAttrs = {}; + + // Initialize to field calls with attrs set. + for (var [name, attrs] of Object.entries(fieldCallAttrs)) + functionAttrs[name] = [attrs, attrs]; + + // map from ID => reason + const gcFunctions = { [ID.gc]: 'internal' }; + + // Add in any extra functions at the end. (If we did this early, it would + // mess up the id <-> name correspondence. Also, we need to know if the + // functions even exist in the first place.) + for (var func of extraGCFunctions(functions.readableName)) { + addGCFunction(functions.mangledToId[func], "annotation", gcFunctions, functionAttrs, functions); + } + + for (const func of gcCalls) + addToMappedList(rawCallees, func, {callee:ID.gc, any:0, all:0}); + + for (const [caller, indirect, attrs] of indirectCalls) { + const id = functions.name.length; + functions.name.push(indirect); + functions.mangledToId[indirect] = id; + addToMappedList(rawCallees, caller, {callee:id, any:attrs, all:attrs}); + addToMappedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0}); + } + + // Callers have a list of callees, with duplicates (if the same function is + // called more than once.) Merge the repeated calls, only keeping attrs + // that are in force for *every* callsite of that callee. Also, generate + // the callersOf table at the same time. + // + // calleesOf : map from mangled => {mangled callee => {'any':intset, 'all':intset}} + // callersOf : map from mangled => {mangled caller => {'any':intset, 'all':intset}} + const {callersOf, calleesOf} = generate_callgraph(rawCallees); + + // Compute functionAttrs: it should contain the set of functions that + // are *always* called within some sort of limited context (eg GC + // suppression). + + // Initialize to field calls with attrs set. + for (var [name, attrs] of Object.entries(fieldCallAttrs)) + functionAttrs[name] = [attrs, attrs]; + + // Initialize functionAttrs to the set of all functions, where each one is + // maximally attributed, and return a worklist containing all simple roots + // (nodes with no callers). + const simple_roots = gather_simple_roots(functionAttrs, calleesOf, callersOf); + + // Traverse the graph, spreading the attrs down from the roots. + propagate_attrs(simple_roots, functionAttrs, calleesOf); + + // There are a surprising number of "recursive roots", where there is a + // cycle of functions calling each other but not called by anything else, + // and these roots may also have descendants. Now that the above traversal + // has eliminated everything reachable from simple roots, traverse the + // remaining graph to gather up a representative function from each root + // cycle. + // + // Simple example: in the JS shell build, moz_xstrdup calls itself, but + // there are no calls to it from within js/src. + const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions); + + // And do a final traversal starting with the recursive roots. + propagate_attrs(recursive_roots, functionAttrs, calleesOf); + + for (const [f, [any, all]] of Object.entries(functionAttrs)) { + // Throw out all functions with no attrs set, to reduce the size of the + // output. From now on, "not in functionAttrs" means [any=0, all=0]. + if (any == 0 && all == 0) + delete functionAttrs[f]; + + // Remove GC-suppressed functions from the set of functions known to GC. + // Also remove functions only reachable through calls that have been + // replaced. + if (all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) + delete gcFunctions[name]; + } + + // functionAttrs now contains all functions that are ever called in an + // attributed context, based on the known callgraph (i.e., calls through + // function pointers are not taken into consideration.) + + // Sanity check to make sure the callgraph has some functions annotated as + // GC Calls. This is mostly a check to be sure the earlier processing + // succeeded (as opposed to, say, running on empty xdb files because you + // didn't actually compile anything interesting.) + assert(gcCalls.length > 0, "No GC functions found!"); + + // Initialize the worklist to all known gcFunctions. + const worklist = [ID.gc]; + + // Include all field calls (but not virtual method calls). + for (const [name, csuName] of fieldCallCSU) { + const fullFieldName = functions.name[name]; + if (!fieldCallCannotGC(csuName, fullFieldName)) { + gcFunctions[name] = 'arbitrary function pointer ' + fullFieldName; + worklist.push(name); + } + } + + // Recursively find all callers not always called in a GC suppression + // context, and add them to the set of gcFunctions. + while (worklist.length) { + name = worklist.shift(); + assert(name in gcFunctions, "gcFunctions does not contain " + name); + if (!callersOf.has(name)) + continue; + for (const [caller, {any, all}] of callersOf.get(name)) { + if ((all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) == 0) { + if (addGCFunction(caller, name, gcFunctions, functionAttrs, functions)) + worklist.push(caller); + } + } + } + + // Convert functionAttrs to limitedFunctions (using mangled names instead + // of ids.) + + // set of mangled names (map from mangled name => {any,all,recursive_root:bool} + var limitedFunctions = {}; + + for (const [id, [any, all]] of Object.entries(functionAttrs)) { + if (all) { + limitedFunctions[functions.name[id]] = { attributes: all }; + } + } + + for (const [id, limits, label] of recursive_roots) { + const name = functions.name[id]; + const s = limitedFunctions[name] || (limitedFunctions[name] = {}); + s.recursive_root = true; + } + + // Remap ids to mangled names. + const namedGCFunctions = {}; + for (const [caller, reason] of Object.entries(gcFunctions)) { + namedGCFunctions[functions.name[caller]] = functions.name[reason] || reason; + } + + return { + gcFunctions: namedGCFunctions, + functions, + calleesOf, + callersOf, + limitedFunctions + }; +} + +function saveCallgraph(functions, calleesOf) { + // Write out all the ids and their readable names. + let id = -1; + for (const name of functions.name) { + id += 1; + if (id == 0) continue; + print(`#${id} ${name}`); + for (const readable of (functions.readableName[name] || [])) { + if (readable != name) + print(`= ${id} ${readable}`); + } + } + + // Omit field calls for now; let them appear as if they were functions. + + const attrstring = range => range.any || range.all ? `${range.all}:${range.any} ` : ''; + for (const [caller, callees] of calleesOf) { + for (const [callee, attrs] of callees) { + print(`D ${attrstring(attrs)}${caller} ${callee}`); + } + } + + // Omit tags for now. This really should preserve all tags. The "GC Call" + // tag will already be represented in the graph by having an edge to the + // "(GC)" node. +} + +// Return a worklist of functions with no callers, and also initialize +// functionAttrs to the set of all functions, each mapped to +// [ATTRS_NONE, ATTRS_UNVISITED]. +function gather_simple_roots(functionAttrs, calleesOf, callersOf) { + const roots = []; + for (const callee of callersOf.keys()) + functionAttrs[callee] = [ATTRS_NONE, ATTRS_UNVISITED]; + for (const caller of calleesOf.keys()) { + functionAttrs[caller] = [ATTRS_NONE, ATTRS_UNVISITED]; + if (!callersOf.has(caller)) + roots.push([caller, ATTRS_NONE, 'root']); + } + + return roots; +} + +// Recursively traverse the callgraph from the roots. Recurse through every +// edge that weakens the attrs. (Attrs that entirely disappear, ie go to a zero +// intset, will be removed from functionAttrs.) +function propagate_attrs(roots, functionAttrs, calleesOf) { + const worklist = Array.from(roots); + let top = worklist.length; + while (top > 0) { + // Consider caller where (graph) -> caller -> (0 or more callees) + // 'callercaller' is for debugging. + const [caller, edge_attrs, callercaller] = worklist[--top]; + assert(caller in functionAttrs); + const [prev_any, prev_all] = functionAttrs[caller]; + assert(prev_any !== undefined); + assert(prev_all !== undefined); + const [new_any, new_all] = [prev_any | edge_attrs, prev_all & edge_attrs]; + if (prev_any != new_any || prev_all != new_all) { + // Update function attrs, then recurse to the children if anything + // was updated. + functionAttrs[caller] = [new_any, new_all]; + for (const [callee, {any, all}] of (calleesOf.get(caller) || new Map)) + worklist[top++] = [callee, all | edge_attrs, caller]; + } + } +} + +// Mutually-recursive roots and their descendants will not have been visited, +// and will still be set to [0, ATTRS_UNVISITED]. Scan through and gather them. +function gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions) { + const roots = []; + + // Pick any node. Mark everything reachable by adding to a 'seen' set. At + // the end, if there are any incoming edges to that node from an unmarked + // node, then it is not a root. Otherwise, mark the node as a root. (There + // will be at least one back edge coming into the node from a marked node + // in this case, since otherwise it would have already been considered to + // be a root.) + // + // Repeat with remaining unmarked nodes until all nodes are marked. + const seen = new Set(); + for (let [func, [any, all]] of Object.entries(functionAttrs)) { + func = func|0; + if (all != ATTRS_UNVISITED) + continue; + + // We should only be looking at nodes with callers, since otherwise + // they would have been handled in the previous pass! + assert(callersOf.has(func)); + assert(callersOf.get(func).size > 0); + + if (seen.has(func)) + continue; + + const work = [func]; + while (work.length > 0) { + const f = work.pop(); + if (!calleesOf.has(f)) continue; + for (const callee of calleesOf.get(f).keys()) { + if (!seen.has(callee) && + callee != func && + functionAttrs[callee][1] == ATTRS_UNVISITED) + { + work.push(callee); + seen.add(callee); + } + } + } + + assert(!seen.has(func)); + seen.add(func); + if ([...callersOf.get(func).keys()].findIndex(f => !seen.has(f)) == -1) { + // No unmarked incoming edges, including self-edges, so this is a + // (recursive) root. + roots.push([func, ATTRS_NONE, 'recursive-root']); + } + } + + return roots; + + tmp = calleesOf; + calleesOf = {}; + for (const [callerId, callees] of Object.entries(calleesOf)) { + const caller = functionNames[callerId]; + for (const {calleeId, limits} of callees) + calleesOf[caller][functionNames[calleeId]] = limits; + } + + tmp = callersOf; + callersOf = {}; + for (const [calleeId, callers] of Object.entries(callersOf)) { + const callee = functionNames[calleeId]; + callersOf[callee] = {}; + for (const {callerId, limits} of callers) + callersOf[callee][functionNames[caller]] = limits; + } +} diff --git a/js/src/devtools/rootAnalysis/mach_commands.py b/js/src/devtools/rootAnalysis/mach_commands.py new file mode 100644 index 0000000000..c2fc1980c9 --- /dev/null +++ b/js/src/devtools/rootAnalysis/mach_commands.py @@ -0,0 +1,690 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import argparse +import html +import json +import logging +import os +import re +import textwrap +import webbrowser + +# Command files like this are listed in build/mach_initialize.py in alphabetical +# order, but we need to access commands earlier in the sorted order to grab +# their arguments. Force them to load now. +import mozbuild.artifact_commands # NOQA: F401 +import mozbuild.build_commands # NOQA: F401 +import mozhttpd +from mach.base import FailedCommandError, MachError +from mach.decorators import Command, CommandArgument, SubCommand +from mach.registrar import Registrar +from mozbuild.base import BuildEnvironmentNotFoundException +from mozbuild.mozconfig import MozconfigLoader + + +# Use a decorator to copy command arguments off of the named command. Instead +# of a decorator, this could be straight code that edits eg +# MachCommands.build_shell._mach_command.arguments, but that looked uglier. +def inherit_command_args(command, subcommand=None): + """Decorator for inheriting all command-line arguments from `mach build`. + + This should come earlier in the source file than @Command or @SubCommand, + because it relies on that decorator having run first.""" + + def inherited(func): + handler = Registrar.command_handlers.get(command) + if handler is not None and subcommand is not None: + handler = handler.subcommand_handlers.get(subcommand) + if handler is None: + raise MachError( + "{} command unknown or not yet loaded".format( + command if subcommand is None else command + " " + subcommand + ) + ) + func._mach_command.arguments.extend(handler.arguments) + return func + + return inherited + + +def state_dir(): + return os.environ.get("MOZBUILD_STATE_PATH", os.path.expanduser("~/.mozbuild")) + + +def tools_dir(): + if os.environ.get("MOZ_FETCHES_DIR"): + # In automation, tools are provided by toolchain dependencies. + return os.path.join(os.environ["HOME"], os.environ["MOZ_FETCHES_DIR"]) + + # In development, `mach hazard bootstrap` installs the tools separately + # to avoid colliding with the "main" compiler versions, which can + # change separately (and the precompiled sixgill and compiler version + # must match exactly). + return os.path.join(state_dir(), "hazard-tools") + + +def sixgill_dir(): + return os.path.join(tools_dir(), "sixgill") + + +def gcc_dir(): + return os.path.join(tools_dir(), "gcc") + + +def script_dir(command_context): + return os.path.join(command_context.topsrcdir, "js/src/devtools/rootAnalysis") + + +def get_work_dir(command_context, project, given): + if given is not None: + return given + return os.path.join(command_context.topsrcdir, "haz-" + project) + + +def get_objdir(command_context, kwargs): + project = kwargs["project"] + objdir = kwargs["haz_objdir"] + if objdir is None: + objdir = os.environ.get("HAZ_OBJDIR") + if objdir is None: + objdir = os.path.join(command_context.topsrcdir, "obj-analyzed-" + project) + return objdir + + +def ensure_dir_exists(dir): + os.makedirs(dir, exist_ok=True) + return dir + + +# Force the use of hazard-compatible installs of tools. +def setup_env_for_tools(env): + gccbin = os.path.join(gcc_dir(), "bin") + env["CC"] = os.path.join(gccbin, "gcc") + env["CXX"] = os.path.join(gccbin, "g++") + env["PATH"] = "{sixgill_dir}/usr/bin:{gccbin}:{PATH}".format( + sixgill_dir=sixgill_dir(), gccbin=gccbin, PATH=env["PATH"] + ) + + +def setup_env_for_shell(env, shell): + """Add JS shell directory to dynamic lib search path""" + for var in ("LD_LIBRARY_PATH", "DYLD_LIBRARY_PATH"): + env[var] = ":".join(p for p in (env.get(var), os.path.dirname(shell)) if p) + + +@Command( + "hazards", + category="build", + order="declaration", + description="Commands for running the static analysis for GC rooting hazards", +) +def hazards(command_context): + """Commands related to performing the GC rooting hazard analysis""" + print("See `mach hazards --help` for a list of subcommands") + + +@inherit_command_args("artifact", "toolchain") +@SubCommand( + "hazards", + "bootstrap", + description="Install prerequisites for the hazard analysis", +) +def bootstrap(command_context, **kwargs): + orig_dir = os.getcwd() + os.chdir(ensure_dir_exists(tools_dir())) + try: + kwargs["from_build"] = ("linux64-gcc-sixgill", "linux64-gcc-9") + command_context._mach_context.commands.dispatch( + "artifact", command_context._mach_context, subcommand="toolchain", **kwargs + ) + finally: + os.chdir(orig_dir) + + +CLOBBER_CHOICES = {"objdir", "work", "shell", "all"} + + +@SubCommand("hazards", "clobber", description="Clean up hazard-related files") +@CommandArgument("--project", default="browser", help="Build the given project.") +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument("--haz-objdir", default=None, help="Hazard analysis objdir.") +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +@CommandArgument( + "what", + default=["objdir", "work"], + nargs="*", + help="Target to clobber, must be one of {{{}}} (default " + "objdir and work).".format(", ".join(CLOBBER_CHOICES)), +) +def clobber(command_context, what, **kwargs): + from mozbuild.controller.clobber import Clobberer + + what = set(what) + if "all" in what: + what.update(CLOBBER_CHOICES) + invalid = what - CLOBBER_CHOICES + if invalid: + print( + "Unknown clobber target(s): {}. Choose from {{{}}}".format( + ", ".join(invalid), ", ".join(CLOBBER_CHOICES) + ) + ) + return 1 + + try: + substs = command_context.substs + except BuildEnvironmentNotFoundException: + substs = {} + + if "objdir" in what: + objdir = get_objdir(command_context, kwargs) + print(f"removing {objdir}") + Clobberer(command_context.topsrcdir, objdir, substs).remove_objdir(full=True) + if "work" in what: + project = kwargs["project"] + work_dir = get_work_dir(command_context, project, kwargs["work_dir"]) + print(f"removing {work_dir}") + Clobberer(command_context.topsrcdir, work_dir, substs).remove_objdir(full=True) + if "shell" in what: + objdir = os.path.join(command_context.topsrcdir, "obj-haz-shell") + print(f"removing {objdir}") + Clobberer(command_context.topsrcdir, objdir, substs).remove_objdir(full=True) + + +@inherit_command_args("build") +@SubCommand( + "hazards", "build-shell", description="Build a shell for the hazard analysis" +) +@CommandArgument( + "--mozconfig", + default=None, + metavar="FILENAME", + help="Build with the given mozconfig.", +) +def build_shell(command_context, **kwargs): + """Build a JS shell to use to run the rooting hazard analysis.""" + # The JS shell requires some specific configuration settings to execute + # the hazard analysis code, and configuration is done via mozconfig. + # Subprocesses find MOZCONFIG in the environment, so we can't just + # modify the settings in this process's loaded version. Pass it through + # the environment. + + default_mozconfig = "js/src/devtools/rootAnalysis/mozconfig.haz_shell" + mozconfig_path = ( + kwargs.pop("mozconfig", None) + or os.environ.get("MOZCONFIG") + or default_mozconfig + ) + mozconfig_path = os.path.join(command_context.topsrcdir, mozconfig_path) + loader = MozconfigLoader(command_context.topsrcdir) + mozconfig = loader.read_mozconfig(mozconfig_path) + + # Validate the mozconfig settings in case the user overrode the default. + configure_args = mozconfig["configure_args"] + if "--enable-ctypes" not in configure_args: + raise FailedCommandError( + "ctypes required in hazard JS shell, mozconfig=" + mozconfig_path + ) + + # Transmit the mozconfig location to build subprocesses. + os.environ["MOZCONFIG"] = mozconfig_path + + setup_env_for_tools(os.environ) + + # Set a default objdir for the shell, for developer builds. + os.environ.setdefault( + "MOZ_OBJDIR", os.path.join(command_context.topsrcdir, "obj-haz-shell") + ) + + return command_context._mach_context.commands.dispatch( + "build", command_context._mach_context, **kwargs + ) + + +def read_json_file(filename): + with open(filename) as fh: + return json.load(fh) + + +def ensure_shell(command_context, objdir): + if objdir is None: + objdir = os.path.join(command_context.topsrcdir, "obj-haz-shell") + + try: + binaries = read_json_file(os.path.join(objdir, "binaries.json")) + info = [b for b in binaries["programs"] if b["program"] == "js"][0] + return os.path.join(objdir, info["install_target"], "js") + except (OSError, KeyError): + raise FailedCommandError( + """\ +no shell found in %s -- must build the JS shell with `mach hazards build-shell` first""" + % objdir + ) + + +def validate_mozconfig(command_context, kwargs): + app = kwargs.pop("project") + default_mozconfig = "js/src/devtools/rootAnalysis/mozconfig.%s" % app + mozconfig_path = ( + kwargs.pop("mozconfig", None) + or os.environ.get("MOZCONFIG") + or default_mozconfig + ) + mozconfig_path = os.path.join(command_context.topsrcdir, mozconfig_path) + + loader = MozconfigLoader(command_context.topsrcdir) + mozconfig = loader.read_mozconfig(mozconfig_path) + configure_args = mozconfig["configure_args"] + + # Require an explicit --enable-project/application=APP (even if you just + # want to build the default browser project.) + if ( + "--enable-project=%s" % app not in configure_args + and "--enable-application=%s" % app not in configure_args + ): + raise FailedCommandError( + textwrap.dedent( + f"""\ + mozconfig {mozconfig_path} builds wrong project. + unset MOZCONFIG to use the default {default_mozconfig}\ + """ + ) + ) + + if not any("--with-compiler-wrapper" in a for a in configure_args): + raise FailedCommandError( + "mozconfig must wrap compiles with --with-compiler-wrapper" + ) + + return mozconfig_path + + +@inherit_command_args("build") +@SubCommand( + "hazards", + "gather", + description="Gather analysis data by compiling the given project", +) +@CommandArgument("--project", default="browser", help="Build the given project.") +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--haz-objdir", default=None, help="Write object files to this directory." +) +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +def gather_hazard_data(command_context, **kwargs): + """Gather analysis information by compiling the tree""" + project = kwargs["project"] + objdir = get_objdir(command_context, kwargs) + + work_dir = get_work_dir(command_context, project, kwargs["work_dir"]) + ensure_dir_exists(work_dir) + with open(os.path.join(work_dir, "defaults.py"), "wt") as fh: + data = textwrap.dedent( + """\ + analysis_scriptdir = "{script_dir}" + objdir = "{objdir}" + source = "{srcdir}" + sixgill = "{sixgill_dir}/usr/libexec/sixgill" + sixgill_bin = "{sixgill_dir}/usr/bin" + """ + ).format( + script_dir=script_dir(command_context), + objdir=objdir, + srcdir=command_context.topsrcdir, + sixgill_dir=sixgill_dir(), + gcc_dir=gcc_dir(), + ) + fh.write(data) + + buildscript = " ".join( + [ + command_context.topsrcdir + "/mach hazards compile", + *kwargs.get("what", []), + "--job-size=3.0", # Conservatively estimate 3GB/process + "--project=" + project, + "--haz-objdir=" + objdir, + ] + ) + args = [ + os.path.join(script_dir(command_context), "run_complete"), + "--foreground", + "--no-logs", + "--build-root=" + objdir, + "--wrap-dir=" + sixgill_dir() + "/usr/libexec/sixgill/scripts/wrap_gcc", + "--work-dir=work", + "-b", + sixgill_dir() + "/usr/bin", + "--buildcommand=" + buildscript, + ".", + ] + + return command_context.run_process(args=args, cwd=work_dir, pass_thru=True) + + +@inherit_command_args("build") +@SubCommand("hazards", "compile", description=argparse.SUPPRESS) +@CommandArgument( + "--mozconfig", + default=None, + metavar="FILENAME", + help="Build with the given mozconfig.", +) +@CommandArgument("--project", default="browser", help="Build the given project.") +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--haz-objdir", + default=os.environ.get("HAZ_OBJDIR"), + help="Write object files to this directory.", +) +def inner_compile(command_context, **kwargs): + """Build a source tree and gather analysis information while running + under the influence of the analysis collection server.""" + + env = os.environ + + # Check whether we are running underneath the manager (and therefore + # have a server to talk to). + if "XGILL_CONFIG" not in env: + raise FailedCommandError( + "no sixgill manager detected. `mach hazards compile` " + + "should only be run from `mach hazards gather`" + ) + + mozconfig_path = validate_mozconfig(command_context, kwargs) + + # Communicate mozconfig to build subprocesses. + env["MOZCONFIG"] = os.path.join(command_context.topsrcdir, mozconfig_path) + + # hazard mozconfigs need to find binaries in .mozbuild + env["MOZBUILD_STATE_PATH"] = state_dir() + + # Suppress the gathering of sources, to save disk space and memory. + env["XGILL_NO_SOURCE"] = "1" + + setup_env_for_tools(env) + + if "haz_objdir" in kwargs: + env["MOZ_OBJDIR"] = kwargs.pop("haz_objdir") + + return command_context._mach_context.commands.dispatch( + "build", command_context._mach_context, **kwargs + ) + + +@SubCommand( + "hazards", "analyze", description="Analyzed gathered data for rooting hazards" +) +@CommandArgument( + "--project", + default="browser", + help="Analyze the output for the given project.", +) +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--shell-objdir", + default=None, + help="objdir containing the optimized JS shell for running the analysis.", +) +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +@CommandArgument( + "--jobs", "-j", default=None, type=int, help="Number of parallel analyzers." +) +@CommandArgument( + "--verbose", + "-v", + default=False, + action="store_true", + help="Display executed commands.", +) +@CommandArgument( + "--from-stage", + default=None, + help="Stage to begin running at ('list' to see all).", +) +@CommandArgument( + "extra", + nargs=argparse.REMAINDER, + default=(), + help="Remaining non-optional arguments to analyze.py script", +) +def analyze( + command_context, + project, + shell_objdir, + work_dir, + jobs, + verbose, + from_stage, + extra, +): + """Analyzed gathered data for rooting hazards""" + + shell = ensure_shell(command_context, shell_objdir) + args = [ + os.path.join(script_dir(command_context), "analyze.py"), + "--js", + shell, + *extra, + ] + + if from_stage is None: + pass + elif from_stage == "list": + args.append("--list") + else: + args.extend(["--first", from_stage]) + + if jobs is not None: + args.extend(["-j", jobs]) + + if verbose: + args.append("-v") + + setup_env_for_tools(os.environ) + setup_env_for_shell(os.environ, shell) + + work_dir = get_work_dir(command_context, project, work_dir) + return command_context.run_process(args=args, cwd=work_dir, pass_thru=True) + + +@SubCommand( + "hazards", + "self-test", + description="Run a self-test to verify hazards are detected", +) +@CommandArgument( + "--shell-objdir", + default=None, + help="objdir containing the optimized JS shell for running the analysis.", +) +@CommandArgument( + "extra", + nargs=argparse.REMAINDER, + help="Remaining non-optional arguments to pass to run-test.py", +) +def self_test(command_context, shell_objdir, extra): + """Analyzed gathered data for rooting hazards""" + shell = ensure_shell(command_context, shell_objdir) + args = [ + os.path.join(script_dir(command_context), "run-test.py"), + "-v", + "--js", + shell, + "--sixgill", + os.path.join(tools_dir(), "sixgill"), + "--gccdir", + gcc_dir(), + ] + args.extend(extra) + + setup_env_for_tools(os.environ) + setup_env_for_shell(os.environ, shell) + + return command_context.run_process(args=args, pass_thru=True) + + +def annotated_source(filename, query): + """The index page has URLs of the format <http://.../path/to/source.cpp?L=m-n#m>. + The `#m` part will be stripped off and used by the browser to jump to the correct line. + The `?L=m-n` or `?L=m` parameter will be processed here on the server to highlight + the given line range.""" + linequery = query.replace("L=", "") + if "-" in linequery: + line0, line1 = linequery.split("-", 1) + else: + line0, line1 = linequery or "0", linequery or "0" + line0 = int(line0) + line1 = int(line1) + + fh = open(filename, "rt") + + out = "<pre>" + for lineno, line in enumerate(fh, 1): + processed = f"{lineno} <span id='{lineno}'" + if line0 <= lineno and lineno <= line1: + processed += " style='background: yellow'" + processed += ">" + html.escape(line.rstrip()) + "</span>\n" + out += processed + + return out + + +@SubCommand( + "hazards", "view", description="Display a web page describing any hazards found" +) +@CommandArgument( + "--project", + default="browser", + help="Analyze the output for the given project.", +) +@CommandArgument("--application", dest="project", help="Build the given project.") +@CommandArgument( + "--haz-objdir", default=None, help="Write object files to this directory." +) +@CommandArgument( + "--work-dir", default=None, help="Directory for output and working files." +) +@CommandArgument("--port", default=6006, help="Port of the web server") +@CommandArgument( + "--serve-only", + default=False, + action="store_true", + help="Serve only, do not navigate to page", +) +def view_hazards(command_context, project, haz_objdir, work_dir, port, serve_only): + work_dir = get_work_dir(command_context, project, work_dir) + haztop = os.path.basename(work_dir) + if haz_objdir is None: + haz_objdir = os.environ.get("HAZ_OBJDIR") + if haz_objdir is None: + haz_objdir = os.path.join(command_context.topsrcdir, "obj-analyzed-" + project) + + httpd = None + + def serve_source_file(request, path): + info = {"req": path} + + def log(fmt, level=logging.INFO): + return command_context.log(level, "view-hazards", info, fmt) + + if path in ("", f"{haztop}"): + info["dest"] = f"/{haztop}/hazards.html" + info["code"] = 301 + log("serve '{req}' -> {code} {dest}") + return (info["code"], {"Location": info["dest"]}, "") + + # Allow files to be served from the source directory or the objdir. + roots = (command_context.topsrcdir, haz_objdir) + + try: + # Validate the path. Some source files have weird characters in their paths (eg "+"), but they + # all start with an alphanumeric or underscore. + command_context.log( + logging.DEBUG, "view-hazards", {"path": path}, "Raw path: {path}" + ) + path_component = r"\w[\w\-\.\+]*" + if not re.match(f"({path_component}/)*{path_component}$", path): + raise ValueError("invalid path") + + # Resolve the path to under one of the roots, and + # ensure that the actual file really is underneath a root directory. + for rootdir in roots: + fullpath = os.path.join(rootdir, path) + info["path"] = fullpath + fullpath = os.path.realpath(fullpath) + if os.path.isfile(fullpath): + # symlinks between roots are ok, but not symlinks outside of the roots. + tops = [ + d + for d in roots + if fullpath.startswith(os.path.realpath(d) + "/") + ] + if len(tops) > 0: + break # Found a file underneath a root. + else: + raise IOError("not found") + + html = annotated_source(fullpath, request.query) + log("serve '{req}' -> 200 {path}") + return ( + 200, + {"Content-type": "text/html", "Content-length": len(html)}, + html, + ) + except (IOError, ValueError): + log("serve '{req}' -> 404 {path}", logging.ERROR) + return ( + 404, + {"Content-type": "text/plain"}, + "We don't have that around here. Don't be asking for it.", + ) + + httpd = mozhttpd.MozHttpd( + port=port, + docroot=None, + path_mappings={"/" + haztop: work_dir}, + urlhandlers=[ + # Treat everything not starting with /haz-browser/ (or /haz-js/) + # as a source file to be processed. Everything else is served + # as a plain file. + { + "method": "GET", + "path": "/(?!haz-" + project + "/)(.*)", + "function": serve_source_file, + }, + ], + log_requests=True, + ) + + # The mozhttpd request handler class eats log messages. + httpd.handler_class.log_message = lambda self, format, *args: command_context.log( + logging.INFO, "view-hazards", {}, format % args + ) + + print("Serving at %s:%s" % (httpd.host, httpd.port)) + + httpd.start(block=False) + url = httpd.get_url(f"/{haztop}/hazards.html") + display_url = True + if not serve_only: + try: + webbrowser.get().open_new_tab(url) + display_url = False + except Exception: + pass + if display_url: + print("Please open %s in a browser." % url) + + print("Hit CTRL+c to stop server.") + httpd.server.join() diff --git a/js/src/devtools/rootAnalysis/mergeJSON.js b/js/src/devtools/rootAnalysis/mergeJSON.js new file mode 100644 index 0000000000..2ac5a983db --- /dev/null +++ b/js/src/devtools/rootAnalysis/mergeJSON.js @@ -0,0 +1,26 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +var infiles = [...scriptArgs]; +var outfile = infiles.pop(); + +let output; +for (const filename of infiles) { + const data = JSON.parse(os.file.readFile(filename)); + if (!output) { + output = data; + } else if (Array.isArray(data) != Array.isArray(output)) { + throw new Error('mismatched types'); + } else if (Array.isArray(output)) { + output.push(...data); + } else { + Object.assign(output, data); + } +} + +var origOut = os.file.redirect(outfile); +print(JSON.stringify(output, null, 4)); +os.file.close(os.file.redirect(origOut)); diff --git a/js/src/devtools/rootAnalysis/mozconfig.browser b/js/src/devtools/rootAnalysis/mozconfig.browser new file mode 100644 index 0000000000..6c3517865b --- /dev/null +++ b/js/src/devtools/rootAnalysis/mozconfig.browser @@ -0,0 +1,19 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This mozconfig is used when analyzing the source code of the Firefox browser +# for GC rooting hazards. See +# <https://wiki.mozilla.org/Javascript:SpiderMonkey:ExactStackRooting>. + +ac_add_options --enable-project=browser +ac_add_options --enable-js-shell + +# the sixgill wrapper is not compatible with building wasm objects with clang. +export WASM_SANDBOXED_LIBRARIES= + +# the hazard analysis is not happy with std::filesystem uses in relrhack host +# tool. +ac_add_options --disable-elf-hack + +. $topsrcdir/js/src/devtools/rootAnalysis/mozconfig.common diff --git a/js/src/devtools/rootAnalysis/mozconfig.common b/js/src/devtools/rootAnalysis/mozconfig.common new file mode 100644 index 0000000000..c68fb6a26c --- /dev/null +++ b/js/src/devtools/rootAnalysis/mozconfig.common @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Configuration shared between browser and shell builds. + +# The configuration options are chosen to compile the most code +# (--enable-debug, --enable-tests) in the trickiest way possible +# (--enable-optimize) to maximize the chance of seeing tricky static orderings. +ac_add_options --enable-debug +ac_add_options --enable-tests +ac_add_options --enable-optimize + +# Wrap all compiler invocations in order to enable the plugin and send +# information to a common database. +if [ -z "$AUTOMATION" ]; then + # Developer build: `mach hazards bootstrap` puts tools here: + TOOLS_DIR="$MOZBUILD_STATE_PATH/hazard-tools" +else + # Automation build: tools are downloaded from upstream tasks. + TOOLS_DIR="$MOZ_FETCHES_DIR" +fi +ac_add_options --with-compiler-wrapper="${TOOLS_DIR}"/sixgill/usr/libexec/sixgill/scripts/wrap_gcc/basecc + +# Stuff that gets in the way. +ac_add_options --without-ccache +ac_add_options --disable-replace-malloc + +# -Wattributes is very verbose due to attributes being ignored on template +# instantiations. +# +# -Wignored-attributes is very verbose due to attributes being +# ignored on template parameters. +ANALYSIS_EXTRA_CFLAGS="-Wno-attributes -Wno-ignored-attributes" +CFLAGS="$CFLAGS $ANALYSIS_EXTRA_CFLAGS" +CPPFLAGS="$CPPFLAGS $ANALYSIS_EXTRA_CFLAGS" +CXXFLAGS="$CXXFLAGS $ANALYSIS_EXTRA_CFLAGS" diff --git a/js/src/devtools/rootAnalysis/mozconfig.haz_shell b/js/src/devtools/rootAnalysis/mozconfig.haz_shell new file mode 100644 index 0000000000..68741f0454 --- /dev/null +++ b/js/src/devtools/rootAnalysis/mozconfig.haz_shell @@ -0,0 +1,18 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This mozconfig is for compiling the JS shell that runs the static rooting +# hazard analysis. See +# <https://wiki.mozilla.org/Javascript:SpiderMonkey:ExactStackRooting>. + +ac_add_options --enable-ctypes +ac_add_options --enable-optimize +ac_add_options --disable-debug +ac_add_options --enable-project=js +ac_add_options --enable-nspr-build +ac_add_options --disable-jemalloc + +if [ -n "$AUTOMATION" ]; then + mk_add_options MOZ_OBJDIR="${HAZARD_SHELL_OBJDIR}" +fi diff --git a/js/src/devtools/rootAnalysis/mozconfig.js b/js/src/devtools/rootAnalysis/mozconfig.js new file mode 100644 index 0000000000..07e584c210 --- /dev/null +++ b/js/src/devtools/rootAnalysis/mozconfig.js @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This mozconfig is used when analyzing the source code of the js/src tree for +# GC rooting hazards. See +# <https://wiki.mozilla.org/Javascript:SpiderMonkey:ExactStackRooting>. + +ac_add_options --enable-project=js + +# Also compile NSPR to see through its part of the control flow graph (not +# currently needed, but also helps with weird problems finding the right +# headers.) +ac_add_options --enable-nspr-build + +. $topsrcdir/js/src/devtools/rootAnalysis/mozconfig.common diff --git a/js/src/devtools/rootAnalysis/run-analysis.sh b/js/src/devtools/rootAnalysis/run-analysis.sh new file mode 100755 index 0000000000..157821cc92 --- /dev/null +++ b/js/src/devtools/rootAnalysis/run-analysis.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +SRCDIR=$(cd $(dirname $0)/../../../..; pwd) +GECKO_PATH=$SRCDIR $SRCDIR/taskcluster/scripts/builder/build-haz-linux.sh $(pwd) "$@" diff --git a/js/src/devtools/rootAnalysis/run-test.py b/js/src/devtools/rootAnalysis/run-test.py new file mode 100755 index 0000000000..b4835efec5 --- /dev/null +++ b/js/src/devtools/rootAnalysis/run-test.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import argparse +import os +import site +import subprocess +import sys +from glob import glob + +scriptdir = os.path.abspath(os.path.dirname(__file__)) +testdir = os.path.join(scriptdir, "t") + +site.addsitedir(testdir) +from testlib import Test, equal + +parser = argparse.ArgumentParser(description="run hazard analysis tests") +parser.add_argument( + "--js", default=os.environ.get("JS"), help="JS binary to run the tests with" +) +parser.add_argument( + "--sixgill", + default=os.environ.get("SIXGILL", os.path.join(testdir, "sixgill")), + help="Path to root of sixgill installation", +) +parser.add_argument( + "--sixgill-bin", + default=os.environ.get("SIXGILL_BIN"), + help="Path to sixgill binary dir", +) +parser.add_argument( + "--sixgill-plugin", + default=os.environ.get("SIXGILL_PLUGIN"), + help="Full path to sixgill gcc plugin", +) +parser.add_argument( + "--gccdir", default=os.environ.get("GCCDIR"), help="Path to GCC installation dir" +) +parser.add_argument("--cc", default=os.environ.get("CC"), help="Path to gcc") +parser.add_argument("--cxx", default=os.environ.get("CXX"), help="Path to g++") +parser.add_argument( + "--verbose", + "-v", + default=0, + action="count", + help="Display verbose output, including commands executed", +) +ALL_TESTS = [ + "sixgill-tree", + "suppression", + "hazards", + "exceptions", + "virtual", + "graph", + "types", +] +parser.add_argument( + "tests", + nargs="*", + default=ALL_TESTS, + help="tests to run", +) + +cfg = parser.parse_args() + +if not cfg.js: + sys.exit("Must specify JS binary through environment variable or --js option") +if not cfg.cc: + if cfg.gccdir: + cfg.cc = os.path.join(cfg.gccdir, "bin", "gcc") + else: + cfg.cc = "gcc" +if not cfg.cxx: + if cfg.gccdir: + cfg.cxx = os.path.join(cfg.gccdir, "bin", "g++") + else: + cfg.cxx = "g++" +if not cfg.sixgill_bin: + cfg.sixgill_bin = os.path.join(cfg.sixgill, "usr", "bin") +if not cfg.sixgill_plugin: + cfg.sixgill_plugin = os.path.join( + cfg.sixgill, "usr", "libexec", "sixgill", "gcc", "xgill.so" + ) + +subprocess.check_call( + [cfg.js, "-e", 'if (!getBuildConfiguration("has-ctypes")) quit(1)'] +) + + +def binpath(prog): + return os.path.join(cfg.sixgill_bin, prog) + + +def make_dir(dirname, exist_ok=True): + try: + os.mkdir(dirname) + except OSError as e: + if exist_ok and e.strerror == "File exists": + pass + else: + raise + + +outroot = os.path.join(testdir, "out") +make_dir(outroot) + +os.environ["HAZARD_RUN_INTERNAL_TESTS"] = "1" + +exclude = [] +tests = [] +for t in cfg.tests: + if t.startswith("!"): + exclude.append(t[1:]) + else: + tests.append(t) +if len(tests) == 0: + tests = filter(lambda t: t not in exclude, ALL_TESTS) + +failed = set() +passed = set() +for path in tests: + name = os.path.basename(path) + indir = os.path.join(testdir, name) + outdir = os.path.join(outroot, name) + make_dir(outdir) + + test = Test(indir, outdir, cfg, verbose=cfg.verbose) + + os.chdir(outdir) + for xdb in glob("*.xdb"): + os.unlink(xdb) + print("START TEST {}".format(name), flush=True) + testpath = os.path.join(indir, "test.py") + testscript = open(testpath).read() + testcode = compile(testscript, testpath, "exec") + try: + exec(testcode, {"test": test, "equal": equal}) + except subprocess.CalledProcessError: + print("TEST-FAILED: %s" % name) + failed.add(name) + except AssertionError: + print("TEST-FAILED: %s" % name) + failed.add(name) + raise + else: + print("TEST-PASSED: %s" % name) + passed.add(name) + +if failed: + raise Exception("Failed tests: " + " ".join(failed)) + +print(f"All {len(passed)} tests passed.") diff --git a/js/src/devtools/rootAnalysis/run_complete b/js/src/devtools/rootAnalysis/run_complete new file mode 100755 index 0000000000..c9355267db --- /dev/null +++ b/js/src/devtools/rootAnalysis/run_complete @@ -0,0 +1,384 @@ +#!/usr/bin/perl + +# Sixgill: Static assertion checker for C/C++ programs. +# Copyright (C) 2009-2010 Stanford University +# Author: Brian Hackett +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# do a complete run of the system from raw source to reports. this requires +# various run_monitor processes to be running in the background (maybe on other +# machines) and watching a shared poll_file for jobs. if the output directory +# for this script already exists then an incremental analysis will be performed +# and the reports will only reflect the changes since the earlier run. + +use strict; +use warnings; +use IO::Handle; +use File::Basename qw(basename dirname); +use Getopt::Long; +use Cwd; + +################################# +# environment specific settings # +################################# + +my $WORKDIR; +my $SIXGILL_BIN; + +# poll file shared with the run_monitor script. +my $poll_file; + +# root directory of the project. +my $build_dir; + +# directory containing gcc wrapper scripts. +my $wrap_dir; + +# optional file with annotations from the web interface. +my $ann_file = ""; + +# optional output directory to do a diff against. +my $old_dir = ""; + +# run in the foreground +my $foreground; + +my $builder = "make -j4"; + +my $suppress_logs; +GetOptions("build-root|b=s" => \$build_dir, + "poll-file=s" => \$poll_file, + "no-logs!" => \$suppress_logs, + "work-dir=s" => \$WORKDIR, + "sixgill-binaries|binaries|b=s" => \$SIXGILL_BIN, + "wrap-dir=s" => \$wrap_dir, + "annotations-file|annotations|a=s" => \$ann_file, + "old-dir|old=s" => \$old_dir, + "foreground!" => \$foreground, + "buildcommand=s" => \$builder, + ) + or die; + +if (not -d $build_dir) { + mkdir($build_dir); +} +if ($old_dir ne "" && not -d $old_dir) { + die "Old directory '$old_dir' does not exist\n"; +} + +$WORKDIR ||= "sixgill-work"; +mkdir($WORKDIR, 0755) if ! -d $WORKDIR; +$poll_file ||= "$WORKDIR/poll.file"; +$build_dir ||= "$WORKDIR/js-inbound-xgill"; + +if (!defined $SIXGILL_BIN) { + chomp(my $path = `which xmanager`); + if ($path) { + use File::Basename qw(dirname); + $SIXGILL_BIN = dirname($path); + } else { + die "Cannot find sixgill binaries. Use the -b option."; + } +} + +$wrap_dir ||= "$WORKDIR/xgill-inbound/wrap_gcc"; +$wrap_dir = "$SIXGILL_BIN/../scripts/wrap_gcc" if not (-e "$wrap_dir/basecc"); +die "Bad wrapper directory: $wrap_dir" if not (-e "$wrap_dir/basecc"); + +# code to clean the project from $build_dir. +sub clean_project { + system("make clean"); +} + +# code to build the project from $build_dir. +sub build_project { + return system($builder) >> 8; +} + +our %kill_on_exit; +END { + for my $pid (keys %kill_on_exit) { + kill($pid); + } +} + +# commands to start the various xgill binaries. timeouts can be specified +# for the backend analyses here, and a memory limit can be specified for +# xmanager if desired (and USE_COUNT_ALLOCATOR is defined in util/alloc.h). +my $xmanager = "$SIXGILL_BIN/xmanager"; +my $xsource = "$SIXGILL_BIN/xsource"; +my $xmemlocal = "$SIXGILL_BIN/xmemlocal -timeout=20"; +my $xinfer = "$SIXGILL_BIN/xinfer -timeout=60"; +my $xcheck = "$SIXGILL_BIN/xcheck -timeout=30"; + +# prefix directory to strip off source files. +my $prefix_dir = $build_dir; + +########################## +# general purpose script # +########################## + +# Prevent ccache from being used. I don't think this does any good. The problem +# I'm struggling with is that if autoconf.mk still has 'ccache gcc' in it, the +# builds fail in a mysterious way. +$ENV{CCACHE_COMPILERCHECK} = 'date +%s.%N'; +delete $ENV{CCACHE_PREFIX}; + +my $usage = "USAGE: run_complete result-dir\n"; +my $result_dir = shift or die $usage; + +if (not $foreground) { + my $pid = fork(); + if ($pid != 0) { + print "Forked, exiting...\n"; + exit(0); + } +} + +# if the result directory does not already exist, mark for a clean build. +my $do_clean = 0; +if (not (-d $result_dir)) { + $do_clean = 1; + mkdir $result_dir; +} + +if (!$suppress_logs) { + my $log_file = "$result_dir/complete.log"; + open(OUT, ">>", $log_file) or die "append to $log_file: $!"; + OUT->autoflush(1); # don't buffer writes to the main log. + + # redirect stdout and stderr to the log. + STDOUT->fdopen(\*OUT, "w"); + STDERR->fdopen(\*OUT, "w"); +} + +# pids to wait on before exiting. these are collating worker output. +my @waitpids; + +chdir $result_dir; + +# to do a partial run, comment out the commands here you don't want to do. + +my $status = run_build(); + +# end of run commands. + +for my $pid (@waitpids) { + waitpid($pid, 0); + $status ||= $? >> 8; +} + +print "Exiting run_complete with status $status\n"; +exit $status; + +# get the IP address which a freshly created manager is listening on. +sub get_manager_address +{ + my $log_file = shift or die; + + # give the manager one second to start, any longer and something's broken. + sleep(1); + + my $log_data = `cat $log_file`; + my ($port) = $log_data =~ /Listening on ([\.\:0-9]*)/ + or die "no manager found"; + print OUT "Connecting to manager on port $port\n" unless $suppress_logs; + print "Connecting to manager on port $port.\n"; + return $1; +} + +sub logging_suffix { + my ($show_logs, $log_file) = @_; + return $show_logs ? "2>&1 | tee $log_file" + : "> $log_file 2>&1"; +} + +sub run_build +{ + print "build started: "; + print scalar(localtime()); + print "\n"; + + # fork off a process to run the build. + defined(my $pid = fork) or die; + + # log file for the manager. + my $manager_log_file = "$result_dir/build_manager.log"; + + if (!$pid) { + # this is the child process, fork another process to run a manager. + defined(my $pid = fork) or die; + my $logging = logging_suffix($suppress_logs, $manager_log_file); + exec("$xmanager -terminate-on-assert $logging") if (!$pid); + $kill_on_exit{$pid} = 1; + + if (!$suppress_logs) { + # open new streams to redirect stdout and stderr. + open(LOGOUT, "> $result_dir/build.log"); + open(LOGERR, "> $result_dir/build_err.log"); + STDOUT->fdopen(\*LOGOUT, "w"); + STDERR->fdopen(\*LOGERR, "w"); + } + + my $address = get_manager_address($manager_log_file); + + # write the configuration file for the wrapper script. + my $config_file = "$WORKDIR/xgill.config"; + open(CONFIG, ">", $config_file) or die "create $config_file: $!"; + print CONFIG "$prefix_dir\n"; + print CONFIG Cwd::abs_path("$result_dir/build_xgill.log")."\n"; + print CONFIG "$address\n"; + my @extra = ("-fplugin-arg-xgill-mangle=1"); + push(@extra, "-fplugin-arg-xgill-annfile=$ann_file") + if ($ann_file ne "" && -e $ann_file); + print CONFIG join(" ", @extra) . "\n"; + close(CONFIG); + + # Tell the wrapper where to find the config + $ENV{"XGILL_CONFIG"} = Cwd::abs_path($config_file); + + # If overriding $CC, use GCCDIR to tell the wrapper scripts where the + # real compiler is. If $CC is not set, then the wrapper script will + # search $PATH anyway. + if (exists $ENV{CC}) { + $ENV{GCCDIR} = dirname($ENV{CC}); + } + + # Force the wrapper scripts to be run in place of the compiler during + # whatever build process we use. + $ENV{CC} = "$wrap_dir/" . basename($ENV{CC} // "gcc"); + $ENV{CXX} = "$wrap_dir/" . basename($ENV{CXX} // "g++"); + + # do the build, cleaning if necessary. + chdir $build_dir; + clean_project() if ($do_clean); + my $exit_status = build_project(); + + # signal the manager that it's over. + system("$xsource -remote=$address -end-manager"); + + # wait for the manager to clean up and terminate. + print "Waiting for manager to finish (build status $exit_status)...\n"; + waitpid($pid, 0); + my $manager_status = $?; + delete $kill_on_exit{$pid}; + + # build is finished, the complete run can resume. + # return value only useful if --foreground + print "Exiting with status " . ($manager_status || $exit_status) . "\n"; + exit($manager_status || $exit_status); + } + + # this is the complete process, wait for the build to finish. + waitpid($pid, 0); + my $status = $? >> 8; + print "build finished (status $status): "; + print scalar(localtime()); + print "\n"; + + return $status; +} + +sub run_pass +{ + my ($name, $command) = @_; + my $log_file = "$result_dir/manager.$name.log"; + + # extra commands to pass to the manager. + my $manager_extra = ""; + $manager_extra .= "-modset-wait=10" if ($name eq "xmemlocal"); + + # fork off a manager process for the analysis. + defined(my $pid = fork) or die; + my $logging = logging_suffix($suppress_logs, $log_file); + exec("$xmanager $manager_extra $logging") if (!$pid); + + my $address = get_manager_address($log_file); + + # write the poll file for this pass. + if (! -d dirname($poll_file)) { + system("mkdir", "-p", dirname($poll_file)); + } + open(POLL, "> $poll_file"); + print POLL "$command\n"; + print POLL "$result_dir/$name\n"; + print POLL "$address\n"; + close(POLL); + + print "$name started: "; + print scalar(localtime()); + print "\n"; + + waitpid($pid, 0); + unlink($poll_file); + + print "$name finished: "; + print scalar(localtime()); + print "\n"; + + # collate the worker's output into a single file. make this asynchronous + # so we can wait a bit and make sure we get all worker output. + defined($pid = fork) or die; + + if (!$pid) { + sleep(20); + exec("cat $name.*.log > $name.log"); + } + + push(@waitpids, $pid); +} + +# the names of all directories containing reports to archive. +my $indexes; + +sub run_index +{ + my ($name, $kind) = @_; + + return if (not (-e "report_$kind.xdb")); + + print "$name started: "; + print scalar(localtime()); + print "\n"; + + # make an index for the report diff if applicable. + if ($old_dir ne "") { + system("make_index $kind $old_dir > $name.diff.log"); + system("mv $kind diff_$kind"); + $indexes .= " diff_$kind"; + } + + # make an index for the full set of reports. + system("make_index $kind > $name.log"); + $indexes .= " $kind"; + + print "$name finished: "; + print scalar(localtime()); + print "\n"; +} + +sub archive_indexes +{ + print "archive started: "; + print scalar(localtime()); + print "\n"; + + system("tar -czf reports.tgz $indexes"); + system("rm -rf $indexes"); + + print "archive finished: "; + print scalar(localtime()); + print "\n"; +} diff --git a/js/src/devtools/rootAnalysis/t/exceptions/source.cpp b/js/src/devtools/rootAnalysis/t/exceptions/source.cpp new file mode 100644 index 0000000000..8d38a790a1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/exceptions/source.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Simply including <exception> was enough to crash sixgill at one point. +#include <exception> + +#define ANNOTATE(property) __attribute__((annotate(property))) + +struct Cell { + int f; +} ANNOTATE("GC Thing"); + +extern void GC() ANNOTATE("GC Call"); + +void GC() { + // If the implementation is too trivial, the function body won't be emitted at + // all. + asm(""); +} + +class RAII_GC { + public: + RAII_GC() {} + ~RAII_GC() { GC(); } +}; + +// ~AutoSomething calls GC because of the RAII_GC field. The constructor, +// though, should *not* GC -- unless it throws an exception. Which is not +// possible when compiled with -fno-exceptions. This test will try it both +// ways. +class AutoSomething { + RAII_GC gc; + + public: + AutoSomething() : gc() { + asm(""); // Ooh, scary, this might throw an exception + } + ~AutoSomething() { asm(""); } +}; + +extern Cell* getcell(); + +extern void usevar(Cell* cell); + +void f() { + Cell* thing = getcell(); // Live range starts here + + // When compiling with -fexceptions, there should be a hazard below. With + // -fno-exceptions, there should not be one. We will check both. + { + AutoSomething smth; // Constructor can GC only if exceptions are enabled + usevar(thing); // Live range ends here + } // In particular, 'thing' is dead at the destructor, so no hazard +} diff --git a/js/src/devtools/rootAnalysis/t/exceptions/test.py b/js/src/devtools/rootAnalysis/t/exceptions/test.py new file mode 100644 index 0000000000..a40753d87a --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/exceptions/test.py @@ -0,0 +1,21 @@ +# flake8: noqa: F821 + +test.compile("source.cpp", "-fno-exceptions") +test.run_analysis_script("gcTypes") + +hazards = test.load_hazards() +assert len(hazards) == 0 + +# If we compile with exceptions, then there *should* be a hazard because +# AutoSomething::AutoSomething might throw an exception, which would cause the +# partially-constructed value to be torn down, which will call ~RAII_GC. + +test.compile("source.cpp", "-fexceptions") +test.run_analysis_script("gcTypes") + +hazards = test.load_hazards() +assert len(hazards) == 1 +hazard = hazards[0] +assert hazard.function == "void f()" +assert hazard.variable == "thing" +assert "AutoSomething::AutoSomething" in hazard.GCFunction diff --git a/js/src/devtools/rootAnalysis/t/graph/source.cpp b/js/src/devtools/rootAnalysis/t/graph/source.cpp new file mode 100644 index 0000000000..0adff8d532 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/graph/source.cpp @@ -0,0 +1,90 @@ +#define ANNOTATE(property) __attribute__((annotate(property))) + +extern void GC() ANNOTATE("GC Call"); + +void GC() { + // If the implementation is too trivial, the function body won't be emitted at + // all. + asm(""); +} + +extern void g(int x); +extern void h(int x); + +void f(int x) { + if (x % 3) { + GC(); + g(x); + } + h(x); +} + +void g(int x) { + if (x % 2) f(x); + h(x); +} + +void h(int x) { + if (x) { + f(x - 1); + g(x - 1); + } +} + +void leaf() { asm(""); } + +void nonrecursive_root() { + leaf(); + leaf(); + GC(); +} + +void self_recursive(int x) { + if (x) self_recursive(x - 1); +} + +// Set up the graph +// +// n1 <--> n2 n4 <--> n5 +// \ / +// --> n3 <--------- +// \ +// ---> n6 --> n7 <---> n8 --> n9 +// +// So recursive roots are one of (n1, n2) plus one of (n4, n5). +extern void n1(int x); +extern void n2(int x); +extern void n3(int x); +extern void n4(int x); +extern void n5(int x); +extern void n6(int x); +extern void n7(int x); +extern void n8(int x); +extern void n9(int x); + +void n1(int x) { n2(x); } + +void n2(int x) { + if (x) n1(x - 1); + n3(x); +} + +void n4(int x) { n5(x); } + +void n5(int x) { + if (x) n4(x - 1); + n3(x); +} + +void n3(int x) { n6(x); } + +void n6(int x) { n7(x); } + +void n7(int x) { n8(x); } + +void n8(int x) { + if (x) n7(x - 1); + n9(x); +} + +void n9(int x) { asm(""); } diff --git a/js/src/devtools/rootAnalysis/t/graph/test.py b/js/src/devtools/rootAnalysis/t/graph/test.py new file mode 100644 index 0000000000..f78500f200 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/graph/test.py @@ -0,0 +1,54 @@ +# 'test' is provided by the calling script. +# flake8: noqa: F821 + +test.compile("source.cpp") +test.run_analysis_script("gcTypes") + +info = test.load_typeInfo() + +gcFunctions = test.load_gcFunctions() + +f = "void f(int32)" +g = "void g(int32)" +h = "void h(int32)" + +assert f in gcFunctions +assert g in gcFunctions +assert h in gcFunctions +assert "void leaf()" not in gcFunctions +assert "void nonrecursive_root()" in gcFunctions + +callgraph = test.load_callgraph() +assert callgraph.calleeGraph[f][g] +assert callgraph.calleeGraph[f][h] +assert callgraph.calleeGraph[g][f] +assert callgraph.calleeGraph[g][h] + +node = ["void n{}(int32)".format(i) for i in range(10)] +mnode = [callgraph.unmangledToMangled.get(f) for f in node] +for src, dst in [ + (1, 2), + (2, 1), + (4, 5), + (5, 4), + (2, 3), + (5, 3), + (3, 6), + (6, 7), + (7, 8), + (8, 7), + (8, 9), +]: + assert callgraph.calleeGraph[node[src]][node[dst]] + +funcInfo = test.load_funcInfo() +rroots = set( + [ + callgraph.mangledToUnmangled[f] + for f in funcInfo + if funcInfo[f].get("recursive_root") + ] +) +assert len(set([node[1], node[2]]) & rroots) == 1 +assert len(set([node[4], node[5]]) & rroots) == 1 +assert len(rroots) == 4, "rroots = {}".format(rroots) # n1, n4, f, self_recursive diff --git a/js/src/devtools/rootAnalysis/t/hazards/source.cpp b/js/src/devtools/rootAnalysis/t/hazards/source.cpp new file mode 100644 index 0000000000..fe991653af --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/hazards/source.cpp @@ -0,0 +1,566 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <utility> + +#define ANNOTATE(property) __attribute__((annotate(property))) + +// MarkVariableAsGCSafe is a magic function name used as an +// explicit annotation. + +namespace JS { +namespace detail { +template <typename T> +static void MarkVariableAsGCSafe(T&) { + asm(""); +} +} // namespace detail +} // namespace JS + +#define JS_HAZ_VARIABLE_IS_GC_SAFE(var) JS::detail::MarkVariableAsGCSafe(var) + +struct Cell { + int f; +} ANNOTATE("GC Thing"); + +template <typename T, typename U> +struct UntypedContainer { + char data[sizeof(T) + sizeof(U)]; +} ANNOTATE("moz_inherit_type_annotations_from_template_args"); + +struct RootedCell { + RootedCell(Cell*) {} +} ANNOTATE("Rooted Pointer"); + +class AutoSuppressGC_Base { + public: + AutoSuppressGC_Base() {} + ~AutoSuppressGC_Base() {} +} ANNOTATE("Suppress GC"); + +class AutoSuppressGC_Child : public AutoSuppressGC_Base { + public: + AutoSuppressGC_Child() : AutoSuppressGC_Base() {} +}; + +class AutoSuppressGC { + AutoSuppressGC_Child helpImBeingSuppressed; + + public: + AutoSuppressGC() {} +}; + +class AutoCheckCannotGC { + public: + AutoCheckCannotGC() {} + ~AutoCheckCannotGC() { asm(""); } +} ANNOTATE("Invalidated by GC"); + +extern void GC() ANNOTATE("GC Call"); +extern void invisible(); + +void GC() { + // If the implementation is too trivial, the function body won't be emitted at + // all. + asm(""); + invisible(); +} + +extern void usecell(Cell*); + +extern bool flipcoin(); + +void suppressedFunction() { + GC(); // Calls GC, but is always called within AutoSuppressGC +} + +void halfSuppressedFunction() { + GC(); // Calls GC, but is sometimes called within AutoSuppressGC +} + +void unsuppressedFunction() { + GC(); // Calls GC, never within AutoSuppressGC +} + +class IDL_Interface { + public: + ANNOTATE("Can run script") virtual void canScriptThis() {} + virtual void cannotScriptThis() {} + ANNOTATE("Can run script") virtual void overridden_canScriptThis() = 0; + virtual void overridden_cannotScriptThis() = 0; +}; + +class IDL_Subclass : public IDL_Interface { + ANNOTATE("Can run script") void overridden_canScriptThis() override {} + void overridden_cannotScriptThis() override {} +}; + +volatile static int x = 3; +volatile static int* xp = &x; +struct GCInDestructor { + ~GCInDestructor() { + invisible(); + asm(""); + *xp = 4; + GC(); + } +}; + +template <typename T> +void usecontainer(T* value) { + if (value) asm(""); +} + +Cell* cell() { + static Cell c; + return &c; +} + +Cell* f() { + GCInDestructor kaboom; + + Cell* cell1 = cell(); + Cell* cell2 = cell(); + Cell* cell3 = cell(); + Cell* cell4 = cell(); + { + AutoSuppressGC nogc; + suppressedFunction(); + halfSuppressedFunction(); + } + usecell(cell1); + halfSuppressedFunction(); + usecell(cell2); + unsuppressedFunction(); + { + // Old bug: it would look from the first AutoSuppressGC constructor it + // found to the last destructor. This statement *should* have no effect. + AutoSuppressGC nogc; + } + usecell(cell3); + Cell* cell5 = cell(); + usecell(cell5); + + { + // Templatized container that inherits attributes from Cell*, should + // report a hazard. + UntypedContainer<int, Cell*> container1; + usecontainer(&container1); + GC(); + usecontainer(&container1); + } + + { + // As above, but with a non-GC type. + UntypedContainer<int, double> container2; + usecontainer(&container2); + GC(); + usecontainer(&container2); + } + + // Hazard in return value due to ~GCInDestructor + Cell* cell6 = cell(); + return cell6; +} + +Cell* copy_and_gc(Cell* src) { + GC(); + return reinterpret_cast<Cell*>(88); +} + +void use(Cell* cell) { + static int x = 0; + if (cell) x++; +} + +struct CellContainer { + Cell* cell; + CellContainer() { asm(""); } +}; + +void loopy() { + Cell cell; + + // No hazard: haz1 is not live during call to copy_and_gc. + Cell* haz1; + for (int i = 0; i < 10; i++) { + haz1 = copy_and_gc(haz1); + } + + // No hazard: haz2 is live up to just before the GC, and starting at the + // next statement after it, but not across the GC. + Cell* haz2 = &cell; + for (int j = 0; j < 10; j++) { + use(haz2); + GC(); + haz2 = &cell; + } + + // Hazard: haz3 is live from the final statement in one iteration, across + // the GC in the next, to the use in the 2nd statement. + Cell* haz3; + for (int k = 0; k < 10; k++) { + GC(); + use(haz3); + haz3 = &cell; + } + + // Hazard: haz4 is live across a GC hidden in a loop. + Cell* haz4 = &cell; + for (int i2 = 0; i2 < 10; i2++) { + GC(); + } + use(haz4); + + // Hazard: haz5 is live from within a loop across a GC. + Cell* haz5; + for (int i3 = 0; i3 < 10; i3++) { + haz5 = &cell; + } + GC(); + use(haz5); + + // No hazard: similar to the haz3 case, but verifying that we do not get + // into an infinite loop. + Cell* haz6; + for (int i4 = 0; i4 < 10; i4++) { + GC(); + haz6 = &cell; + } + + // No hazard: haz7 is constructed within the body, so it can't make a + // hazard across iterations. Note that this requires CellContainer to have + // a constructor, because otherwise the analysis doesn't see where + // variables are declared. (With the constructor, it knows that + // construction of haz7 obliterates any previous value it might have had. + // Not that that's possible given its scope, but the analysis doesn't get + // that information.) + for (int i5 = 0; i5 < 10; i5++) { + GC(); + CellContainer haz7; + use(haz7.cell); + haz7.cell = &cell; + } + + // Hazard: make sure we *can* see hazards across iterations involving + // CellContainer; + CellContainer haz8; + for (int i6 = 0; i6 < 10; i6++) { + GC(); + use(haz8.cell); + haz8.cell = &cell; + } +} + +namespace mozilla { +template <typename T> +class UniquePtr { + T* val; + + public: + UniquePtr() : val(nullptr) { asm(""); } + UniquePtr(T* p) : val(p) {} + UniquePtr(UniquePtr<T>&& u) : val(u.val) { u.val = nullptr; } + ~UniquePtr() { use(val); } + T* get() { return val; } + void reset() { val = nullptr; } +} ANNOTATE("moz_inherit_type_annotations_from_template_args"); +} // namespace mozilla + +extern void consume(mozilla::UniquePtr<Cell> uptr); + +void safevals() { + Cell cell; + + // Simple hazard. + Cell* unsafe1 = &cell; + GC(); + use(unsafe1); + + // Safe because it's known to be nullptr. + Cell* safe2 = &cell; + safe2 = nullptr; + GC(); + use(safe2); + + // Unsafe because it may not be nullptr. + Cell* unsafe3 = &cell; + if (reinterpret_cast<long>(&cell) & 0x100) { + unsafe3 = nullptr; + } + GC(); + use(unsafe3); + + // Unsafe because it's not nullptr anymore. + Cell* unsafe3b = &cell; + unsafe3b = nullptr; + unsafe3b = &cell; + GC(); + use(unsafe3b); + + // Hazard involving UniquePtr. + { + mozilla::UniquePtr<Cell> unsafe4(&cell); + GC(); + // Destructor uses unsafe4. + } + + // reset() to safe value before the GC. + { + mozilla::UniquePtr<Cell> safe5(&cell); + safe5.reset(); + GC(); + } + + // reset() to safe value after the GC. + { + mozilla::UniquePtr<Cell> safe6(&cell); + GC(); + safe6.reset(); + } + + // reset() to safe value after the GC -- but we've already used it, so it's + // too late. + { + mozilla::UniquePtr<Cell> unsafe7(&cell); + GC(); + use(unsafe7.get()); + unsafe7.reset(); + } + + // initialized to safe value. + { + mozilla::UniquePtr<Cell> safe8; + GC(); + } + + // passed to a function that takes ownership before GC. + { + mozilla::UniquePtr<Cell> safe9(&cell); + consume(std::move(safe9)); + GC(); + } + + // passed to a function that takes ownership after GC. + { + mozilla::UniquePtr<Cell> unsafe10(&cell); + GC(); + consume(std::move(unsafe10)); + } + + // annotated to be safe before the GC. (This doesn't make + // a lot of sense here; the annotation is for when some + // type is known to only contain safe values, eg it is + // initialized as empty, or it is a union and we know + // that the GC pointer variants are not in use.) + { + mozilla::UniquePtr<Cell> safe11(&cell); + JS_HAZ_VARIABLE_IS_GC_SAFE(safe11); + GC(); + } + + // annotate as safe value after the GC -- since nothing else + // has touched the variable, that means it was already safe + // during the GC. + { + mozilla::UniquePtr<Cell> safe12(&cell); + GC(); + JS_HAZ_VARIABLE_IS_GC_SAFE(safe12); + } + + // annotate as safe after the GC -- but we've already used it, so it's + // too late. + { + mozilla::UniquePtr<Cell> unsafe13(&cell); + GC(); + use(unsafe13.get()); + JS_HAZ_VARIABLE_IS_GC_SAFE(unsafe13); + } + + // Check JS_HAZ_CAN_RUN_SCRIPT annotation handling. + IDL_Subclass sub; + IDL_Subclass* subp = ⊂ + IDL_Interface* base = ⊂ + { + Cell* unsafe14 = &cell; + base->canScriptThis(); + use(unsafe14); + } + { + Cell* unsafe15 = &cell; + subp->canScriptThis(); + use(unsafe15); + } + { + // Almost the same as the last one, except call using the actual object, not + // a pointer. The type is known, so there is no danger of the actual type + // being a subclass that has overridden the method with an implementation + // that calls script. + Cell* safe16 = &cell; + sub.canScriptThis(); + use(safe16); + } + { + Cell* safe17 = &cell; + base->cannotScriptThis(); + use(safe17); + } + { + Cell* safe18 = &cell; + subp->cannotScriptThis(); + use(safe18); + } + { + // A use after a GC, but not before. (This does not initialize safe19 by + // setting it to a value, because assignment would start its live range, and + // this test is to see if a variable with no known live range start requires + // a use before the GC or not. It should.) + Cell* safe19; + GC(); + extern void initCellPtr(Cell**); + initCellPtr(&safe19); + } +} + +// Make sure `this` is live at the beginning of a function. +class Subcell : public Cell { + int method() { + GC(); + return f; // this->f + } +}; + +template <typename T> +struct RefPtr { + ~RefPtr() { GC(); } + bool forget() { return true; } + bool use() { return true; } + void assign_with_AddRef(T* aRawPtr) { asm(""); } +}; + +extern bool flipcoin(); + +Cell* refptr_test1() { + static Cell cell; + RefPtr<float> v1; + Cell* ref_unsafe1 = &cell; + return ref_unsafe1; +} + +Cell* refptr_test2() { + static Cell cell; + RefPtr<float> v2; + Cell* ref_safe2 = &cell; + v2.forget(); + return ref_safe2; +} + +Cell* refptr_test3() { + static Cell cell; + RefPtr<float> v3; + Cell* ref_unsafe3 = &cell; + if (x) { + v3.forget(); + } + return ref_unsafe3; +} + +Cell* refptr_test4() { + static Cell cell; + RefPtr<int> r; + return &cell; // hazard in return value +} + +Cell* refptr_test5() { + static Cell cell; + RefPtr<int> r; + return nullptr; // returning immobile value, so no hazard +} + +float somefloat = 1.2; + +Cell* refptr_test6() { + static Cell cell; + RefPtr<float> v6; + Cell* ref_unsafe6 = &cell; + // v6 can be used without an intervening forget() before the end of the + // function, even though forget() will be called at least once. + v6.forget(); + if (x) { + v6.forget(); + v6.assign_with_AddRef(&somefloat); + } + return ref_unsafe6; +} + +Cell* refptr_test7() { + static Cell cell; + RefPtr<float> v7; + Cell* ref_unsafe7 = &cell; + // Similar to above, but with a loop. + while (flipcoin()) { + v7.forget(); + v7.assign_with_AddRef(&somefloat); + } + return ref_unsafe7; +} + +Cell* refptr_test8() { + static Cell cell; + RefPtr<float> v8; + Cell* ref_unsafe8 = &cell; + // If the loop is traversed, forget() will be called. But that doesn't + // matter, because even on the last iteration v8.use() will have been called + // (and potentially dropped the refcount or whatever.) + while (v8.use()) { + v8.forget(); + } + return ref_unsafe8; +} + +Cell* refptr_test9() { + static Cell cell; + RefPtr<float> v9; + Cell* ref_safe9 = &cell; + // Even when not going through the loop, forget() will be called and so the + // dtor will not Release. + while (v9.forget()) { + v9.assign_with_AddRef(&somefloat); + } + return ref_safe9; +} + +Cell* refptr_test10() { + static Cell cell; + RefPtr<float> v10; + Cell* ref_unsafe10 = &cell; + // The destructor has a backwards path that skips the loop body. + v10.assign_with_AddRef(&somefloat); + while (flipcoin()) { + v10.forget(); + } + return ref_unsafe10; +} + +std::pair<bool, AutoCheckCannotGC> pair_returning_function() { + return std::make_pair(true, AutoCheckCannotGC()); +} + +void aggr_init_unsafe() { + // nogc will be live after the call, so across the GC. + auto [ok, nogc] = pair_returning_function(); + GC(); +} + +void aggr_init_safe() { + // The analysis should be able to tell that nogc is only live after the call, + // not before. (This is to check for a problem where the return value was + // getting stored into a different temporary than the local nogc variable, + // and so its initialization was never seen and so it was assumed to be live + // throughout the function.) + GC(); + auto [ok, nogc] = pair_returning_function(); +} diff --git a/js/src/devtools/rootAnalysis/t/hazards/test.py b/js/src/devtools/rootAnalysis/t/hazards/test.py new file mode 100644 index 0000000000..c4e9549305 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/hazards/test.py @@ -0,0 +1,121 @@ +# flake8: noqa: F821 + +from collections import defaultdict + +test.compile("source.cpp") +test.run_analysis_script("gcTypes") + +# gcFunctions should be the inverse, but we get to rely on unmangled names here. +gcFunctions = test.load_gcFunctions() +assert "void GC()" in gcFunctions +assert "void suppressedFunction()" not in gcFunctions +assert "void halfSuppressedFunction()" in gcFunctions +assert "void unsuppressedFunction()" in gcFunctions +assert "int32 Subcell::method()" in gcFunctions +assert "Cell* f()" in gcFunctions + +hazards = test.load_hazards() +hazmap = {haz.variable: haz for haz in hazards} +assert "cell1" not in hazmap +assert "cell2" in hazmap +assert "cell3" in hazmap +assert "cell4" not in hazmap +assert "cell5" not in hazmap +assert "cell6" not in hazmap +assert "<returnvalue>" in hazmap +assert "this" in hazmap + +assert hazmap["cell2"].function == "Cell* f()" + +# Check that the correct GC call is reported for each hazard. (cell3 has a +# hazard from two different GC calls; it doesn't really matter which is +# reported.) +assert hazmap["cell2"].GCFunction == "void halfSuppressedFunction()" +assert hazmap["cell3"].GCFunction in ( + "void halfSuppressedFunction()", + "void unsuppressedFunction()", +) +returnval_hazards = set( + haz.function for haz in hazards if haz.variable == "<returnvalue>" +) +assert "Cell* f()" in returnval_hazards +assert "Cell* refptr_test1()" in returnval_hazards +assert "Cell* refptr_test2()" not in returnval_hazards +assert "Cell* refptr_test3()" in returnval_hazards +assert "Cell* refptr_test4()" in returnval_hazards +assert "Cell* refptr_test5()" not in returnval_hazards +assert "Cell* refptr_test6()" in returnval_hazards +assert "Cell* refptr_test7()" in returnval_hazards +assert "Cell* refptr_test8()" in returnval_hazards +assert "Cell* refptr_test9()" not in returnval_hazards + +assert "container1" in hazmap +assert "container2" not in hazmap + +# Type names are handy to have in the report. +assert hazmap["cell2"].type == "Cell*" +assert hazmap["<returnvalue>"].type == "Cell*" +assert hazmap["this"].type == "Subcell*" + +# loopy hazards. See comments in source. +assert "haz1" not in hazmap +assert "haz2" not in hazmap +assert "haz3" in hazmap +assert "haz4" in hazmap +assert "haz5" in hazmap +assert "haz6" not in hazmap +assert "haz7" not in hazmap +assert "haz8" in hazmap + +# safevals hazards. See comments in source. +assert "unsafe1" in hazmap +assert "safe2" not in hazmap +assert "unsafe3" in hazmap +assert "unsafe3b" in hazmap +assert "unsafe4" in hazmap +assert "safe5" not in hazmap +assert "safe6" not in hazmap +assert "unsafe7" in hazmap +assert "safe8" not in hazmap +assert "safe9" not in hazmap +assert "safe10" not in hazmap +assert "safe11" not in hazmap +assert "safe12" not in hazmap +assert "unsafe13" in hazmap +assert "unsafe14" in hazmap +assert "unsafe15" in hazmap +assert "safe16" not in hazmap +assert "safe17" not in hazmap +assert "safe18" not in hazmap +assert "safe19" not in hazmap + +# method hazard. + +byfunc = defaultdict(lambda: defaultdict(dict)) +for haz in hazards: + byfunc[haz.function][haz.variable] = haz + +methhaz = byfunc["int32 Subcell::method()"] +assert "this" in methhaz +assert methhaz["this"].type == "Subcell*" + +haz_functions = set(haz.function for haz in hazards) + +# RefPtr<T> tests. + +haz_functions = set(haz.function for haz in hazards) +assert "Cell* refptr_test1()" in haz_functions +assert "Cell* refptr_test2()" not in haz_functions +assert "Cell* refptr_test3()" in haz_functions +assert "Cell* refptr_test4()" in haz_functions +assert "Cell* refptr_test5()" not in haz_functions +assert "Cell* refptr_test6()" in haz_functions +assert "Cell* refptr_test7()" in haz_functions +assert "Cell* refptr_test8()" in haz_functions +assert "Cell* refptr_test9()" not in haz_functions +assert "Cell* refptr_test10()" in haz_functions + +# aggr_init tests. + +assert "void aggr_init_safe()" not in haz_functions +assert "void aggr_init_unsafe()" in haz_functions diff --git a/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp b/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp new file mode 100644 index 0000000000..149d77b03a --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define ANNOTATE(property) __attribute__((annotate(property))) + +namespace js { +namespace gc { +struct Cell { + int f; +} ANNOTATE("GC Thing"); +} // namespace gc +} // namespace js + +struct Bogon {}; + +struct JustACell : public js::gc::Cell { + bool iHaveNoDataMembers() { return true; } +}; + +struct JSObject : public js::gc::Cell, public Bogon { + int g; +}; + +struct SpecialObject : public JSObject { + int z; +}; + +struct ErrorResult { + bool hasObj; + JSObject* obj; + void trace() {} +} ANNOTATE("Suppressed GC Pointer"); + +struct OkContainer { + ErrorResult res; + bool happy; +}; + +struct UnrootedPointer { + JSObject* obj; +}; + +template <typename T> +class Rooted { + T data; +} ANNOTATE("Rooted Pointer"); + +extern void js_GC() ANNOTATE("GC Call") ANNOTATE("Slow"); + +void js_GC() {} + +void root_arg(JSObject* obj, JSObject* random) { + // Use all these types so they get included in the output. + SpecialObject so; + UnrootedPointer up; + Bogon b; + OkContainer okc; + Rooted<JSObject*> ro; + Rooted<SpecialObject*> rso; + + obj = random; + + JSObject* other1 = obj; + js_GC(); + + float MARKER1 = 0; + JSObject* other2 = obj; + other1->f = 1; + other2->f = -1; + + unsigned int u1 = 1; + unsigned int u2 = -1; +} diff --git a/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py b/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py new file mode 100644 index 0000000000..5e99fff908 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py @@ -0,0 +1,63 @@ +# flake8: noqa: F821 +import re + +test.compile("source.cpp") +test.computeGCTypes() +body = test.process_body(test.load_db_entry("src_body", re.compile(r"root_arg"))[0]) + +# Rendering positive and negative integers +marker1 = body.assignment_line("MARKER1") +equal(body.edge_from_line(marker1 + 2)["Exp"][1]["String"], "1") +equal(body.edge_from_line(marker1 + 3)["Exp"][1]["String"], "-1") + +equal(body.edge_from_point(body.assignment_point("u1"))["Exp"][1]["String"], "1") +equal( + body.edge_from_point(body.assignment_point("u2"))["Exp"][1]["String"], "4294967295" +) + +assert "obj" in body["Variables"] +assert "random" in body["Variables"] +assert "other1" in body["Variables"] +assert "other2" in body["Variables"] + +# Test function annotations +js_GC = test.process_body(test.load_db_entry("src_body", re.compile(r"js_GC"))[0]) +annotations = js_GC["Variables"]["void js_GC()"]["Annotation"] +assert annotations +found_call_annotate = False +for annotation in annotations: + (annType, value) = annotation["Name"] + if annType == "annotate" and value == "GC Call": + found_call_annotate = True +assert found_call_annotate + +# Test type annotations + +# js::gc::Cell first +cell = test.load_db_entry("src_comp", "js::gc::Cell")[0] +assert cell["Kind"] == "Struct" +annotations = cell["Annotation"] +assert len(annotations) == 1 +(tag, value) = annotations[0]["Name"] +assert tag == "annotate" +assert value == "GC Thing" + +# Check JSObject inheritance. +JSObject = test.load_db_entry("src_comp", "JSObject")[0] +bases = [b["Base"] for b in JSObject["CSUBaseClass"]] +assert "js::gc::Cell" in bases +assert "Bogon" in bases +assert len(bases) == 2 + +# Check type analysis +gctypes = test.load_gcTypes() +assert "js::gc::Cell" in gctypes["GCThings"] +assert "JustACell" in gctypes["GCThings"] +assert "JSObject" in gctypes["GCThings"] +assert "SpecialObject" in gctypes["GCThings"] +assert "UnrootedPointer" in gctypes["GCPointers"] +assert "Bogon" not in gctypes["GCThings"] +assert "Bogon" not in gctypes["GCPointers"] +assert "ErrorResult" not in gctypes["GCPointers"] +assert "OkContainer" not in gctypes["GCPointers"] +assert "class Rooted<JSObject*>" not in gctypes["GCPointers"] diff --git a/js/src/devtools/rootAnalysis/t/sixgill.py b/js/src/devtools/rootAnalysis/t/sixgill.py new file mode 100644 index 0000000000..307f13fae5 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/sixgill.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from collections import defaultdict + +# Simplified version of the body info. + + +class Body(dict): + def __init__(self, body): + self["BlockIdKind"] = body["BlockId"]["Kind"] + if "Variable" in body["BlockId"]: + self["BlockName"] = body["BlockId"]["Variable"]["Name"][0].split("$")[-1] + loc = body["Location"] + self["LineRange"] = (loc[0]["Line"], loc[1]["Line"]) + self["Filename"] = loc[0]["CacheString"] + self["Edges"] = body.get("PEdge", []) + self["Points"] = { + i: p["Location"]["Line"] for i, p in enumerate(body["PPoint"], 1) + } + self["Index"] = body["Index"] + self["Variables"] = { + x["Variable"]["Name"][0].split("$")[-1]: x["Type"] + for x in body["DefineVariable"] + } + + # Indexes + self["Line2Points"] = defaultdict(list) + for point, line in self["Points"].items(): + self["Line2Points"][line].append(point) + self["SrcPoint2Edges"] = defaultdict(list) + for edge in self["Edges"]: + src, dst = edge["Index"] + self["SrcPoint2Edges"][src].append(edge) + self["Line2Edges"] = defaultdict(list) + for src, edges in self["SrcPoint2Edges"].items(): + line = self["Points"][src] + self["Line2Edges"][line].extend(edges) + + def edges_from_line(self, line): + return self["Line2Edges"][line] + + def edge_from_line(self, line): + edges = self.edges_from_line(line) + assert len(edges) == 1 + return edges[0] + + def edges_from_point(self, point): + return self["SrcPoint2Edges"][point] + + def edge_from_point(self, point): + edges = self.edges_from_point(point) + assert len(edges) == 1 + return edges[0] + + def assignment_point(self, varname): + for edge in self["Edges"]: + if edge["Kind"] != "Assign": + continue + dst = edge["Exp"][0] + if dst["Kind"] != "Var": + continue + if dst["Variable"]["Name"][0] == varname: + return edge["Index"][0] + raise Exception("assignment to variable %s not found" % varname) + + def assignment_line(self, varname): + return self["Points"][self.assignment_point(varname)] diff --git a/js/src/devtools/rootAnalysis/t/suppression/source.cpp b/js/src/devtools/rootAnalysis/t/suppression/source.cpp new file mode 100644 index 0000000000..56e458bdaa --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/suppression/source.cpp @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define ANNOTATE(property) __attribute__((annotate(property))) + +struct Cell { + int f; +} ANNOTATE("GC Thing"); + +class AutoSuppressGC_Base { + public: + AutoSuppressGC_Base() {} + ~AutoSuppressGC_Base() {} +} ANNOTATE("Suppress GC"); + +class AutoSuppressGC_Child : public AutoSuppressGC_Base { + public: + AutoSuppressGC_Child() : AutoSuppressGC_Base() {} +}; + +class AutoSuppressGC { + AutoSuppressGC_Child helpImBeingSuppressed; + + public: + AutoSuppressGC() {} +}; + +extern void GC() ANNOTATE("GC Call"); + +void GC() { + // If the implementation is too trivial, the function body won't be emitted at + // all. + asm(""); +} + +extern void foo(Cell*); + +void suppressedFunction() { + GC(); // Calls GC, but is always called within AutoSuppressGC +} + +void halfSuppressedFunction() { + GC(); // Calls GC, but is sometimes called within AutoSuppressGC +} + +void unsuppressedFunction() { + GC(); // Calls GC, never within AutoSuppressGC +} + +void f() { + Cell* cell1 = nullptr; + Cell* cell2 = nullptr; + Cell* cell3 = nullptr; + { + AutoSuppressGC nogc; + suppressedFunction(); + halfSuppressedFunction(); + } + foo(cell1); + halfSuppressedFunction(); + foo(cell2); + unsuppressedFunction(); + { + // Old bug: it would look from the first AutoSuppressGC constructor it + // found to the last destructor. This statement *should* have no effect. + AutoSuppressGC nogc; + } + foo(cell3); +} diff --git a/js/src/devtools/rootAnalysis/t/suppression/test.py b/js/src/devtools/rootAnalysis/t/suppression/test.py new file mode 100644 index 0000000000..118ae422ab --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/suppression/test.py @@ -0,0 +1,21 @@ +# flake8: noqa: F821 +test.compile("source.cpp") +test.run_analysis_script("gcTypes", upto="gcFunctions") + +# The suppressions file uses mangled names. +info = test.load_funcInfo() +suppressed = [f for f, v in info.items() if v.get("limits", 0) | 1] + +# Only one of these is fully suppressed (ie, *always* called within the scope +# of an AutoSuppressGC). +assert len(list(filter(lambda f: "suppressedFunction" in f, suppressed))) == 1 +assert len(list(filter(lambda f: "halfSuppressedFunction" in f, suppressed))) == 0 +assert len(list(filter(lambda f: "unsuppressedFunction" in f, suppressed))) == 0 + +# gcFunctions should be the inverse, but we get to rely on unmangled names here. +gcFunctions = test.load_gcFunctions() +assert "void GC()" in gcFunctions +assert "void suppressedFunction()" not in gcFunctions +assert "void halfSuppressedFunction()" in gcFunctions +assert "void unsuppressedFunction()" in gcFunctions +assert "void f()" in gcFunctions diff --git a/js/src/devtools/rootAnalysis/t/testlib.py b/js/src/devtools/rootAnalysis/t/testlib.py new file mode 100644 index 0000000000..e08b236e4f --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/testlib.py @@ -0,0 +1,249 @@ +import json +import os +import re +import subprocess +import sys +from collections import defaultdict, namedtuple + +from sixgill import Body + +scriptdir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + +HazardSummary = namedtuple( + "HazardSummary", ["function", "variable", "type", "GCFunction", "location"] +) + +Callgraph = namedtuple( + "Callgraph", + [ + "functionNames", + "nameToId", + "mangledToUnmangled", + "unmangledToMangled", + "calleesOf", + "callersOf", + "tags", + "calleeGraph", + "callerGraph", + ], +) + + +def equal(got, expected): + if got != expected: + print("Got '%s', expected '%s'" % (got, expected)) + + +def extract_unmangled(func): + return func.split("$")[-1] + + +class Test(object): + def __init__(self, indir, outdir, cfg, verbose=0): + self.indir = indir + self.outdir = outdir + self.cfg = cfg + self.verbose = verbose + + def infile(self, path): + return os.path.join(self.indir, path) + + def binpath(self, prog): + return os.path.join(self.cfg.sixgill_bin, prog) + + def compile(self, source, options=""): + env = os.environ + env["CCACHE_DISABLE"] = "1" + if "-fexceptions" not in options and "-fno-exceptions" not in options: + options += " -fno-exceptions" + cmd = "{CXX} -c {source} -O3 -std=c++17 -fplugin={sixgill} -fplugin-arg-xgill-mangle=1 {options}".format( # NOQA: E501 + source=self.infile(source), + CXX=self.cfg.cxx, + sixgill=self.cfg.sixgill_plugin, + options=options, + ) + if self.cfg.verbose > 0: + print("Running %s" % cmd) + subprocess.check_call(["sh", "-c", cmd]) + + def load_db_entry(self, dbname, pattern): + """Look up an entry from an XDB database file, 'pattern' may be an exact + matching string, or an re pattern object matching a single entry.""" + + if hasattr(pattern, "match"): + output = subprocess.check_output( + [self.binpath("xdbkeys"), dbname + ".xdb"], universal_newlines=True + ) + matches = list(filter(lambda _: re.search(pattern, _), output.splitlines())) + if len(matches) == 0: + raise Exception("entry not found") + if len(matches) > 1: + raise Exception("multiple entries found") + pattern = matches[0] + + output = subprocess.check_output( + [self.binpath("xdbfind"), "-json", dbname + ".xdb", pattern], + universal_newlines=True, + ) + return json.loads(output) + + def run_analysis_script(self, startPhase="gcTypes", upto=None): + open("defaults.py", "w").write( + """\ +analysis_scriptdir = '{scriptdir}' +sixgill_bin = '{bindir}' +""".format( + scriptdir=scriptdir, bindir=self.cfg.sixgill_bin + ) + ) + cmd = [ + sys.executable, + os.path.join(scriptdir, "analyze.py"), + ["-q", "", "-v"][min(self.verbose, 2)], + ] + cmd += ["--first", startPhase] + if upto: + cmd += ["--last", upto] + cmd.append("--source=%s" % self.indir) + cmd.append("--js=%s" % self.cfg.js) + if self.cfg.verbose: + print("Running " + " ".join(cmd)) + subprocess.check_call(cmd) + + def computeGCTypes(self): + self.run_analysis_script("gcTypes", upto="gcTypes") + + def computeHazards(self): + self.run_analysis_script("gcTypes") + + def load_text_file(self, filename, extract=lambda l: l): + fullpath = os.path.join(self.outdir, filename) + values = (extract(line.strip()) for line in open(fullpath, "r")) + return list(filter(lambda _: _ is not None, values)) + + def load_json_file(self, filename, reviver=None): + fullpath = os.path.join(self.outdir, filename) + with open(fullpath) as fh: + return json.load(fh, object_hook=reviver) + + def load_gcTypes(self): + def grab_type(line): + m = re.match(r"^(GC\w+): (.*)", line) + if m: + return (m.group(1) + "s", m.group(2)) + return None + + gctypes = defaultdict(list) + for collection, typename in self.load_text_file( + "gcTypes.txt", extract=grab_type + ): + gctypes[collection].append(typename) + return gctypes + + def load_typeInfo(self, filename="typeInfo.txt"): + return self.load_json_file(filename) + + def load_funcInfo(self, filename="limitedFunctions.lst"): + return self.load_json_file(filename) + + def load_gcFunctions(self): + return self.load_text_file("gcFunctions.lst", extract=extract_unmangled) + + def load_callgraph(self): + data = Callgraph( + functionNames=["dummy"], + nameToId={}, + mangledToUnmangled={}, + unmangledToMangled={}, + calleesOf=defaultdict(list), + callersOf=defaultdict(list), + tags=defaultdict(set), + calleeGraph=defaultdict(dict), + callerGraph=defaultdict(dict), + ) + + def lookup(id): + mangled = data.functionNames[int(id)] + return data.mangledToUnmangled.get(mangled, mangled) + + def add_call(caller, callee, limit): + data.calleesOf[caller].append(callee) + data.callersOf[callee].append(caller) + data.calleeGraph[caller][callee] = True + data.callerGraph[callee][caller] = True + + def process(line): + if line.startswith("#"): + name = line.split(" ", 1)[1] + data.nameToId[name] = len(data.functionNames) + data.functionNames.append(name) + return + + if line.startswith("="): + m = re.match(r"^= (\d+) (.*)", line) + mangled = data.functionNames[int(m.group(1))] + unmangled = m.group(2) + data.nameToId[unmangled] = id + data.mangledToUnmangled[mangled] = unmangled + data.unmangledToMangled[unmangled] = mangled + return + + # Sample lines: + # D 10 20 + # D /3 10 20 + # D 3:3 10 20 + # All of these mean that there is a direct call from function #10 + # to function #20. The latter two mean that the call is made in a + # context where the 0x1 and 0x2 properties (3 == 0x1 | 0x2) are in + # effect. The `/n` syntax was the original, which was then expanded + # to `m:n` to allow multiple calls to be combined together when not + # all calls have the same properties in effect. The `/n` syntax is + # deprecated. + # + # The properties usually refer to "limits", eg "GC is suppressed + # in the scope surrounding this call". For testing purposes, the + # difference between `m` and `n` in `m:n` is currently ignored. + tokens = line.split(" ") + limit = 0 + if tokens[1].startswith("/"): + attr_str = tokens.pop(1) + limit = int(attr_str[1:]) + elif ":" in tokens[1]: + attr_str = tokens.pop(1) + limit = int(attr_str[0 : attr_str.index(":")]) + + if tokens[0] in ("D", "R"): + _, caller, callee = tokens + add_call(lookup(caller), lookup(callee), limit) + elif tokens[0] == "T": + data.tags[tokens[1]].add(line.split(" ", 2)[2]) + elif tokens[0] in ("F", "V"): + pass + + elif tokens[0] == "I": + m = re.match(r"^I (\d+) VARIABLE ([^\,]*)", line) + pass + + self.load_text_file("callgraph.txt", extract=process) + return data + + def load_hazards(self): + def grab_hazard(line): + m = re.match( + r"Function '(.*?)' has unrooted '(.*?)' of type '(.*?)' live across GC call '(.*?)' at (.*)", # NOQA: E501 + line, + ) + if m: + info = list(m.groups()) + info[0] = info[0].split("$")[-1] + info[3] = info[3].split("$")[-1] + return HazardSummary(*info) + return None + + return self.load_text_file("hazards.txt", extract=grab_hazard) + + def process_body(self, body): + return Body(body) + + def process_bodies(self, bodies): + return [self.process_body(b) for b in bodies] diff --git a/js/src/devtools/rootAnalysis/t/types/source.cpp b/js/src/devtools/rootAnalysis/t/types/source.cpp new file mode 100644 index 0000000000..c8a2d4aa73 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/types/source.cpp @@ -0,0 +1,167 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <memory> +#include <utility> + +#define ANNOTATE(property) __attribute__((annotate(property))) + +struct Cell { + int f; +} ANNOTATE("GC Thing"); + +namespace World { +namespace NS { +struct Unsafe { + int g; + ~Unsafe() { asm(""); } +} ANNOTATE("Invalidated by GC") ANNOTATE("GC Pointer or Reference"); +} // namespace NS +} // namespace World + +extern void GC() ANNOTATE("GC Call"); +extern void invisible(); + +void GC() { + // If the implementation is too trivial, the function body won't be emitted at + // all. + asm(""); + invisible(); +} + +struct GCOnDestruction { + ~GCOnDestruction() { GC(); } +}; + +struct NoGCOnDestruction { + ~NoGCOnDestruction() { asm(""); } +}; + +extern void usecell(Cell*); + +Cell* cell() { + static Cell c; + return &c; +} + +template <typename T, typename U> +struct SimpleTemplate { + int member; +}; + +template <typename T, typename U> +class ANNOTATE("moz_inherit_type_annotations_from_template_args") Container { + public: + template <typename V, typename W> + void foo(V& v, W& w) { + class InnerClass {}; + InnerClass xxx; + return; + } + + struct Entry { + T t; + U u; + }* ent; +}; + +Cell* f() { + Container<int, double> c1; + Container<SimpleTemplate<int, int>, SimpleTemplate<double, double>> c2; + Container<Container<int, double>, Container<float, float>> c3; + Container<Container<SimpleTemplate<int, int>, float>, + Container<float, SimpleTemplate<char, char>>> + c4; + + return nullptr; +} + +// Define a set of classes for verifying that there is no infinite loop +// when a class contains itself via mozilla::UniquePtr. + +namespace mozilla { + +template <typename A> +struct JustAField { + A field; + + // Hack to allow UniquePtr and SimpleUniquePtr to be swapped. + A& operator->() { return field; } +}; + +template <typename T> +struct UniquePtr { + JustAField<T*> holder; +}; + +// This did not trigger the infinite loop, because the pointer here +// caused the UniquePtr special handling to be skipped. It requires +// the above definition to be triggered, which matches the actual +// implementation (JustAField maps to CompactPair, more or less). +// The bugfix for the infinite loop also drops this requirement, so +// now this *would* trigger the bug if it weren't fixed in the same +// commit. +template <typename T> +struct SimpleUniquePtr { + T* holder; +}; + +} // namespace mozilla + +class Recursive { + public: + using EntryMap = Container<Cell*, Recursive>; + mozilla::UniquePtr<EntryMap> entries; +}; + +void rvalue_ref(World::NS::Unsafe&& arg1) { GC(); } + +void ref(const World::NS::Unsafe& arg2) { + Recursive* foo; + // Must actually use a type for the compiler to instantiate the + // template specializations. + foo->entries.holder->ent; + GC(); + static int use = arg2.g; +} + +// A function that consumes a parameter, but only if passed by rvalue reference. +extern void eat(World::NS::Unsafe&&); +extern void eat(World::NS::Unsafe&); + +void rvalue_ref_ok() { + World::NS::Unsafe unsafe1; + eat(std::move(unsafe1)); + GC(); +} + +void rvalue_ref_not_ok() { + World::NS::Unsafe unsafe2; + eat(unsafe2); + GC(); +} + +void rvalue_ref_arg_ok(World::NS::Unsafe&& unsafe3) { + eat(std::move(unsafe3)); + GC(); +} + +void rvalue_ref_arg_not_ok(World::NS::Unsafe&& unsafe4) { + eat(unsafe4); + GC(); +} + +void shared_ptr_hazard() { + Cell* unsafe5 = f(); + { auto p = std::make_shared<GCOnDestruction>(); } + usecell(unsafe5); +} + +void shared_ptr_no_hazard() { + Cell* safe6 = f(); + { auto p = std::make_shared<NoGCOnDestruction>(); } + usecell(safe6); +} diff --git a/js/src/devtools/rootAnalysis/t/types/test.py b/js/src/devtools/rootAnalysis/t/types/test.py new file mode 100644 index 0000000000..4a2b985abf --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/types/test.py @@ -0,0 +1,16 @@ +# flake8: noqa: F821 + +from collections import defaultdict + +test.compile("source.cpp") +test.run_analysis_script() +hazards = test.load_hazards() +hazmap = {haz.variable: haz for haz in hazards} +assert "arg1" in hazmap +assert "arg2" in hazmap +assert "unsafe1" not in hazmap +assert "unsafe2" in hazmap +assert "unsafe3" not in hazmap +assert "unsafe4" in hazmap +assert "unsafe5" in hazmap +assert "safe6" not in hazmap diff --git a/js/src/devtools/rootAnalysis/t/virtual/source.cpp b/js/src/devtools/rootAnalysis/t/virtual/source.cpp new file mode 100644 index 0000000000..980546f38d --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/virtual/source.cpp @@ -0,0 +1,366 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define ANNOTATE(property) __attribute__((annotate(property))) + +extern void GC() ANNOTATE("GC Call"); + +void GC() { + // If the implementation is too trivial, the function body won't be emitted at + // all. + asm(""); +} + +// Special-cased function -- code that can run JS has an artificial edge to +// js::RunScript. +namespace js { +void RunScript() { GC(); } +} // namespace js + +struct Cell { + int f; +} ANNOTATE("GC Thing"); + +extern void foo(); + +void bar() { GC(); } + +typedef void (*func_t)(); + +class Base { + public: + int ANNOTATE("field annotation") dummy; + virtual void someGC() ANNOTATE("Base pure virtual method") = 0; + virtual void someGC(int) ANNOTATE("overloaded Base pure virtual method") = 0; + virtual void sibGC() = 0; + virtual void onBase() { bar(); } + func_t functionField; + + // For now, this is just to verify that the plugin doesn't crash. The + // analysis code does not yet look at this annotation or output it anywhere + // (though it *is* being recorded.) + static float testAnnotations() ANNOTATE("static func"); + + // Similar, though sixgill currently completely ignores parameter annotations. + static double testParamAnnotations(Cell& ANNOTATE("param annotation") + ANNOTATE("second param annot") cell) + ANNOTATE("static func") ANNOTATE("second func"); +}; + +float Base::testAnnotations() { + asm(""); + return 1.1; +} + +double Base::testParamAnnotations(Cell& cell) { + asm(""); + return 1.2; +} + +class Super : public Base { + public: + virtual void ANNOTATE("Super pure virtual") noneGC() = 0; + virtual void allGC() = 0; + virtual void onSuper() { asm(""); } + void nonVirtualFunc() { asm(""); } +}; + +class Sub1 : public Super { + public: + void noneGC() override { foo(); } + void someGC() override ANNOTATE("Sub1 override") ANNOTATE("second attr") { + foo(); + } + void someGC(int) override ANNOTATE("Sub1 override for int overload") { + foo(); + } + void allGC() override { + foo(); + bar(); + } + void sibGC() override { foo(); } + void onBase() override { foo(); } +} ANNOTATE("CSU1") ANNOTATE("CSU2"); + +class Sub2 : public Super { + public: + void noneGC() override { foo(); } + void someGC() override { + foo(); + bar(); + } + void someGC(int) override { + foo(); + bar(); + } + void allGC() override { + foo(); + bar(); + } + void sibGC() override { foo(); } +}; + +class Sibling : public Base { + public: + virtual void noneGC() { foo(); } + void someGC() override { + foo(); + bar(); + } + void someGC(int) override { + foo(); + bar(); + } + virtual void allGC() { + foo(); + bar(); + } + void sibGC() override { bar(); } +}; + +class AutoSuppressGC { + public: + AutoSuppressGC() {} + ~AutoSuppressGC() {} +} ANNOTATE("Suppress GC"); + +void use(Cell*) { asm(""); } + +class nsISupports { + public: + virtual ANNOTATE("Can run script") void danger() { asm(""); } + + virtual ~nsISupports() = 0; +}; + +class nsIPrincipal : public nsISupports { + public: + ~nsIPrincipal() override{}; +}; + +struct JSPrincipals { + int debugToken; + JSPrincipals() = default; + virtual ~JSPrincipals() { GC(); } +}; + +class nsJSPrincipals : public nsIPrincipal, public JSPrincipals { + public: + void Release() { delete this; } +}; + +class SafePrincipals : public nsIPrincipal { + public: + ~SafePrincipals() { foo(); } +}; + +void f() { + Sub1 s1; + Sub2 s2; + + static Cell cell; + { + Cell* c1 = &cell; + s1.noneGC(); + use(c1); + } + { + Cell* c2 = &cell; + s2.someGC(); + use(c2); + } + { + Cell* c3 = &cell; + s1.allGC(); + use(c3); + } + { + Cell* c4 = &cell; + s2.noneGC(); + use(c4); + } + { + Cell* c5 = &cell; + s2.someGC(); + use(c5); + } + { + Cell* c6 = &cell; + s2.allGC(); + use(c6); + } + + Super* super = &s2; + { + Cell* c7 = &cell; + super->noneGC(); + use(c7); + } + { + Cell* c8 = &cell; + super->someGC(); + use(c8); + } + { + Cell* c9 = &cell; + super->allGC(); + use(c9); + } + + { + Cell* c10 = &cell; + s1.functionField(); + use(c10); + } + { + Cell* c11 = &cell; + super->functionField(); + use(c11); + } + { + Cell* c12 = &cell; + super->sibGC(); + use(c12); + } + + Base* base = &s2; + { + Cell* c13 = &cell; + base->sibGC(); + use(c13); + } + + nsJSPrincipals pals; + { + Cell* c14 = &cell; + nsISupports* p = &pals; + p->danger(); + use(c14); + } + + // Base defines, Sub1 overrides, static Super can call either. + { + Cell* c15 = &cell; + super->onBase(); + use(c15); + } + + { + Cell* c16 = &cell; + s2.someGC(7); + use(c16); + } + + { + Cell* c17 = &cell; + super->someGC(7); + use(c17); + } + + { + nsJSPrincipals* princ = new nsJSPrincipals(); + Cell* c18 = &cell; + delete princ; // Can GC + use(c18); + } + + { + nsJSPrincipals* princ = new nsJSPrincipals(); + nsISupports* supp = static_cast<nsISupports*>(princ); + Cell* c19 = &cell; + delete supp; // Can GC + use(c19); + } + + { + auto* safe = new SafePrincipals(); + Cell* c20 = &cell; + delete safe; // Cannot GC + use(c20); + } + + { + auto* safe = new SafePrincipals(); + nsISupports* supp = static_cast<nsISupports*>(safe); + Cell* c21 = &cell; + delete supp; // Compiler thinks destructor can GC. + use(c21); + } +} + +template <typename Function> +void Call1(Function&& f) { + f(); +} + +template <typename Function> +void Call2(Function&& f) { + f(); +} + +void function_pointers() { + Cell cell; + + { + auto* f = GC; + Cell* c22 = &cell; + f(); + use(c22); + } + + { + auto* f = GC; + auto*& g = f; + Cell* c23 = &cell; + g(); + use(c23); + } + + { + auto* f = GC; + Call1([&] { + Cell* c24 = &cell; + f(); + use(c24); + }); + } +} + +// Use a separate function to test `mallocSizeOf` annotations. Bug 1872197: +// functions that are specialized on a lambda function and call that function +// will have that call get mixed up with other calls of lambdas defined within +// the same function. +void annotated_function_pointers() { + Cell cell; + + // Variables with the specific name "mallocSizeOf" are + // annotated to not GC. (Heh... even though here, they + // *do* GC!) + + { + auto* mallocSizeOf = GC; + Cell* c25 = &cell; + mallocSizeOf(); + use(c25); + } + + { + auto* f = GC; + auto*& mallocSizeOf = f; + Cell* c26 = &cell; + mallocSizeOf(); + use(c26); + } + + { + auto* mallocSizeOf = GC; + Call2([&] { + Cell* c27 = &cell; + mallocSizeOf(); + use(c27); + }); + } +} diff --git a/js/src/devtools/rootAnalysis/t/virtual/test.py b/js/src/devtools/rootAnalysis/t/virtual/test.py new file mode 100644 index 0000000000..26d2e51ed6 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/virtual/test.py @@ -0,0 +1,99 @@ +# 'test' is provided by the calling script. +# flake8: noqa: F821 + +test.compile("source.cpp") +test.run_analysis_script("gcTypes") + +info = test.load_typeInfo() + +assert "Sub1" in info["OtherCSUTags"] +assert ["CSU1", "CSU2"] == sorted(info["OtherCSUTags"]["Sub1"]) +assert "Base" in info["OtherFieldTags"] +assert "someGC" in info["OtherFieldTags"]["Base"] +assert "Sub1" in info["OtherFieldTags"] +assert "someGC" in info["OtherFieldTags"]["Sub1"] + +# For now, fields with the same name (eg overloaded virtual methods) just +# accumulate attributes. +assert ["Sub1 override", "Sub1 override for int overload", "second attr"] == sorted( + info["OtherFieldTags"]["Sub1"]["someGC"] +) + +gcFunctions = test.load_gcFunctions() + +assert "void Sub1::noneGC()" not in gcFunctions +assert "void Sub1::someGC()" not in gcFunctions +assert "void Sub1::someGC(int32)" not in gcFunctions +assert "void Sub1::allGC()" in gcFunctions +assert "void Sub2::noneGC()" not in gcFunctions +assert "void Sub2::someGC()" in gcFunctions +assert "void Sub2::someGC(int32)" in gcFunctions +assert "void Sub2::allGC()" in gcFunctions + +callgraph = test.load_callgraph() + +assert callgraph.calleeGraph["void f()"]["Super.noneGC:0"] +assert callgraph.calleeGraph["Super.noneGC:0"]["Sub1.noneGC:0"] +assert callgraph.calleeGraph["Super.noneGC:0"]["Sub2.noneGC:0"] +assert callgraph.calleeGraph["Sub1.noneGC:0"]["void Sub1::noneGC()"] +assert callgraph.calleeGraph["Sub2.noneGC:0"]["void Sub2::noneGC()"] +assert "void Sibling::noneGC()" not in callgraph.calleeGraph["Super.noneGC:0"] +assert callgraph.calleeGraph["Super.onBase:0"]["Sub1.onBase:0"] +assert callgraph.calleeGraph["Sub1.onBase:0"]["void Sub1::onBase()"] +assert callgraph.calleeGraph["Super.onBase:0"]["void Base::onBase()"] +assert "void Sibling::onBase()" not in callgraph.calleeGraph["Super.onBase:0"] + +hazards = test.load_hazards() +hazmap = {haz.variable: haz for haz in hazards} + +assert "c1" not in hazmap +assert "c2" in hazmap +assert "c3" in hazmap +assert "c4" not in hazmap +assert "c5" in hazmap +assert "c6" in hazmap +assert "c7" not in hazmap +assert "c8" in hazmap +assert "c9" in hazmap +assert "c10" in hazmap +assert "c11" in hazmap + +# Virtual resolution should take the static type into account: the only method +# implementations considered should be those of descendants, even if the +# virtual method is inherited and not overridden in the static class. (Base +# defines sibGC() as pure virtual, Super inherits it without overriding, +# Sibling and Sub2 both implement it.) + +# Call Base.sibGC on a Super pointer: can only call Sub2.sibGC(), which does not GC. +# In particular, PEdgeCallInstance.Exp.Field.FieldCSU.Type = {Kind: "CSU", Name="Super"} +assert "c12" not in hazmap +# Call Base.sibGC on a Base pointer; can call Sibling.sibGC(), which GCs. +assert "c13" in hazmap + +# Call nsISupports.danger() which is annotated to be overridable and hence can GC. +assert "c14" in hazmap + +# someGC(int) overload +assert "c16" in hazmap +assert "c17" in hazmap + +# Super.onBase() could call the GC'ing Base::onBase(). +assert "c15" in hazmap + +# virtual ~nsJSPrincipals calls ~JSPrincipals calls GC. +assert "c18" in hazmap +assert "c19" in hazmap + +# ~SafePrincipals does not GC. +assert "c20" not in hazmap + +# ...but when cast to a nsISupports*, the compiler can't tell that it won't. +assert "c21" in hazmap + +# Function pointers! References to function pointers! Created by reference-capturing lambdas! +assert "c22" in hazmap +assert "c23" in hazmap +assert "c24" in hazmap +assert "c25" not in hazmap +assert "c26" not in hazmap +assert "c27" not in hazmap diff --git a/js/src/devtools/rootAnalysis/utility.js b/js/src/devtools/rootAnalysis/utility.js new file mode 100644 index 0000000000..94b5391c02 --- /dev/null +++ b/js/src/devtools/rootAnalysis/utility.js @@ -0,0 +1,422 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('dumpCFG.js'); + +// Attribute bits - each call edge may carry a set of 'attrs' bits, saying eg +// that the edge takes place within a scope where GC is suppressed, for +// example. +var ATTR_GC_SUPPRESSED = 1 << 0; +var ATTR_CANSCRIPT_BOUNDED = 1 << 1; // Unimplemented +var ATTR_DOM_ITERATING = 1 << 2; // Unimplemented +var ATTR_NONRELEASING = 1 << 3; // ~RefPtr of value whose refcount will not go to zero +var ATTR_REPLACED = 1 << 4; // Ignore edge, it was replaced by zero or more better edges. +var ATTR_SYNTHETIC = 1 << 5; // Call was manufactured in some way. + +var ATTR_LAST = 1 << 5; +var ATTRS_NONE = 0; +var ATTRS_ALL = (ATTR_LAST << 1) - 1; // All possible bits set + +// The traversal algorithms we run will recurse into children if you change any +// attrs bit to zero. Use all bits set to maximally attributed, including +// additional bits that all just mean "unvisited", so that the first time we +// see a node with this attrs, we're guaranteed to turn at least one bit off +// and thereby keep going. +var ATTRS_UNVISITED = 0xffff; + +// gcc appends this to mangled function names for "not in charge" +// constructors/destructors. +var internalMarker = " *INTERNAL* "; + +if (! Set.prototype.hasOwnProperty("update")) { + Object.defineProperty(Set.prototype, "update", { + value: function (collection) { + for (let elt of collection) + this.add(elt); + } + }); +} + +function assert(x, msg) +{ + if (x) + return; + debugger; + if (msg) + throw new Error("assertion failed: " + msg + "\n"); + else + throw new Error("assertion failed"); +} + +function defined(x) { + return x !== undefined; +} + +function xprint(x, padding) +{ + if (!padding) + padding = ""; + if (x instanceof Array) { + print(padding + "["); + for (var elem of x) + xprint(elem, padding + " "); + print(padding + "]"); + } else if (x instanceof Object) { + print(padding + "{"); + for (var prop in x) { + print(padding + " " + prop + ":"); + xprint(x[prop], padding + " "); + } + print(padding + "}"); + } else { + print(padding + x); + } +} + +// Command-line argument parser. +// +// `parameters` is a dict of parameters specs, each of which is a dict with keys: +// +// - name: name of option, prefixed with "--" if it is named (otherwise, it +// is interpreted as a positional parameter.) +// - dest: key to store the result in, defaulting to the parameter name without +// any leading "--"" and with dashes replaced with underscores. +// - default: value of option if no value is given. Positional parameters with +// a default value are optional. If no default is given, the parameter's name +// is not included in the return value. +// - type: `bool` if it takes no argument, otherwise an argument is required. +// Named arguments default to 'bool', positional arguments to 'string'. +// - nargs: the only supported value is `+`, which means to grab all following +// arguments, up to the next named option, and store them as a list. +// +// The command line is parsed for `--foo=value` and `--bar` arguments. +// +// Return value is a dict of parameter values, keyed off of `dest` as determined +// above. An extra option named "rest" will be set to the list of all remaining +// arguments passed in. +// +function parse_options(parameters, inArgs = scriptArgs) { + const options = {}; + + const named = {}; + const positional = []; + for (const param of parameters) { + if (param.name.startsWith("-")) { + named[param.name] = param; + if (!param.dest) { + if (!param.name.startsWith("--")) { + throw new Error(`parameter '${param.name}' requires param.dest to be set`); + } + param.dest = param.name.substring(2).replace("-", "_"); + } + } else { + if (!('default' in param) && positional.length > 0 && ('default' in positional.at(-1))) { + throw new Error(`required parameter '${param.name}' follows optional parameter`); + } + param.positional = true; + positional.push(param); + param.dest = param.dest || param.name.replace("-", "_"); + } + + if (!param.type) { + if (param.nargs === "+") { + param.type = "list"; + } else if (param.positional) { + param.type = "string"; + } else { + param.type = "bool"; + } + } + + if ('default' in param) { + options[param.dest] = param.default; + } + } + + options.rest = []; + const args = [...inArgs]; + let grabbing_into = undefined; + while (args.length > 0) { + let arg = args.shift(); + let param; + if (arg.startsWith("-") && arg in named) { + param = named[arg]; + if (param.type !== 'bool') { + if (args.length == 0) { + throw(new Error(`${param.name} requires an argument`)); + } + arg = args.shift(); + } + } else { + const pos = arg.indexOf("="); + if (pos != -1) { + const name = arg.substring(0, pos); + param = named[name]; + if (!param) { + throw(new Error(`Unknown option '${name}'`)); + } else if (param.type === 'bool') { + throw(new Error(`--${param.name} does not take an argument`)); + } + arg = arg.substring(pos + 1); + } + } + + // If this isn't a --named param, and we're not accumulating into a nargs="+" param, then + // use the next positional. + if (!param && !grabbing_into && positional.length > 0) { + param = positional.shift(); + } + + // If a parameter was identified, then any old accumulator is done and we might start a new one. + if (param) { + if (param.type === 'list') { + grabbing_into = options[param.dest] = options[param.dest] || []; + } else { + grabbing_into = undefined; + } + } + + if (grabbing_into) { + grabbing_into.push(arg); + } else if (param) { + if (param.type === 'bool') { + options[param.dest] = true; + } else { + options[param.dest] = arg; + } + } else { + options.rest.push(arg); + } + } + + for (const param of positional) { + if (!('default' in param)) { + throw(new Error(`'${param.name}' option is required`)); + } + } + + for (const param of parameters) { + if (param.nargs === '+' && options[param.dest].length == 0) { + throw(new Error(`at least one value required for option '${param.name}'`)); + } + } + + return options; +} + +function sameBlockId(id0, id1) +{ + if (id0.Kind != id1.Kind) + return false; + if (!sameVariable(id0.Variable, id1.Variable)) + return false; + if (id0.Kind == "Loop" && id0.Loop != id1.Loop) + return false; + return true; +} + +function sameVariable(var0, var1) +{ + assert("Name" in var0 || var0.Kind == "This" || var0.Kind == "Return"); + assert("Name" in var1 || var1.Kind == "This" || var1.Kind == "Return"); + if ("Name" in var0) + return "Name" in var1 && var0.Name[0] == var1.Name[0]; + return var0.Kind == var1.Kind; +} + +function blockIdentifier(body) +{ + if (body.BlockId.Kind == "Loop") + return body.BlockId.Loop; + assert(body.BlockId.Kind == "Function", "body.Kind should be Function, not " + body.BlockId.Kind); + return body.BlockId.Variable.Name[0]; +} + +function collectBodyEdges(body) +{ + body.predecessors = []; + body.successors = []; + if (!("PEdge" in body)) + return; + + for (var edge of body.PEdge) { + var [ source, target ] = edge.Index; + if (!(target in body.predecessors)) + body.predecessors[target] = []; + body.predecessors[target].push(edge); + if (!(source in body.successors)) + body.successors[source] = []; + body.successors[source].push(edge); + } +} + +function getPredecessors(body) +{ + if (!('predecessors' in body)) + collectBodyEdges(body); + return body.predecessors; +} + +function getSuccessors(body) +{ + if (!('successors' in body)) + collectBodyEdges(body); + return body.successors; +} + +// Split apart a function from sixgill into its mangled and unmangled name. If +// no mangled name was given, use the unmangled name as its mangled name +function splitFunction(func) +{ + var split = func.indexOf("$"); + if (split != -1) + return [ func.substr(0, split), func.substr(split+1) ]; + split = func.indexOf("|"); + if (split != -1) + return [ func.substr(0, split), func.substr(split+1) ]; + return [ func, func ]; +} + +function mangled(fullname) +{ + var [ mangled, unmangled ] = splitFunction(fullname); + return mangled; +} + +function readable(fullname) +{ + var [ mangled, unmangled ] = splitFunction(fullname); + return unmangled; +} + +function xdbLibrary() +{ + var lib = ctypes.open(os.getenv('XDB')); + var api = { + open: lib.declare("xdb_open", ctypes.default_abi, ctypes.void_t, ctypes.char.ptr), + min_data_stream: lib.declare("xdb_min_data_stream", ctypes.default_abi, ctypes.int), + max_data_stream: lib.declare("xdb_max_data_stream", ctypes.default_abi, ctypes.int), + read_key: lib.declare("xdb_read_key", ctypes.default_abi, ctypes.char.ptr, ctypes.int), + read_entry: lib.declare("xdb_read_entry", ctypes.default_abi, ctypes.char.ptr, ctypes.char.ptr), + free_string: lib.declare("xdb_free", ctypes.default_abi, ctypes.void_t, ctypes.char.ptr) + }; + try { + api.lookup_key = lib.declare("xdb_lookup_key", ctypes.default_abi, ctypes.int, ctypes.char.ptr); + } catch (e) { + // lookup_key is for development use only and is not strictly necessary. + } + return api; +} + +function openLibrary(names) { + for (const name of names) { + try { + return ctypes.open(name); + } catch(e) { + } + } + return undefined; +} + +function cLibrary() +{ + const lib = openLibrary(['libc.so.6', 'libc.so', 'libc.dylib']); + if (!lib) { + throw new Error("Unable to open libc"); + } + + if (getBuildConfiguration("moz-memory")) { + throw new Error("cannot use libc functions with --enable-jemalloc, since they will be routed " + + "through jemalloc, but calling libc.free() directly will bypass it and the " + + "malloc/free will be mismatched"); + } + + return { + fopen: lib.declare("fopen", ctypes.default_abi, ctypes.void_t.ptr, ctypes.char.ptr, ctypes.char.ptr), + getline: lib.declare("getline", ctypes.default_abi, ctypes.ssize_t, ctypes.char.ptr.ptr, ctypes.size_t.ptr, ctypes.void_t.ptr), + fclose: lib.declare("fclose", ctypes.default_abi, ctypes.int, ctypes.void_t.ptr), + free: lib.declare("free", ctypes.default_abi, ctypes.void_t, ctypes.void_t.ptr), + }; +} + +function* readFileLines_gen(filename) +{ + var libc = cLibrary(); + var linebuf = ctypes.char.ptr(); + var bufsize = ctypes.size_t(0); + var fp = libc.fopen(filename, "r"); + if (fp.isNull()) + throw new Error("Unable to open '" + filename + "'"); + + while (libc.getline(linebuf.address(), bufsize.address(), fp) > 0) + yield linebuf.readString(); + libc.fclose(fp); + libc.free(ctypes.void_t.ptr(linebuf)); +} + +function addToKeyedList(collection, key, entry) +{ + if (!(key in collection)) + collection[key] = []; + collection[key].push(entry); + return collection[key]; +} + +function addToMappedList(map, key, entry) +{ + if (!map.has(key)) + map.set(key, []); + map.get(key).push(entry); + return map.get(key); +} + +function loadTypeInfo(filename) +{ + return JSON.parse(os.file.readFile(filename)); +} + +// Given the range `first` .. `last`, break it down into `count` batches and +// return the start of the (1-based) `num` batch. +function batchStart(num, count, first, last) { + const N = (last - first) + 1; + return Math.floor((num - 1) / count * N) + first; +} + +// As above, but return the last value in the (1-based) `num` batch. +function batchLast(num, count, first, last) { + const N = (last - first) + 1; + return Math.floor(num / count * N) + first - 1; +} + +// Debugging tool. See usage below. +function PropertyTracer(traced_prop, check) { + return { + matches(prop, value) { + if (prop != traced_prop) + return false; + if ('value' in check) + return value == check.value; + return true; + }, + + // Also called when defining a property. + set(obj, prop, value) { + if (this.matches(prop, value)) + debugger; + return Reflect.set(...arguments); + }, + }; +} + +// Usage: var myobj = traced({}, 'name', {value: 'Bob'}) +// +// This will execute a `debugger;` statement when myobj['name'] is defined or +// set to 'Bob'. +function traced(obj, traced_prop, check) { + return new Proxy(obj, PropertyTracer(traced_prop, check)); +} |