/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ "use strict"; loadRelativeToScript('callgraph.js'); var options = parse_options([ { name: '--function', type: 'string' }, { name: 'typeInfo_filename', type: 'string', default: "typeInfo.txt" }, { name: 'callgraphOut_filename', type: 'string', default: "rawcalls.txt" }, { name: 'gcEdgesOut_filename', type: 'string', default: "gcEdges.json" }, { name: 'batch', default: 1, type: 'number' }, { name: 'numBatches', default: 1, type: 'number' }, ]); var origOut = os.file.redirect(options.callgraphOut_filename); var memoized = new Map(); var unmangled2id = new Set(); var gcEdges = {}; // Insert a string into the name table and return the ID. Do not use for // functions, which must be handled specially. function getId(name) { let id = memoized.get(name); if (id !== undefined) return id; id = memoized.size + 1; memoized.set(name, id); print(`#${id} ${name}`); return id; } // Split a function into mangled and unmangled parts and return the ID for the // function. function functionId(name) { const [mangled, unmangled] = splitFunction(name); const id = getId(mangled); // Only produce a mangled -> unmangled mapping once, unless there are // multiple unmangled names for the same mangled name. if (unmangled2id.has(unmangled)) return id; print(`= ${id} ${unmangled}`); unmangled2id.add(unmangled); return id; } var lastline; function printOnce(line) { if (line != lastline) { print(line); lastline = line; } } // Returns a table mapping function name to lists of // [annotation-name, annotation-value] pairs: // { function-name => [ [annotation-name, annotation-value] ] } // // Note that sixgill will only store certain attributes (annotation-names), so // this won't be *all* the attributes in the source, just the ones that sixgill // watches for. function getAllAttributes(body) { var all_annotations = {}; for (var v of (body.DefineVariable || [])) { if (v.Variable.Kind != 'Func') continue; var name = v.Variable.Name[0]; var annotations = all_annotations[name] = []; for (var ann of (v.Type.Annotation || [])) { annotations.push(ann.Name); } } return all_annotations; } // Get just the annotations understood by the hazard analysis. function getAnnotations(functionName, body) { var tags = new Set(); var attributes = getAllAttributes(body); if (functionName in attributes) { for (var [ annName, annValue ] of attributes[functionName]) { if (annName == 'annotate') tags.add(annValue); } } return tags; } // Scan through a function body, pulling out all annotations and calls and // recording them in callgraph.txt. function processBody(functionName, body, functionBodies) { if (!('PEdge' in body)) return; for (var tag of getAnnotations(functionName, body).values()) { const id = functionId(functionName); print(`T ${id} ${tag}`); if (tag == "Calls JSNatives") printOnce(`D ${id} ${functionId("(js-code)")}`); } // Set of all callees that have been output so far, in order to suppress // repeated callgraph edges from being recorded. This uses a Map from // callees to limit sets, because we don't want a limited edge to prevent // an unlimited edge from being recorded later. (So an edge will be skipped // if it exists and is at least as limited as the previously seen edge.) // // Limit sets are implemented as integers interpreted as bitfields. // var seen = new Map(); lastline = null; for (var edge of body.PEdge) { if (edge.Kind != "Call") continue; // The attrs (eg ATTR_GC_SUPPRESSED) are determined by whatever RAII // scopes might be active, which have been computed previously for all // points in the body. var edgeAttrs = body.attrs[edge.Index[0]] | 0; for (var callee of getCallees(edge)) { // Special-case some calls when we can derive more information about them, eg // that they are a destructor that won't do anything. if (callee.kind === "direct" && edgeIsNonReleasingDtor(body, edge, callee.name, functionBodies)) { const block = blockIdentifier(body); addToKeyedList(gcEdges, block, { Index: edge.Index, attrs: ATTR_GC_SUPPRESSED | ATTR_NONRELEASING }); } // Individual callees may have additional attrs. The only such // bit currently is that nsISupports.{AddRef,Release} are assumed // to never GC. const attrs = edgeAttrs | callee.attrs; let prologue = attrs ? `/${attrs} ` : ""; prologue += functionId(functionName) + " "; if (callee.kind == 'direct') { const prev_attrs = seen.has(callee.name) ? seen.get(callee.name) : ATTRS_UNVISITED; if (prev_attrs & ~attrs) { // Only output an edge if it loosens a limit. seen.set(callee.name, prev_attrs & attrs); printOnce("D " + prologue + functionId(callee.name)); } } else if (callee.kind == 'field') { var { csu, field, isVirtual } = callee; const tag = isVirtual ? 'V' : 'F'; const fullfield = `${csu}.${field}`; printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`); } else if (callee.kind == 'resolved-field') { // Fully-resolved field (virtual method) call. Record the // callgraph edges. Do not consider attrs, since they are local // to this callsite and we are writing out a global record // here. // // Any field call that does *not* have an R entry must be // assumed to call anything. var { csu, field, callees } = callee; var fullFieldName = csu + "." + field; if (!virtualResolutionsSeen.has(fullFieldName)) { virtualResolutionsSeen.add(fullFieldName); for (var target of callees) printOnce("R " + getId(fullFieldName) + " " + functionId(target.name)); } } else if (callee.kind == 'indirect') { printOnce("I " + prologue + "VARIABLE " + callee.variable); } else if (callee.kind == 'unknown') { printOnce("I " + prologue + "VARIABLE UNKNOWN"); } else { printErr("invalid " + callee.kind + " callee"); debugger; } } } } // Reserve IDs for special function names. // represents anything that can run JS assert(ID.jscode == functionId("(js-code)")); // function pointers will get an edge to this in loadCallgraph.js; only the ID // reservation is present in callgraph.txt assert(ID.anyfunc == functionId("(any-function)")); // same as above, but for fields annotated to never GC assert(ID.nogcfunc == functionId("(nogc-function)")); // garbage collection assert(ID.gc == functionId("(GC)")); var typeInfo = loadTypeInfo(options.typeInfo_filename); loadTypes("src_comp.xdb"); // Arbitrary JS code must always be assumed to GC. In real code, there would // always be a path anyway through some arbitrary JSNative, but this route will be shorter. print(`D ${ID.jscode} ${ID.gc}`); // An unknown function is assumed to GC. print(`D ${ID.anyfunc} ${ID.gc}`); // Output call edges for all virtual methods defined anywhere, from // Class.methodname to what a (dynamic) instance of Class would run when // methodname was called (either Class::methodname() if defined, or some // Base::methodname() for inherited method definitions). for (const [fieldkey, methods] of virtualDefinitions) { const caller = getId(fieldkey); for (const name of methods) { const callee = functionId(name); printOnce(`D ${caller} ${callee}`); } } // Output call edges from C.methodname -> S.methodname for all subclasses S of // class C. This is for when you are calling methodname on a pointer/ref of // dynamic type C, so that the callgraph contains calls to all descendant // subclasses' implementations. for (const [csu, methods] of virtualDeclarations) { for (const {field, dtor} of methods) { const caller = getId(fieldKey(csu, field)); if (virtualCanRunJS(csu, field.Name[0])) printOnce(`D ${caller} ${functionId("(js-code)")}`); if (dtor) printOnce(`D ${caller} ${functionId(dtor)}`); if (!subclasses.has(csu)) continue; for (const sub of subclasses.get(csu)) { printOnce(`D ${caller} ${getId(fieldKey(sub, field))}`); } } } var xdb = xdbLibrary(); xdb.open("src_body.xdb"); printErr("Finished loading data structures"); var minStream = xdb.min_data_stream(); var maxStream = xdb.max_data_stream(); if (options.function) { var index = xdb.lookup_key(options.function); if (!index) { printErr("Function not found"); quit(1); } minStream = maxStream = index; } function process(functionName, functionBodies) { for (var body of functionBodies) body.attrs = []; for (var body of functionBodies) { for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) { pbody.attrs[id] = attrs; } } for (var body of functionBodies) processBody(functionName, body, functionBodies); // Not strictly necessary, but add an edge from the synthetic "(js-code)" // to RunScript to allow better stacks than just randomly selecting a // JSNative to blame things on. if (functionName.includes("js::RunScript")) print(`D ${functionId("(js-code)")} ${functionId(functionName)}`); // GCC generates multiple constructors and destructors ("in-charge" and // "not-in-charge") to handle virtual base classes. They are normally // identical, and it appears that GCC does some magic to alias them to the // same thing. But this aliasing is not visible to the analysis. So we'll // add a dummy call edge from "foo" -> "foo *INTERNAL* ", since only "foo" // will show up as called but only "foo *INTERNAL* " will be emitted in the // case where the constructors are identical. // // This is slightly conservative in the case where they are *not* // identical, but that should be rare enough that we don't care. var markerPos = functionName.indexOf(internalMarker); if (markerPos > 0) { var inChargeXTor = functionName.replace(internalMarker, ""); printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName)); } const [ mangled, unmangled ] = splitFunction(functionName); // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the // different kinds of constructors/destructors are: // C1 # complete object constructor // C2 # base object constructor // C3 # complete object allocating constructor // D0 # deleting destructor // D1 # complete object destructor // D2 # base object destructor // // In actual practice, I have observed C4 and D4 xtors generated by gcc // 4.9.3 (but not 4.7.3). The gcc source code says: // // /* This is the old-style "[unified]" constructor. // In some cases, we may emit this function and call // it from the clones in order to share code and save space. */ // // Unfortunately, that "call... from the clones" does not seem to appear in // the CFG we get from GCC. So if we see a C4 constructor or D4 destructor, // inject an edge to it from C1, C2, and C3 (or D1, D2, and D3). (Note that // C3 isn't even used in current GCC, but add the edge anyway just in // case.) // // from gcc/cp/mangle.c: // // ::= D0 # deleting (in-charge) destructor // ::= D1 # complete object (in-charge) destructor // ::= D2 # base object (not-in-charge) destructor // ::= C1 # complete object constructor // ::= C2 # base object constructor // ::= C3 # complete object allocating constructor // // Currently, allocating constructors are never used. // if (functionName.indexOf("C4") != -1) { // E terminates the method name (and precedes the method parameters). // If eg "C4E" shows up in the mangled name for another reason, this // will create bogus edges in the callgraph. But it will affect little // and is somewhat difficult to avoid, so we will live with it. // // Another possibility! A templatized constructor will contain C4I...E // for template arguments. // for (let [synthetic, variant, desc] of [ ['C4E', 'C1E', 'complete_ctor'], ['C4E', 'C2E', 'base_ctor'], ['C4E', 'C3E', 'complete_alloc_ctor'], ['C4I', 'C1I', 'complete_ctor'], ['C4I', 'C2I', 'base_ctor'], ['C4I', 'C3I', 'complete_alloc_ctor']]) { if (mangled.indexOf(synthetic) == -1) continue; let variant_mangled = mangled.replace(synthetic, variant); let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`; printOnce("D " + functionId(variant_full) + " " + functionId(functionName)); } } // For destructors: // // I've never seen D4Ev() + D4Ev(int32), only one or the other. So // for a D4Ev of any sort, create: // // D0() -> D1() # deleting destructor calls complete destructor, then deletes // D1() -> D2() # complete destructor calls base destructor, then destroys virtual bases // D2() -> D4(?) # base destructor might be aliased to unified destructor // # use whichever one is defined, in-charge or not. // # ('?') means either () or (int32). // // Note that this doesn't actually make sense -- D0 and D1 should be // in-charge, but gcc doesn't seem to give them the in-charge parameter?! // if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) { const not_in_charge_dtor = functionName.replace("(int32)", "()"); const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]"; const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]"; const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]"; printOnce("D " + functionId(D0) + " " + functionId(D1)); printOnce("D " + functionId(D1) + " " + functionId(D2)); printOnce("D " + functionId(D2) + " " + functionId(functionName)); } if (isJSNative(mangled)) printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`); } var start = batchStart(options.batch, options.numBatches, minStream, maxStream); var end = batchLast(options.batch, options.numBatches, minStream, maxStream); for (var nameIndex = start; nameIndex <= end; nameIndex++) { var name = xdb.read_key(nameIndex); var data = xdb.read_entry(name); process(name.readString(), JSON.parse(data.readString())); xdb.free_string(name); xdb.free_string(data); } os.file.close(os.file.redirect(options.gcEdgesOut_filename)); print(JSON.stringify(gcEdges, null, 4)); os.file.close(os.file.redirect(origOut));