diff options
Diffstat (limited to 'src/cmd/compile/internal/inline')
37 files changed, 8380 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go new file mode 100644 index 0000000..b365008 --- /dev/null +++ b/src/cmd/compile/internal/inline/inl.go @@ -0,0 +1,1217 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// The inlining facility makes 2 passes: first CanInline determines which +// functions are suitable for inlining, and for those that are it +// saves a copy of the body. Then InlineCalls walks each function body to +// expand calls to inlinable functions. +// +// The Debug.l flag controls the aggressiveness. Note that main() swaps level 0 and 1, +// making 1 the default and -l disable. Additional levels (beyond -l) may be buggy and +// are not supported. +// 0: disabled +// 1: 80-nodes leaf functions, oneliners, panic, lazy typechecking (default) +// 2: (unassigned) +// 3: (unassigned) +// 4: allow non-leaf functions +// +// At some point this may get another default and become switch-offable with -N. +// +// The -d typcheckinl flag enables early typechecking of all imported bodies, +// which is useful to flush out bugs. +// +// The Debug.m flag enables diagnostic output. a single -m is useful for verifying +// which calls get inlined or not, more is for debugging, and may go away at any point. + +package inline + +import ( + "fmt" + "go/constant" + "internal/buildcfg" + "strconv" + + "cmd/compile/internal/base" + "cmd/compile/internal/inline/inlheur" + "cmd/compile/internal/ir" + "cmd/compile/internal/logopt" + "cmd/compile/internal/pgo" + "cmd/compile/internal/typecheck" + "cmd/compile/internal/types" + "cmd/internal/obj" +) + +// Inlining budget parameters, gathered in one place +const ( + inlineMaxBudget = 80 + inlineExtraAppendCost = 0 + // default is to inline if there's at most one call. -l=4 overrides this by using 1 instead. + inlineExtraCallCost = 57 // 57 was benchmarked to provided most benefit with no bad surprises; see https://github.com/golang/go/issues/19348#issuecomment-439370742 + inlineExtraPanicCost = 1 // do not penalize inlining panics. + inlineExtraThrowCost = inlineMaxBudget // with current (2018-05/1.11) code, inlining runtime.throw does not help. + + inlineBigFunctionNodes = 5000 // Functions with this many nodes are considered "big". + inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function. +) + +var ( + // List of all hot callee nodes. + // TODO(prattmic): Make this non-global. + candHotCalleeMap = make(map[*pgo.IRNode]struct{}) + + // List of all hot call sites. CallSiteInfo.Callee is always nil. + // TODO(prattmic): Make this non-global. + candHotEdgeMap = make(map[pgo.CallSiteInfo]struct{}) + + // Threshold in percentage for hot callsite inlining. + inlineHotCallSiteThresholdPercent float64 + + // Threshold in CDF percentage for hot callsite inlining, + // that is, for a threshold of X the hottest callsites that + // make up the top X% of total edge weight will be + // considered hot for inlining candidates. + inlineCDFHotCallSiteThresholdPercent = float64(99) + + // Budget increased due to hotness. + inlineHotMaxBudget int32 = 2000 +) + +// PGOInlinePrologue records the hot callsites from ir-graph. +func PGOInlinePrologue(p *pgo.Profile, funcs []*ir.Func) { + if base.Debug.PGOInlineCDFThreshold != "" { + if s, err := strconv.ParseFloat(base.Debug.PGOInlineCDFThreshold, 64); err == nil && s >= 0 && s <= 100 { + inlineCDFHotCallSiteThresholdPercent = s + } else { + base.Fatalf("invalid PGOInlineCDFThreshold, must be between 0 and 100") + } + } + var hotCallsites []pgo.NamedCallEdge + inlineHotCallSiteThresholdPercent, hotCallsites = hotNodesFromCDF(p) + if base.Debug.PGODebug > 0 { + fmt.Printf("hot-callsite-thres-from-CDF=%v\n", inlineHotCallSiteThresholdPercent) + } + + if x := base.Debug.PGOInlineBudget; x != 0 { + inlineHotMaxBudget = int32(x) + } + + for _, n := range hotCallsites { + // mark inlineable callees from hot edges + if callee := p.WeightedCG.IRNodes[n.CalleeName]; callee != nil { + candHotCalleeMap[callee] = struct{}{} + } + // mark hot call sites + if caller := p.WeightedCG.IRNodes[n.CallerName]; caller != nil && caller.AST != nil { + csi := pgo.CallSiteInfo{LineOffset: n.CallSiteOffset, Caller: caller.AST} + candHotEdgeMap[csi] = struct{}{} + } + } + + if base.Debug.PGODebug >= 3 { + fmt.Printf("hot-cg before inline in dot format:") + p.PrintWeightedCallGraphDOT(inlineHotCallSiteThresholdPercent) + } +} + +// hotNodesFromCDF computes an edge weight threshold and the list of hot +// nodes that make up the given percentage of the CDF. The threshold, as +// a percent, is the lower bound of weight for nodes to be considered hot +// (currently only used in debug prints) (in case of equal weights, +// comparing with the threshold may not accurately reflect which nodes are +// considiered hot). +func hotNodesFromCDF(p *pgo.Profile) (float64, []pgo.NamedCallEdge) { + cum := int64(0) + for i, n := range p.NamedEdgeMap.ByWeight { + w := p.NamedEdgeMap.Weight[n] + cum += w + if pgo.WeightInPercentage(cum, p.TotalWeight) > inlineCDFHotCallSiteThresholdPercent { + // nodes[:i+1] to include the very last node that makes it to go over the threshold. + // (Say, if the CDF threshold is 50% and one hot node takes 60% of weight, we want to + // include that node instead of excluding it.) + return pgo.WeightInPercentage(w, p.TotalWeight), p.NamedEdgeMap.ByWeight[:i+1] + } + } + return 0, p.NamedEdgeMap.ByWeight +} + +// CanInlineFuncs computes whether a batch of functions are inlinable. +func CanInlineFuncs(funcs []*ir.Func, profile *pgo.Profile) { + if profile != nil { + PGOInlinePrologue(profile, funcs) + } + + ir.VisitFuncsBottomUp(funcs, func(list []*ir.Func, recursive bool) { + CanInlineSCC(list, recursive, profile) + }) +} + +// CanInlineSCC computes the inlinability of functions within an SCC +// (strongly connected component). +// +// CanInlineSCC is designed to be used by ir.VisitFuncsBottomUp +// callbacks. +func CanInlineSCC(funcs []*ir.Func, recursive bool, profile *pgo.Profile) { + if base.Flag.LowerL == 0 { + return + } + + numfns := numNonClosures(funcs) + + for _, fn := range funcs { + if !recursive || numfns > 1 { + // We allow inlining if there is no + // recursion, or the recursion cycle is + // across more than one function. + CanInline(fn, profile) + } else { + if base.Flag.LowerM > 1 && fn.OClosure == nil { + fmt.Printf("%v: cannot inline %v: recursive\n", ir.Line(fn), fn.Nname) + } + } + if inlheur.Enabled() { + analyzeFuncProps(fn, profile) + } + } +} + +// GarbageCollectUnreferencedHiddenClosures makes a pass over all the +// top-level (non-hidden-closure) functions looking for nested closure +// functions that are reachable, then sweeps through the Target.Decls +// list and marks any non-reachable hidden closure function as dead. +// See issues #59404 and #59638 for more context. +func GarbageCollectUnreferencedHiddenClosures() { + + liveFuncs := make(map[*ir.Func]bool) + + var markLiveFuncs func(fn *ir.Func) + markLiveFuncs = func(fn *ir.Func) { + if liveFuncs[fn] { + return + } + liveFuncs[fn] = true + ir.Visit(fn, func(n ir.Node) { + if clo, ok := n.(*ir.ClosureExpr); ok { + markLiveFuncs(clo.Func) + } + }) + } + + for i := 0; i < len(typecheck.Target.Funcs); i++ { + fn := typecheck.Target.Funcs[i] + if fn.IsHiddenClosure() { + continue + } + markLiveFuncs(fn) + } + + for i := 0; i < len(typecheck.Target.Funcs); i++ { + fn := typecheck.Target.Funcs[i] + if !fn.IsHiddenClosure() { + continue + } + if fn.IsDeadcodeClosure() { + continue + } + if liveFuncs[fn] { + continue + } + fn.SetIsDeadcodeClosure(true) + if base.Flag.LowerM > 2 { + fmt.Printf("%v: unreferenced closure %v marked as dead\n", ir.Line(fn), fn) + } + if fn.Inl != nil && fn.LSym == nil { + ir.InitLSym(fn, true) + } + } +} + +// inlineBudget determines the max budget for function 'fn' prior to +// analyzing the hairyness of the body of 'fn'. We pass in the pgo +// profile if available (which can change the budget), also a +// 'relaxed' flag, which expands the budget slightly to allow for the +// possibility that a call to the function might have its score +// adjusted downwards. If 'verbose' is set, then print a remark where +// we boost the budget due to PGO. +func inlineBudget(fn *ir.Func, profile *pgo.Profile, relaxed bool, verbose bool) int32 { + // Update the budget for profile-guided inlining. + budget := int32(inlineMaxBudget) + if profile != nil { + if n, ok := profile.WeightedCG.IRNodes[ir.LinkFuncName(fn)]; ok { + if _, ok := candHotCalleeMap[n]; ok { + budget = int32(inlineHotMaxBudget) + if verbose { + fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn)) + } + } + } + } + if relaxed { + budget += inlheur.BudgetExpansion(inlineMaxBudget) + } + return budget +} + +// CanInline determines whether fn is inlineable. +// If so, CanInline saves copies of fn.Body and fn.Dcl in fn.Inl. +// fn and fn.Body will already have been typechecked. +func CanInline(fn *ir.Func, profile *pgo.Profile) { + if fn.Nname == nil { + base.Fatalf("CanInline no nname %+v", fn) + } + + var reason string // reason, if any, that the function was not inlined + if base.Flag.LowerM > 1 || logopt.Enabled() { + defer func() { + if reason != "" { + if base.Flag.LowerM > 1 { + fmt.Printf("%v: cannot inline %v: %s\n", ir.Line(fn), fn.Nname, reason) + } + if logopt.Enabled() { + logopt.LogOpt(fn.Pos(), "cannotInlineFunction", "inline", ir.FuncName(fn), reason) + } + } + }() + } + + reason = InlineImpossible(fn) + if reason != "" { + return + } + if fn.Typecheck() == 0 { + base.Fatalf("CanInline on non-typechecked function %v", fn) + } + + n := fn.Nname + if n.Func.InlinabilityChecked() { + return + } + defer n.Func.SetInlinabilityChecked(true) + + cc := int32(inlineExtraCallCost) + if base.Flag.LowerL == 4 { + cc = 1 // this appears to yield better performance than 0. + } + + // Used a "relaxed" inline budget if the new inliner is enabled. + relaxed := inlheur.Enabled() + + // Compute the inline budget for this func. + budget := inlineBudget(fn, profile, relaxed, base.Debug.PGODebug > 0) + + // At this point in the game the function we're looking at may + // have "stale" autos, vars that still appear in the Dcl list, but + // which no longer have any uses in the function body (due to + // elimination by deadcode). We'd like to exclude these dead vars + // when creating the "Inline.Dcl" field below; to accomplish this, + // the hairyVisitor below builds up a map of used/referenced + // locals, and we use this map to produce a pruned Inline.Dcl + // list. See issue 25459 for more context. + + visitor := hairyVisitor{ + curFunc: fn, + isBigFunc: IsBigFunc(fn), + budget: budget, + maxBudget: budget, + extraCallCost: cc, + profile: profile, + } + if visitor.tooHairy(fn) { + reason = visitor.reason + return + } + + n.Func.Inl = &ir.Inline{ + Cost: budget - visitor.budget, + Dcl: pruneUnusedAutos(n.Func.Dcl, &visitor), + HaveDcl: true, + + CanDelayResults: canDelayResults(fn), + } + if base.Flag.LowerM != 0 || logopt.Enabled() { + noteInlinableFunc(n, fn, budget-visitor.budget) + } +} + +// noteInlinableFunc issues a message to the user that the specified +// function is inlinable. +func noteInlinableFunc(n *ir.Name, fn *ir.Func, cost int32) { + if base.Flag.LowerM > 1 { + fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, cost, fn.Type(), ir.Nodes(fn.Body)) + } else if base.Flag.LowerM != 0 { + fmt.Printf("%v: can inline %v\n", ir.Line(fn), n) + } + // JSON optimization log output. + if logopt.Enabled() { + logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", cost)) + } +} + +// InlineImpossible returns a non-empty reason string if fn is impossible to +// inline regardless of cost or contents. +func InlineImpossible(fn *ir.Func) string { + var reason string // reason, if any, that the function can not be inlined. + if fn.Nname == nil { + reason = "no name" + return reason + } + + // If marked "go:noinline", don't inline. + if fn.Pragma&ir.Noinline != 0 { + reason = "marked go:noinline" + return reason + } + + // If marked "go:norace" and -race compilation, don't inline. + if base.Flag.Race && fn.Pragma&ir.Norace != 0 { + reason = "marked go:norace with -race compilation" + return reason + } + + // If marked "go:nocheckptr" and -d checkptr compilation, don't inline. + if base.Debug.Checkptr != 0 && fn.Pragma&ir.NoCheckPtr != 0 { + reason = "marked go:nocheckptr" + return reason + } + + // If marked "go:cgo_unsafe_args", don't inline, since the function + // makes assumptions about its argument frame layout. + if fn.Pragma&ir.CgoUnsafeArgs != 0 { + reason = "marked go:cgo_unsafe_args" + return reason + } + + // If marked as "go:uintptrkeepalive", don't inline, since the keep + // alive information is lost during inlining. + // + // TODO(prattmic): This is handled on calls during escape analysis, + // which is after inlining. Move prior to inlining so the keep-alive is + // maintained after inlining. + if fn.Pragma&ir.UintptrKeepAlive != 0 { + reason = "marked as having a keep-alive uintptr argument" + return reason + } + + // If marked as "go:uintptrescapes", don't inline, since the escape + // information is lost during inlining. + if fn.Pragma&ir.UintptrEscapes != 0 { + reason = "marked as having an escaping uintptr argument" + return reason + } + + // The nowritebarrierrec checker currently works at function + // granularity, so inlining yeswritebarrierrec functions can confuse it + // (#22342). As a workaround, disallow inlining them for now. + if fn.Pragma&ir.Yeswritebarrierrec != 0 { + reason = "marked go:yeswritebarrierrec" + return reason + } + + // If a local function has no fn.Body (is defined outside of Go), cannot inline it. + // Imported functions don't have fn.Body but might have inline body in fn.Inl. + if len(fn.Body) == 0 && !typecheck.HaveInlineBody(fn) { + reason = "no function body" + return reason + } + + return "" +} + +// canDelayResults reports whether inlined calls to fn can delay +// declaring the result parameter until the "return" statement. +func canDelayResults(fn *ir.Func) bool { + // We can delay declaring+initializing result parameters if: + // (1) there's exactly one "return" statement in the inlined function; + // (2) it's not an empty return statement (#44355); and + // (3) the result parameters aren't named. + + nreturns := 0 + ir.VisitList(fn.Body, func(n ir.Node) { + if n, ok := n.(*ir.ReturnStmt); ok { + nreturns++ + if len(n.Results) == 0 { + nreturns++ // empty return statement (case 2) + } + } + }) + + if nreturns != 1 { + return false // not exactly one return statement (case 1) + } + + // temporaries for return values. + for _, param := range fn.Type().Results() { + if sym := param.Sym; sym != nil && !sym.IsBlank() { + return false // found a named result parameter (case 3) + } + } + + return true +} + +// hairyVisitor visits a function body to determine its inlining +// hairiness and whether or not it can be inlined. +type hairyVisitor struct { + // This is needed to access the current caller in the doNode function. + curFunc *ir.Func + isBigFunc bool + budget int32 + maxBudget int32 + reason string + extraCallCost int32 + usedLocals ir.NameSet + do func(ir.Node) bool + profile *pgo.Profile +} + +func (v *hairyVisitor) tooHairy(fn *ir.Func) bool { + v.do = v.doNode // cache closure + if ir.DoChildren(fn, v.do) { + return true + } + if v.budget < 0 { + v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", v.maxBudget-v.budget, v.maxBudget) + return true + } + return false +} + +// doNode visits n and its children, updates the state in v, and returns true if +// n makes the current function too hairy for inlining. +func (v *hairyVisitor) doNode(n ir.Node) bool { + if n == nil { + return false + } +opSwitch: + switch n.Op() { + // Call is okay if inlinable and we have the budget for the body. + case ir.OCALLFUNC: + n := n.(*ir.CallExpr) + // Functions that call runtime.getcaller{pc,sp} can not be inlined + // because getcaller{pc,sp} expect a pointer to the caller's first argument. + // + // runtime.throw is a "cheap call" like panic in normal code. + var cheap bool + if n.Fun.Op() == ir.ONAME { + name := n.Fun.(*ir.Name) + if name.Class == ir.PFUNC { + switch fn := types.RuntimeSymName(name.Sym()); fn { + case "getcallerpc", "getcallersp": + v.reason = "call to " + fn + return true + case "throw": + v.budget -= inlineExtraThrowCost + break opSwitch + case "panicrangeexit": + cheap = true + } + // Special case for reflect.noescape. It does just type + // conversions to appease the escape analysis, and doesn't + // generate code. + if types.ReflectSymName(name.Sym()) == "noescape" { + cheap = true + } + } + // Special case for coverage counter updates; although + // these correspond to real operations, we treat them as + // zero cost for the moment. This is due to the existence + // of tests that are sensitive to inlining-- if the + // insertion of coverage instrumentation happens to tip a + // given function over the threshold and move it from + // "inlinable" to "not-inlinable", this can cause changes + // in allocation behavior, which can then result in test + // failures (a good example is the TestAllocations in + // crypto/ed25519). + if isAtomicCoverageCounterUpdate(n) { + return false + } + } + if n.Fun.Op() == ir.OMETHEXPR { + if meth := ir.MethodExprName(n.Fun); meth != nil { + if fn := meth.Func; fn != nil { + s := fn.Sym() + if types.RuntimeSymName(s) == "heapBits.nextArena" { + // Special case: explicitly allow mid-stack inlining of + // runtime.heapBits.next even though it calls slow-path + // runtime.heapBits.nextArena. + cheap = true + } + // Special case: on architectures that can do unaligned loads, + // explicitly mark encoding/binary methods as cheap, + // because in practice they are, even though our inlining + // budgeting system does not see that. See issue 42958. + if base.Ctxt.Arch.CanMergeLoads && s.Pkg.Path == "encoding/binary" { + switch s.Name { + case "littleEndian.Uint64", "littleEndian.Uint32", "littleEndian.Uint16", + "bigEndian.Uint64", "bigEndian.Uint32", "bigEndian.Uint16", + "littleEndian.PutUint64", "littleEndian.PutUint32", "littleEndian.PutUint16", + "bigEndian.PutUint64", "bigEndian.PutUint32", "bigEndian.PutUint16", + "littleEndian.AppendUint64", "littleEndian.AppendUint32", "littleEndian.AppendUint16", + "bigEndian.AppendUint64", "bigEndian.AppendUint32", "bigEndian.AppendUint16": + cheap = true + } + } + } + } + } + if cheap { + break // treat like any other node, that is, cost of 1 + } + + if ir.IsIntrinsicCall(n) { + // Treat like any other node. + break + } + + if callee := inlCallee(v.curFunc, n.Fun, v.profile); callee != nil && typecheck.HaveInlineBody(callee) { + // Check whether we'd actually inline this call. Set + // log == false since we aren't actually doing inlining + // yet. + if ok, _ := canInlineCallExpr(v.curFunc, n, callee, v.isBigFunc, false); ok { + // mkinlcall would inline this call [1], so use + // the cost of the inline body as the cost of + // the call, as that is what will actually + // appear in the code. + // + // [1] This is almost a perfect match to the + // mkinlcall logic, except that + // canInlineCallExpr considers inlining cycles + // by looking at what has already been inlined. + // Since we haven't done any inlining yet we + // will miss those. + v.budget -= callee.Inl.Cost + break + } + } + + // Call cost for non-leaf inlining. + v.budget -= v.extraCallCost + + case ir.OCALLMETH: + base.FatalfAt(n.Pos(), "OCALLMETH missed by typecheck") + + // Things that are too hairy, irrespective of the budget + case ir.OCALL, ir.OCALLINTER: + // Call cost for non-leaf inlining. + v.budget -= v.extraCallCost + + case ir.OPANIC: + n := n.(*ir.UnaryExpr) + if n.X.Op() == ir.OCONVIFACE && n.X.(*ir.ConvExpr).Implicit() { + // Hack to keep reflect.flag.mustBe inlinable for TestIntendedInlining. + // Before CL 284412, these conversions were introduced later in the + // compiler, so they didn't count against inlining budget. + v.budget++ + } + v.budget -= inlineExtraPanicCost + + case ir.ORECOVER: + base.FatalfAt(n.Pos(), "ORECOVER missed typecheck") + case ir.ORECOVERFP: + // recover matches the argument frame pointer to find + // the right panic value, so it needs an argument frame. + v.reason = "call to recover" + return true + + case ir.OCLOSURE: + if base.Debug.InlFuncsWithClosures == 0 { + v.reason = "not inlining functions with closures" + return true + } + + // TODO(danscales): Maybe make budget proportional to number of closure + // variables, e.g.: + //v.budget -= int32(len(n.(*ir.ClosureExpr).Func.ClosureVars) * 3) + // TODO(austin): However, if we're able to inline this closure into + // v.curFunc, then we actually pay nothing for the closure captures. We + // should try to account for that if we're going to account for captures. + v.budget -= 15 + + case ir.OGO, ir.ODEFER, ir.OTAILCALL: + v.reason = "unhandled op " + n.Op().String() + return true + + case ir.OAPPEND: + v.budget -= inlineExtraAppendCost + + case ir.OADDR: + n := n.(*ir.AddrExpr) + // Make "&s.f" cost 0 when f's offset is zero. + if dot, ok := n.X.(*ir.SelectorExpr); ok && (dot.Op() == ir.ODOT || dot.Op() == ir.ODOTPTR) { + if _, ok := dot.X.(*ir.Name); ok && dot.Selection.Offset == 0 { + v.budget += 2 // undo ir.OADDR+ir.ODOT/ir.ODOTPTR + } + } + + case ir.ODEREF: + // *(*X)(unsafe.Pointer(&x)) is low-cost + n := n.(*ir.StarExpr) + + ptr := n.X + for ptr.Op() == ir.OCONVNOP { + ptr = ptr.(*ir.ConvExpr).X + } + if ptr.Op() == ir.OADDR { + v.budget += 1 // undo half of default cost of ir.ODEREF+ir.OADDR + } + + case ir.OCONVNOP: + // This doesn't produce code, but the children might. + v.budget++ // undo default cost + + case ir.OFALL, ir.OTYPE: + // These nodes don't produce code; omit from inlining budget. + return false + + case ir.OIF: + n := n.(*ir.IfStmt) + if ir.IsConst(n.Cond, constant.Bool) { + // This if and the condition cost nothing. + if doList(n.Init(), v.do) { + return true + } + if ir.BoolVal(n.Cond) { + return doList(n.Body, v.do) + } else { + return doList(n.Else, v.do) + } + } + + case ir.ONAME: + n := n.(*ir.Name) + if n.Class == ir.PAUTO { + v.usedLocals.Add(n) + } + + case ir.OBLOCK: + // The only OBLOCK we should see at this point is an empty one. + // In any event, let the visitList(n.List()) below take care of the statements, + // and don't charge for the OBLOCK itself. The ++ undoes the -- below. + v.budget++ + + case ir.OMETHVALUE, ir.OSLICELIT: + v.budget-- // Hack for toolstash -cmp. + + case ir.OMETHEXPR: + v.budget++ // Hack for toolstash -cmp. + + case ir.OAS2: + n := n.(*ir.AssignListStmt) + + // Unified IR unconditionally rewrites: + // + // a, b = f() + // + // into: + // + // DCL tmp1 + // DCL tmp2 + // tmp1, tmp2 = f() + // a, b = tmp1, tmp2 + // + // so that it can insert implicit conversions as necessary. To + // minimize impact to the existing inlining heuristics (in + // particular, to avoid breaking the existing inlinability regress + // tests), we need to compensate for this here. + // + // See also identical logic in IsBigFunc. + if len(n.Rhs) > 0 { + if init := n.Rhs[0].Init(); len(init) == 1 { + if _, ok := init[0].(*ir.AssignListStmt); ok { + // 4 for each value, because each temporary variable now + // appears 3 times (DCL, LHS, RHS), plus an extra DCL node. + // + // 1 for the extra "tmp1, tmp2 = f()" assignment statement. + v.budget += 4*int32(len(n.Lhs)) + 1 + } + } + } + + case ir.OAS: + // Special case for coverage counter updates and coverage + // function registrations. Although these correspond to real + // operations, we treat them as zero cost for the moment. This + // is primarily due to the existence of tests that are + // sensitive to inlining-- if the insertion of coverage + // instrumentation happens to tip a given function over the + // threshold and move it from "inlinable" to "not-inlinable", + // this can cause changes in allocation behavior, which can + // then result in test failures (a good example is the + // TestAllocations in crypto/ed25519). + n := n.(*ir.AssignStmt) + if n.X.Op() == ir.OINDEX && isIndexingCoverageCounter(n.X) { + return false + } + } + + v.budget-- + + // When debugging, don't stop early, to get full cost of inlining this function + if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() { + v.reason = "too expensive" + return true + } + + return ir.DoChildren(n, v.do) +} + +// IsBigFunc reports whether fn is a "big" function. +// +// Note: The criteria for "big" is heuristic and subject to change. +func IsBigFunc(fn *ir.Func) bool { + budget := inlineBigFunctionNodes + return ir.Any(fn, func(n ir.Node) bool { + // See logic in hairyVisitor.doNode, explaining unified IR's + // handling of "a, b = f()" assignments. + if n, ok := n.(*ir.AssignListStmt); ok && n.Op() == ir.OAS2 && len(n.Rhs) > 0 { + if init := n.Rhs[0].Init(); len(init) == 1 { + if _, ok := init[0].(*ir.AssignListStmt); ok { + budget += 4*len(n.Lhs) + 1 + } + } + } + + budget-- + return budget <= 0 + }) +} + +// TryInlineCall returns an inlined call expression for call, or nil +// if inlining is not possible. +func TryInlineCall(callerfn *ir.Func, call *ir.CallExpr, bigCaller bool, profile *pgo.Profile) *ir.InlinedCallExpr { + if base.Flag.LowerL == 0 { + return nil + } + if call.Op() != ir.OCALLFUNC { + return nil + } + if call.GoDefer || call.NoInline { + return nil + } + + // Prevent inlining some reflect.Value methods when using checkptr, + // even when package reflect was compiled without it (#35073). + if base.Debug.Checkptr != 0 && call.Fun.Op() == ir.OMETHEXPR { + if method := ir.MethodExprName(call.Fun); method != nil { + switch types.ReflectSymName(method.Sym()) { + case "Value.UnsafeAddr", "Value.Pointer": + return nil + } + } + } + + if base.Flag.LowerM > 3 { + fmt.Printf("%v:call to func %+v\n", ir.Line(call), call.Fun) + } + if ir.IsIntrinsicCall(call) { + return nil + } + if fn := inlCallee(callerfn, call.Fun, profile); fn != nil && typecheck.HaveInlineBody(fn) { + return mkinlcall(callerfn, call, fn, bigCaller) + } + return nil +} + +// inlCallee takes a function-typed expression and returns the underlying function ONAME +// that it refers to if statically known. Otherwise, it returns nil. +func inlCallee(caller *ir.Func, fn ir.Node, profile *pgo.Profile) (res *ir.Func) { + fn = ir.StaticValue(fn) + switch fn.Op() { + case ir.OMETHEXPR: + fn := fn.(*ir.SelectorExpr) + n := ir.MethodExprName(fn) + // Check that receiver type matches fn.X. + // TODO(mdempsky): Handle implicit dereference + // of pointer receiver argument? + if n == nil || !types.Identical(n.Type().Recv().Type, fn.X.Type()) { + return nil + } + return n.Func + case ir.ONAME: + fn := fn.(*ir.Name) + if fn.Class == ir.PFUNC { + return fn.Func + } + case ir.OCLOSURE: + fn := fn.(*ir.ClosureExpr) + c := fn.Func + if len(c.ClosureVars) != 0 && c.ClosureVars[0].Outer.Curfn != caller { + return nil // inliner doesn't support inlining across closure frames + } + CanInline(c, profile) + return c + } + return nil +} + +var inlgen int + +// SSADumpInline gives the SSA back end a chance to dump the function +// when producing output for debugging the compiler itself. +var SSADumpInline = func(*ir.Func) {} + +// InlineCall allows the inliner implementation to be overridden. +// If it returns nil, the function will not be inlined. +var InlineCall = func(callerfn *ir.Func, call *ir.CallExpr, fn *ir.Func, inlIndex int) *ir.InlinedCallExpr { + base.Fatalf("inline.InlineCall not overridden") + panic("unreachable") +} + +// inlineCostOK returns true if call n from caller to callee is cheap enough to +// inline. bigCaller indicates that caller is a big function. +// +// In addition to the "cost OK" boolean, it also returns the "max +// cost" limit used to make the decision (which may differ depending +// on func size), and the score assigned to this specific callsite. +func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool, int32, int32) { + maxCost := int32(inlineMaxBudget) + if bigCaller { + // We use this to restrict inlining into very big functions. + // See issue 26546 and 17566. + maxCost = inlineBigFunctionMaxCost + } + + metric := callee.Inl.Cost + if inlheur.Enabled() { + score, ok := inlheur.GetCallSiteScore(caller, n) + if ok { + metric = int32(score) + } + } + + if metric <= maxCost { + // Simple case. Function is already cheap enough. + return true, 0, metric + } + + // We'll also allow inlining of hot functions below inlineHotMaxBudget, + // but only in small functions. + + lineOffset := pgo.NodeLineOffset(n, caller) + csi := pgo.CallSiteInfo{LineOffset: lineOffset, Caller: caller} + if _, ok := candHotEdgeMap[csi]; !ok { + // Cold + return false, maxCost, metric + } + + // Hot + + if bigCaller { + if base.Debug.PGODebug > 0 { + fmt.Printf("hot-big check disallows inlining for call %s (cost %d) at %v in big function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller)) + } + return false, maxCost, metric + } + + if metric > inlineHotMaxBudget { + return false, inlineHotMaxBudget, metric + } + + if !base.PGOHash.MatchPosWithInfo(n.Pos(), "inline", nil) { + // De-selected by PGO Hash. + return false, maxCost, metric + } + + if base.Debug.PGODebug > 0 { + fmt.Printf("hot-budget check allows inlining for call %s (cost %d) at %v in function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller)) + } + + return true, 0, metric +} + +// canInlineCallsite returns true if the call n from caller to callee +// can be inlined, plus the score computed for the call expr in +// question. bigCaller indicates that caller is a big function. log +// indicates that the 'cannot inline' reason should be logged. +// +// Preconditions: CanInline(callee) has already been called. +func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCaller bool, log bool) (bool, int32) { + if callee.Inl == nil { + // callee is never inlinable. + if log && logopt.Enabled() { + logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), + fmt.Sprintf("%s cannot be inlined", ir.PkgFuncName(callee))) + } + return false, 0 + } + + ok, maxCost, callSiteScore := inlineCostOK(n, callerfn, callee, bigCaller) + if !ok { + // callee cost too high for this call site. + if log && logopt.Enabled() { + logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), + fmt.Sprintf("cost %d of %s exceeds max caller cost %d", callee.Inl.Cost, ir.PkgFuncName(callee), maxCost)) + } + return false, 0 + } + + if callee == callerfn { + // Can't recursively inline a function into itself. + if log && logopt.Enabled() { + logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", fmt.Sprintf("recursive call to %s", ir.FuncName(callerfn))) + } + return false, 0 + } + + if base.Flag.Cfg.Instrumenting && types.IsNoInstrumentPkg(callee.Sym().Pkg) { + // Runtime package must not be instrumented. + // Instrument skips runtime package. However, some runtime code can be + // inlined into other packages and instrumented there. To avoid this, + // we disable inlining of runtime functions when instrumenting. + // The example that we observed is inlining of LockOSThread, + // which lead to false race reports on m contents. + if log && logopt.Enabled() { + logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), + fmt.Sprintf("call to runtime function %s in instrumented build", ir.PkgFuncName(callee))) + } + return false, 0 + } + + if base.Flag.Race && types.IsNoRacePkg(callee.Sym().Pkg) { + if log && logopt.Enabled() { + logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), + fmt.Sprintf(`call to into "no-race" package function %s in race build`, ir.PkgFuncName(callee))) + } + return false, 0 + } + + // Check if we've already inlined this function at this particular + // call site, in order to stop inlining when we reach the beginning + // of a recursion cycle again. We don't inline immediately recursive + // functions, but allow inlining if there is a recursion cycle of + // many functions. Most likely, the inlining will stop before we + // even hit the beginning of the cycle again, but this catches the + // unusual case. + parent := base.Ctxt.PosTable.Pos(n.Pos()).Base().InliningIndex() + sym := callee.Linksym() + for inlIndex := parent; inlIndex >= 0; inlIndex = base.Ctxt.InlTree.Parent(inlIndex) { + if base.Ctxt.InlTree.InlinedFunction(inlIndex) == sym { + if log { + if base.Flag.LowerM > 1 { + fmt.Printf("%v: cannot inline %v into %v: repeated recursive cycle\n", ir.Line(n), callee, ir.FuncName(callerfn)) + } + if logopt.Enabled() { + logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), + fmt.Sprintf("repeated recursive cycle to %s", ir.PkgFuncName(callee))) + } + } + return false, 0 + } + } + + return true, callSiteScore +} + +// mkinlcall returns an OINLCALL node that can replace OCALLFUNC n, or +// nil if it cannot be inlined. callerfn is the function that contains +// n, and fn is the function being called. +// +// The result of mkinlcall MUST be assigned back to n, e.g. +// +// n.Left = mkinlcall(n.Left, fn, isddd) +func mkinlcall(callerfn *ir.Func, n *ir.CallExpr, fn *ir.Func, bigCaller bool) *ir.InlinedCallExpr { + ok, score := canInlineCallExpr(callerfn, n, fn, bigCaller, true) + if !ok { + return nil + } + typecheck.AssertFixedCall(n) + + parent := base.Ctxt.PosTable.Pos(n.Pos()).Base().InliningIndex() + sym := fn.Linksym() + inlIndex := base.Ctxt.InlTree.Add(parent, n.Pos(), sym, ir.FuncName(fn)) + + closureInitLSym := func(n *ir.CallExpr, fn *ir.Func) { + // The linker needs FuncInfo metadata for all inlined + // functions. This is typically handled by gc.enqueueFunc + // calling ir.InitLSym for all function declarations in + // typecheck.Target.Decls (ir.UseClosure adds all closures to + // Decls). + // + // However, non-trivial closures in Decls are ignored, and are + // insteaded enqueued when walk of the calling function + // discovers them. + // + // This presents a problem for direct calls to closures. + // Inlining will replace the entire closure definition with its + // body, which hides the closure from walk and thus suppresses + // symbol creation. + // + // Explicitly create a symbol early in this edge case to ensure + // we keep this metadata. + // + // TODO: Refactor to keep a reference so this can all be done + // by enqueueFunc. + + if n.Op() != ir.OCALLFUNC { + // Not a standard call. + return + } + if n.Fun.Op() != ir.OCLOSURE { + // Not a direct closure call. + return + } + + clo := n.Fun.(*ir.ClosureExpr) + if ir.IsTrivialClosure(clo) { + // enqueueFunc will handle trivial closures anyways. + return + } + + ir.InitLSym(fn, true) + } + + closureInitLSym(n, fn) + + if base.Flag.GenDwarfInl > 0 { + if !sym.WasInlined() { + base.Ctxt.DwFixups.SetPrecursorFunc(sym, fn) + sym.Set(obj.AttrWasInlined, true) + } + } + + if base.Flag.LowerM != 0 { + if buildcfg.Experiment.NewInliner { + fmt.Printf("%v: inlining call to %v with score %d\n", + ir.Line(n), fn, score) + } else { + fmt.Printf("%v: inlining call to %v\n", ir.Line(n), fn) + } + } + if base.Flag.LowerM > 2 { + fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n) + } + + res := InlineCall(callerfn, n, fn, inlIndex) + + if res == nil { + base.FatalfAt(n.Pos(), "inlining call to %v failed", fn) + } + + if base.Flag.LowerM > 2 { + fmt.Printf("%v: After inlining %+v\n\n", ir.Line(res), res) + } + + if inlheur.Enabled() { + inlheur.UpdateCallsiteTable(callerfn, n, res) + } + + return res +} + +// CalleeEffects appends any side effects from evaluating callee to init. +func CalleeEffects(init *ir.Nodes, callee ir.Node) { + for { + init.Append(ir.TakeInit(callee)...) + + switch callee.Op() { + case ir.ONAME, ir.OCLOSURE, ir.OMETHEXPR: + return // done + + case ir.OCONVNOP: + conv := callee.(*ir.ConvExpr) + callee = conv.X + + case ir.OINLCALL: + ic := callee.(*ir.InlinedCallExpr) + init.Append(ic.Body.Take()...) + callee = ic.SingleResult() + + default: + base.FatalfAt(callee.Pos(), "unexpected callee expression: %v", callee) + } + } +} + +func pruneUnusedAutos(ll []*ir.Name, vis *hairyVisitor) []*ir.Name { + s := make([]*ir.Name, 0, len(ll)) + for _, n := range ll { + if n.Class == ir.PAUTO { + if !vis.usedLocals.Has(n) { + // TODO(mdempsky): Simplify code after confident that this + // never happens anymore. + base.FatalfAt(n.Pos(), "unused auto: %v", n) + continue + } + } + s = append(s, n) + } + return s +} + +// numNonClosures returns the number of functions in list which are not closures. +func numNonClosures(list []*ir.Func) int { + count := 0 + for _, fn := range list { + if fn.OClosure == nil { + count++ + } + } + return count +} + +func doList(list []ir.Node, do func(ir.Node) bool) bool { + for _, x := range list { + if x != nil { + if do(x) { + return true + } + } + } + return false +} + +// isIndexingCoverageCounter returns true if the specified node 'n' is indexing +// into a coverage counter array. +func isIndexingCoverageCounter(n ir.Node) bool { + if n.Op() != ir.OINDEX { + return false + } + ixn := n.(*ir.IndexExpr) + if ixn.X.Op() != ir.ONAME || !ixn.X.Type().IsArray() { + return false + } + nn := ixn.X.(*ir.Name) + return nn.CoverageCounter() +} + +// isAtomicCoverageCounterUpdate examines the specified node to +// determine whether it represents a call to sync/atomic.AddUint32 to +// increment a coverage counter. +func isAtomicCoverageCounterUpdate(cn *ir.CallExpr) bool { + if cn.Fun.Op() != ir.ONAME { + return false + } + name := cn.Fun.(*ir.Name) + if name.Class != ir.PFUNC { + return false + } + fn := name.Sym().Name + if name.Sym().Pkg.Path != "sync/atomic" || + (fn != "AddUint32" && fn != "StoreUint32") { + return false + } + if len(cn.Args) != 2 || cn.Args[0].Op() != ir.OADDR { + return false + } + adn := cn.Args[0].(*ir.AddrExpr) + v := isIndexingCoverageCounter(adn.X) + return v +} + +func PostProcessCallSites(profile *pgo.Profile) { + if base.Debug.DumpInlCallSiteScores != 0 { + budgetCallback := func(fn *ir.Func, prof *pgo.Profile) (int32, bool) { + v := inlineBudget(fn, prof, false, false) + return v, v == inlineHotMaxBudget + } + inlheur.DumpInlCallSiteScores(profile, budgetCallback) + } +} + +func analyzeFuncProps(fn *ir.Func, p *pgo.Profile) { + canInline := func(fn *ir.Func) { CanInline(fn, p) } + budgetForFunc := func(fn *ir.Func) int32 { + return inlineBudget(fn, p, true, false) + } + inlheur.AnalyzeFunc(fn, canInline, budgetForFunc, inlineMaxBudget) +} diff --git a/src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go b/src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go new file mode 100644 index 0000000..2faf76f --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go @@ -0,0 +1,58 @@ +// Code generated by "stringer -bitset -type ActualExprPropBits"; DO NOT EDIT. + +package inlheur + +import "strconv" +import "bytes" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ActualExprConstant-1] + _ = x[ActualExprIsConcreteConvIface-2] + _ = x[ActualExprIsFunc-4] + _ = x[ActualExprIsInlinableFunc-8] +} + +var _ActualExprPropBits_value = [...]uint64{ + 0x1, /* ActualExprConstant */ + 0x2, /* ActualExprIsConcreteConvIface */ + 0x4, /* ActualExprIsFunc */ + 0x8, /* ActualExprIsInlinableFunc */ +} + +const _ActualExprPropBits_name = "ActualExprConstantActualExprIsConcreteConvIfaceActualExprIsFuncActualExprIsInlinableFunc" + +var _ActualExprPropBits_index = [...]uint8{0, 18, 47, 63, 88} + +func (i ActualExprPropBits) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _ActualExprPropBits_value { + x := _ActualExprPropBits_name[_ActualExprPropBits_index[k]:_ActualExprPropBits_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _ActualExprPropBits_name[_ActualExprPropBits_index[k]:_ActualExprPropBits_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "ActualExprPropBits(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/analyze.go b/src/cmd/compile/internal/inline/inlheur/analyze.go new file mode 100644 index 0000000..a1b6f35 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/analyze.go @@ -0,0 +1,370 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/compile/internal/types" + "encoding/json" + "fmt" + "internal/buildcfg" + "io" + "os" + "path/filepath" + "sort" + "strings" +) + +const ( + debugTraceFuncs = 1 << iota + debugTraceFuncFlags + debugTraceResults + debugTraceParams + debugTraceExprClassify + debugTraceCalls + debugTraceScoring +) + +// propAnalyzer interface is used for defining one or more analyzer +// helper objects, each tasked with computing some specific subset of +// the properties we're interested in. The assumption is that +// properties are independent, so each new analyzer that implements +// this interface can operate entirely on its own. For a given analyzer +// there will be a sequence of calls to nodeVisitPre and nodeVisitPost +// as the nodes within a function are visited, then a followup call to +// setResults so that the analyzer can transfer its results into the +// final properties object. +type propAnalyzer interface { + nodeVisitPre(n ir.Node) + nodeVisitPost(n ir.Node) + setResults(funcProps *FuncProps) +} + +// fnInlHeur contains inline heuristics state information about a +// specific Go function being analyzed/considered by the inliner. Note +// that in addition to constructing a fnInlHeur object by analyzing a +// specific *ir.Func, there is also code in the test harness +// (funcprops_test.go) that builds up fnInlHeur's by reading in and +// parsing a dump. This is the reason why we have file/fname/line +// fields below instead of just an *ir.Func field. +type fnInlHeur struct { + props *FuncProps + cstab CallSiteTab + fname string + file string + line uint +} + +var fpmap = map[*ir.Func]fnInlHeur{} + +// AnalyzeFunc computes function properties for fn and its contained +// closures, updating the global 'fpmap' table. It is assumed that +// "CanInline" has been run on fn and on the closures that feed +// directly into calls; other closures not directly called will also +// be checked inlinability for inlinability here in case they are +// returned as a result. +func AnalyzeFunc(fn *ir.Func, canInline func(*ir.Func), budgetForFunc func(*ir.Func) int32, inlineMaxBudget int) { + if fpmap == nil { + // If fpmap is nil this indicates that the main inliner pass is + // complete and we're doing inlining of wrappers (no heuristics + // used here). + return + } + if fn.OClosure != nil { + // closures will be processed along with their outer enclosing func. + return + } + enableDebugTraceIfEnv() + if debugTrace&debugTraceFuncs != 0 { + fmt.Fprintf(os.Stderr, "=-= AnalyzeFunc(%v)\n", fn) + } + // Build up a list containing 'fn' and any closures it contains. Along + // the way, test to see whether each closure is inlinable in case + // we might be returning it. + funcs := []*ir.Func{fn} + ir.VisitFuncAndClosures(fn, func(n ir.Node) { + if clo, ok := n.(*ir.ClosureExpr); ok { + funcs = append(funcs, clo.Func) + } + }) + + // Analyze the list of functions. We want to visit a given func + // only after the closures it contains have been processed, so + // iterate through the list in reverse order. Once a function has + // been analyzed, revisit the question of whether it should be + // inlinable; if it is over the default hairyness limit and it + // doesn't have any interesting properties, then we don't want + // the overhead of writing out its inline body. + nameFinder := newNameFinder(fn) + for i := len(funcs) - 1; i >= 0; i-- { + f := funcs[i] + if f.OClosure != nil && !f.InlinabilityChecked() { + canInline(f) + } + funcProps := analyzeFunc(f, inlineMaxBudget, nameFinder) + revisitInlinability(f, funcProps, budgetForFunc) + if f.Inl != nil { + f.Inl.Properties = funcProps.SerializeToString() + } + } + disableDebugTrace() +} + +// TearDown is invoked at the end of the main inlining pass; doing +// function analysis and call site scoring is unlikely to help a lot +// after this point, so nil out fpmap and other globals to reclaim +// storage. +func TearDown() { + fpmap = nil + scoreCallsCache.tab = nil + scoreCallsCache.csl = nil +} + +func analyzeFunc(fn *ir.Func, inlineMaxBudget int, nf *nameFinder) *FuncProps { + if funcInlHeur, ok := fpmap[fn]; ok { + return funcInlHeur.props + } + funcProps, fcstab := computeFuncProps(fn, inlineMaxBudget, nf) + file, line := fnFileLine(fn) + entry := fnInlHeur{ + fname: fn.Sym().Name, + file: file, + line: line, + props: funcProps, + cstab: fcstab, + } + fn.SetNeverReturns(entry.props.Flags&FuncPropNeverReturns != 0) + fpmap[fn] = entry + if fn.Inl != nil && fn.Inl.Properties == "" { + fn.Inl.Properties = entry.props.SerializeToString() + } + return funcProps +} + +// revisitInlinability revisits the question of whether to continue to +// treat function 'fn' as an inline candidate based on the set of +// properties we've computed for it. If (for example) it has an +// initial size score of 150 and no interesting properties to speak +// of, then there isn't really any point to moving ahead with it as an +// inline candidate. +func revisitInlinability(fn *ir.Func, funcProps *FuncProps, budgetForFunc func(*ir.Func) int32) { + if fn.Inl == nil { + return + } + maxAdj := int32(LargestNegativeScoreAdjustment(fn, funcProps)) + budget := budgetForFunc(fn) + if fn.Inl.Cost+maxAdj > budget { + fn.Inl = nil + } +} + +// computeFuncProps examines the Go function 'fn' and computes for it +// a function "properties" object, to be used to drive inlining +// heuristics. See comments on the FuncProps type for more info. +func computeFuncProps(fn *ir.Func, inlineMaxBudget int, nf *nameFinder) (*FuncProps, CallSiteTab) { + if debugTrace&debugTraceFuncs != 0 { + fmt.Fprintf(os.Stderr, "=-= starting analysis of func %v:\n%+v\n", + fn, fn) + } + funcProps := new(FuncProps) + ffa := makeFuncFlagsAnalyzer(fn) + analyzers := []propAnalyzer{ffa} + analyzers = addResultsAnalyzer(fn, analyzers, funcProps, inlineMaxBudget, nf) + analyzers = addParamsAnalyzer(fn, analyzers, funcProps, nf) + runAnalyzersOnFunction(fn, analyzers) + for _, a := range analyzers { + a.setResults(funcProps) + } + cstab := computeCallSiteTable(fn, fn.Body, nil, ffa.panicPathTable(), 0, nf) + return funcProps, cstab +} + +func runAnalyzersOnFunction(fn *ir.Func, analyzers []propAnalyzer) { + var doNode func(ir.Node) bool + doNode = func(n ir.Node) bool { + for _, a := range analyzers { + a.nodeVisitPre(n) + } + ir.DoChildren(n, doNode) + for _, a := range analyzers { + a.nodeVisitPost(n) + } + return false + } + doNode(fn) +} + +func propsForFunc(fn *ir.Func) *FuncProps { + if funcInlHeur, ok := fpmap[fn]; ok { + return funcInlHeur.props + } else if fn.Inl != nil && fn.Inl.Properties != "" { + // FIXME: considering adding some sort of cache or table + // for deserialized properties of imported functions. + return DeserializeFromString(fn.Inl.Properties) + } + return nil +} + +func fnFileLine(fn *ir.Func) (string, uint) { + p := base.Ctxt.InnermostPos(fn.Pos()) + return filepath.Base(p.Filename()), p.Line() +} + +func Enabled() bool { + return buildcfg.Experiment.NewInliner || UnitTesting() +} + +func UnitTesting() bool { + return base.Debug.DumpInlFuncProps != "" || + base.Debug.DumpInlCallSiteScores != 0 +} + +// DumpFuncProps computes and caches function properties for the func +// 'fn', writing out a description of the previously computed set of +// properties to the file given in 'dumpfile'. Used for the +// "-d=dumpinlfuncprops=..." command line flag, intended for use +// primarily in unit testing. +func DumpFuncProps(fn *ir.Func, dumpfile string) { + if fn != nil { + if fn.OClosure != nil { + // closures will be processed along with their outer enclosing func. + return + } + captureFuncDumpEntry(fn) + ir.VisitFuncAndClosures(fn, func(n ir.Node) { + if clo, ok := n.(*ir.ClosureExpr); ok { + captureFuncDumpEntry(clo.Func) + } + }) + } else { + emitDumpToFile(dumpfile) + } +} + +// emitDumpToFile writes out the buffer function property dump entries +// to a file, for unit testing. Dump entries need to be sorted by +// definition line, and due to generics we need to account for the +// possibility that several ir.Func's will have the same def line. +func emitDumpToFile(dumpfile string) { + mode := os.O_WRONLY | os.O_CREATE | os.O_TRUNC + if dumpfile[0] == '+' { + dumpfile = dumpfile[1:] + mode = os.O_WRONLY | os.O_APPEND | os.O_CREATE + } + if dumpfile[0] == '%' { + dumpfile = dumpfile[1:] + d, b := filepath.Dir(dumpfile), filepath.Base(dumpfile) + ptag := strings.ReplaceAll(types.LocalPkg.Path, "/", ":") + dumpfile = d + "/" + ptag + "." + b + } + outf, err := os.OpenFile(dumpfile, mode, 0644) + if err != nil { + base.Fatalf("opening function props dump file %q: %v\n", dumpfile, err) + } + defer outf.Close() + dumpFilePreamble(outf) + + atline := map[uint]uint{} + sl := make([]fnInlHeur, 0, len(dumpBuffer)) + for _, e := range dumpBuffer { + sl = append(sl, e) + atline[e.line] = atline[e.line] + 1 + } + sl = sortFnInlHeurSlice(sl) + + prevline := uint(0) + for _, entry := range sl { + idx := uint(0) + if prevline == entry.line { + idx++ + } + prevline = entry.line + atl := atline[entry.line] + if err := dumpFnPreamble(outf, &entry, nil, idx, atl); err != nil { + base.Fatalf("function props dump: %v\n", err) + } + } + dumpBuffer = nil +} + +// captureFuncDumpEntry grabs the function properties object for 'fn' +// and enqueues it for later dumping. Used for the +// "-d=dumpinlfuncprops=..." command line flag, intended for use +// primarily in unit testing. +func captureFuncDumpEntry(fn *ir.Func) { + // avoid capturing compiler-generated equality funcs. + if strings.HasPrefix(fn.Sym().Name, ".eq.") { + return + } + funcInlHeur, ok := fpmap[fn] + if !ok { + // Missing entry is expected for functions that are too large + // to inline. We still want to write out call site scores in + // this case however. + funcInlHeur = fnInlHeur{cstab: callSiteTab} + } + if dumpBuffer == nil { + dumpBuffer = make(map[*ir.Func]fnInlHeur) + } + if _, ok := dumpBuffer[fn]; ok { + return + } + if debugTrace&debugTraceFuncs != 0 { + fmt.Fprintf(os.Stderr, "=-= capturing dump for %v:\n", fn) + } + dumpBuffer[fn] = funcInlHeur +} + +// dumpFilePreamble writes out a file-level preamble for a given +// Go function as part of a function properties dump. +func dumpFilePreamble(w io.Writer) { + fmt.Fprintf(w, "// DO NOT EDIT (use 'go test -v -update-expected' instead.)\n") + fmt.Fprintf(w, "// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt\n") + fmt.Fprintf(w, "// for more information on the format of this file.\n") + fmt.Fprintf(w, "// %s\n", preambleDelimiter) +} + +// dumpFnPreamble writes out a function-level preamble for a given +// Go function as part of a function properties dump. See the +// README.txt file in testdata/props for more on the format of +// this preamble. +func dumpFnPreamble(w io.Writer, funcInlHeur *fnInlHeur, ecst encodedCallSiteTab, idx, atl uint) error { + fmt.Fprintf(w, "// %s %s %d %d %d\n", + funcInlHeur.file, funcInlHeur.fname, funcInlHeur.line, idx, atl) + // emit props as comments, followed by delimiter + fmt.Fprintf(w, "%s// %s\n", funcInlHeur.props.ToString("// "), comDelimiter) + data, err := json.Marshal(funcInlHeur.props) + if err != nil { + return fmt.Errorf("marshall error %v\n", err) + } + fmt.Fprintf(w, "// %s\n", string(data)) + dumpCallSiteComments(w, funcInlHeur.cstab, ecst) + fmt.Fprintf(w, "// %s\n", fnDelimiter) + return nil +} + +// sortFnInlHeurSlice sorts a slice of fnInlHeur based on +// the starting line of the function definition, then by name. +func sortFnInlHeurSlice(sl []fnInlHeur) []fnInlHeur { + sort.SliceStable(sl, func(i, j int) bool { + if sl[i].line != sl[j].line { + return sl[i].line < sl[j].line + } + return sl[i].fname < sl[j].fname + }) + return sl +} + +// delimiters written to various preambles to make parsing of +// dumps easier. +const preambleDelimiter = "<endfilepreamble>" +const fnDelimiter = "<endfuncpreamble>" +const comDelimiter = "<endpropsdump>" +const csDelimiter = "<endcallsites>" + +// dumpBuffer stores up function properties dumps when +// "-d=dumpinlfuncprops=..." is in effect. +var dumpBuffer map[*ir.Func]fnInlHeur diff --git a/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go b/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go new file mode 100644 index 0000000..36ebe18 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go @@ -0,0 +1,413 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "cmd/compile/internal/pgo" + "cmd/compile/internal/typecheck" + "fmt" + "os" + "strings" +) + +type callSiteAnalyzer struct { + fn *ir.Func + *nameFinder +} + +type callSiteTableBuilder struct { + fn *ir.Func + *nameFinder + cstab CallSiteTab + ptab map[ir.Node]pstate + nstack []ir.Node + loopNest int + isInit bool +} + +func makeCallSiteAnalyzer(fn *ir.Func) *callSiteAnalyzer { + return &callSiteAnalyzer{ + fn: fn, + nameFinder: newNameFinder(fn), + } +} + +func makeCallSiteTableBuilder(fn *ir.Func, cstab CallSiteTab, ptab map[ir.Node]pstate, loopNestingLevel int, nf *nameFinder) *callSiteTableBuilder { + isInit := fn.IsPackageInit() || strings.HasPrefix(fn.Sym().Name, "init.") + return &callSiteTableBuilder{ + fn: fn, + cstab: cstab, + ptab: ptab, + isInit: isInit, + loopNest: loopNestingLevel, + nstack: []ir.Node{fn}, + nameFinder: nf, + } +} + +// computeCallSiteTable builds and returns a table of call sites for +// the specified region in function fn. A region here corresponds to a +// specific subtree within the AST for a function. The main intended +// use cases are for 'region' to be either A) an entire function body, +// or B) an inlined call expression. +func computeCallSiteTable(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, ptab map[ir.Node]pstate, loopNestingLevel int, nf *nameFinder) CallSiteTab { + cstb := makeCallSiteTableBuilder(fn, cstab, ptab, loopNestingLevel, nf) + var doNode func(ir.Node) bool + doNode = func(n ir.Node) bool { + cstb.nodeVisitPre(n) + ir.DoChildren(n, doNode) + cstb.nodeVisitPost(n) + return false + } + for _, n := range region { + doNode(n) + } + return cstb.cstab +} + +func (cstb *callSiteTableBuilder) flagsForNode(call *ir.CallExpr) CSPropBits { + var r CSPropBits + + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= analyzing call at %s\n", + fmtFullPos(call.Pos())) + } + + // Set a bit if this call is within a loop. + if cstb.loopNest > 0 { + r |= CallSiteInLoop + } + + // Set a bit if the call is within an init function (either + // compiler-generated or user-written). + if cstb.isInit { + r |= CallSiteInInitFunc + } + + // Decide whether to apply the panic path heuristic. Hack: don't + // apply this heuristic in the function "main.main" (mostly just + // to avoid annoying users). + if !isMainMain(cstb.fn) { + r = cstb.determinePanicPathBits(call, r) + } + + return r +} + +// determinePanicPathBits updates the CallSiteOnPanicPath bit within +// "r" if we think this call is on an unconditional path to +// panic/exit. Do this by walking back up the node stack to see if we +// can find either A) an enclosing panic, or B) a statement node that +// we've determined leads to a panic/exit. +func (cstb *callSiteTableBuilder) determinePanicPathBits(call ir.Node, r CSPropBits) CSPropBits { + cstb.nstack = append(cstb.nstack, call) + defer func() { + cstb.nstack = cstb.nstack[:len(cstb.nstack)-1] + }() + + for ri := range cstb.nstack[:len(cstb.nstack)-1] { + i := len(cstb.nstack) - ri - 1 + n := cstb.nstack[i] + _, isCallExpr := n.(*ir.CallExpr) + _, isStmt := n.(ir.Stmt) + if isCallExpr { + isStmt = false + } + + if debugTrace&debugTraceCalls != 0 { + ps, inps := cstb.ptab[n] + fmt.Fprintf(os.Stderr, "=-= callpar %d op=%s ps=%s inptab=%v stmt=%v\n", i, n.Op().String(), ps.String(), inps, isStmt) + } + + if n.Op() == ir.OPANIC { + r |= CallSiteOnPanicPath + break + } + if v, ok := cstb.ptab[n]; ok { + if v == psCallsPanic { + r |= CallSiteOnPanicPath + break + } + if isStmt { + break + } + } + } + return r +} + +// propsForArg returns property bits for a given call argument expression arg. +func (cstb *callSiteTableBuilder) propsForArg(arg ir.Node) ActualExprPropBits { + if cval := cstb.constValue(arg); cval != nil { + return ActualExprConstant + } + if cstb.isConcreteConvIface(arg) { + return ActualExprIsConcreteConvIface + } + fname := cstb.funcName(arg) + if fname != nil { + if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) { + return ActualExprIsInlinableFunc + } + return ActualExprIsFunc + } + return 0 +} + +// argPropsForCall returns a slice of argument properties for the +// expressions being passed to the callee in the specific call +// expression; these will be stored in the CallSite object for a given +// call and then consulted when scoring. If no arg has any interesting +// properties we try to save some space and return a nil slice. +func (cstb *callSiteTableBuilder) argPropsForCall(ce *ir.CallExpr) []ActualExprPropBits { + rv := make([]ActualExprPropBits, len(ce.Args)) + somethingInteresting := false + for idx := range ce.Args { + argProp := cstb.propsForArg(ce.Args[idx]) + somethingInteresting = somethingInteresting || (argProp != 0) + rv[idx] = argProp + } + if !somethingInteresting { + return nil + } + return rv +} + +func (cstb *callSiteTableBuilder) addCallSite(callee *ir.Func, call *ir.CallExpr) { + flags := cstb.flagsForNode(call) + argProps := cstb.argPropsForCall(call) + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= props %+v for call %v\n", argProps, call) + } + // FIXME: maybe bulk-allocate these? + cs := &CallSite{ + Call: call, + Callee: callee, + Assign: cstb.containingAssignment(call), + ArgProps: argProps, + Flags: flags, + ID: uint(len(cstb.cstab)), + } + if _, ok := cstb.cstab[call]; ok { + fmt.Fprintf(os.Stderr, "*** cstab duplicate entry at: %s\n", + fmtFullPos(call.Pos())) + fmt.Fprintf(os.Stderr, "*** call: %+v\n", call) + panic("bad") + } + // Set initial score for callsite to the cost computed + // by CanInline; this score will be refined later based + // on heuristics. + cs.Score = int(callee.Inl.Cost) + + if cstb.cstab == nil { + cstb.cstab = make(CallSiteTab) + } + cstb.cstab[call] = cs + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= added callsite: caller=%v callee=%v n=%s\n", + cstb.fn, callee, fmtFullPos(call.Pos())) + } +} + +func (cstb *callSiteTableBuilder) nodeVisitPre(n ir.Node) { + switch n.Op() { + case ir.ORANGE, ir.OFOR: + if !hasTopLevelLoopBodyReturnOrBreak(loopBody(n)) { + cstb.loopNest++ + } + case ir.OCALLFUNC: + ce := n.(*ir.CallExpr) + callee := pgo.DirectCallee(ce.Fun) + if callee != nil && callee.Inl != nil { + cstb.addCallSite(callee, ce) + } + } + cstb.nstack = append(cstb.nstack, n) +} + +func (cstb *callSiteTableBuilder) nodeVisitPost(n ir.Node) { + cstb.nstack = cstb.nstack[:len(cstb.nstack)-1] + switch n.Op() { + case ir.ORANGE, ir.OFOR: + if !hasTopLevelLoopBodyReturnOrBreak(loopBody(n)) { + cstb.loopNest-- + } + } +} + +func loopBody(n ir.Node) ir.Nodes { + if forst, ok := n.(*ir.ForStmt); ok { + return forst.Body + } + if rst, ok := n.(*ir.RangeStmt); ok { + return rst.Body + } + return nil +} + +// hasTopLevelLoopBodyReturnOrBreak examines the body of a "for" or +// "range" loop to try to verify that it is a real loop, as opposed to +// a construct that is syntactically loopy but doesn't actually iterate +// multiple times, like: +// +// for { +// blah() +// return 1 +// } +// +// [Remark: the pattern above crops up quite a bit in the source code +// for the compiler itself, e.g. the auto-generated rewrite code] +// +// Note that we don't look for GOTO statements here, so it's possible +// we'll get the wrong result for a loop with complicated control +// jumps via gotos. +func hasTopLevelLoopBodyReturnOrBreak(loopBody ir.Nodes) bool { + for _, n := range loopBody { + if n.Op() == ir.ORETURN || n.Op() == ir.OBREAK { + return true + } + } + return false +} + +// containingAssignment returns the top-level assignment statement +// for a statement level function call "n". Examples: +// +// x := foo() +// x, y := bar(z, baz()) +// if blah() { ... +// +// Here the top-level assignment statement for the foo() call is the +// statement assigning to "x"; the top-level assignment for "bar()" +// call is the assignment to x,y. For the baz() and blah() calls, +// there is no top level assignment statement. +// +// The unstated goal here is that we want to use the containing +// assignment to establish a connection between a given call and the +// variables to which its results/returns are being assigned. +// +// Note that for the "bar" command above, the front end sometimes +// decomposes this into two assignments, the first one assigning the +// call to a pair of auto-temps, then the second one assigning the +// auto-temps to the user-visible vars. This helper will return the +// second (outer) of these two. +func (cstb *callSiteTableBuilder) containingAssignment(n ir.Node) ir.Node { + parent := cstb.nstack[len(cstb.nstack)-1] + + // assignsOnlyAutoTemps returns TRUE of the specified OAS2FUNC + // node assigns only auto-temps. + assignsOnlyAutoTemps := func(x ir.Node) bool { + alst := x.(*ir.AssignListStmt) + oa2init := alst.Init() + if len(oa2init) == 0 { + return false + } + for _, v := range oa2init { + d := v.(*ir.Decl) + if !ir.IsAutoTmp(d.X) { + return false + } + } + return true + } + + // Simple case: x := foo() + if parent.Op() == ir.OAS { + return parent + } + + // Multi-return case: x, y := bar() + if parent.Op() == ir.OAS2FUNC { + // Hack city: if the result vars are auto-temps, try looking + // for an outer assignment in the tree. The code shape we're + // looking for here is: + // + // OAS1({x,y},OCONVNOP(OAS2FUNC({auto1,auto2},OCALLFUNC(bar)))) + // + if assignsOnlyAutoTemps(parent) { + par2 := cstb.nstack[len(cstb.nstack)-2] + if par2.Op() == ir.OAS2 { + return par2 + } + if par2.Op() == ir.OCONVNOP { + par3 := cstb.nstack[len(cstb.nstack)-3] + if par3.Op() == ir.OAS2 { + return par3 + } + } + } + } + + return nil +} + +// UpdateCallsiteTable handles updating of callerfn's call site table +// after an inlined has been carried out, e.g. the call at 'n' as been +// turned into the inlined call expression 'ic' within function +// callerfn. The chief thing of interest here is to make sure that any +// call nodes within 'ic' are added to the call site table for +// 'callerfn' and scored appropriately. +func UpdateCallsiteTable(callerfn *ir.Func, n *ir.CallExpr, ic *ir.InlinedCallExpr) { + enableDebugTraceIfEnv() + defer disableDebugTrace() + + funcInlHeur, ok := fpmap[callerfn] + if !ok { + // This can happen for compiler-generated wrappers. + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= early exit, no entry for caller fn %v\n", callerfn) + } + return + } + + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= UpdateCallsiteTable(caller=%v, cs=%s)\n", + callerfn, fmtFullPos(n.Pos())) + } + + // Mark the call in question as inlined. + oldcs, ok := funcInlHeur.cstab[n] + if !ok { + // This can happen for compiler-generated wrappers. + return + } + oldcs.aux |= csAuxInlined + + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= marked as inlined: callee=%v %s\n", + oldcs.Callee, EncodeCallSiteKey(oldcs)) + } + + // Walk the inlined call region to collect new callsites. + var icp pstate + if oldcs.Flags&CallSiteOnPanicPath != 0 { + icp = psCallsPanic + } + var loopNestLevel int + if oldcs.Flags&CallSiteInLoop != 0 { + loopNestLevel = 1 + } + ptab := map[ir.Node]pstate{ic: icp} + nf := newNameFinder(nil) + icstab := computeCallSiteTable(callerfn, ic.Body, nil, ptab, loopNestLevel, nf) + + // Record parent callsite. This is primarily for debug output. + for _, cs := range icstab { + cs.parent = oldcs + } + + // Score the calls in the inlined body. Note the setting of + // "doCallResults" to false here: at the moment there isn't any + // easy way to localize or region-ize the work done by + // "rescoreBasedOnCallResultUses", which currently does a walk + // over the entire function to look for uses of a given set of + // results. Similarly we're passing nil to makeCallSiteAnalyzer, + // so as to run name finding without the use of static value & + // friends. + csa := makeCallSiteAnalyzer(nil) + const doCallResults = false + csa.scoreCallsRegion(callerfn, ic.Body, icstab, doCallResults, ic) +} diff --git a/src/cmd/compile/internal/inline/inlheur/analyze_func_flags.go b/src/cmd/compile/internal/inline/inlheur/analyze_func_flags.go new file mode 100644 index 0000000..b7403a4 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/analyze_func_flags.go @@ -0,0 +1,356 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/compile/internal/types" + "fmt" + "os" +) + +// funcFlagsAnalyzer computes the "Flags" value for the FuncProps +// object we're computing. The main item of interest here is "nstate", +// which stores the disposition of a given ir Node with respect to the +// flags/properties we're trying to compute. +type funcFlagsAnalyzer struct { + fn *ir.Func + nstate map[ir.Node]pstate + noInfo bool // set if we see something inscrutable/un-analyzable +} + +// pstate keeps track of the disposition of a given node and its +// children with respect to panic/exit calls. +type pstate int + +const ( + psNoInfo pstate = iota // nothing interesting about this node + psCallsPanic // node causes call to panic or os.Exit + psMayReturn // executing node may trigger a "return" stmt + psTop // dataflow lattice "top" element +) + +func makeFuncFlagsAnalyzer(fn *ir.Func) *funcFlagsAnalyzer { + return &funcFlagsAnalyzer{ + fn: fn, + nstate: make(map[ir.Node]pstate), + } +} + +// setResults transfers func flag results to 'funcProps'. +func (ffa *funcFlagsAnalyzer) setResults(funcProps *FuncProps) { + var rv FuncPropBits + if !ffa.noInfo && ffa.stateForList(ffa.fn.Body) == psCallsPanic { + rv = FuncPropNeverReturns + } + // This is slightly hacky and not at all required, but include a + // special case for main.main, which often ends in a call to + // os.Exit. People who write code like this (very common I + // imagine) + // + // func main() { + // rc = perform() + // ... + // foo() + // os.Exit(rc) + // } + // + // will be constantly surprised when foo() is inlined in many + // other spots in the program but not in main(). + if isMainMain(ffa.fn) { + rv &^= FuncPropNeverReturns + } + funcProps.Flags = rv +} + +func (ffa *funcFlagsAnalyzer) getState(n ir.Node) pstate { + return ffa.nstate[n] +} + +func (ffa *funcFlagsAnalyzer) setState(n ir.Node, st pstate) { + if st != psNoInfo { + ffa.nstate[n] = st + } +} + +func (ffa *funcFlagsAnalyzer) updateState(n ir.Node, st pstate) { + if st == psNoInfo { + delete(ffa.nstate, n) + } else { + ffa.nstate[n] = st + } +} + +func (ffa *funcFlagsAnalyzer) panicPathTable() map[ir.Node]pstate { + return ffa.nstate +} + +// blockCombine merges together states as part of a linear sequence of +// statements, where 'pred' and 'succ' are analysis results for a pair +// of consecutive statements. Examples: +// +// case 1: case 2: +// panic("foo") if q { return x } <-pred +// return x panic("boo") <-succ +// +// In case 1, since the pred state is "always panic" it doesn't matter +// what the succ state is, hence the state for the combination of the +// two blocks is "always panics". In case 2, because there is a path +// to return that avoids the panic in succ, the state for the +// combination of the two statements is "may return". +func blockCombine(pred, succ pstate) pstate { + switch succ { + case psTop: + return pred + case psMayReturn: + if pred == psCallsPanic { + return psCallsPanic + } + return psMayReturn + case psNoInfo: + return pred + case psCallsPanic: + if pred == psMayReturn { + return psMayReturn + } + return psCallsPanic + } + panic("should never execute") +} + +// branchCombine combines two states at a control flow branch point where +// either p1 or p2 executes (as in an "if" statement). +func branchCombine(p1, p2 pstate) pstate { + if p1 == psCallsPanic && p2 == psCallsPanic { + return psCallsPanic + } + if p1 == psMayReturn || p2 == psMayReturn { + return psMayReturn + } + return psNoInfo +} + +// stateForList walks through a list of statements and computes the +// state/diposition for the entire list as a whole, as well +// as updating disposition of intermediate nodes. +func (ffa *funcFlagsAnalyzer) stateForList(list ir.Nodes) pstate { + st := psTop + // Walk the list backwards so that we can update the state for + // earlier list elements based on what we find out about their + // successors. Example: + // + // if ... { + // L10: foo() + // L11: <stmt> + // L12: panic(...) + // } + // + // After combining the dispositions for line 11 and 12, we want to + // update the state for the call at line 10 based on that combined + // disposition (if L11 has no path to "return", then the call at + // line 10 will be on a panic path). + for i := len(list) - 1; i >= 0; i-- { + n := list[i] + psi := ffa.getState(n) + if debugTrace&debugTraceFuncFlags != 0 { + fmt.Fprintf(os.Stderr, "=-= %v: stateForList n=%s ps=%s\n", + ir.Line(n), n.Op().String(), psi.String()) + } + st = blockCombine(psi, st) + ffa.updateState(n, st) + } + if st == psTop { + st = psNoInfo + } + return st +} + +func isMainMain(fn *ir.Func) bool { + s := fn.Sym() + return (s.Pkg.Name == "main" && s.Name == "main") +} + +func isWellKnownFunc(s *types.Sym, pkg, name string) bool { + return s.Pkg.Path == pkg && s.Name == name +} + +// isExitCall reports TRUE if the node itself is an unconditional +// call to os.Exit(), a panic, or a function that does likewise. +func isExitCall(n ir.Node) bool { + if n.Op() != ir.OCALLFUNC { + return false + } + cx := n.(*ir.CallExpr) + name := ir.StaticCalleeName(cx.Fun) + if name == nil { + return false + } + s := name.Sym() + if isWellKnownFunc(s, "os", "Exit") || + isWellKnownFunc(s, "runtime", "throw") { + return true + } + if funcProps := propsForFunc(name.Func); funcProps != nil { + if funcProps.Flags&FuncPropNeverReturns != 0 { + return true + } + } + return name.Func.NeverReturns() +} + +// pessimize is called to record the fact that we saw something in the +// function that renders it entirely impossible to analyze. +func (ffa *funcFlagsAnalyzer) pessimize() { + ffa.noInfo = true +} + +// shouldVisit reports TRUE if this is an interesting node from the +// perspective of computing function flags. NB: due to the fact that +// ir.CallExpr implements the Stmt interface, we wind up visiting +// a lot of nodes that we don't really need to, but these can +// simply be screened out as part of the visit. +func shouldVisit(n ir.Node) bool { + _, isStmt := n.(ir.Stmt) + return n.Op() != ir.ODCL && + (isStmt || n.Op() == ir.OCALLFUNC || n.Op() == ir.OPANIC) +} + +// nodeVisitPost helps implement the propAnalyzer interface; when +// called on a given node, it decides the disposition of that node +// based on the state(s) of the node's children. +func (ffa *funcFlagsAnalyzer) nodeVisitPost(n ir.Node) { + if debugTrace&debugTraceFuncFlags != 0 { + fmt.Fprintf(os.Stderr, "=+= nodevis %v %s should=%v\n", + ir.Line(n), n.Op().String(), shouldVisit(n)) + } + if !shouldVisit(n) { + return + } + var st pstate + switch n.Op() { + case ir.OCALLFUNC: + if isExitCall(n) { + st = psCallsPanic + } + case ir.OPANIC: + st = psCallsPanic + case ir.ORETURN: + st = psMayReturn + case ir.OBREAK, ir.OCONTINUE: + // FIXME: this handling of break/continue is sub-optimal; we + // have them as "mayReturn" in order to help with this case: + // + // for { + // if q() { break } + // panic(...) + // } + // + // where the effect of the 'break' is to cause the subsequent + // panic to be skipped. One possible improvement would be to + // track whether the currently enclosing loop is a "for {" or + // a for/range with condition, then use mayReturn only for the + // former. Note also that "break X" or "continue X" is treated + // the same as "goto", since we don't have a good way to track + // the target of the branch. + st = psMayReturn + n := n.(*ir.BranchStmt) + if n.Label != nil { + ffa.pessimize() + } + case ir.OBLOCK: + n := n.(*ir.BlockStmt) + st = ffa.stateForList(n.List) + case ir.OCASE: + if ccst, ok := n.(*ir.CaseClause); ok { + st = ffa.stateForList(ccst.Body) + } else if ccst, ok := n.(*ir.CommClause); ok { + st = ffa.stateForList(ccst.Body) + } else { + panic("unexpected") + } + case ir.OIF: + n := n.(*ir.IfStmt) + st = branchCombine(ffa.stateForList(n.Body), ffa.stateForList(n.Else)) + case ir.OFOR: + // Treat for { XXX } like a block. + // Treat for <cond> { XXX } like an if statement with no else. + n := n.(*ir.ForStmt) + bst := ffa.stateForList(n.Body) + if n.Cond == nil { + st = bst + } else { + if bst == psMayReturn { + st = psMayReturn + } + } + case ir.ORANGE: + // Treat for range { XXX } like an if statement with no else. + n := n.(*ir.RangeStmt) + if ffa.stateForList(n.Body) == psMayReturn { + st = psMayReturn + } + case ir.OGOTO: + // punt if we see even one goto. if we built a control + // flow graph we could do more, but this is just a tree walk. + ffa.pessimize() + case ir.OSELECT: + // process selects for "may return" but not "always panics", + // the latter case seems very improbable. + n := n.(*ir.SelectStmt) + if len(n.Cases) != 0 { + st = psTop + for _, c := range n.Cases { + st = branchCombine(ffa.stateForList(c.Body), st) + } + } + case ir.OSWITCH: + n := n.(*ir.SwitchStmt) + if len(n.Cases) != 0 { + st = psTop + for _, c := range n.Cases { + st = branchCombine(ffa.stateForList(c.Body), st) + } + } + + st, fall := psTop, psNoInfo + for i := len(n.Cases) - 1; i >= 0; i-- { + cas := n.Cases[i] + cst := ffa.stateForList(cas.Body) + endsInFallthrough := false + if len(cas.Body) != 0 { + endsInFallthrough = cas.Body[0].Op() == ir.OFALL + } + if endsInFallthrough { + cst = blockCombine(cst, fall) + } + st = branchCombine(st, cst) + fall = cst + } + case ir.OFALL: + // Not important. + case ir.ODCLFUNC, ir.ORECOVER, ir.OAS, ir.OAS2, ir.OAS2FUNC, ir.OASOP, + ir.OPRINTLN, ir.OPRINT, ir.OLABEL, ir.OCALLINTER, ir.ODEFER, + ir.OSEND, ir.ORECV, ir.OSELRECV2, ir.OGO, ir.OAPPEND, ir.OAS2DOTTYPE, + ir.OAS2MAPR, ir.OGETG, ir.ODELETE, ir.OINLMARK, ir.OAS2RECV, + ir.OMIN, ir.OMAX, ir.OMAKE, ir.ORECOVERFP, ir.OGETCALLERSP: + // these should all be benign/uninteresting + case ir.OTAILCALL, ir.OJUMPTABLE, ir.OTYPESW: + // don't expect to see these at all. + base.Fatalf("unexpected op %s in func %s", + n.Op().String(), ir.FuncName(ffa.fn)) + default: + base.Fatalf("%v: unhandled op %s in func %v", + ir.Line(n), n.Op().String(), ir.FuncName(ffa.fn)) + } + if debugTrace&debugTraceFuncFlags != 0 { + fmt.Fprintf(os.Stderr, "=-= %v: visit n=%s returns %s\n", + ir.Line(n), n.Op().String(), st.String()) + } + ffa.setState(n, st) +} + +func (ffa *funcFlagsAnalyzer) nodeVisitPre(n ir.Node) { +} diff --git a/src/cmd/compile/internal/inline/inlheur/analyze_func_params.go b/src/cmd/compile/internal/inline/inlheur/analyze_func_params.go new file mode 100644 index 0000000..d85d73b --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/analyze_func_params.go @@ -0,0 +1,355 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "fmt" + "os" +) + +// paramsAnalyzer holds state information for the phase that computes +// flags for a Go functions parameters, for use in inline heuristics. +// Note that the params slice below includes entries for blanks. +type paramsAnalyzer struct { + fname string + values []ParamPropBits + params []*ir.Name + top []bool + *condLevelTracker + *nameFinder +} + +// getParams returns an *ir.Name slice containing all params for the +// function (plus rcvr as well if applicable). +func getParams(fn *ir.Func) []*ir.Name { + sig := fn.Type() + numParams := sig.NumRecvs() + sig.NumParams() + return fn.Dcl[:numParams] +} + +// addParamsAnalyzer creates a new paramsAnalyzer helper object for +// the function fn, appends it to the analyzers list, and returns the +// new list. If the function in question doesn't have any interesting +// parameters then the analyzer list is returned unchanged, and the +// params flags in "fp" are updated accordingly. +func addParamsAnalyzer(fn *ir.Func, analyzers []propAnalyzer, fp *FuncProps, nf *nameFinder) []propAnalyzer { + pa, props := makeParamsAnalyzer(fn, nf) + if pa != nil { + analyzers = append(analyzers, pa) + } else { + fp.ParamFlags = props + } + return analyzers +} + +// makeParamAnalyzer creates a new helper object to analyze parameters +// of function fn. If the function doesn't have any interesting +// params, a nil helper is returned along with a set of default param +// flags for the func. +func makeParamsAnalyzer(fn *ir.Func, nf *nameFinder) (*paramsAnalyzer, []ParamPropBits) { + params := getParams(fn) // includes receiver if applicable + if len(params) == 0 { + return nil, nil + } + vals := make([]ParamPropBits, len(params)) + if fn.Inl == nil { + return nil, vals + } + top := make([]bool, len(params)) + interestingToAnalyze := false + for i, pn := range params { + if pn == nil { + continue + } + pt := pn.Type() + if !pt.IsScalar() && !pt.HasNil() { + // existing properties not applicable here (for things + // like structs, arrays, slices, etc). + continue + } + // If param is reassigned, skip it. + if ir.Reassigned(pn) { + continue + } + top[i] = true + interestingToAnalyze = true + } + if !interestingToAnalyze { + return nil, vals + } + + if debugTrace&debugTraceParams != 0 { + fmt.Fprintf(os.Stderr, "=-= param analysis of func %v:\n", + fn.Sym().Name) + for i := range vals { + n := "_" + if params[i] != nil { + n = params[i].Sym().String() + } + fmt.Fprintf(os.Stderr, "=-= %d: %q %s top=%v\n", + i, n, vals[i].String(), top[i]) + } + } + pa := ¶msAnalyzer{ + fname: fn.Sym().Name, + values: vals, + params: params, + top: top, + condLevelTracker: new(condLevelTracker), + nameFinder: nf, + } + return pa, nil +} + +func (pa *paramsAnalyzer) setResults(funcProps *FuncProps) { + funcProps.ParamFlags = pa.values +} + +func (pa *paramsAnalyzer) findParamIdx(n *ir.Name) int { + if n == nil { + panic("bad") + } + for i := range pa.params { + if pa.params[i] == n { + return i + } + } + return -1 +} + +type testfType func(x ir.Node, param *ir.Name, idx int) (bool, bool) + +// paramsAnalyzer invokes function 'testf' on the specified expression +// 'x' for each parameter, and if the result is TRUE, or's 'flag' into +// the flags for that param. +func (pa *paramsAnalyzer) checkParams(x ir.Node, flag ParamPropBits, mayflag ParamPropBits, testf testfType) { + for idx, p := range pa.params { + if !pa.top[idx] && pa.values[idx] == ParamNoInfo { + continue + } + result, may := testf(x, p, idx) + if debugTrace&debugTraceParams != 0 { + fmt.Fprintf(os.Stderr, "=-= test expr %v param %s result=%v flag=%s\n", x, p.Sym().Name, result, flag.String()) + } + if result { + v := flag + if pa.condLevel != 0 || may { + v = mayflag + } + pa.values[idx] |= v + pa.top[idx] = false + } + } +} + +// foldCheckParams checks expression 'x' (an 'if' condition or +// 'switch' stmt expr) to see if the expr would fold away if a +// specific parameter had a constant value. +func (pa *paramsAnalyzer) foldCheckParams(x ir.Node) { + pa.checkParams(x, ParamFeedsIfOrSwitch, ParamMayFeedIfOrSwitch, + func(x ir.Node, p *ir.Name, idx int) (bool, bool) { + return ShouldFoldIfNameConstant(x, []*ir.Name{p}), false + }) +} + +// callCheckParams examines the target of call expression 'ce' to see +// if it is making a call to the value passed in for some parameter. +func (pa *paramsAnalyzer) callCheckParams(ce *ir.CallExpr) { + switch ce.Op() { + case ir.OCALLINTER: + if ce.Op() != ir.OCALLINTER { + return + } + sel := ce.Fun.(*ir.SelectorExpr) + r := pa.staticValue(sel.X) + if r.Op() != ir.ONAME { + return + } + name := r.(*ir.Name) + if name.Class != ir.PPARAM { + return + } + pa.checkParams(r, ParamFeedsInterfaceMethodCall, + ParamMayFeedInterfaceMethodCall, + func(x ir.Node, p *ir.Name, idx int) (bool, bool) { + name := x.(*ir.Name) + return name == p, false + }) + case ir.OCALLFUNC: + if ce.Fun.Op() != ir.ONAME { + return + } + called := ir.StaticValue(ce.Fun) + if called.Op() != ir.ONAME { + return + } + name := called.(*ir.Name) + if name.Class == ir.PPARAM { + pa.checkParams(called, ParamFeedsIndirectCall, + ParamMayFeedIndirectCall, + func(x ir.Node, p *ir.Name, idx int) (bool, bool) { + name := x.(*ir.Name) + return name == p, false + }) + } else { + cname := pa.funcName(called) + if cname != nil { + pa.deriveFlagsFromCallee(ce, cname.Func) + } + } + } +} + +// deriveFlagsFromCallee tries to derive flags for the current +// function based on a call this function makes to some other +// function. Example: +// +// /* Simple */ /* Derived from callee */ +// func foo(f func(int)) { func foo(f func(int)) { +// f(2) bar(32, f) +// } } +// func bar(x int, f func()) { +// f(x) +// } +// +// Here we can set the "param feeds indirect call" flag for +// foo's param 'f' since we know that bar has that flag set for +// its second param, and we're passing that param a function. +func (pa *paramsAnalyzer) deriveFlagsFromCallee(ce *ir.CallExpr, callee *ir.Func) { + calleeProps := propsForFunc(callee) + if calleeProps == nil { + return + } + if debugTrace&debugTraceParams != 0 { + fmt.Fprintf(os.Stderr, "=-= callee props for %v:\n%s", + callee.Sym().Name, calleeProps.String()) + } + + must := []ParamPropBits{ParamFeedsInterfaceMethodCall, ParamFeedsIndirectCall, ParamFeedsIfOrSwitch} + may := []ParamPropBits{ParamMayFeedInterfaceMethodCall, ParamMayFeedIndirectCall, ParamMayFeedIfOrSwitch} + + for pidx, arg := range ce.Args { + // Does the callee param have any interesting properties? + // If not we can skip this one. + pflag := calleeProps.ParamFlags[pidx] + if pflag == 0 { + continue + } + // See if one of the caller's parameters is flowing unmodified + // into this actual expression. + r := pa.staticValue(arg) + if r.Op() != ir.ONAME { + return + } + name := r.(*ir.Name) + if name.Class != ir.PPARAM { + return + } + callerParamIdx := pa.findParamIdx(name) + // note that callerParamIdx may return -1 in the case where + // the param belongs not to the current closure func we're + // analyzing but to an outer enclosing func. + if callerParamIdx == -1 { + return + } + if pa.params[callerParamIdx] == nil { + panic("something went wrong") + } + if !pa.top[callerParamIdx] && + pa.values[callerParamIdx] == ParamNoInfo { + continue + } + if debugTrace&debugTraceParams != 0 { + fmt.Fprintf(os.Stderr, "=-= pflag for arg %d is %s\n", + pidx, pflag.String()) + } + for i := range must { + mayv := may[i] + mustv := must[i] + if pflag&mustv != 0 && pa.condLevel == 0 { + pa.values[callerParamIdx] |= mustv + } else if pflag&(mustv|mayv) != 0 { + pa.values[callerParamIdx] |= mayv + } + } + pa.top[callerParamIdx] = false + } +} + +func (pa *paramsAnalyzer) nodeVisitPost(n ir.Node) { + if len(pa.values) == 0 { + return + } + pa.condLevelTracker.post(n) + switch n.Op() { + case ir.OCALLFUNC: + ce := n.(*ir.CallExpr) + pa.callCheckParams(ce) + case ir.OCALLINTER: + ce := n.(*ir.CallExpr) + pa.callCheckParams(ce) + case ir.OIF: + ifst := n.(*ir.IfStmt) + pa.foldCheckParams(ifst.Cond) + case ir.OSWITCH: + swst := n.(*ir.SwitchStmt) + if swst.Tag != nil { + pa.foldCheckParams(swst.Tag) + } + } +} + +func (pa *paramsAnalyzer) nodeVisitPre(n ir.Node) { + if len(pa.values) == 0 { + return + } + pa.condLevelTracker.pre(n) +} + +// condLevelTracker helps keeps track very roughly of "level of conditional +// nesting", e.g. how many "if" statements you have to go through to +// get to the point where a given stmt executes. Example: +// +// cond nesting level +// func foo() { +// G = 1 0 +// if x < 10 { 0 +// if y < 10 { 1 +// G = 0 2 +// } +// } +// } +// +// The intent here is to provide some sort of very abstract relative +// hotness metric, e.g. "G = 1" above is expected to be executed more +// often than "G = 0" (in the aggregate, across large numbers of +// functions). +type condLevelTracker struct { + condLevel int +} + +func (c *condLevelTracker) pre(n ir.Node) { + // Increment level of "conditional testing" if we see + // an "if" or switch statement, and decrement if in + // a loop. + switch n.Op() { + case ir.OIF, ir.OSWITCH: + c.condLevel++ + case ir.OFOR, ir.ORANGE: + c.condLevel-- + } +} + +func (c *condLevelTracker) post(n ir.Node) { + switch n.Op() { + case ir.OFOR, ir.ORANGE: + c.condLevel++ + case ir.OIF: + c.condLevel-- + case ir.OSWITCH: + c.condLevel-- + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/analyze_func_returns.go b/src/cmd/compile/internal/inline/inlheur/analyze_func_returns.go new file mode 100644 index 0000000..2aaa68d --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/analyze_func_returns.go @@ -0,0 +1,277 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "fmt" + "go/constant" + "go/token" + "os" +) + +// resultsAnalyzer stores state information for the process of +// computing flags/properties for the return values of a specific Go +// function, as part of inline heuristics synthesis. +type resultsAnalyzer struct { + fname string + props []ResultPropBits + values []resultVal + inlineMaxBudget int + *nameFinder +} + +// resultVal captures information about a specific result returned from +// the function we're analyzing; we are interested in cases where +// the func always returns the same constant, or always returns +// the same function, etc. This container stores info on a the specific +// scenarios we're looking for. +type resultVal struct { + cval constant.Value + fn *ir.Name + fnClo bool + top bool + derived bool // see deriveReturnFlagsFromCallee below +} + +// addResultsAnalyzer creates a new resultsAnalyzer helper object for +// the function fn, appends it to the analyzers list, and returns the +// new list. If the function in question doesn't have any returns (or +// any interesting returns) then the analyzer list is left as is, and +// the result flags in "fp" are updated accordingly. +func addResultsAnalyzer(fn *ir.Func, analyzers []propAnalyzer, fp *FuncProps, inlineMaxBudget int, nf *nameFinder) []propAnalyzer { + ra, props := makeResultsAnalyzer(fn, inlineMaxBudget, nf) + if ra != nil { + analyzers = append(analyzers, ra) + } else { + fp.ResultFlags = props + } + return analyzers +} + +// makeResultsAnalyzer creates a new helper object to analyze results +// in function fn. If the function doesn't have any interesting +// results, a nil helper is returned along with a set of default +// result flags for the func. +func makeResultsAnalyzer(fn *ir.Func, inlineMaxBudget int, nf *nameFinder) (*resultsAnalyzer, []ResultPropBits) { + results := fn.Type().Results() + if len(results) == 0 { + return nil, nil + } + props := make([]ResultPropBits, len(results)) + if fn.Inl == nil { + return nil, props + } + vals := make([]resultVal, len(results)) + interestingToAnalyze := false + for i := range results { + rt := results[i].Type + if !rt.IsScalar() && !rt.HasNil() { + // existing properties not applicable here (for things + // like structs, arrays, slices, etc). + continue + } + // set the "top" flag (as in "top element of data flow lattice") + // meaning "we have no info yet, but we might later on". + vals[i].top = true + interestingToAnalyze = true + } + if !interestingToAnalyze { + return nil, props + } + ra := &resultsAnalyzer{ + props: props, + values: vals, + inlineMaxBudget: inlineMaxBudget, + nameFinder: nf, + } + return ra, nil +} + +// setResults transfers the calculated result properties for this +// function to 'funcProps'. +func (ra *resultsAnalyzer) setResults(funcProps *FuncProps) { + // Promote ResultAlwaysSameFunc to ResultAlwaysSameInlinableFunc + for i := range ra.values { + if ra.props[i] == ResultAlwaysSameFunc && !ra.values[i].derived { + f := ra.values[i].fn.Func + // HACK: in order to allow for call site score + // adjustments, we used a relaxed inline budget in + // determining inlinability. For the check below, however, + // we want to know is whether the func in question is + // likely to be inlined, as opposed to whether it might + // possibly be inlined if all the right score adjustments + // happened, so do a simple check based on the cost. + if f.Inl != nil && f.Inl.Cost <= int32(ra.inlineMaxBudget) { + ra.props[i] = ResultAlwaysSameInlinableFunc + } + } + } + funcProps.ResultFlags = ra.props +} + +func (ra *resultsAnalyzer) pessimize() { + for i := range ra.props { + ra.props[i] = ResultNoInfo + } +} + +func (ra *resultsAnalyzer) nodeVisitPre(n ir.Node) { +} + +func (ra *resultsAnalyzer) nodeVisitPost(n ir.Node) { + if len(ra.values) == 0 { + return + } + if n.Op() != ir.ORETURN { + return + } + if debugTrace&debugTraceResults != 0 { + fmt.Fprintf(os.Stderr, "=+= returns nodevis %v %s\n", + ir.Line(n), n.Op().String()) + } + + // No support currently for named results, so if we see an empty + // "return" stmt, be conservative. + rs := n.(*ir.ReturnStmt) + if len(rs.Results) != len(ra.values) { + ra.pessimize() + return + } + for i, r := range rs.Results { + ra.analyzeResult(i, r) + } +} + +// analyzeResult examines the expression 'n' being returned as the +// 'ii'th argument in some return statement to see whether has +// interesting characteristics (for example, returns a constant), then +// applies a dataflow "meet" operation to combine this result with any +// previous result (for the given return slot) that we've already +// processed. +func (ra *resultsAnalyzer) analyzeResult(ii int, n ir.Node) { + isAllocMem := ra.isAllocatedMem(n) + isConcConvItf := ra.isConcreteConvIface(n) + constVal := ra.constValue(n) + isConst := (constVal != nil) + isNil := ra.isNil(n) + rfunc := ra.funcName(n) + isFunc := (rfunc != nil) + isClo := (rfunc != nil && rfunc.Func.OClosure != nil) + curp := ra.props[ii] + dprops, isDerivedFromCall := ra.deriveReturnFlagsFromCallee(n) + newp := ResultNoInfo + var newcval constant.Value + var newfunc *ir.Name + + if debugTrace&debugTraceResults != 0 { + fmt.Fprintf(os.Stderr, "=-= %v: analyzeResult n=%s ismem=%v isconcconv=%v isconst=%v isnil=%v isfunc=%v isclo=%v\n", ir.Line(n), n.Op().String(), isAllocMem, isConcConvItf, isConst, isNil, isFunc, isClo) + } + + if ra.values[ii].top { + ra.values[ii].top = false + // this is the first return we've seen; record + // whatever properties it has. + switch { + case isAllocMem: + newp = ResultIsAllocatedMem + case isConcConvItf: + newp = ResultIsConcreteTypeConvertedToInterface + case isFunc: + newp = ResultAlwaysSameFunc + newfunc = rfunc + case isConst: + newp = ResultAlwaysSameConstant + newcval = constVal + case isNil: + newp = ResultAlwaysSameConstant + newcval = nil + case isDerivedFromCall: + newp = dprops + ra.values[ii].derived = true + } + } else { + if !ra.values[ii].derived { + // this is not the first return we've seen; apply + // what amounts of a "meet" operator to combine + // the properties we see here with what we saw on + // the previous returns. + switch curp { + case ResultIsAllocatedMem: + if isAllocMem { + newp = ResultIsAllocatedMem + } + case ResultIsConcreteTypeConvertedToInterface: + if isConcConvItf { + newp = ResultIsConcreteTypeConvertedToInterface + } + case ResultAlwaysSameConstant: + if isNil && ra.values[ii].cval == nil { + newp = ResultAlwaysSameConstant + newcval = nil + } else if isConst && constant.Compare(constVal, token.EQL, ra.values[ii].cval) { + newp = ResultAlwaysSameConstant + newcval = constVal + } + case ResultAlwaysSameFunc: + if isFunc && isSameFuncName(rfunc, ra.values[ii].fn) { + newp = ResultAlwaysSameFunc + newfunc = rfunc + } + } + } + } + ra.values[ii].fn = newfunc + ra.values[ii].fnClo = isClo + ra.values[ii].cval = newcval + ra.props[ii] = newp + + if debugTrace&debugTraceResults != 0 { + fmt.Fprintf(os.Stderr, "=-= %v: analyzeResult newp=%s\n", + ir.Line(n), newp) + } +} + +// deriveReturnFlagsFromCallee tries to set properties for a given +// return result where we're returning call expression; return value +// is a return property value and a boolean indicating whether the +// prop is valid. Examples: +// +// func foo() int { return bar() } +// func bar() int { return 42 } +// func blix() int { return 43 } +// func two(y int) int { +// if y < 0 { return bar() } else { return blix() } +// } +// +// Since "foo" always returns the result of a call to "bar", we can +// set foo's return property to that of bar. In the case of "two", however, +// even though each return path returns a constant, we don't know +// whether the constants are identical, hence we need to be conservative. +func (ra *resultsAnalyzer) deriveReturnFlagsFromCallee(n ir.Node) (ResultPropBits, bool) { + if n.Op() != ir.OCALLFUNC { + return 0, false + } + ce := n.(*ir.CallExpr) + if ce.Fun.Op() != ir.ONAME { + return 0, false + } + called := ir.StaticValue(ce.Fun) + if called.Op() != ir.ONAME { + return 0, false + } + cname := ra.funcName(called) + if cname == nil { + return 0, false + } + calleeProps := propsForFunc(cname.Func) + if calleeProps == nil { + return 0, false + } + if len(calleeProps.ResultFlags) != 1 { + return 0, false + } + return calleeProps.ResultFlags[0], true +} diff --git a/src/cmd/compile/internal/inline/inlheur/callsite.go b/src/cmd/compile/internal/inline/inlheur/callsite.go new file mode 100644 index 0000000..f457dd4 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/callsite.go @@ -0,0 +1,149 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/internal/src" + "fmt" + "io" + "path/filepath" + "sort" + "strings" +) + +// CallSite records useful information about a potentially inlinable +// (direct) function call. "Callee" is the target of the call, "Call" +// is the ir node corresponding to the call itself, "Assign" is +// the top-level assignment statement containing the call (if the call +// appears in the form of a top-level statement, e.g. "x := foo()"), +// "Flags" contains properties of the call that might be useful for +// making inlining decisions, "Score" is the final score assigned to +// the site, and "ID" is a numeric ID for the site within its +// containing function. +type CallSite struct { + Callee *ir.Func + Call *ir.CallExpr + parent *CallSite + Assign ir.Node + Flags CSPropBits + + ArgProps []ActualExprPropBits + Score int + ScoreMask scoreAdjustTyp + ID uint + aux uint8 +} + +// CallSiteTab is a table of call sites, keyed by call expr. +// Ideally it would be nice to key the table by src.XPos, but +// this results in collisions for calls on very long lines (the +// front end saturates column numbers at 255). We also wind up +// with many calls that share the same auto-generated pos. +type CallSiteTab map[*ir.CallExpr]*CallSite + +// ActualExprPropBits describes a property of an actual expression (value +// passed to some specific func argument at a call site). +type ActualExprPropBits uint8 + +const ( + ActualExprConstant ActualExprPropBits = 1 << iota + ActualExprIsConcreteConvIface + ActualExprIsFunc + ActualExprIsInlinableFunc +) + +type CSPropBits uint32 + +const ( + CallSiteInLoop CSPropBits = 1 << iota + CallSiteOnPanicPath + CallSiteInInitFunc +) + +type csAuxBits uint8 + +const ( + csAuxInlined = 1 << iota +) + +// encodedCallSiteTab is a table keyed by "encoded" callsite +// (stringified src.XPos plus call site ID) mapping to a value of call +// property bits and score. +type encodedCallSiteTab map[string]propsAndScore + +type propsAndScore struct { + props CSPropBits + score int + mask scoreAdjustTyp +} + +func (pas propsAndScore) String() string { + return fmt.Sprintf("P=%s|S=%d|M=%s", pas.props.String(), + pas.score, pas.mask.String()) +} + +func (cst CallSiteTab) merge(other CallSiteTab) error { + for k, v := range other { + if prev, ok := cst[k]; ok { + return fmt.Errorf("internal error: collision during call site table merge, fn=%s callsite=%s", prev.Callee.Sym().Name, fmtFullPos(prev.Call.Pos())) + } + cst[k] = v + } + return nil +} + +func fmtFullPos(p src.XPos) string { + var sb strings.Builder + sep := "" + base.Ctxt.AllPos(p, func(pos src.Pos) { + fmt.Fprintf(&sb, sep) + sep = "|" + file := filepath.Base(pos.Filename()) + fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col()) + }) + return sb.String() +} + +func EncodeCallSiteKey(cs *CallSite) string { + var sb strings.Builder + // FIXME: maybe rewrite line offsets relative to function start? + sb.WriteString(fmtFullPos(cs.Call.Pos())) + fmt.Fprintf(&sb, "|%d", cs.ID) + return sb.String() +} + +func buildEncodedCallSiteTab(tab CallSiteTab) encodedCallSiteTab { + r := make(encodedCallSiteTab) + for _, cs := range tab { + k := EncodeCallSiteKey(cs) + r[k] = propsAndScore{ + props: cs.Flags, + score: cs.Score, + mask: cs.ScoreMask, + } + } + return r +} + +// dumpCallSiteComments emits comments into the dump file for the +// callsites in the function of interest. If "ecst" is non-nil, we use +// that, otherwise generated a fresh encodedCallSiteTab from "tab". +func dumpCallSiteComments(w io.Writer, tab CallSiteTab, ecst encodedCallSiteTab) { + if ecst == nil { + ecst = buildEncodedCallSiteTab(tab) + } + tags := make([]string, 0, len(ecst)) + for k := range ecst { + tags = append(tags, k) + } + sort.Strings(tags) + for _, s := range tags { + v := ecst[s] + fmt.Fprintf(w, "// callsite: %s flagstr %q flagval %d score %d mask %d maskstr %q\n", s, v.props.String(), v.props, v.score, v.mask, v.mask.String()) + } + fmt.Fprintf(w, "// %s\n", csDelimiter) +} diff --git a/src/cmd/compile/internal/inline/inlheur/cspropbits_string.go b/src/cmd/compile/internal/inline/inlheur/cspropbits_string.go new file mode 100644 index 0000000..216f510 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/cspropbits_string.go @@ -0,0 +1,56 @@ +// Code generated by "stringer -bitset -type CSPropBits"; DO NOT EDIT. + +package inlheur + +import "strconv" +import "bytes" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[CallSiteInLoop-1] + _ = x[CallSiteOnPanicPath-2] + _ = x[CallSiteInInitFunc-4] +} + +var _CSPropBits_value = [...]uint64{ + 0x1, /* CallSiteInLoop */ + 0x2, /* CallSiteOnPanicPath */ + 0x4, /* CallSiteInInitFunc */ +} + +const _CSPropBits_name = "CallSiteInLoopCallSiteOnPanicPathCallSiteInInitFunc" + +var _CSPropBits_index = [...]uint8{0, 14, 33, 51} + +func (i CSPropBits) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _CSPropBits_value { + x := _CSPropBits_name[_CSPropBits_index[k]:_CSPropBits_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _CSPropBits_name[_CSPropBits_index[k]:_CSPropBits_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "CSPropBits(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/debugflags_test.go b/src/cmd/compile/internal/inline/inlheur/debugflags_test.go new file mode 100644 index 0000000..abf4910 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/debugflags_test.go @@ -0,0 +1,65 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "testing" +) + +func TestInlScoreAdjFlagParse(t *testing.T) { + scenarios := []struct { + value string + expok bool + }{ + { + value: "returnFeedsConcreteToInterfaceCallAdj:9", + expok: true, + }, + { + value: "panicPathAdj:-1/initFuncAdj:9", + expok: true, + }, + { + value: "", + expok: false, + }, + { + value: "nonsenseAdj:10", + expok: false, + }, + { + value: "inLoopAdj:", + expok: false, + }, + { + value: "inLoopAdj:10:10", + expok: false, + }, + { + value: "inLoopAdj:blah", + expok: false, + }, + { + value: "/", + expok: false, + }, + } + + for _, scenario := range scenarios { + err := parseScoreAdj(scenario.value) + t.Logf("for value=%q err is %v\n", scenario.value, err) + if scenario.expok { + if err != nil { + t.Errorf("expected parseScoreAdj(%s) ok, got err %v", + scenario.value, err) + } + } else { + if err == nil { + t.Errorf("expected parseScoreAdj(%s) failure, got success", + scenario.value) + } + } + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/dumpscores_test.go b/src/cmd/compile/internal/inline/inlheur/dumpscores_test.go new file mode 100644 index 0000000..438b700 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/dumpscores_test.go @@ -0,0 +1,109 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "internal/testenv" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestDumpCallSiteScoreDump(t *testing.T) { + td := t.TempDir() + testenv.MustHaveGoBuild(t) + + scenarios := []struct { + name string + promoted int + indirectlyPromoted int + demoted int + unchanged int + }{ + { + name: "dumpscores", + promoted: 1, + indirectlyPromoted: 1, + demoted: 1, + unchanged: 5, + }, + } + + for _, scen := range scenarios { + dumpfile, err := gatherInlCallSitesScoresForFile(t, scen.name, td) + if err != nil { + t.Fatalf("dumping callsite scores for %q: error %v", scen.name, err) + } + var lines []string + if content, err := os.ReadFile(dumpfile); err != nil { + t.Fatalf("reading dump %q: error %v", dumpfile, err) + } else { + lines = strings.Split(string(content), "\n") + } + prom, indprom, dem, unch := 0, 0, 0, 0 + for _, line := range lines { + switch { + case strings.TrimSpace(line) == "": + case !strings.Contains(line, "|"): + case strings.HasPrefix(line, "#"): + case strings.Contains(line, "PROMOTED"): + prom++ + case strings.Contains(line, "INDPROM"): + indprom++ + case strings.Contains(line, "DEMOTED"): + dem++ + default: + unch++ + } + } + showout := false + if prom != scen.promoted { + t.Errorf("testcase %q, got %d promoted want %d promoted", + scen.name, prom, scen.promoted) + showout = true + } + if indprom != scen.indirectlyPromoted { + t.Errorf("testcase %q, got %d indirectly promoted want %d", + scen.name, indprom, scen.indirectlyPromoted) + showout = true + } + if dem != scen.demoted { + t.Errorf("testcase %q, got %d demoted want %d demoted", + scen.name, dem, scen.demoted) + showout = true + } + if unch != scen.unchanged { + t.Errorf("testcase %q, got %d unchanged want %d unchanged", + scen.name, unch, scen.unchanged) + showout = true + } + if showout { + t.Logf(">> dump output: %s", strings.Join(lines, "\n")) + } + } +} + +// gatherInlCallSitesScoresForFile builds the specified testcase 'testcase' +// from testdata/props passing the "-d=dumpinlcallsitescores=1" +// compiler option, to produce a dump, then returns the path of the +// newly created file. +func gatherInlCallSitesScoresForFile(t *testing.T, testcase string, td string) (string, error) { + t.Helper() + gopath := "testdata/" + testcase + ".go" + outpath := filepath.Join(td, testcase+".a") + dumpfile := filepath.Join(td, testcase+".callsites.txt") + run := []string{testenv.GoToolPath(t), "build", + "-gcflags=-d=dumpinlcallsitescores=1", "-o", outpath, gopath} + out, err := testenv.Command(t, run[0], run[1:]...).CombinedOutput() + t.Logf("run: %+v\n", run) + if err != nil { + return "", err + } + if err := os.WriteFile(dumpfile, out, 0666); err != nil { + return "", err + } + return dumpfile, err +} diff --git a/src/cmd/compile/internal/inline/inlheur/eclassify.go b/src/cmd/compile/internal/inline/inlheur/eclassify.go new file mode 100644 index 0000000..1e6d1b9 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/eclassify.go @@ -0,0 +1,247 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "fmt" + "os" +) + +// ShouldFoldIfNameConstant analyzes expression tree 'e' to see +// whether it contains only combinations of simple references to all +// of the names in 'names' with selected constants + operators. The +// intent is to identify expression that could be folded away to a +// constant if the value of 'n' were available. Return value is TRUE +// if 'e' does look foldable given the value of 'n', and given that +// 'e' actually makes reference to 'n'. Some examples where the type +// of "n" is int64, type of "s" is string, and type of "p" is *byte: +// +// Simple? Expr +// yes n<10 +// yes n*n-100 +// yes (n < 10 || n > 100) && (n >= 12 || n <= 99 || n != 101) +// yes s == "foo" +// yes p == nil +// no n<foo() +// no n<1 || n>m +// no float32(n)<1.0 +// no *p == 1 +// no 1 + 100 +// no 1 / n +// no 1 + unsafe.Sizeof(n) +// +// To avoid complexities (e.g. nan, inf) we stay way from folding and +// floating point or complex operations (integers, bools, and strings +// only). We also try to be conservative about avoiding any operation +// that might result in a panic at runtime, e.g. for "n" with type +// int64: +// +// 1<<(n-9) < 100/(n<<9999) +// +// we would return FALSE due to the negative shift count and/or +// potential divide by zero. +func ShouldFoldIfNameConstant(n ir.Node, names []*ir.Name) bool { + cl := makeExprClassifier(names) + var doNode func(ir.Node) bool + doNode = func(n ir.Node) bool { + ir.DoChildren(n, doNode) + cl.Visit(n) + return false + } + doNode(n) + if cl.getdisp(n) != exprSimple { + return false + } + for _, v := range cl.names { + if !v { + return false + } + } + return true +} + +// exprClassifier holds intermediate state about nodes within an +// expression tree being analyzed by ShouldFoldIfNameConstant. Here +// "name" is the name node passed in, and "disposition" stores the +// result of classifying a given IR node. +type exprClassifier struct { + names map[*ir.Name]bool + disposition map[ir.Node]disp +} + +type disp int + +const ( + // no info on this expr + exprNoInfo disp = iota + + // expr contains only literals + exprLiterals + + // expr is legal combination of literals and specified names + exprSimple +) + +func (d disp) String() string { + switch d { + case exprNoInfo: + return "noinfo" + case exprSimple: + return "simple" + case exprLiterals: + return "literals" + default: + return fmt.Sprintf("unknown<%d>", d) + } +} + +func makeExprClassifier(names []*ir.Name) *exprClassifier { + m := make(map[*ir.Name]bool, len(names)) + for _, n := range names { + m[n] = false + } + return &exprClassifier{ + names: m, + disposition: make(map[ir.Node]disp), + } +} + +// Visit sets the classification for 'n' based on the previously +// calculated classifications for n's children, as part of a bottom-up +// walk over an expression tree. +func (ec *exprClassifier) Visit(n ir.Node) { + + ndisp := exprNoInfo + + binparts := func(n ir.Node) (ir.Node, ir.Node) { + if lex, ok := n.(*ir.LogicalExpr); ok { + return lex.X, lex.Y + } else if bex, ok := n.(*ir.BinaryExpr); ok { + return bex.X, bex.Y + } else { + panic("bad") + } + } + + t := n.Type() + if t == nil { + if debugTrace&debugTraceExprClassify != 0 { + fmt.Fprintf(os.Stderr, "=-= *** untyped op=%s\n", + n.Op().String()) + } + } else if t.IsInteger() || t.IsString() || t.IsBoolean() || t.HasNil() { + switch n.Op() { + // FIXME: maybe add support for OADDSTR? + case ir.ONIL: + ndisp = exprLiterals + + case ir.OLITERAL: + if _, ok := n.(*ir.BasicLit); ok { + } else { + panic("unexpected") + } + ndisp = exprLiterals + + case ir.ONAME: + nn := n.(*ir.Name) + if _, ok := ec.names[nn]; ok { + ndisp = exprSimple + ec.names[nn] = true + } else { + sv := ir.StaticValue(n) + if sv.Op() == ir.ONAME { + nn = sv.(*ir.Name) + } + if _, ok := ec.names[nn]; ok { + ndisp = exprSimple + ec.names[nn] = true + } + } + + case ir.ONOT, + ir.OPLUS, + ir.ONEG: + uex := n.(*ir.UnaryExpr) + ndisp = ec.getdisp(uex.X) + + case ir.OEQ, + ir.ONE, + ir.OLT, + ir.OGT, + ir.OGE, + ir.OLE: + // compare ops + x, y := binparts(n) + ndisp = ec.dispmeet(x, y) + if debugTrace&debugTraceExprClassify != 0 { + fmt.Fprintf(os.Stderr, "=-= meet(%s,%s) = %s for op=%s\n", + ec.getdisp(x), ec.getdisp(y), ec.dispmeet(x, y), + n.Op().String()) + } + case ir.OLSH, + ir.ORSH, + ir.ODIV, + ir.OMOD: + x, y := binparts(n) + if ec.getdisp(y) == exprLiterals { + ndisp = ec.dispmeet(x, y) + } + + case ir.OADD, + ir.OSUB, + ir.OOR, + ir.OXOR, + ir.OMUL, + ir.OAND, + ir.OANDNOT, + ir.OANDAND, + ir.OOROR: + x, y := binparts(n) + if debugTrace&debugTraceExprClassify != 0 { + fmt.Fprintf(os.Stderr, "=-= meet(%s,%s) = %s for op=%s\n", + ec.getdisp(x), ec.getdisp(y), ec.dispmeet(x, y), + n.Op().String()) + } + ndisp = ec.dispmeet(x, y) + } + } + + if debugTrace&debugTraceExprClassify != 0 { + fmt.Fprintf(os.Stderr, "=-= op=%s disp=%v\n", n.Op().String(), + ndisp.String()) + } + + ec.disposition[n] = ndisp +} + +func (ec *exprClassifier) getdisp(x ir.Node) disp { + if d, ok := ec.disposition[x]; ok { + return d + } else { + panic("missing node from disp table") + } +} + +// dispmeet performs a "meet" operation on the data flow states of +// node x and y (where the term "meet" is being drawn from traditional +// lattice-theoretical data flow analysis terminology). +func (ec *exprClassifier) dispmeet(x, y ir.Node) disp { + xd := ec.getdisp(x) + if xd == exprNoInfo { + return exprNoInfo + } + yd := ec.getdisp(y) + if yd == exprNoInfo { + return exprNoInfo + } + if xd == exprSimple || yd == exprSimple { + return exprSimple + } + if xd != exprLiterals || yd != exprLiterals { + panic("unexpected") + } + return exprLiterals +} diff --git a/src/cmd/compile/internal/inline/inlheur/funcprop_string.go b/src/cmd/compile/internal/inline/inlheur/funcprop_string.go new file mode 100644 index 0000000..d16e4d3 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/funcprop_string.go @@ -0,0 +1,44 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "fmt" + "strings" +) + +func (fp *FuncProps) String() string { + return fp.ToString("") +} + +func (fp *FuncProps) ToString(prefix string) string { + var sb strings.Builder + if fp.Flags != 0 { + fmt.Fprintf(&sb, "%sFlags %s\n", prefix, fp.Flags) + } + flagSliceToSB[ParamPropBits](&sb, fp.ParamFlags, + prefix, "ParamFlags") + flagSliceToSB[ResultPropBits](&sb, fp.ResultFlags, + prefix, "ResultFlags") + return sb.String() +} + +func flagSliceToSB[T interface { + ~uint32 + String() string +}](sb *strings.Builder, sl []T, prefix string, tag string) { + var sb2 strings.Builder + foundnz := false + fmt.Fprintf(&sb2, "%s%s\n", prefix, tag) + for i, e := range sl { + if e != 0 { + foundnz = true + } + fmt.Fprintf(&sb2, "%s %d %s\n", prefix, i, e.String()) + } + if foundnz { + sb.WriteString(sb2.String()) + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/funcpropbits_string.go b/src/cmd/compile/internal/inline/inlheur/funcpropbits_string.go new file mode 100644 index 0000000..28de4a9 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/funcpropbits_string.go @@ -0,0 +1,58 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by "stringer -bitset -type FuncPropBits"; DO NOT EDIT. + +package inlheur + +import ( + "bytes" + "strconv" +) + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[FuncPropNeverReturns-1] +} + +var _FuncPropBits_value = [...]uint64{ + 0x1, /* FuncPropNeverReturns */ +} + +const _FuncPropBits_name = "FuncPropNeverReturns" + +var _FuncPropBits_index = [...]uint8{0, 20} + +func (i FuncPropBits) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _FuncPropBits_value { + x := _FuncPropBits_name[_FuncPropBits_index[k]:_FuncPropBits_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _FuncPropBits_name[_FuncPropBits_index[k]:_FuncPropBits_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "FuncPropBits(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/funcprops_test.go b/src/cmd/compile/internal/inline/inlheur/funcprops_test.go new file mode 100644 index 0000000..c04e604 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/funcprops_test.go @@ -0,0 +1,530 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "bufio" + "encoding/json" + "flag" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "testing" + "time" +) + +var remasterflag = flag.Bool("update-expected", false, "if true, generate updated golden results in testcases for all props tests") + +func TestFuncProperties(t *testing.T) { + td := t.TempDir() + // td = "/tmp/qqq" + // os.RemoveAll(td) + // os.Mkdir(td, 0777) + testenv.MustHaveGoBuild(t) + + // NOTE: this testpoint has the unfortunate characteristic that it + // relies on the installed compiler, meaning that if you make + // changes to the inline heuristics code in your working copy and + // then run the test, it will test the installed compiler and not + // your local modifications. TODO: decide whether to convert this + // to building a fresh compiler on the fly, or using some other + // scheme. + + testcases := []string{"funcflags", "returns", "params", + "acrosscall", "calls", "returns2"} + for _, tc := range testcases { + dumpfile, err := gatherPropsDumpForFile(t, tc, td) + if err != nil { + t.Fatalf("dumping func props for %q: error %v", tc, err) + } + // Read in the newly generated dump. + dentries, dcsites, derr := readDump(t, dumpfile) + if derr != nil { + t.Fatalf("reading func prop dump: %v", derr) + } + if *remasterflag { + updateExpected(t, tc, dentries, dcsites) + continue + } + // Generate expected dump. + epath, egerr := genExpected(td, tc) + if egerr != nil { + t.Fatalf("generating expected func prop dump: %v", egerr) + } + // Read in the expected result entries. + eentries, ecsites, eerr := readDump(t, epath) + if eerr != nil { + t.Fatalf("reading expected func prop dump: %v", eerr) + } + // Compare new vs expected. + n := len(dentries) + eidx := 0 + for i := 0; i < n; i++ { + dentry := dentries[i] + dcst := dcsites[i] + if !interestingToCompare(dentry.fname) { + continue + } + if eidx >= len(eentries) { + t.Errorf("testcase %s missing expected entry for %s, skipping", tc, dentry.fname) + continue + } + eentry := eentries[eidx] + ecst := ecsites[eidx] + eidx++ + if dentry.fname != eentry.fname { + t.Errorf("got fn %q wanted %q, skipping checks", + dentry.fname, eentry.fname) + continue + } + compareEntries(t, tc, &dentry, dcst, &eentry, ecst) + } + } +} + +func propBitsToString[T interface{ String() string }](sl []T) string { + var sb strings.Builder + for i, f := range sl { + fmt.Fprintf(&sb, "%d: %s\n", i, f.String()) + } + return sb.String() +} + +func compareEntries(t *testing.T, tc string, dentry *fnInlHeur, dcsites encodedCallSiteTab, eentry *fnInlHeur, ecsites encodedCallSiteTab) { + dfp := dentry.props + efp := eentry.props + dfn := dentry.fname + + // Compare function flags. + if dfp.Flags != efp.Flags { + t.Errorf("testcase %q: Flags mismatch for %q: got %s, wanted %s", + tc, dfn, dfp.Flags.String(), efp.Flags.String()) + } + // Compare returns + rgot := propBitsToString[ResultPropBits](dfp.ResultFlags) + rwant := propBitsToString[ResultPropBits](efp.ResultFlags) + if rgot != rwant { + t.Errorf("testcase %q: Results mismatch for %q: got:\n%swant:\n%s", + tc, dfn, rgot, rwant) + } + // Compare receiver + params. + pgot := propBitsToString[ParamPropBits](dfp.ParamFlags) + pwant := propBitsToString[ParamPropBits](efp.ParamFlags) + if pgot != pwant { + t.Errorf("testcase %q: Params mismatch for %q: got:\n%swant:\n%s", + tc, dfn, pgot, pwant) + } + // Compare call sites. + for k, ve := range ecsites { + if vd, ok := dcsites[k]; !ok { + t.Errorf("testcase %q missing expected callsite %q in func %q", tc, k, dfn) + continue + } else { + if vd != ve { + t.Errorf("testcase %q callsite %q in func %q: got %+v want %+v", + tc, k, dfn, vd.String(), ve.String()) + } + } + } + for k := range dcsites { + if _, ok := ecsites[k]; !ok { + t.Errorf("testcase %q unexpected extra callsite %q in func %q", tc, k, dfn) + } + } +} + +type dumpReader struct { + s *bufio.Scanner + t *testing.T + p string + ln int +} + +// readDump reads in the contents of a dump file produced +// by the "-d=dumpinlfuncprops=..." command line flag by the Go +// compiler. It breaks the dump down into separate sections +// by function, then deserializes each func section into a +// fnInlHeur object and returns a slice of those objects. +func readDump(t *testing.T, path string) ([]fnInlHeur, []encodedCallSiteTab, error) { + content, err := os.ReadFile(path) + if err != nil { + return nil, nil, err + } + dr := &dumpReader{ + s: bufio.NewScanner(strings.NewReader(string(content))), + t: t, + p: path, + ln: 1, + } + // consume header comment until preamble delimiter. + found := false + for dr.scan() { + if dr.curLine() == preambleDelimiter { + found = true + break + } + } + if !found { + return nil, nil, fmt.Errorf("malformed testcase file %s, missing preamble delimiter", path) + } + res := []fnInlHeur{} + csres := []encodedCallSiteTab{} + for { + dentry, dcst, err := dr.readEntry() + if err != nil { + t.Fatalf("reading func prop dump: %v", err) + } + if dentry.fname == "" { + break + } + res = append(res, dentry) + csres = append(csres, dcst) + } + return res, csres, nil +} + +func (dr *dumpReader) scan() bool { + v := dr.s.Scan() + if v { + dr.ln++ + } + return v +} + +func (dr *dumpReader) curLine() string { + res := strings.TrimSpace(dr.s.Text()) + if !strings.HasPrefix(res, "// ") { + dr.t.Fatalf("malformed line %s:%d, no comment: %s", dr.p, dr.ln, res) + } + return res[3:] +} + +// readObjBlob reads in a series of commented lines until +// it hits a delimiter, then returns the contents of the comments. +func (dr *dumpReader) readObjBlob(delim string) (string, error) { + var sb strings.Builder + foundDelim := false + for dr.scan() { + line := dr.curLine() + if delim == line { + foundDelim = true + break + } + sb.WriteString(line + "\n") + } + if err := dr.s.Err(); err != nil { + return "", err + } + if !foundDelim { + return "", fmt.Errorf("malformed input %s, missing delimiter %q", + dr.p, delim) + } + return sb.String(), nil +} + +// readEntry reads a single function's worth of material from +// a file produced by the "-d=dumpinlfuncprops=..." command line +// flag. It deserializes the json for the func properties and +// returns the resulting properties and function name. EOF is +// signaled by a nil FuncProps return (with no error +func (dr *dumpReader) readEntry() (fnInlHeur, encodedCallSiteTab, error) { + var funcInlHeur fnInlHeur + var callsites encodedCallSiteTab + if !dr.scan() { + return funcInlHeur, callsites, nil + } + // first line contains info about function: file/name/line + info := dr.curLine() + chunks := strings.Fields(info) + funcInlHeur.file = chunks[0] + funcInlHeur.fname = chunks[1] + if _, err := fmt.Sscanf(chunks[2], "%d", &funcInlHeur.line); err != nil { + return funcInlHeur, callsites, fmt.Errorf("scanning line %q: %v", info, err) + } + // consume comments until and including delimiter + for { + if !dr.scan() { + break + } + if dr.curLine() == comDelimiter { + break + } + } + + // Consume JSON for encoded props. + dr.scan() + line := dr.curLine() + fp := &FuncProps{} + if err := json.Unmarshal([]byte(line), fp); err != nil { + return funcInlHeur, callsites, err + } + funcInlHeur.props = fp + + // Consume callsites. + callsites = make(encodedCallSiteTab) + for dr.scan() { + line := dr.curLine() + if line == csDelimiter { + break + } + // expected format: "// callsite: <expanded pos> flagstr <desc> flagval <flags> score <score> mask <scoremask> maskstr <scoremaskstring>" + fields := strings.Fields(line) + if len(fields) != 12 { + return funcInlHeur, nil, fmt.Errorf("malformed callsite (nf=%d) %s line %d: %s", len(fields), dr.p, dr.ln, line) + } + if fields[2] != "flagstr" || fields[4] != "flagval" || fields[6] != "score" || fields[8] != "mask" || fields[10] != "maskstr" { + return funcInlHeur, nil, fmt.Errorf("malformed callsite %s line %d: %s", + dr.p, dr.ln, line) + } + tag := fields[1] + flagstr := fields[5] + flags, err := strconv.Atoi(flagstr) + if err != nil { + return funcInlHeur, nil, fmt.Errorf("bad flags val %s line %d: %q err=%v", + dr.p, dr.ln, line, err) + } + scorestr := fields[7] + score, err2 := strconv.Atoi(scorestr) + if err2 != nil { + return funcInlHeur, nil, fmt.Errorf("bad score val %s line %d: %q err=%v", + dr.p, dr.ln, line, err2) + } + maskstr := fields[9] + mask, err3 := strconv.Atoi(maskstr) + if err3 != nil { + return funcInlHeur, nil, fmt.Errorf("bad mask val %s line %d: %q err=%v", + dr.p, dr.ln, line, err3) + } + callsites[tag] = propsAndScore{ + props: CSPropBits(flags), + score: score, + mask: scoreAdjustTyp(mask), + } + } + + // Consume function delimiter. + dr.scan() + line = dr.curLine() + if line != fnDelimiter { + return funcInlHeur, nil, fmt.Errorf("malformed testcase file %q, missing delimiter %q", dr.p, fnDelimiter) + } + + return funcInlHeur, callsites, nil +} + +// gatherPropsDumpForFile builds the specified testcase 'testcase' from +// testdata/props passing the "-d=dumpinlfuncprops=..." compiler option, +// to produce a properties dump, then returns the path of the newly +// created file. NB: we can't use "go tool compile" here, since +// some of the test cases import stdlib packages (such as "os"). +// This means using "go build", which is problematic since the +// Go command can potentially cache the results of the compile step, +// causing the test to fail when being run interactively. E.g. +// +// $ rm -f dump.txt +// $ go build -o foo.a -gcflags=-d=dumpinlfuncprops=dump.txt foo.go +// $ rm -f dump.txt foo.a +// $ go build -o foo.a -gcflags=-d=dumpinlfuncprops=dump.txt foo.go +// $ ls foo.a dump.txt > /dev/null +// ls : cannot access 'dump.txt': No such file or directory +// $ +// +// For this reason, pick a unique filename for the dump, so as to +// defeat the caching. +func gatherPropsDumpForFile(t *testing.T, testcase string, td string) (string, error) { + t.Helper() + gopath := "testdata/props/" + testcase + ".go" + outpath := filepath.Join(td, testcase+".a") + salt := fmt.Sprintf(".p%dt%d", os.Getpid(), time.Now().UnixNano()) + dumpfile := filepath.Join(td, testcase+salt+".dump.txt") + run := []string{testenv.GoToolPath(t), "build", + "-gcflags=-d=dumpinlfuncprops=" + dumpfile, "-o", outpath, gopath} + out, err := testenv.Command(t, run[0], run[1:]...).CombinedOutput() + if err != nil { + t.Logf("compile command: %+v", run) + } + if strings.TrimSpace(string(out)) != "" { + t.Logf("%s", out) + } + return dumpfile, err +} + +// genExpected reads in a given Go testcase file, strips out all the +// unindented (column 0) commands, writes them out to a new file, and +// returns the path of that new file. By picking out just the comments +// from the Go file we wind up with something that resembles the +// output from a "-d=dumpinlfuncprops=..." compilation. +func genExpected(td string, testcase string) (string, error) { + epath := filepath.Join(td, testcase+".expected") + outf, err := os.OpenFile(epath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return "", err + } + gopath := "testdata/props/" + testcase + ".go" + content, err := os.ReadFile(gopath) + if err != nil { + return "", err + } + lines := strings.Split(string(content), "\n") + for _, line := range lines[3:] { + if !strings.HasPrefix(line, "// ") { + continue + } + fmt.Fprintf(outf, "%s\n", line) + } + if err := outf.Close(); err != nil { + return "", err + } + return epath, nil +} + +type upexState struct { + dentries []fnInlHeur + newgolines []string + atline map[uint]uint +} + +func mkUpexState(dentries []fnInlHeur) *upexState { + atline := make(map[uint]uint) + for _, e := range dentries { + atline[e.line] = atline[e.line] + 1 + } + return &upexState{ + dentries: dentries, + atline: atline, + } +} + +// updateExpected takes a given Go testcase file X.go and writes out a +// new/updated version of the file to X.go.new, where the column-0 +// "expected" comments have been updated using fresh data from +// "dentries". +// +// Writing of expected results is complicated by closures and by +// generics, where you can have multiple functions that all share the +// same starting line. Currently we combine up all the dups and +// closures into the single pre-func comment. +func updateExpected(t *testing.T, testcase string, dentries []fnInlHeur, dcsites []encodedCallSiteTab) { + nd := len(dentries) + + ues := mkUpexState(dentries) + + gopath := "testdata/props/" + testcase + ".go" + newgopath := "testdata/props/" + testcase + ".go.new" + + // Read the existing Go file. + content, err := os.ReadFile(gopath) + if err != nil { + t.Fatalf("opening %s: %v", gopath, err) + } + golines := strings.Split(string(content), "\n") + + // Preserve copyright. + ues.newgolines = append(ues.newgolines, golines[:4]...) + if !strings.HasPrefix(golines[0], "// Copyright") { + t.Fatalf("missing copyright from existing testcase") + } + golines = golines[4:] + + clore := regexp.MustCompile(`.+\.func\d+[\.\d]*$`) + + emitFunc := func(e *fnInlHeur, dcsites encodedCallSiteTab, + instance, atl uint) { + var sb strings.Builder + dumpFnPreamble(&sb, e, dcsites, instance, atl) + ues.newgolines = append(ues.newgolines, + strings.Split(strings.TrimSpace(sb.String()), "\n")...) + } + + // Write file preamble with "DO NOT EDIT" message and such. + var sb strings.Builder + dumpFilePreamble(&sb) + ues.newgolines = append(ues.newgolines, + strings.Split(strings.TrimSpace(sb.String()), "\n")...) + + // Helper to add a clump of functions to the output file. + processClump := func(idx int, emit bool) int { + // Process func itself, plus anything else defined + // on the same line + atl := ues.atline[dentries[idx].line] + for k := uint(0); k < atl; k++ { + if emit { + emitFunc(&dentries[idx], dcsites[idx], k, atl) + } + idx++ + } + // now process any closures it contains + ncl := 0 + for idx < nd { + nfn := dentries[idx].fname + if !clore.MatchString(nfn) { + break + } + ncl++ + if emit { + emitFunc(&dentries[idx], dcsites[idx], 0, 1) + } + idx++ + } + return idx + } + + didx := 0 + for _, line := range golines { + if strings.HasPrefix(line, "func ") { + + // We have a function definition. + // Pick out the corresponding entry or entries in the dump + // and emit if interesting (or skip if not). + dentry := dentries[didx] + emit := interestingToCompare(dentry.fname) + didx = processClump(didx, emit) + } + + // Consume all existing comments. + if strings.HasPrefix(line, "//") { + continue + } + ues.newgolines = append(ues.newgolines, line) + } + + if didx != nd { + t.Logf("didx=%d wanted %d", didx, nd) + } + + // Open new Go file and write contents. + of, err := os.OpenFile(newgopath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + t.Fatalf("opening %s: %v", newgopath, err) + } + fmt.Fprintf(of, "%s", strings.Join(ues.newgolines, "\n")) + if err := of.Close(); err != nil { + t.Fatalf("closing %s: %v", newgopath, err) + } + + t.Logf("update-expected: emitted updated file %s", newgopath) + t.Logf("please compare the two files, then overwrite %s with %s\n", + gopath, newgopath) +} + +// interestingToCompare returns TRUE if we want to compare results +// for function 'fname'. +func interestingToCompare(fname string) bool { + if strings.HasPrefix(fname, "init.") { + return true + } + if strings.HasPrefix(fname, "T_") { + return true + } + f := strings.Split(fname, ".") + if len(f) == 2 && strings.HasPrefix(f[1], "T_") { + return true + } + return false +} diff --git a/src/cmd/compile/internal/inline/inlheur/function_properties.go b/src/cmd/compile/internal/inline/inlheur/function_properties.go new file mode 100644 index 0000000..b90abf9 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/function_properties.go @@ -0,0 +1,98 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +// This file defines a set of Go function "properties" intended to +// guide inlining heuristics; these properties may apply to the +// function as a whole, or to one or more function return values or +// parameters. +// +// IMPORTANT: function properties are produced on a "best effort" +// basis, meaning that the code that computes them doesn't verify that +// the properties are guaranteed to be true in 100% of cases. For this +// reason, properties should only be used to drive always-safe +// optimization decisions (e.g. "should I inline this call", or +// "should I unroll this loop") as opposed to potentially unsafe IR +// alterations that could change program semantics (e.g. "can I delete +// this variable" or "can I move this statement to a new location"). +// +//---------------------------------------------------------------- + +// FuncProps describes a set of function or method properties that may +// be useful for inlining heuristics. Here 'Flags' are properties that +// we think apply to the entire function; 'RecvrParamFlags' are +// properties of specific function params (or the receiver), and +// 'ResultFlags' are things properties we think will apply to values +// of specific results. Note that 'ParamFlags' includes and entry for +// the receiver if applicable, and does include etries for blank +// params; for a function such as "func foo(_ int, b byte, _ float32)" +// the length of ParamFlags will be 3. +type FuncProps struct { + Flags FuncPropBits + ParamFlags []ParamPropBits // slot 0 receiver if applicable + ResultFlags []ResultPropBits +} + +type FuncPropBits uint32 + +const ( + // Function always panics or invokes os.Exit() or a func that does + // likewise. + FuncPropNeverReturns FuncPropBits = 1 << iota +) + +type ParamPropBits uint32 + +const ( + // No info about this param + ParamNoInfo ParamPropBits = 0 + + // Parameter value feeds unmodified into a top-level interface + // call (this assumes the parameter is of interface type). + ParamFeedsInterfaceMethodCall ParamPropBits = 1 << iota + + // Parameter value feeds unmodified into an interface call that + // may be conditional/nested and not always executed (this assumes + // the parameter is of interface type). + ParamMayFeedInterfaceMethodCall ParamPropBits = 1 << iota + + // Parameter value feeds unmodified into a top level indirect + // function call (assumes parameter is of function type). + ParamFeedsIndirectCall + + // Parameter value feeds unmodified into an indirect function call + // that is conditional/nested (not guaranteed to execute). Assumes + // parameter is of function type. + ParamMayFeedIndirectCall + + // Parameter value feeds unmodified into a top level "switch" + // statement or "if" statement simple expressions (see more on + // "simple" expression classification below). + ParamFeedsIfOrSwitch + + // Parameter value feeds unmodified into a "switch" or "if" + // statement simple expressions (see more on "simple" expression + // classification below), where the if/switch is + // conditional/nested. + ParamMayFeedIfOrSwitch +) + +type ResultPropBits uint32 + +const ( + // No info about this result + ResultNoInfo ResultPropBits = 0 + // This result always contains allocated memory. + ResultIsAllocatedMem ResultPropBits = 1 << iota + // This result is always a single concrete type that is + // implicitly converted to interface. + ResultIsConcreteTypeConvertedToInterface + // Result is always the same non-composite compile time constant. + ResultAlwaysSameConstant + // Result is always the same function or closure. + ResultAlwaysSameFunc + // Result is always the same (potentially) inlinable function or closure. + ResultAlwaysSameInlinableFunc +) diff --git a/src/cmd/compile/internal/inline/inlheur/names.go b/src/cmd/compile/internal/inline/inlheur/names.go new file mode 100644 index 0000000..0223850 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/names.go @@ -0,0 +1,129 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "go/constant" +) + +// nameFinder provides a set of "isXXX" query methods for clients to +// ask whether a given AST node corresponds to a function, a constant +// value, and so on. These methods use an underlying ir.ReassignOracle +// to return more precise results in cases where an "interesting" +// value is assigned to a singly-defined local temp. Example: +// +// const q = 101 +// fq := func() int { return q } +// copyOfConstant := q +// copyOfFunc := f +// interestingCall(copyOfConstant, copyOfFunc) +// +// A name finder query method invoked on the arguments being passed to +// "interestingCall" will be able detect that 'copyOfConstant' always +// evaluates to a constant (even though it is in fact a PAUTO local +// variable). A given nameFinder can also operate without using +// ir.ReassignOracle (in cases where it is not practical to look +// at the entire function); in such cases queries will still work +// for explicit constant values and functions. +type nameFinder struct { + ro *ir.ReassignOracle +} + +// newNameFinder returns a new nameFinder object with a reassignment +// oracle initialized based on the function fn, or if fn is nil, +// without an underlying ReassignOracle. +func newNameFinder(fn *ir.Func) *nameFinder { + var ro *ir.ReassignOracle + if fn != nil { + ro = &ir.ReassignOracle{} + ro.Init(fn) + } + return &nameFinder{ro: ro} +} + +// funcName returns the *ir.Name for the func or method +// corresponding to node 'n', or nil if n can't be proven +// to contain a function value. +func (nf *nameFinder) funcName(n ir.Node) *ir.Name { + sv := n + if nf.ro != nil { + sv = nf.ro.StaticValue(n) + } + if name := ir.StaticCalleeName(sv); name != nil { + return name + } + return nil +} + +// isAllocatedMem returns true if node n corresponds to a memory +// allocation expression (make, new, or equivalent). +func (nf *nameFinder) isAllocatedMem(n ir.Node) bool { + sv := n + if nf.ro != nil { + sv = nf.ro.StaticValue(n) + } + switch sv.Op() { + case ir.OMAKESLICE, ir.ONEW, ir.OPTRLIT, ir.OSLICELIT: + return true + } + return false +} + +// constValue returns the underlying constant.Value for an AST node n +// if n is itself a constant value/expr, or if n is a singly assigned +// local containing constant expr/value (or nil not constant). +func (nf *nameFinder) constValue(n ir.Node) constant.Value { + sv := n + if nf.ro != nil { + sv = nf.ro.StaticValue(n) + } + if sv.Op() == ir.OLITERAL { + return sv.Val() + } + return nil +} + +// isNil returns whether n is nil (or singly +// assigned local containing nil). +func (nf *nameFinder) isNil(n ir.Node) bool { + sv := n + if nf.ro != nil { + sv = nf.ro.StaticValue(n) + } + return sv.Op() == ir.ONIL +} + +func (nf *nameFinder) staticValue(n ir.Node) ir.Node { + if nf.ro == nil { + return n + } + return nf.ro.StaticValue(n) +} + +func (nf *nameFinder) reassigned(n *ir.Name) bool { + if nf.ro == nil { + return true + } + return nf.ro.Reassigned(n) +} + +func (nf *nameFinder) isConcreteConvIface(n ir.Node) bool { + sv := n + if nf.ro != nil { + sv = nf.ro.StaticValue(n) + } + if sv.Op() != ir.OCONVIFACE { + return false + } + return !sv.(*ir.ConvExpr).X.Type().IsInterface() +} + +func isSameFuncName(v1, v2 *ir.Name) bool { + // NB: there are a few corner cases where pointer equality + // doesn't work here, but this should be good enough for + // our purposes here. + return v1 == v2 +} diff --git a/src/cmd/compile/internal/inline/inlheur/parampropbits_string.go b/src/cmd/compile/internal/inline/inlheur/parampropbits_string.go new file mode 100644 index 0000000..bf4d3ca --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/parampropbits_string.go @@ -0,0 +1,70 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by "stringer -bitset -type ParamPropBits"; DO NOT EDIT. + +package inlheur + +import ( + "bytes" + "strconv" +) + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ParamNoInfo-0] + _ = x[ParamFeedsInterfaceMethodCall-2] + _ = x[ParamMayFeedInterfaceMethodCall-4] + _ = x[ParamFeedsIndirectCall-8] + _ = x[ParamMayFeedIndirectCall-16] + _ = x[ParamFeedsIfOrSwitch-32] + _ = x[ParamMayFeedIfOrSwitch-64] +} + +var _ParamPropBits_value = [...]uint64{ + 0x0, /* ParamNoInfo */ + 0x2, /* ParamFeedsInterfaceMethodCall */ + 0x4, /* ParamMayFeedInterfaceMethodCall */ + 0x8, /* ParamFeedsIndirectCall */ + 0x10, /* ParamMayFeedIndirectCall */ + 0x20, /* ParamFeedsIfOrSwitch */ + 0x40, /* ParamMayFeedIfOrSwitch */ +} + +const _ParamPropBits_name = "ParamNoInfoParamFeedsInterfaceMethodCallParamMayFeedInterfaceMethodCallParamFeedsIndirectCallParamMayFeedIndirectCallParamFeedsIfOrSwitchParamMayFeedIfOrSwitch" + +var _ParamPropBits_index = [...]uint8{0, 11, 40, 71, 93, 117, 137, 159} + +func (i ParamPropBits) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _ParamPropBits_value { + x := _ParamPropBits_name[_ParamPropBits_index[k]:_ParamPropBits_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _ParamPropBits_name[_ParamPropBits_index[k]:_ParamPropBits_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "ParamPropBits(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/pstate_string.go b/src/cmd/compile/internal/inline/inlheur/pstate_string.go new file mode 100644 index 0000000..e6108d1 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/pstate_string.go @@ -0,0 +1,30 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by "stringer -type pstate"; DO NOT EDIT. + +package inlheur + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[psNoInfo-0] + _ = x[psCallsPanic-1] + _ = x[psMayReturn-2] + _ = x[psTop-3] +} + +const _pstate_name = "psNoInfopsCallsPanicpsMayReturnpsTop" + +var _pstate_index = [...]uint8{0, 8, 20, 31, 36} + +func (i pstate) String() string { + if i < 0 || i >= pstate(len(_pstate_index)-1) { + return "pstate(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _pstate_name[_pstate_index[i]:_pstate_index[i+1]] +} diff --git a/src/cmd/compile/internal/inline/inlheur/resultpropbits_string.go b/src/cmd/compile/internal/inline/inlheur/resultpropbits_string.go new file mode 100644 index 0000000..888af98 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/resultpropbits_string.go @@ -0,0 +1,68 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by "stringer -bitset -type ResultPropBits"; DO NOT EDIT. + +package inlheur + +import ( + "bytes" + "strconv" +) + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ResultNoInfo-0] + _ = x[ResultIsAllocatedMem-2] + _ = x[ResultIsConcreteTypeConvertedToInterface-4] + _ = x[ResultAlwaysSameConstant-8] + _ = x[ResultAlwaysSameFunc-16] + _ = x[ResultAlwaysSameInlinableFunc-32] +} + +var _ResultPropBits_value = [...]uint64{ + 0x0, /* ResultNoInfo */ + 0x2, /* ResultIsAllocatedMem */ + 0x4, /* ResultIsConcreteTypeConvertedToInterface */ + 0x8, /* ResultAlwaysSameConstant */ + 0x10, /* ResultAlwaysSameFunc */ + 0x20, /* ResultAlwaysSameInlinableFunc */ +} + +const _ResultPropBits_name = "ResultNoInfoResultIsAllocatedMemResultIsConcreteTypeConvertedToInterfaceResultAlwaysSameConstantResultAlwaysSameFuncResultAlwaysSameInlinableFunc" + +var _ResultPropBits_index = [...]uint8{0, 12, 32, 72, 96, 116, 145} + +func (i ResultPropBits) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _ResultPropBits_value { + x := _ResultPropBits_name[_ResultPropBits_index[k]:_ResultPropBits_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _ResultPropBits_name[_ResultPropBits_index[k]:_ResultPropBits_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "ResultPropBits(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/score_callresult_uses.go b/src/cmd/compile/internal/inline/inlheur/score_callresult_uses.go new file mode 100644 index 0000000..b95ea37 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/score_callresult_uses.go @@ -0,0 +1,413 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "fmt" + "os" +) + +// This file contains code to re-score callsites based on how the +// results of the call were used. Example: +// +// func foo() { +// x, fptr := bar() +// switch x { +// case 10: fptr = baz() +// default: blix() +// } +// fptr(100) +// } +// +// The initial scoring pass will assign a score to "bar()" based on +// various criteria, however once the first pass of scoring is done, +// we look at the flags on the result from bar, and check to see +// how those results are used. If bar() always returns the same constant +// for its first result, and if the variable receiving that result +// isn't redefined, and if that variable feeds into an if/switch +// condition, then we will try to adjust the score for "bar" (on the +// theory that if we inlined, we can constant fold / deadcode). + +type resultPropAndCS struct { + defcs *CallSite + props ResultPropBits +} + +type resultUseAnalyzer struct { + resultNameTab map[*ir.Name]resultPropAndCS + fn *ir.Func + cstab CallSiteTab + *condLevelTracker +} + +// rescoreBasedOnCallResultUses examines how call results are used, +// and tries to update the scores of calls based on how their results +// are used in the function. +func (csa *callSiteAnalyzer) rescoreBasedOnCallResultUses(fn *ir.Func, resultNameTab map[*ir.Name]resultPropAndCS, cstab CallSiteTab) { + enableDebugTraceIfEnv() + rua := &resultUseAnalyzer{ + resultNameTab: resultNameTab, + fn: fn, + cstab: cstab, + condLevelTracker: new(condLevelTracker), + } + var doNode func(ir.Node) bool + doNode = func(n ir.Node) bool { + rua.nodeVisitPre(n) + ir.DoChildren(n, doNode) + rua.nodeVisitPost(n) + return false + } + doNode(fn) + disableDebugTrace() +} + +func (csa *callSiteAnalyzer) examineCallResults(cs *CallSite, resultNameTab map[*ir.Name]resultPropAndCS) map[*ir.Name]resultPropAndCS { + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= examining call results for %q\n", + EncodeCallSiteKey(cs)) + } + + // Invoke a helper to pick out the specific ir.Name's the results + // from this call are assigned into, e.g. "x, y := fooBar()". If + // the call is not part of an assignment statement, or if the + // variables in question are not newly defined, then we'll receive + // an empty list here. + // + names, autoTemps, props := namesDefined(cs) + if len(names) == 0 { + return resultNameTab + } + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= %d names defined\n", len(names)) + } + + // For each returned value, if the value has interesting + // properties (ex: always returns the same constant), and the name + // in question is never redefined, then make an entry in the + // result table for it. + const interesting = (ResultIsConcreteTypeConvertedToInterface | + ResultAlwaysSameConstant | ResultAlwaysSameInlinableFunc | ResultAlwaysSameFunc) + for idx, n := range names { + rprop := props.ResultFlags[idx] + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= props for ret %d %q: %s\n", + idx, n.Sym().Name, rprop.String()) + } + + if rprop&interesting == 0 { + continue + } + if csa.nameFinder.reassigned(n) { + continue + } + if resultNameTab == nil { + resultNameTab = make(map[*ir.Name]resultPropAndCS) + } else if _, ok := resultNameTab[n]; ok { + panic("should never happen") + } + entry := resultPropAndCS{ + defcs: cs, + props: rprop, + } + resultNameTab[n] = entry + if autoTemps[idx] != nil { + resultNameTab[autoTemps[idx]] = entry + } + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= add resultNameTab table entry n=%v autotemp=%v props=%s\n", n, autoTemps[idx], rprop.String()) + } + } + return resultNameTab +} + +// namesDefined returns a list of ir.Name's corresponding to locals +// that receive the results from the call at site 'cs', plus the +// properties object for the called function. If a given result +// isn't cleanly assigned to a newly defined local, the +// slot for that result in the returned list will be nil. Example: +// +// call returned name list +// +// x := foo() [ x ] +// z, y := bar() [ nil, nil ] +// _, q := baz() [ nil, q ] +// +// In the case of a multi-return call, such as "x, y := foo()", +// the pattern we see from the front end will be a call op +// assigning to auto-temps, and then an assignment of the auto-temps +// to the user-level variables. In such cases we return +// first the user-level variable (in the first func result) +// and then the auto-temp name in the second result. +func namesDefined(cs *CallSite) ([]*ir.Name, []*ir.Name, *FuncProps) { + // If this call doesn't feed into an assignment (and of course not + // all calls do), then we don't have anything to work with here. + if cs.Assign == nil { + return nil, nil, nil + } + funcInlHeur, ok := fpmap[cs.Callee] + if !ok { + // TODO: add an assert/panic here. + return nil, nil, nil + } + if len(funcInlHeur.props.ResultFlags) == 0 { + return nil, nil, nil + } + + // Single return case. + if len(funcInlHeur.props.ResultFlags) == 1 { + asgn, ok := cs.Assign.(*ir.AssignStmt) + if !ok { + return nil, nil, nil + } + // locate name being assigned + aname, ok := asgn.X.(*ir.Name) + if !ok { + return nil, nil, nil + } + return []*ir.Name{aname}, []*ir.Name{nil}, funcInlHeur.props + } + + // Multi-return case + asgn, ok := cs.Assign.(*ir.AssignListStmt) + if !ok || !asgn.Def { + return nil, nil, nil + } + userVars := make([]*ir.Name, len(funcInlHeur.props.ResultFlags)) + autoTemps := make([]*ir.Name, len(funcInlHeur.props.ResultFlags)) + for idx, x := range asgn.Lhs { + if n, ok := x.(*ir.Name); ok { + userVars[idx] = n + r := asgn.Rhs[idx] + if r.Op() == ir.OCONVNOP { + r = r.(*ir.ConvExpr).X + } + if ir.IsAutoTmp(r) { + autoTemps[idx] = r.(*ir.Name) + } + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= multi-ret namedef uv=%v at=%v\n", + x, autoTemps[idx]) + } + } else { + return nil, nil, nil + } + } + return userVars, autoTemps, funcInlHeur.props +} + +func (rua *resultUseAnalyzer) nodeVisitPost(n ir.Node) { + rua.condLevelTracker.post(n) +} + +func (rua *resultUseAnalyzer) nodeVisitPre(n ir.Node) { + rua.condLevelTracker.pre(n) + switch n.Op() { + case ir.OCALLINTER: + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= rescore examine iface call %v:\n", n) + } + rua.callTargetCheckResults(n) + case ir.OCALLFUNC: + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= rescore examine call %v:\n", n) + } + rua.callTargetCheckResults(n) + case ir.OIF: + ifst := n.(*ir.IfStmt) + rua.foldCheckResults(ifst.Cond) + case ir.OSWITCH: + swst := n.(*ir.SwitchStmt) + if swst.Tag != nil { + rua.foldCheckResults(swst.Tag) + } + + } +} + +// callTargetCheckResults examines a given call to see whether the +// callee expression is potentially an inlinable function returned +// from a potentially inlinable call. Examples: +// +// Scenario 1: named intermediate +// +// fn1 := foo() conc := bar() +// fn1("blah") conc.MyMethod() +// +// Scenario 2: returned func or concrete object feeds directly to call +// +// foo()("blah") bar().MyMethod() +// +// In the second case although at the source level the result of the +// direct call feeds right into the method call or indirect call, +// we're relying on the front end having inserted an auto-temp to +// capture the value. +func (rua *resultUseAnalyzer) callTargetCheckResults(call ir.Node) { + ce := call.(*ir.CallExpr) + rname := rua.getCallResultName(ce) + if rname == nil { + return + } + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= staticvalue returns %v:\n", + rname) + } + if rname.Class != ir.PAUTO { + return + } + switch call.Op() { + case ir.OCALLINTER: + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= in %s checking %v for cci prop:\n", + rua.fn.Sym().Name, rname) + } + if cs := rua.returnHasProp(rname, ResultIsConcreteTypeConvertedToInterface); cs != nil { + + adj := returnFeedsConcreteToInterfaceCallAdj + cs.Score, cs.ScoreMask = adjustScore(adj, cs.Score, cs.ScoreMask) + } + case ir.OCALLFUNC: + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= in %s checking %v for samefunc props:\n", + rua.fn.Sym().Name, rname) + v, ok := rua.resultNameTab[rname] + if !ok { + fmt.Fprintf(os.Stderr, "=-= no entry for %v in rt\n", rname) + } else { + fmt.Fprintf(os.Stderr, "=-= props for %v: %q\n", rname, v.props.String()) + } + } + if cs := rua.returnHasProp(rname, ResultAlwaysSameInlinableFunc); cs != nil { + adj := returnFeedsInlinableFuncToIndCallAdj + cs.Score, cs.ScoreMask = adjustScore(adj, cs.Score, cs.ScoreMask) + } else if cs := rua.returnHasProp(rname, ResultAlwaysSameFunc); cs != nil { + adj := returnFeedsFuncToIndCallAdj + cs.Score, cs.ScoreMask = adjustScore(adj, cs.Score, cs.ScoreMask) + + } + } +} + +// foldCheckResults examines the specified if/switch condition 'cond' +// to see if it refers to locals defined by a (potentially inlinable) +// function call at call site C, and if so, whether 'cond' contains +// only combinations of simple references to all of the names in +// 'names' with selected constants + operators. If these criteria are +// met, then we adjust the score for call site C to reflect the +// fact that inlining will enable deadcode and/or constant propagation. +// Note: for this heuristic to kick in, the names in question have to +// be all from the same callsite. Examples: +// +// q, r := baz() x, y := foo() +// switch q+r { a, b, c := bar() +// ... if x && y && a && b && c { +// } ... +// } +// +// For the call to "baz" above we apply a score adjustment, but not +// for the calls to "foo" or "bar". +func (rua *resultUseAnalyzer) foldCheckResults(cond ir.Node) { + namesUsed := collectNamesUsed(cond) + if len(namesUsed) == 0 { + return + } + var cs *CallSite + for _, n := range namesUsed { + rpcs, found := rua.resultNameTab[n] + if !found { + return + } + if cs != nil && rpcs.defcs != cs { + return + } + cs = rpcs.defcs + if rpcs.props&ResultAlwaysSameConstant == 0 { + return + } + } + if debugTrace&debugTraceScoring != 0 { + nls := func(nl []*ir.Name) string { + r := "" + for _, n := range nl { + r += " " + n.Sym().Name + } + return r + } + fmt.Fprintf(os.Stderr, "=-= calling ShouldFoldIfNameConstant on names={%s} cond=%v\n", nls(namesUsed), cond) + } + + if !ShouldFoldIfNameConstant(cond, namesUsed) { + return + } + adj := returnFeedsConstToIfAdj + cs.Score, cs.ScoreMask = adjustScore(adj, cs.Score, cs.ScoreMask) +} + +func collectNamesUsed(expr ir.Node) []*ir.Name { + res := []*ir.Name{} + ir.Visit(expr, func(n ir.Node) { + if n.Op() != ir.ONAME { + return + } + nn := n.(*ir.Name) + if nn.Class != ir.PAUTO { + return + } + res = append(res, nn) + }) + return res +} + +func (rua *resultUseAnalyzer) returnHasProp(name *ir.Name, prop ResultPropBits) *CallSite { + v, ok := rua.resultNameTab[name] + if !ok { + return nil + } + if v.props&prop == 0 { + return nil + } + return v.defcs +} + +func (rua *resultUseAnalyzer) getCallResultName(ce *ir.CallExpr) *ir.Name { + var callTarg ir.Node + if sel, ok := ce.Fun.(*ir.SelectorExpr); ok { + // method call + callTarg = sel.X + } else if ctarg, ok := ce.Fun.(*ir.Name); ok { + // regular call + callTarg = ctarg + } else { + return nil + } + r := ir.StaticValue(callTarg) + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= staticname on %v returns %v:\n", + callTarg, r) + } + if r.Op() == ir.OCALLFUNC { + // This corresponds to the "x := foo()" case; here + // ir.StaticValue has brought us all the way back to + // the call expression itself. We need to back off to + // the name defined by the call; do this by looking up + // the callsite. + ce := r.(*ir.CallExpr) + cs, ok := rua.cstab[ce] + if !ok { + return nil + } + names, _, _ := namesDefined(cs) + if len(names) == 0 { + return nil + } + return names[0] + } else if r.Op() == ir.ONAME { + return r.(*ir.Name) + } + return nil +} diff --git a/src/cmd/compile/internal/inline/inlheur/scoreadjusttyp_string.go b/src/cmd/compile/internal/inline/inlheur/scoreadjusttyp_string.go new file mode 100644 index 0000000..f5b8bf6 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/scoreadjusttyp_string.go @@ -0,0 +1,80 @@ +// Code generated by "stringer -bitset -type scoreAdjustTyp"; DO NOT EDIT. + +package inlheur + +import "strconv" +import "bytes" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[panicPathAdj-1] + _ = x[initFuncAdj-2] + _ = x[inLoopAdj-4] + _ = x[passConstToIfAdj-8] + _ = x[passConstToNestedIfAdj-16] + _ = x[passConcreteToItfCallAdj-32] + _ = x[passConcreteToNestedItfCallAdj-64] + _ = x[passFuncToIndCallAdj-128] + _ = x[passFuncToNestedIndCallAdj-256] + _ = x[passInlinableFuncToIndCallAdj-512] + _ = x[passInlinableFuncToNestedIndCallAdj-1024] + _ = x[returnFeedsConstToIfAdj-2048] + _ = x[returnFeedsFuncToIndCallAdj-4096] + _ = x[returnFeedsInlinableFuncToIndCallAdj-8192] + _ = x[returnFeedsConcreteToInterfaceCallAdj-16384] +} + +var _scoreAdjustTyp_value = [...]uint64{ + 0x1, /* panicPathAdj */ + 0x2, /* initFuncAdj */ + 0x4, /* inLoopAdj */ + 0x8, /* passConstToIfAdj */ + 0x10, /* passConstToNestedIfAdj */ + 0x20, /* passConcreteToItfCallAdj */ + 0x40, /* passConcreteToNestedItfCallAdj */ + 0x80, /* passFuncToIndCallAdj */ + 0x100, /* passFuncToNestedIndCallAdj */ + 0x200, /* passInlinableFuncToIndCallAdj */ + 0x400, /* passInlinableFuncToNestedIndCallAdj */ + 0x800, /* returnFeedsConstToIfAdj */ + 0x1000, /* returnFeedsFuncToIndCallAdj */ + 0x2000, /* returnFeedsInlinableFuncToIndCallAdj */ + 0x4000, /* returnFeedsConcreteToInterfaceCallAdj */ +} + +const _scoreAdjustTyp_name = "panicPathAdjinitFuncAdjinLoopAdjpassConstToIfAdjpassConstToNestedIfAdjpassConcreteToItfCallAdjpassConcreteToNestedItfCallAdjpassFuncToIndCallAdjpassFuncToNestedIndCallAdjpassInlinableFuncToIndCallAdjpassInlinableFuncToNestedIndCallAdjreturnFeedsConstToIfAdjreturnFeedsFuncToIndCallAdjreturnFeedsInlinableFuncToIndCallAdjreturnFeedsConcreteToInterfaceCallAdj" + +var _scoreAdjustTyp_index = [...]uint16{0, 12, 23, 32, 48, 70, 94, 124, 144, 170, 199, 234, 257, 284, 320, 357} + +func (i scoreAdjustTyp) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _scoreAdjustTyp_value { + x := _scoreAdjustTyp_name[_scoreAdjustTyp_index[k]:_scoreAdjustTyp_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _scoreAdjustTyp_name[_scoreAdjustTyp_index[k]:_scoreAdjustTyp_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "scoreAdjustTyp(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/scoring.go b/src/cmd/compile/internal/inline/inlheur/scoring.go new file mode 100644 index 0000000..623ba8a --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/scoring.go @@ -0,0 +1,751 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/compile/internal/pgo" + "cmd/compile/internal/types" + "fmt" + "os" + "sort" + "strconv" + "strings" +) + +// These constants enumerate the set of possible ways/scenarios +// in which we'll adjust the score of a given callsite. +type scoreAdjustTyp uint + +// These constants capture the various ways in which the inliner's +// scoring phase can adjust a callsite score based on heuristics. They +// fall broadly into three categories: +// +// 1) adjustments based solely on the callsite context (ex: call +// appears on panic path) +// +// 2) adjustments that take into account specific interesting values +// passed at a call site (ex: passing a constant that could result in +// cprop/deadcode in the caller) +// +// 3) adjustments that take into account values returned from the call +// at a callsite (ex: call always returns the same inlinable function, +// and return value flows unmodified into an indirect call) +// +// For categories 2 and 3 above, each adjustment can have either a +// "must" version and a "may" version (but not both). Here the idea is +// that in the "must" version the value flow is unconditional: if the +// callsite executes, then the condition we're interested in (ex: +// param feeding call) is guaranteed to happen. For the "may" version, +// there may be control flow that could cause the benefit to be +// bypassed. +const ( + // Category 1 adjustments (see above) + panicPathAdj scoreAdjustTyp = (1 << iota) + initFuncAdj + inLoopAdj + + // Category 2 adjustments (see above). + passConstToIfAdj + passConstToNestedIfAdj + passConcreteToItfCallAdj + passConcreteToNestedItfCallAdj + passFuncToIndCallAdj + passFuncToNestedIndCallAdj + passInlinableFuncToIndCallAdj + passInlinableFuncToNestedIndCallAdj + + // Category 3 adjustments. + returnFeedsConstToIfAdj + returnFeedsFuncToIndCallAdj + returnFeedsInlinableFuncToIndCallAdj + returnFeedsConcreteToInterfaceCallAdj + + sentinelScoreAdj // sentinel; not a real adjustment +) + +// This table records the specific values we use to adjust call +// site scores in a given scenario. +// NOTE: these numbers are chosen very arbitrarily; ideally +// we will go through some sort of turning process to decide +// what value for each one produces the best performance. + +var adjValues = map[scoreAdjustTyp]int{ + panicPathAdj: 40, + initFuncAdj: 20, + inLoopAdj: -5, + passConstToIfAdj: -20, + passConstToNestedIfAdj: -15, + passConcreteToItfCallAdj: -30, + passConcreteToNestedItfCallAdj: -25, + passFuncToIndCallAdj: -25, + passFuncToNestedIndCallAdj: -20, + passInlinableFuncToIndCallAdj: -45, + passInlinableFuncToNestedIndCallAdj: -40, + returnFeedsConstToIfAdj: -15, + returnFeedsFuncToIndCallAdj: -25, + returnFeedsInlinableFuncToIndCallAdj: -40, + returnFeedsConcreteToInterfaceCallAdj: -25, +} + +// SetupScoreAdjustments interprets the value of the -d=inlscoreadj +// debugging option, if set. The value of this flag is expected to be +// a series of "/"-separated clauses of the form adj1:value1. Example: +// -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99 +func SetupScoreAdjustments() { + if base.Debug.InlScoreAdj == "" { + return + } + if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil { + base.Fatalf("malformed -d=inlscoreadj argument %q: %v", + base.Debug.InlScoreAdj, err) + } +} + +func adjStringToVal(s string) (scoreAdjustTyp, bool) { + for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 { + if adj.String() == s { + return adj, true + } + } + return 0, false +} + +func parseScoreAdj(val string) error { + clauses := strings.Split(val, "/") + if len(clauses) == 0 { + return fmt.Errorf("no clauses") + } + for _, clause := range clauses { + elems := strings.Split(clause, ":") + if len(elems) < 2 { + return fmt.Errorf("clause %q: expected colon", clause) + } + if len(elems) != 2 { + return fmt.Errorf("clause %q has %d elements, wanted 2", clause, + len(elems)) + } + adj, ok := adjStringToVal(elems[0]) + if !ok { + return fmt.Errorf("clause %q: unknown adjustment", clause) + } + val, err := strconv.Atoi(elems[1]) + if err != nil { + return fmt.Errorf("clause %q: malformed value: %v", clause, err) + } + adjValues[adj] = val + } + return nil +} + +func adjValue(x scoreAdjustTyp) int { + if val, ok := adjValues[x]; ok { + return val + } else { + panic("internal error unregistered adjustment type") + } +} + +var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{ + {may: passConstToNestedIfAdj, must: passConstToIfAdj}, + {may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj}, + {may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj}, + {may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj}, +} + +func isMay(x scoreAdjustTyp) bool { + return mayToMust(x) != 0 +} + +func isMust(x scoreAdjustTyp) bool { + return mustToMay(x) != 0 +} + +func mayToMust(x scoreAdjustTyp) scoreAdjustTyp { + for _, v := range mayMustAdj { + if x == v.may { + return v.must + } + } + return 0 +} + +func mustToMay(x scoreAdjustTyp) scoreAdjustTyp { + for _, v := range mayMustAdj { + if x == v.must { + return v.may + } + } + return 0 +} + +// computeCallSiteScore takes a given call site whose ir node is +// 'call' and callee function is 'callee' and with previously computed +// call site properties 'csflags', then computes a score for the +// callsite that combines the size cost of the callee with heuristics +// based on previously computed argument and function properties, +// then stores the score and the adjustment mask in the appropriate +// fields in 'cs' +func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) { + callee := cs.Callee + csflags := cs.Flags + call := cs.Call + + // Start with the size-based score for the callee. + score := int(callee.Inl.Cost) + var tmask scoreAdjustTyp + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n", + callee.Sym().Name, fmtFullPos(call.Pos()), score) + } + + // First some score adjustments to discourage inlining in selected cases. + if csflags&CallSiteOnPanicPath != 0 { + score, tmask = adjustScore(panicPathAdj, score, tmask) + } + if csflags&CallSiteInInitFunc != 0 { + score, tmask = adjustScore(initFuncAdj, score, tmask) + } + + // Then adjustments to encourage inlining in selected cases. + if csflags&CallSiteInLoop != 0 { + score, tmask = adjustScore(inLoopAdj, score, tmask) + } + + // Stop here if no callee props. + if calleeProps == nil { + cs.Score, cs.ScoreMask = score, tmask + return + } + + // Walk through the actual expressions being passed at the call. + calleeRecvrParms := callee.Type().RecvParams() + for idx := range call.Args { + // ignore blanks + if calleeRecvrParms[idx].Sym == nil || + calleeRecvrParms[idx].Sym.IsBlank() { + continue + } + arg := call.Args[idx] + pflag := calleeProps.ParamFlags[idx] + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n", + idx, len(call.Args), arg, pflag.String()) + } + + if len(cs.ArgProps) == 0 { + continue + } + argProps := cs.ArgProps[idx] + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n", + idx, argProps.String(), arg) + } + + if argProps&ActualExprConstant != 0 { + if pflag&ParamMayFeedIfOrSwitch != 0 { + score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask) + } + if pflag&ParamFeedsIfOrSwitch != 0 { + score, tmask = adjustScore(passConstToIfAdj, score, tmask) + } + } + + if argProps&ActualExprIsConcreteConvIface != 0 { + // FIXME: ideally here it would be nice to make a + // distinction between the inlinable case and the + // non-inlinable case, but this is hard to do. Example: + // + // type I interface { Tiny() int; Giant() } + // type Conc struct { x int } + // func (c *Conc) Tiny() int { return 42 } + // func (c *Conc) Giant() { <huge amounts of code> } + // + // func passConcToItf(c *Conc) { + // makesItfMethodCall(c) + // } + // + // In the code above, function properties will only tell + // us that 'makesItfMethodCall' invokes a method on its + // interface parameter, but we don't know whether it calls + // "Tiny" or "Giant". If we knew if called "Tiny", then in + // theory in addition to converting the interface call to + // a direct call, we could also inline (in which case + // we'd want to decrease the score even more). + // + // One thing we could do (not yet implemented) is iterate + // through all of the methods of "*Conc" that allow it to + // satisfy I, and if all are inlinable, then exploit that. + if pflag&ParamMayFeedInterfaceMethodCall != 0 { + score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask) + } + if pflag&ParamFeedsInterfaceMethodCall != 0 { + score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask) + } + } + + if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 { + mayadj := passFuncToNestedIndCallAdj + mustadj := passFuncToIndCallAdj + if argProps&ActualExprIsInlinableFunc != 0 { + mayadj = passInlinableFuncToNestedIndCallAdj + mustadj = passInlinableFuncToIndCallAdj + } + if pflag&ParamMayFeedIndirectCall != 0 { + score, tmask = adjustScore(mayadj, score, tmask) + } + if pflag&ParamFeedsIndirectCall != 0 { + score, tmask = adjustScore(mustadj, score, tmask) + } + } + } + + cs.Score, cs.ScoreMask = score, tmask +} + +func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) { + + if isMust(typ) { + if mask&typ != 0 { + return score, mask + } + may := mustToMay(typ) + if mask&may != 0 { + // promote may to must, so undo may + score -= adjValue(may) + mask &^= may + } + } else if isMay(typ) { + must := mayToMust(typ) + if mask&(must|typ) != 0 { + return score, mask + } + } + if mask&typ == 0 { + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n", + adjValue(typ), typ.String()) + } + score += adjValue(typ) + mask |= typ + } + return score, mask +} + +var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp +var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp + +func setupFlagToAdjMaps() { + resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{ + ResultIsAllocatedMem: returnFeedsConcreteToInterfaceCallAdj, + ResultAlwaysSameFunc: returnFeedsFuncToIndCallAdj, + ResultAlwaysSameConstant: returnFeedsConstToIfAdj, + } + paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{ + ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj, + ParamFeedsInterfaceMethodCall: passConcreteToItfCallAdj, + ParamMayFeedIndirectCall: passInlinableFuncToNestedIndCallAdj, + ParamFeedsIndirectCall: passInlinableFuncToIndCallAdj, + } +} + +// LargestNegativeScoreAdjustment tries to estimate the largest possible +// negative score adjustment that could be applied to a call of the +// function with the specified props. Example: +// +// func foo() { func bar(x int, p *int) int { +// ... if x < 0 { *p = x } +// } return 99 +// } +// +// Function 'foo' above on the left has no interesting properties, +// thus as a result the most we'll adjust any call to is the value for +// "call in loop". If the calculated cost of the function is 150, and +// the in-loop adjustment is 5 (for example), then there is not much +// point treating it as inlinable. On the other hand "bar" has a param +// property (parameter "x" feeds unmodified to an "if" statement") and +// a return property (always returns same constant) meaning that a +// given call _could_ be rescored down as much as -35 points-- thus if +// the size of "bar" is 100 (for example) then there is at least a +// chance that scoring will enable inlining. +func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int { + if resultFlagToPositiveAdj == nil { + setupFlagToAdjMaps() + } + var tmask scoreAdjustTyp + score := adjValues[inLoopAdj] // any call can be in a loop + for _, pf := range props.ParamFlags { + if adj, ok := paramFlagToPositiveAdj[pf]; ok { + score, tmask = adjustScore(adj, score, tmask) + } + } + for _, rf := range props.ResultFlags { + if adj, ok := resultFlagToPositiveAdj[rf]; ok { + score, tmask = adjustScore(adj, score, tmask) + } + } + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n", + fn, score) + } + + return score +} + +// LargestPositiveScoreAdjustment tries to estimate the largest possible +// positive score adjustment that could be applied to a given callsite. +// At the moment we don't have very many positive score adjustments, so +// this is just hard-coded, not table-driven. +func LargestPositiveScoreAdjustment(fn *ir.Func) int { + return adjValues[panicPathAdj] + adjValues[initFuncAdj] +} + +// callSiteTab contains entries for each call in the function +// currently being processed by InlineCalls; this variable will either +// be set to 'cstabCache' below (for non-inlinable routines) or to the +// local 'cstab' entry in the fnInlHeur object for inlinable routines. +// +// NOTE: this assumes that inlining operations are happening in a serial, +// single-threaded fashion,f which is true today but probably won't hold +// in the future (for example, we might want to score the callsites +// in multiple functions in parallel); if the inliner evolves in this +// direction we'll need to come up with a different approach here. +var callSiteTab CallSiteTab + +// scoreCallsCache caches a call site table and call site list between +// invocations of ScoreCalls so that we can reuse previously allocated +// storage. +var scoreCallsCache scoreCallsCacheType + +type scoreCallsCacheType struct { + tab CallSiteTab + csl []*CallSite +} + +// ScoreCalls assigns numeric scores to each of the callsites in +// function 'fn'; the lower the score, the more helpful we think it +// will be to inline. +// +// Unlike a lot of the other inline heuristics machinery, callsite +// scoring can't be done as part of the CanInline call for a function, +// due to fact that we may be working on a non-trivial SCC. So for +// example with this SCC: +// +// func foo(x int) { func bar(x int, f func()) { +// if x != 0 { f() +// bar(x, func(){}) foo(x-1) +// } } +// } +// +// We don't want to perform scoring for the 'foo' call in "bar" until +// after foo has been analyzed, but it's conceivable that CanInline +// might visit bar before foo for this SCC. +func ScoreCalls(fn *ir.Func) { + if len(fn.Body) == 0 { + return + } + enableDebugTraceIfEnv() + + nameFinder := newNameFinder(fn) + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn)) + } + + // If this is an inlinable function, use the precomputed + // call site table for it. If the function wasn't an inline + // candidate, collect a callsite table for it now. + var cstab CallSiteTab + if funcInlHeur, ok := fpmap[fn]; ok { + cstab = funcInlHeur.cstab + } else { + if len(scoreCallsCache.tab) != 0 { + panic("missing call to ScoreCallsCleanup") + } + if scoreCallsCache.tab == nil { + scoreCallsCache.tab = make(CallSiteTab) + } + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n", + ir.FuncName(fn)) + } + cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0, + nameFinder) + } + + csa := makeCallSiteAnalyzer(fn) + const doCallResults = true + csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil) + + disableDebugTrace() +} + +// scoreCallsRegion assigns numeric scores to each of the callsites in +// region 'region' within function 'fn'. This can be called on +// an entire function, or with 'region' set to a chunk of +// code corresponding to an inlined call. +func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) { + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n", + ir.FuncName(fn), region[0].Op().String(), len(cstab)) + } + + // Sort callsites to avoid any surprises with non deterministic + // map iteration order (this is probably not needed, but here just + // in case). + csl := scoreCallsCache.csl[:0] + for _, cs := range cstab { + csl = append(csl, cs) + } + scoreCallsCache.csl = csl[:0] + sort.Slice(csl, func(i, j int) bool { + return csl[i].ID < csl[j].ID + }) + + // Score each call site. + var resultNameTab map[*ir.Name]resultPropAndCS + for _, cs := range csl { + var cprops *FuncProps + fihcprops := false + desercprops := false + if funcInlHeur, ok := fpmap[cs.Callee]; ok { + cprops = funcInlHeur.props + fihcprops = true + } else if cs.Callee.Inl != nil { + cprops = DeserializeFromString(cs.Callee.Inl.Properties) + desercprops = true + } else { + if base.Debug.DumpInlFuncProps != "" { + fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos())) + panic("should never happen") + } else { + continue + } + } + cs.computeCallSiteScore(csa, cprops) + + if doCallResults { + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) + } + resultNameTab = csa.examineCallResults(cs, resultNameTab) + } + + if debugTrace&debugTraceScoring != 0 { + fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) + } + } + + if resultNameTab != nil { + csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab) + } + + disableDebugTrace() + + if ic != nil && callSiteTab != nil { + // Integrate the calls from this cstab into the table for the caller. + if err := callSiteTab.merge(cstab); err != nil { + base.FatalfAt(ic.Pos(), "%v", err) + } + } else { + callSiteTab = cstab + } +} + +// ScoreCallsCleanup resets the state of the callsite cache +// once ScoreCalls is done with a function. +func ScoreCallsCleanup() { + if base.Debug.DumpInlCallSiteScores != 0 { + if allCallSites == nil { + allCallSites = make(CallSiteTab) + } + for call, cs := range callSiteTab { + allCallSites[call] = cs + } + } + for k := range scoreCallsCache.tab { + delete(scoreCallsCache.tab, k) + } +} + +// GetCallSiteScore returns the previously calculated score for call +// within fn. +func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) { + if funcInlHeur, ok := fpmap[fn]; ok { + if cs, ok := funcInlHeur.cstab[call]; ok { + return cs.Score, true + } + } + if cs, ok := callSiteTab[call]; ok { + return cs.Score, true + } + return 0, false +} + +// BudgetExpansion returns the amount to relax/expand the base +// inlining budget when the new inliner is turned on; the inliner +// will add the returned value to the hairyness budget. +// +// Background: with the new inliner, the score for a given callsite +// can be adjusted down by some amount due to heuristics, however we +// won't know whether this is going to happen until much later after +// the CanInline call. This function returns the amount to relax the +// budget initially (to allow for a large score adjustment); later on +// in RevisitInlinability we'll look at each individual function to +// demote it if needed. +func BudgetExpansion(maxBudget int32) int32 { + if base.Debug.InlBudgetSlack != 0 { + return int32(base.Debug.InlBudgetSlack) + } + // In the default case, return maxBudget, which will effectively + // double the budget from 80 to 160; this should be good enough + // for most cases. + return maxBudget +} + +var allCallSites CallSiteTab + +// DumpInlCallSiteScores is invoked by the inliner if the debug flag +// "-d=dumpinlcallsitescores" is set; it dumps out a human-readable +// summary of all (potentially) inlinable callsites in the package, +// along with info on call site scoring and the adjustments made to a +// given score. Here profile is the PGO profile in use (may be +// nil), budgetCallback is a callback that can be invoked to find out +// the original pre-adjustment hairyness limit for the function, and +// inlineHotMaxBudget is the constant of the same name used in the +// inliner. Sample output lines: +// +// Score Adjustment Status Callee CallerPos ScoreFlags +// 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj +// 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj +// 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1 +// 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj +// +// In the dump above, "Score" is the final score calculated for the +// callsite, "Adjustment" is the amount added to or subtracted from +// the original hairyness estimate to form the score. "Status" shows +// whether anything changed with the site -- did the adjustment bump +// it down just below the threshold ("PROMOTED") or instead bump it +// above the threshold ("DEMOTED"); this will be blank ("---") if no +// threshold was crossed as a result of the heuristics. Note that +// "Status" also shows whether PGO was involved. "Callee" is the name +// of the function called, "CallerPos" is the position of the +// callsite, and "ScoreFlags" is a digest of the specific properties +// we used to make adjustments to callsite score via heuristics. +func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) { + + var indirectlyDueToPromotion func(cs *CallSite) bool + indirectlyDueToPromotion = func(cs *CallSite) bool { + bud, _ := budgetCallback(cs.Callee, profile) + hairyval := cs.Callee.Inl.Cost + score := int32(cs.Score) + if hairyval > bud && score <= bud { + return true + } + if cs.parent != nil { + return indirectlyDueToPromotion(cs.parent) + } + return false + } + + genstatus := func(cs *CallSite) string { + hairyval := cs.Callee.Inl.Cost + bud, isPGO := budgetCallback(cs.Callee, profile) + score := int32(cs.Score) + st := "---" + expinl := false + switch { + case hairyval <= bud && score <= bud: + // "Normal" inlined case: hairy val sufficiently low that + // it would have been inlined anyway without heuristics. + expinl = true + case hairyval > bud && score > bud: + // "Normal" not inlined case: hairy val sufficiently high + // and scoring didn't lower it. + case hairyval > bud && score <= bud: + // Promoted: we would not have inlined it before, but + // after score adjustment we decided to inline. + st = "PROMOTED" + expinl = true + case hairyval <= bud && score > bud: + // Demoted: we would have inlined it before, but after + // score adjustment we decided not to inline. + st = "DEMOTED" + } + inlined := cs.aux&csAuxInlined != 0 + indprom := false + if cs.parent != nil { + indprom = indirectlyDueToPromotion(cs.parent) + } + if inlined && indprom { + st += "|INDPROM" + } + if inlined && !expinl { + st += "|[NI?]" + } else if !inlined && expinl { + st += "|[IN?]" + } + if isPGO { + st += "|PGO" + } + return st + } + + if base.Debug.DumpInlCallSiteScores != 0 { + var sl []*CallSite + for _, cs := range allCallSites { + sl = append(sl, cs) + } + sort.Slice(sl, func(i, j int) bool { + if sl[i].Score != sl[j].Score { + return sl[i].Score < sl[j].Score + } + fni := ir.PkgFuncName(sl[i].Callee) + fnj := ir.PkgFuncName(sl[j].Callee) + if fni != fnj { + return fni < fnj + } + ecsi := EncodeCallSiteKey(sl[i]) + ecsj := EncodeCallSiteKey(sl[j]) + return ecsi < ecsj + }) + + mkname := func(fn *ir.Func) string { + var n string + if fn == nil || fn.Nname == nil { + return "<nil>" + } + if fn.Sym().Pkg == types.LocalPkg { + n = "·" + fn.Sym().Name + } else { + n = ir.PkgFuncName(fn) + } + // don't try to print super-long names + if len(n) <= 64 { + return n + } + return n[:32] + "..." + n[len(n)-32:len(n)] + } + + if len(sl) != 0 { + fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path) + fmt.Fprintf(os.Stdout, "# Score Adjustment Status Callee CallerPos Flags ScoreFlags\n") + } + for _, cs := range sl { + hairyval := cs.Callee.Inl.Cost + adj := int32(cs.Score) - hairyval + nm := mkname(cs.Callee) + ecc := EncodeCallSiteKey(cs) + fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n", + cs.Score, adj, genstatus(cs), + nm, ecc, + cs.ScoreMask.String()) + } + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/serialize.go b/src/cmd/compile/internal/inline/inlheur/serialize.go new file mode 100644 index 0000000..d650626 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/serialize.go @@ -0,0 +1,80 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import "strings" + +func (funcProps *FuncProps) SerializeToString() string { + if funcProps == nil { + return "" + } + var sb strings.Builder + writeUleb128(&sb, uint64(funcProps.Flags)) + writeUleb128(&sb, uint64(len(funcProps.ParamFlags))) + for _, pf := range funcProps.ParamFlags { + writeUleb128(&sb, uint64(pf)) + } + writeUleb128(&sb, uint64(len(funcProps.ResultFlags))) + for _, rf := range funcProps.ResultFlags { + writeUleb128(&sb, uint64(rf)) + } + return sb.String() +} + +func DeserializeFromString(s string) *FuncProps { + if len(s) == 0 { + return nil + } + var funcProps FuncProps + var v uint64 + sl := []byte(s) + v, sl = readULEB128(sl) + funcProps.Flags = FuncPropBits(v) + v, sl = readULEB128(sl) + funcProps.ParamFlags = make([]ParamPropBits, v) + for i := range funcProps.ParamFlags { + v, sl = readULEB128(sl) + funcProps.ParamFlags[i] = ParamPropBits(v) + } + v, sl = readULEB128(sl) + funcProps.ResultFlags = make([]ResultPropBits, v) + for i := range funcProps.ResultFlags { + v, sl = readULEB128(sl) + funcProps.ResultFlags[i] = ResultPropBits(v) + } + return &funcProps +} + +func readULEB128(sl []byte) (value uint64, rsl []byte) { + var shift uint + + for { + b := sl[0] + sl = sl[1:] + value |= (uint64(b&0x7F) << shift) + if b&0x80 == 0 { + break + } + shift += 7 + } + return value, sl +} + +func writeUleb128(sb *strings.Builder, v uint64) { + if v < 128 { + sb.WriteByte(uint8(v)) + return + } + more := true + for more { + c := uint8(v & 0x7f) + v >>= 7 + more = v != 0 + if more { + c |= 0x80 + } + sb.WriteByte(c) + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/dumpscores.go b/src/cmd/compile/internal/inline/inlheur/testdata/dumpscores.go new file mode 100644 index 0000000..6f2f760 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/dumpscores.go @@ -0,0 +1,45 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package dumpscores + +var G int + +func inlinable(x int, f func(int) int) int { + if x != 0 { + return 1 + } + G += noninl(x) + return f(x) +} + +func inlinable2(x int) int { + return noninl(-x) +} + +//go:noinline +func noninl(x int) int { + return x + 1 +} + +func tooLargeToInline(x int) int { + if x > 101 { + // Drive up the cost of inlining this func over the + // regular threshold. + return big(big(big(big(big(G + x))))) + } + if x < 100 { + // make sure this callsite is scored properly + G += inlinable(101, inlinable2) + if G == 101 { + return 0 + } + panic(inlinable2(3)) + } + return G +} + +func big(q int) int { + return noninl(q) + noninl(-q) +} diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/README.txt b/src/cmd/compile/internal/inline/inlheur/testdata/props/README.txt new file mode 100644 index 0000000..af5ebec --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/README.txt @@ -0,0 +1,77 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +Notes on the format of the testcase files in +cmd/compile/internal/inline/inlheur/testdata/props: + +- each (compilable) file contains input Go code and expected results + in the form of column-0 comments. + +- functions or methods that begin with "T_" are targeted for testing, + as well as "init" functions; all other functions are ignored. + +- function header comments begin with a line containing + the file name, function name, definition line, then index + and a count of the number of funcs that share that same + definition line (needed to support generics). Example: + + // foo.go T_mumble 35 1 4 + + Here "T_mumble" is defined at line 35, and it is func 0 + out of the 4 funcs that share that same line. + +- function property expected results appear as comments in immediately + prior to the function. For example, here we have first the function + name ("T_feeds_if_simple"), then human-readable dump of the function + properties, as well as the JSON for the properties object, each + section separated by a "<>" delimiter. + + // params.go T_feeds_if_simple 35 0 1 + // RecvrParamFlags: + // 0: ParamFeedsIfOrSwitch + // <endpropsdump> + // {"Flags":0,"RecvrParamFlags":[8],"ReturnFlags":[]} + // callsite: params.go:34:10|0 "CallSiteOnPanicPath" 2 + // <endcallsites> + // <endfuncpreamble> + func T_feeds_if_simple(x int) { + if x < 100 { + os.Exit(1) + } + println(x) + } + +- when the test runs, it will compile the Go source file with an + option to dump out function properties, then compare the new dump + for each function with the JSON appearing in the header comment for + the function (in the example above, the JSON appears between + "<endpropsdump>" and "<endfuncpreamble>". The material prior to the + dump is simply there for human consumption, so that a developer can + easily see that "RecvrParamFlags":[8] means that the first parameter + has flag ParamFeedsIfOrSwitch. + +- when making changes to the compiler (which can alter the expected + results) or edits/additions to the go code in the testcase files, + you can remaster the results by running + + go test -v -count=1 . + + In the trace output of this run, you'll see messages of the form + + === RUN TestFuncProperties + funcprops_test.go:NNN: update-expected: emitted updated file + testdata/props/XYZ.go.new + funcprops_test.go:MMM: please compare the two files, then overwrite + testdata/props/XYZ.go with testdata/props/XYZ.go.new + + at which point you can compare the old and new files by hand, then + overwrite the *.go file with the *.go.new file if you are happy with + the diffs. + +- note that the remastering process will strip out any existing + column-0 (unindented) comments; if you write comments that you + want to see preserved, use "/* */" or indent them. + + + diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/acrosscall.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/acrosscall.go new file mode 100644 index 0000000..a8166fd --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/acrosscall.go @@ -0,0 +1,214 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT (use 'go test -v -update-expected' instead.) +// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt +// for more information on the format of this file. +// <endfilepreamble> +package params + +// acrosscall.go T_feeds_indirect_call_via_call_toplevel 19 0 1 +// ParamFlags +// 0 ParamFeedsIndirectCall +// <endpropsdump> +// {"Flags":0,"ParamFlags":[8],"ResultFlags":null} +// callsite: acrosscall.go:20:12|0 flagstr "" flagval 0 score 60 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_feeds_indirect_call_via_call_toplevel(f func(int)) { + callsparam(f) +} + +// acrosscall.go T_feeds_indirect_call_via_call_conditional 31 0 1 +// ParamFlags +// 0 ParamMayFeedIndirectCall +// <endpropsdump> +// {"Flags":0,"ParamFlags":[16],"ResultFlags":null} +// callsite: acrosscall.go:33:13|0 flagstr "" flagval 0 score 60 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_feeds_indirect_call_via_call_conditional(f func(int)) { + if G != 101 { + callsparam(f) + } +} + +// acrosscall.go T_feeds_conditional_indirect_call_via_call_toplevel 45 0 1 +// ParamFlags +// 0 ParamMayFeedIndirectCall +// <endpropsdump> +// {"Flags":0,"ParamFlags":[16],"ResultFlags":null} +// callsite: acrosscall.go:46:23|0 flagstr "" flagval 0 score 64 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_feeds_conditional_indirect_call_via_call_toplevel(f func(int)) { + callsparamconditional(f) +} + +// acrosscall.go T_feeds_if_via_call 57 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// callsite: acrosscall.go:58:9|0 flagstr "" flagval 0 score 8 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_via_call(x int) { + feedsif(x) +} + +// acrosscall.go T_feeds_if_via_call_conditional 69 0 1 +// ParamFlags +// 0 ParamMayFeedIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[64],"ResultFlags":null} +// callsite: acrosscall.go:71:10|0 flagstr "" flagval 0 score 8 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_via_call_conditional(x int) { + if G != 101 { + feedsif(x) + } +} + +// acrosscall.go T_feeds_conditional_if_via_call 83 0 1 +// ParamFlags +// 0 ParamMayFeedIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[64],"ResultFlags":null} +// callsite: acrosscall.go:84:20|0 flagstr "" flagval 0 score 12 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_feeds_conditional_if_via_call(x int) { + feedsifconditional(x) +} + +// acrosscall.go T_multifeeds1 97 0 1 +// ParamFlags +// 0 ParamFeedsIndirectCall|ParamMayFeedIndirectCall +// 1 ParamNoInfo +// <endpropsdump> +// {"Flags":0,"ParamFlags":[24,0],"ResultFlags":null} +// callsite: acrosscall.go:98:12|0 flagstr "" flagval 0 score 60 mask 0 maskstr "" +// callsite: acrosscall.go:99:23|1 flagstr "" flagval 0 score 64 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_multifeeds1(f1, f2 func(int)) { + callsparam(f1) + callsparamconditional(f1) +} + +// acrosscall.go T_acrosscall_returnsconstant 110 0 1 +// ResultFlags +// 0 ResultAlwaysSameConstant +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[8]} +// callsite: acrosscall.go:111:24|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_acrosscall_returnsconstant() int { + return returnsconstant() +} + +// acrosscall.go T_acrosscall_returnsmem 122 0 1 +// ResultFlags +// 0 ResultIsAllocatedMem +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[2]} +// callsite: acrosscall.go:123:19|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_acrosscall_returnsmem() *int { + return returnsmem() +} + +// acrosscall.go T_acrosscall_returnscci 134 0 1 +// ResultFlags +// 0 ResultIsConcreteTypeConvertedToInterface +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[4]} +// callsite: acrosscall.go:135:19|0 flagstr "" flagval 0 score 7 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_acrosscall_returnscci() I { + return returnscci() +} + +// acrosscall.go T_acrosscall_multiret 144 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: acrosscall.go:146:25|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_acrosscall_multiret(q int) int { + if q != G { + return returnsconstant() + } + return 0 +} + +// acrosscall.go T_acrosscall_multiret2 158 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: acrosscall.go:160:25|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// callsite: acrosscall.go:162:25|1 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_acrosscall_multiret2(q int) int { + if q == G { + return returnsconstant() + } else { + return returnsconstant() + } +} + +func callsparam(f func(int)) { + f(2) +} + +func callsparamconditional(f func(int)) { + if G != 101 { + f(2) + } +} + +func feedsif(x int) int { + if x != 101 { + return 42 + } + return 43 +} + +func feedsifconditional(x int) int { + if G != 101 { + if x != 101 { + return 42 + } + } + return 43 +} + +func returnsconstant() int { + return 42 +} + +func returnsmem() *int { + return new(int) +} + +func returnscci() I { + var q Q + return q +} + +type I interface { + Foo() +} + +type Q int + +func (q Q) Foo() { +} + +var G int diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go new file mode 100644 index 0000000..5cc217b --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go @@ -0,0 +1,240 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT (use 'go test -v -update-expected' instead.) +// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt +// for more information on the format of this file. +// <endfilepreamble> +package calls + +import "os" + +// calls.go T_call_in_panic_arg 19 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// callsite: calls.go:21:15|0 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" +// <endcallsites> +// <endfuncpreamble> +func T_call_in_panic_arg(x int) { + if x < G { + panic(callee(x)) + } +} + +// calls.go T_calls_in_loops 32 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// callsite: calls.go:34:9|0 flagstr "CallSiteInLoop" flagval 1 score -3 mask 4 maskstr "inLoopAdj" +// callsite: calls.go:37:9|1 flagstr "CallSiteInLoop" flagval 1 score -3 mask 4 maskstr "inLoopAdj" +// <endcallsites> +// <endfuncpreamble> +func T_calls_in_loops(x int, q []string) { + for i := 0; i < x; i++ { + callee(i) + } + for _, s := range q { + callee(len(s)) + } +} + +// calls.go T_calls_in_pseudo_loop 48 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// callsite: calls.go:50:9|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// callsite: calls.go:54:9|1 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_calls_in_pseudo_loop(x int, q []string) { + for i := 0; i < x; i++ { + callee(i) + return + } + for _, s := range q { + callee(len(s)) + break + } +} + +// calls.go T_calls_on_panic_paths 67 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// callsite: calls.go:69:9|0 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" +// callsite: calls.go:73:9|1 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" +// callsite: calls.go:77:12|2 flagstr "CallSiteOnPanicPath" flagval 2 score 102 mask 1 maskstr "panicPathAdj" +// <endcallsites> +// <endfuncpreamble> +func T_calls_on_panic_paths(x int, q []string) { + if x+G == 101 { + callee(x) + panic("ouch") + } + if x < G-101 { + callee(x) + if len(q) == 0 { + G++ + } + callsexit(x) + } +} + +// calls.go T_calls_not_on_panic_paths 93 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch|ParamMayFeedIfOrSwitch +// 1 ParamNoInfo +// <endpropsdump> +// {"Flags":0,"ParamFlags":[96,0],"ResultFlags":null} +// callsite: calls.go:103:9|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// callsite: calls.go:112:9|1 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// callsite: calls.go:115:9|2 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// callsite: calls.go:119:12|3 flagstr "CallSiteOnPanicPath" flagval 2 score 102 mask 1 maskstr "panicPathAdj" +// <endcallsites> +// <endfuncpreamble> +func T_calls_not_on_panic_paths(x int, q []string) { + if x != G { + panic("ouch") + /* Notes: */ + /* - we only look for post-dominating panic/exit, so */ + /* this site will on fact not have a panicpath flag */ + /* - vet will complain about this site as unreachable */ + callee(x) + } + if x != G { + callee(x) + if x < 100 { + panic("ouch") + } + } + if x+G == 101 { + if x < 100 { + panic("ouch") + } + callee(x) + } + if x < -101 { + callee(x) + if len(q) == 0 { + return + } + callsexit(x) + } +} + +// calls.go init.0 129 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":null} +// callsite: calls.go:130:16|0 flagstr "CallSiteInInitFunc" flagval 4 score 22 mask 2 maskstr "initFuncAdj" +// <endcallsites> +// <endfuncpreamble> +func init() { + println(callee(5)) +} + +// calls.go T_pass_inlinable_func_to_param_feeding_indirect_call 140 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: calls.go:141:19|0 flagstr "" flagval 0 score 16 mask 512 maskstr "passInlinableFuncToIndCallAdj" +// callsite: calls.go:141:19|calls.go:232:10|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_pass_inlinable_func_to_param_feeding_indirect_call(x int) int { + return callsParam(x, callee) +} + +// calls.go T_pass_noninlinable_func_to_param_feeding_indirect_call 150 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: calls.go:153:19|0 flagstr "" flagval 0 score 36 mask 128 maskstr "passFuncToIndCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_pass_noninlinable_func_to_param_feeding_indirect_call(x int) int { + // if we inline callsParam we can convert the indirect call + // to a direct call, but we can't inline it. + return callsParam(x, calleeNoInline) +} + +// calls.go T_pass_inlinable_func_to_param_feeding_nested_indirect_call 165 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[0]} +// callsite: calls.go:166:25|0 flagstr "" flagval 0 score 27 mask 1024 maskstr "passInlinableFuncToNestedIndCallAdj" +// callsite: calls.go:166:25|calls.go:237:11|0 flagstr "" flagval 0 score 2 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_pass_inlinable_func_to_param_feeding_nested_indirect_call(x int) int { + return callsParamNested(x, callee) +} + +// calls.go T_pass_noninlinable_func_to_param_feeding_nested_indirect_call 177 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[0]} +// callsite: calls.go:178:25|0 flagstr "" flagval 0 score 47 mask 256 maskstr "passFuncToNestedIndCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_pass_noninlinable_func_to_param_feeding_nested_indirect_call(x int) int { + return callsParamNested(x, calleeNoInline) +} + +// calls.go T_call_scoring_in_noninlinable_func 195 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[0]} +// callsite: calls.go:209:14|0 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" +// callsite: calls.go:210:15|1 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" +// callsite: calls.go:212:19|2 flagstr "" flagval 0 score 16 mask 512 maskstr "passInlinableFuncToIndCallAdj" +// callsite: calls.go:212:19|calls.go:232:10|0 flagstr "" flagval 0 score 4 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +// calls.go T_call_scoring_in_noninlinable_func.func1 212 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_call_scoring_in_noninlinable_func(x int, sl []int) int { + if x == 101 { + // Drive up the cost of inlining this funcfunc over the + // regular threshold. + for i := 0; i < 10; i++ { + for j := 0; j < i; j++ { + sl = append(sl, append(sl, append(sl, append(sl, x)...)...)...) + sl = append(sl, sl[0], sl[1], sl[2]) + x += calleeNoInline(x) + } + } + } + if x < 100 { + // make sure this callsite is scored properly + G += callee(101) + panic(callee(x)) + } + return callsParam(x, func(y int) int { return y + x }) +} + +var G int + +func callee(x int) int { + return x +} + +func calleeNoInline(x int) int { + defer func() { G++ }() + return x +} + +func callsexit(x int) { + println(x) + os.Exit(x) +} + +func callsParam(x int, f func(int) int) int { + return f(x) +} + +func callsParamNested(x int, f func(int) int) int { + if x < 0 { + return f(x) + } + return 0 +} diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/funcflags.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/funcflags.go new file mode 100644 index 0000000..f3d7424 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/funcflags.go @@ -0,0 +1,341 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT (use 'go test -v -update-expected' instead.) +// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt +// for more information on the format of this file. +// <endfilepreamble> + +package funcflags + +import "os" + +// funcflags.go T_simple 20 0 1 +// Flags FuncPropNeverReturns +// <endpropsdump> +// {"Flags":1,"ParamFlags":null,"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_simple() { + panic("bad") +} + +// funcflags.go T_nested 32 0 1 +// Flags FuncPropNeverReturns +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":1,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_nested(x int) { + if x < 10 { + panic("bad") + } else { + panic("good") + } +} + +// funcflags.go T_block1 46 0 1 +// Flags FuncPropNeverReturns +// <endpropsdump> +// {"Flags":1,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_block1(x int) { + panic("bad") + if x < 10 { + return + } +} + +// funcflags.go T_block2 60 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_block2(x int) { + if x < 10 { + return + } + panic("bad") +} + +// funcflags.go T_switches1 75 0 1 +// Flags FuncPropNeverReturns +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":1,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_switches1(x int) { + switch x { + case 1: + panic("one") + case 2: + panic("two") + } + panic("whatev") +} + +// funcflags.go T_switches1a 92 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_switches1a(x int) { + switch x { + case 2: + panic("two") + } +} + +// funcflags.go T_switches2 106 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_switches2(x int) { + switch x { + case 1: + panic("one") + case 2: + panic("two") + default: + return + } + panic("whatev") +} + +// funcflags.go T_switches3 123 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_switches3(x interface{}) { + switch x.(type) { + case bool: + panic("one") + case float32: + panic("two") + } +} + +// funcflags.go T_switches4 138 0 1 +// Flags FuncPropNeverReturns +// <endpropsdump> +// {"Flags":1,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_switches4(x int) { + switch x { + case 1: + x++ + fallthrough + case 2: + panic("two") + fallthrough + default: + panic("bad") + } + panic("whatev") +} + +// funcflags.go T_recov 157 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_recov(x int) { + if x := recover(); x != nil { + panic(x) + } +} + +// funcflags.go T_forloops1 169 0 1 +// Flags FuncPropNeverReturns +// <endpropsdump> +// {"Flags":1,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_forloops1(x int) { + for { + panic("wokketa") + } +} + +// funcflags.go T_forloops2 180 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_forloops2(x int) { + for { + println("blah") + if true { + break + } + panic("warg") + } +} + +// funcflags.go T_forloops3 195 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_forloops3(x int) { + for i := 0; i < 101; i++ { + println("blah") + if true { + continue + } + panic("plark") + } + for i := range [10]int{} { + println(i) + panic("plark") + } + panic("whatev") +} + +// funcflags.go T_hasgotos 215 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_hasgotos(x int, y int) { + { + xx := x + panic("bad") + lab1: + goto lab2 + lab2: + if false { + goto lab1 + } else { + goto lab4 + } + lab4: + if xx < y { + lab3: + if false { + goto lab3 + } + } + println(9) + } +} + +// funcflags.go T_break_with_label 246 0 1 +// ParamFlags +// 0 ParamMayFeedIfOrSwitch +// 1 ParamNoInfo +// <endpropsdump> +// {"Flags":0,"ParamFlags":[64,0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_break_with_label(x int, y int) { + // presence of break with label should pessimize this func + // (similar to goto). + panic("bad") +lab1: + for { + println("blah") + if x < 0 { + break lab1 + } + panic("hubba") + } +} + +// funcflags.go T_callsexit 268 0 1 +// Flags FuncPropNeverReturns +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":1,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_callsexit(x int) { + if x < 0 { + os.Exit(1) + } + os.Exit(2) +} + +// funcflags.go T_exitinexpr 281 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// callsite: funcflags.go:286:18|0 flagstr "CallSiteOnPanicPath" flagval 2 score 102 mask 1 maskstr "panicPathAdj" +// <endcallsites> +// <endfuncpreamble> +func T_exitinexpr(x int) { + // This function does indeed unconditionally call exit, since the + // first thing it does is invoke exprcallsexit, however from the + // perspective of this function, the call is not at the statement + // level, so we'll wind up missing it. + if exprcallsexit(x) < 0 { + println("foo") + } +} + +// funcflags.go T_select_noreturn 297 0 1 +// Flags FuncPropNeverReturns +// <endpropsdump> +// {"Flags":1,"ParamFlags":[0,0,0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_select_noreturn(chi chan int, chf chan float32, p *int) { + rv := 0 + select { + case i := <-chi: + rv = i + case f := <-chf: + rv = int(f) + } + *p = rv + panic("bad") +} + +// funcflags.go T_select_mayreturn 314 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0,0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_select_mayreturn(chi chan int, chf chan float32, p *int) int { + rv := 0 + select { + case i := <-chi: + rv = i + return i + case f := <-chf: + rv = int(f) + } + *p = rv + panic("bad") +} + +// funcflags.go T_calls_callsexit 334 0 1 +// Flags FuncPropNeverReturns +// <endpropsdump> +// {"Flags":1,"ParamFlags":[0],"ResultFlags":null} +// callsite: funcflags.go:335:15|0 flagstr "CallSiteOnPanicPath" flagval 2 score 102 mask 1 maskstr "panicPathAdj" +// <endcallsites> +// <endfuncpreamble> +func T_calls_callsexit(x int) { + exprcallsexit(x) +} + +func exprcallsexit(x int) int { + os.Exit(x) + return x +} diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/params.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/params.go new file mode 100644 index 0000000..1a3073c --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/params.go @@ -0,0 +1,367 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT (use 'go test -v -update-expected' instead.) +// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt +// for more information on the format of this file. +// <endfilepreamble> +package params + +import "os" + +// params.go T_feeds_if_simple 20 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_simple(x int) { + if x < 100 { + os.Exit(1) + } + println(x) +} + +// params.go T_feeds_if_nested 35 0 1 +// ParamFlags +// 0 ParamMayFeedIfOrSwitch +// 1 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[64,32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_nested(x, y int) { + if y != 0 { + if x < 100 { + os.Exit(1) + } + } + println(x) +} + +// params.go T_feeds_if_pointer 51 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_pointer(xp *int) { + if xp != nil { + os.Exit(1) + } + println(xp) +} + +// params.go T.T_feeds_if_simple_method 66 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// 1 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32,32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func (r T) T_feeds_if_simple_method(x int) { + if x < 100 { + os.Exit(1) + } + if r != 99 { + os.Exit(2) + } + println(x) +} + +// params.go T_feeds_if_blanks 86 0 1 +// ParamFlags +// 0 ParamNoInfo +// 1 ParamFeedsIfOrSwitch +// 2 ParamNoInfo +// 3 ParamNoInfo +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,32,0,0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_blanks(_ string, x int, _ bool, _ bool) { + // blanks ignored; from a props perspective "x" is param 0 + if x < 100 { + os.Exit(1) + } + println(x) +} + +// params.go T_feeds_if_with_copy 101 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_with_copy(x int) { + // simple copy here -- we get this case + xx := x + if xx < 100 { + os.Exit(1) + } + println(x) +} + +// params.go T_feeds_if_with_copy_expr 115 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_with_copy_expr(x int) { + // this case (copy of expression) currently not handled. + xx := x < 100 + if xx { + os.Exit(1) + } + println(x) +} + +// params.go T_feeds_switch 131 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_switch(x int) { + switch x { + case 101: + println(101) + case 202: + panic("bad") + } + println(x) +} + +// params.go T_feeds_if_toocomplex 146 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_toocomplex(x int, y int) { + // not handled at the moment; we only look for cases where + // an "if" or "switch" can be simplified based on a single + // constant param, not a combination of constant params. + if x < y { + panic("bad") + } + println(x + y) +} + +// params.go T_feeds_if_redefined 161 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_redefined(x int) { + if x < G { + x++ + } + if x == 101 { + panic("bad") + } +} + +// params.go T_feeds_if_redefined2 175 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_redefined2(x int) { + // this currently classifies "x" as "no info", since the analysis we + // use to check for reassignments/redefinitions is not flow-sensitive, + // but we could probably catch this case with better analysis or + // high-level SSA. + if x == 101 { + panic("bad") + } + if x < G { + x++ + } +} + +// params.go T_feeds_multi_if 196 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// 1 ParamNoInfo +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32,0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_multi_if(x int, y int) { + // Here we have one "if" that is too complex (x < y) but one that is + // simple enough. Currently we enable the heuristic for this. It's + // possible to imagine this being a bad thing if the function in + // question is sufficiently large, but if it's too large we probably + // can't inline it anyhow. + if x < y { + panic("bad") + } + if x < 10 { + panic("whatev") + } + println(x + y) +} + +// params.go T_feeds_if_redefined_indirectwrite 216 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_redefined_indirectwrite(x int) { + ax := &x + if G != 2 { + *ax = G + } + if x == 101 { + panic("bad") + } +} + +// params.go T_feeds_if_redefined_indirectwrite_copy 231 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_redefined_indirectwrite_copy(x int) { + // we don't catch this case, "x" is marked as no info, + // since we're conservative about redefinitions. + ax := &x + cx := x + if G != 2 { + *ax = G + } + if cx == 101 { + panic("bad") + } +} + +// params.go T_feeds_if_expr1 251 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_expr1(x int) { + if x == 101 || x == 102 || x&0xf == 0 { + panic("bad") + } +} + +// params.go T_feeds_if_expr2 262 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_expr2(x int) { + if (x*x)-(x+x)%x == 101 || x&0xf == 0 { + panic("bad") + } +} + +// params.go T_feeds_if_expr3 273 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_expr3(x int) { + if x-(x&0x1)^378 > (1 - G) { + panic("bad") + } +} + +// params.go T_feeds_if_shift_may_panic 284 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_shift_may_panic(x int) *int { + // here if "x" is a constant like 2, we could simplify the "if", + // but if we were to pass in a negative value for "x" we can't + // fold the condition due to the need to panic on negative shift. + if 1<<x > 1024 { + return nil + } + return &G +} + +// params.go T_feeds_if_maybe_divide_by_zero 299 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_if_maybe_divide_by_zero(x int) { + if 99/x == 3 { + return + } + println("blarg") +} + +// params.go T_feeds_indcall 313 0 1 +// ParamFlags +// 0 ParamMayFeedIndirectCall +// <endpropsdump> +// {"Flags":0,"ParamFlags":[16],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_indcall(x func()) { + if G != 20 { + x() + } +} + +// params.go T_feeds_indcall_and_if 326 0 1 +// ParamFlags +// 0 ParamMayFeedIndirectCall|ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[48],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_indcall_and_if(x func()) { + if x != nil { + x() + } +} + +// params.go T_feeds_indcall_with_copy 339 0 1 +// ParamFlags +// 0 ParamFeedsIndirectCall +// <endpropsdump> +// {"Flags":0,"ParamFlags":[8],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_indcall_with_copy(x func()) { + xx := x + if G < 10 { + G-- + } + xx() +} + +// params.go T_feeds_interface_method_call 354 0 1 +// ParamFlags +// 0 ParamFeedsInterfaceMethodCall +// <endpropsdump> +// {"Flags":0,"ParamFlags":[2],"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_feeds_interface_method_call(i I) { + i.Blarg() +} + +var G int + +type T int + +type I interface { + Blarg() +} + +func (r T) Blarg() { +} diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/returns.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/returns.go new file mode 100644 index 0000000..51f2bc7 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/returns.go @@ -0,0 +1,370 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT (use 'go test -v -update-expected' instead.) +// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt +// for more information on the format of this file. +// <endfilepreamble> + +package returns1 + +import "unsafe" + +// returns.go T_simple_allocmem 21 0 1 +// ResultFlags +// 0 ResultIsAllocatedMem +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[2]} +// <endcallsites> +// <endfuncpreamble> +func T_simple_allocmem() *Bar { + return &Bar{} +} + +// returns.go T_allocmem_two_returns 34 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// ResultFlags +// 0 ResultIsAllocatedMem +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[2]} +// <endcallsites> +// <endfuncpreamble> +func T_allocmem_two_returns(x int) *Bar { + // multiple returns + if x < 0 { + return new(Bar) + } else { + return &Bar{x: 2} + } +} + +// returns.go T_allocmem_three_returns 52 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// ResultFlags +// 0 ResultIsAllocatedMem +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[2]} +// <endcallsites> +// <endfuncpreamble> +func T_allocmem_three_returns(x int) []*Bar { + // more multiple returns + switch x { + case 10, 11, 12: + return make([]*Bar, 10) + case 13: + fallthrough + case 15: + return []*Bar{&Bar{x: 15}} + } + return make([]*Bar, 0, 10) +} + +// returns.go T_return_nil 72 0 1 +// ResultFlags +// 0 ResultAlwaysSameConstant +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[8]} +// <endcallsites> +// <endfuncpreamble> +func T_return_nil() *Bar { + // simple case: no alloc + return nil +} + +// returns.go T_multi_return_nil 84 0 1 +// ResultFlags +// 0 ResultAlwaysSameConstant +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[8]} +// <endcallsites> +// <endfuncpreamble> +func T_multi_return_nil(x, y bool) *Bar { + if x && y { + return nil + } + return nil +} + +// returns.go T_multi_return_nil_anomoly 98 0 1 +// ResultFlags +// 0 ResultIsConcreteTypeConvertedToInterface +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[4]} +// <endcallsites> +// <endfuncpreamble> +func T_multi_return_nil_anomoly(x, y bool) Itf { + if x && y { + var qnil *Q + return qnil + } + var barnil *Bar + return barnil +} + +// returns.go T_multi_return_some_nil 112 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_multi_return_some_nil(x, y bool) *Bar { + if x && y { + return nil + } else { + return &GB + } +} + +// returns.go T_mixed_returns 127 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_mixed_returns(x int) *Bar { + // mix of alloc and non-alloc + if x < 0 { + return new(Bar) + } else { + return &GB + } +} + +// returns.go T_mixed_returns_slice 143 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_mixed_returns_slice(x int) []*Bar { + // mix of alloc and non-alloc + switch x { + case 10, 11, 12: + return make([]*Bar, 10) + case 13: + fallthrough + case 15: + return []*Bar{&Bar{x: 15}} + } + ba := [...]*Bar{&GB, &GB} + return ba[:] +} + +// returns.go T_maps_and_channels 167 0 1 +// ResultFlags +// 0 ResultNoInfo +// 1 ResultNoInfo +// 2 ResultNoInfo +// 3 ResultAlwaysSameConstant +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[0,0,0,8]} +// <endcallsites> +// <endfuncpreamble> +func T_maps_and_channels(x int, b bool) (bool, map[int]int, chan bool, unsafe.Pointer) { + // maps and channels + return b, make(map[int]int), make(chan bool), nil +} + +// returns.go T_assignment_to_named_returns 179 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[0,0]} +// <endcallsites> +// <endfuncpreamble> +func T_assignment_to_named_returns(x int) (r1 *uint64, r2 *uint64) { + // assignments to named returns and then "return" not supported + r1 = new(uint64) + if x < 1 { + *r1 = 2 + } + r2 = new(uint64) + return +} + +// returns.go T_named_returns_but_return_explicit_values 199 0 1 +// ParamFlags +// 0 ParamFeedsIfOrSwitch +// ResultFlags +// 0 ResultIsAllocatedMem +// 1 ResultIsAllocatedMem +// <endpropsdump> +// {"Flags":0,"ParamFlags":[32],"ResultFlags":[2,2]} +// <endcallsites> +// <endfuncpreamble> +func T_named_returns_but_return_explicit_values(x int) (r1 *uint64, r2 *uint64) { + // named returns ok if all returns are non-empty + rx1 := new(uint64) + if x < 1 { + *rx1 = 2 + } + rx2 := new(uint64) + return rx1, rx2 +} + +// returns.go T_return_concrete_type_to_itf 216 0 1 +// ResultFlags +// 0 ResultIsConcreteTypeConvertedToInterface +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[4]} +// <endcallsites> +// <endfuncpreamble> +func T_return_concrete_type_to_itf(x, y int) Itf { + return &Bar{} +} + +// returns.go T_return_concrete_type_to_itfwith_copy 227 0 1 +// ResultFlags +// 0 ResultIsConcreteTypeConvertedToInterface +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[4]} +// <endcallsites> +// <endfuncpreamble> +func T_return_concrete_type_to_itfwith_copy(x, y int) Itf { + b := &Bar{} + println("whee") + return b +} + +// returns.go T_return_concrete_type_to_itf_mixed 238 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_return_concrete_type_to_itf_mixed(x, y int) Itf { + if x < y { + b := &Bar{} + return b + } + return nil +} + +// returns.go T_return_same_func 253 0 1 +// ResultFlags +// 0 ResultAlwaysSameInlinableFunc +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[32]} +// <endcallsites> +// <endfuncpreamble> +func T_return_same_func() func(int) int { + if G < 10 { + return foo + } else { + return foo + } +} + +// returns.go T_return_different_funcs 266 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_return_different_funcs() func(int) int { + if G != 10 { + return foo + } else { + return bar + } +} + +// returns.go T_return_same_closure 286 0 1 +// ResultFlags +// 0 ResultAlwaysSameInlinableFunc +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[32]} +// <endcallsites> +// <endfuncpreamble> +// returns.go T_return_same_closure.func1 287 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +func T_return_same_closure() func(int) int { + p := func(q int) int { return q } + if G < 10 { + return p + } else { + return p + } +} + +// returns.go T_return_different_closures 312 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +// returns.go T_return_different_closures.func1 313 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// <endcallsites> +// <endfuncpreamble> +// returns.go T_return_different_closures.func2 317 0 1 +// ResultFlags +// 0 ResultAlwaysSameConstant +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[8]} +// <endcallsites> +// <endfuncpreamble> +func T_return_different_closures() func(int) int { + p := func(q int) int { return q } + if G < 10 { + return p + } else { + return func(q int) int { return 101 } + } +} + +// returns.go T_return_noninlinable 339 0 1 +// ResultFlags +// 0 ResultAlwaysSameFunc +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[16]} +// <endcallsites> +// <endfuncpreamble> +// returns.go T_return_noninlinable.func1 340 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: returns.go:343:4|0 flagstr "" flagval 0 score 4 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +// returns.go T_return_noninlinable.func1.1 341 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":null} +// <endcallsites> +// <endfuncpreamble> +func T_return_noninlinable(x int) func(int) int { + noti := func(q int) int { + defer func() { + println(q + x) + }() + return q + } + return noti +} + +type Bar struct { + x int + y string +} + +func (b *Bar) Plark() { +} + +type Q int + +func (q *Q) Plark() { +} + +func foo(x int) int { return x } +func bar(x int) int { return -x } + +var G int +var GB Bar + +type Itf interface { + Plark() +} diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/returns2.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/returns2.go new file mode 100644 index 0000000..7200926 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/returns2.go @@ -0,0 +1,231 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT (use 'go test -v -update-expected' instead.) +// See cmd/compile/internal/inline/inlheur/testdata/props/README.txt +// for more information on the format of this file. +// <endfilepreamble> + +package returns2 + +// returns2.go T_return_feeds_iface_call 18 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":null} +// callsite: returns2.go:19:13|0 flagstr "" flagval 0 score 1 mask 16384 maskstr "returnFeedsConcreteToInterfaceCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_return_feeds_iface_call() { + b := newBar(10) + b.Plark() +} + +// returns2.go T_multi_return_feeds_iface_call 29 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":null,"ResultFlags":null} +// callsite: returns2.go:30:20|0 flagstr "" flagval 0 score 3 mask 16384 maskstr "returnFeedsConcreteToInterfaceCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_multi_return_feeds_iface_call() { + _, b, _ := newBar2(10) + b.Plark() +} + +// returns2.go T_returned_inlinable_func_feeds_indirect_call 41 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// callsite: returns2.go:42:18|0 flagstr "" flagval 0 score -51 mask 8200 maskstr "passConstToIfAdj|returnFeedsInlinableFuncToIndCallAdj" +// callsite: returns2.go:44:20|1 flagstr "" flagval 0 score -23 mask 8192 maskstr "returnFeedsInlinableFuncToIndCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_returned_inlinable_func_feeds_indirect_call(q int) { + f := returnsFunc(10) + f(q) + f2 := returnsFunc2() + f2(q) +} + +// returns2.go T_returned_noninlineable_func_feeds_indirect_call 54 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// callsite: returns2.go:55:30|0 flagstr "" flagval 0 score -23 mask 4096 maskstr "returnFeedsFuncToIndCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_returned_noninlineable_func_feeds_indirect_call(q int) { + f := returnsNonInlinableFunc() + f(q) +} + +// returns2.go T_multi_return_feeds_indirect_call 65 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":null} +// callsite: returns2.go:66:29|0 flagstr "" flagval 0 score -21 mask 8192 maskstr "returnFeedsInlinableFuncToIndCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_multi_return_feeds_indirect_call(q int) { + _, f, _ := multiReturnsFunc() + f(q) +} + +// returns2.go T_return_feeds_ifswitch 76 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: returns2.go:77:14|0 flagstr "" flagval 0 score 10 mask 2048 maskstr "returnFeedsConstToIfAdj" +// <endcallsites> +// <endfuncpreamble> +func T_return_feeds_ifswitch(q int) int { + x := meaning(q) + if x < 42 { + switch x { + case 42: + return 1 + } + } + return 0 +} + +// returns2.go T_multi_return_feeds_ifswitch 93 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: returns2.go:94:21|0 flagstr "" flagval 0 score 9 mask 2048 maskstr "returnFeedsConstToIfAdj" +// <endcallsites> +// <endfuncpreamble> +func T_multi_return_feeds_ifswitch(q int) int { + x, y, z := meanings(q) + if x < y { + switch x { + case 42: + return z + } + } + return 0 +} + +// returns2.go T_two_calls_feed_ifswitch 111 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0],"ResultFlags":[0]} +// callsite: returns2.go:115:14|0 flagstr "" flagval 0 score 25 mask 0 maskstr "" +// callsite: returns2.go:116:14|1 flagstr "" flagval 0 score 25 mask 0 maskstr "" +// <endcallsites> +// <endfuncpreamble> +func T_two_calls_feed_ifswitch(q int) int { + // This case we don't handle; for the heuristic to kick in, + // all names in a given if/switch cond have to come from the + // same callsite + x := meaning(q) + y := meaning(-q) + if x < y { + switch x + y { + case 42: + return 1 + } + } + return 0 +} + +// returns2.go T_chained_indirect_call 132 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// callsite: returns2.go:135:18|0 flagstr "" flagval 0 score -31 mask 8192 maskstr "returnFeedsInlinableFuncToIndCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_chained_indirect_call(x, y int) { + // Here 'returnsFunc' returns an inlinable func that feeds + // directly into a call (no named intermediate). + G += returnsFunc(x - y)(x + y) +} + +// returns2.go T_chained_conc_iface_call 144 0 1 +// <endpropsdump> +// {"Flags":0,"ParamFlags":[0,0],"ResultFlags":null} +// callsite: returns2.go:148:8|0 flagstr "" flagval 0 score 1 mask 16384 maskstr "returnFeedsConcreteToInterfaceCallAdj" +// <endcallsites> +// <endfuncpreamble> +func T_chained_conc_iface_call(x, y int) { + // Similar to the case above, return from call returning concrete type + // feeds directly into interface call. Note that only the first + // iface call is interesting here. + newBar(10).Plark().Plark() +} + +func returnsFunc(x int) func(int) int { + if x < 0 { + G++ + } + return adder +} + +func returnsFunc2() func(int) int { + return func(x int) int { + return adder(x) + } +} + +func returnsNonInlinableFunc() func(int) int { + return adderNoInline +} + +func multiReturnsFunc() (int, func(int) int, int) { + return 42, func(x int) int { G++; return 1 }, -42 +} + +func adder(x int) int { + G += 1 + return G +} + +func adderNoInline(x int) int { + defer func() { G += x }() + G += 1 + return G +} + +func meaning(q int) int { + r := 0 + for i := 0; i < 42; i++ { + r += q + } + G += r + return 42 +} + +func meanings(q int) (int, int, int) { + r := 0 + for i := 0; i < 42; i++ { + r += q + } + return 42, 43, r +} + +type Bar struct { + x int + y string +} + +func (b *Bar) Plark() Itf { + return b +} + +type Itf interface { + Plark() Itf +} + +func newBar(x int) Itf { + s := 0 + for i := 0; i < x; i++ { + s += i + } + return &Bar{ + x: s, + } +} + +func newBar2(x int) (int, Itf, bool) { + s := 0 + for i := 0; i < x; i++ { + s += i + } + return 0, &Bar{x: s}, false +} + +var G int diff --git a/src/cmd/compile/internal/inline/inlheur/texpr_classify_test.go b/src/cmd/compile/internal/inline/inlheur/texpr_classify_test.go new file mode 100644 index 0000000..587eab0 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/texpr_classify_test.go @@ -0,0 +1,217 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import ( + "cmd/compile/internal/ir" + "cmd/compile/internal/typecheck" + "cmd/compile/internal/types" + "cmd/internal/src" + "go/constant" + "testing" +) + +var pos src.XPos +var local *types.Pkg +var f *ir.Func + +func init() { + types.PtrSize = 8 + types.RegSize = 8 + types.MaxWidth = 1 << 50 + typecheck.InitUniverse() + local = types.NewPkg("", "") + fsym := &types.Sym{ + Pkg: types.NewPkg("my/import/path", "path"), + Name: "function", + } + f = ir.NewFunc(src.NoXPos, src.NoXPos, fsym, nil) +} + +type state struct { + ntab map[string]*ir.Name +} + +func mkstate() *state { + return &state{ + ntab: make(map[string]*ir.Name), + } +} + +func bin(x ir.Node, op ir.Op, y ir.Node) ir.Node { + return ir.NewBinaryExpr(pos, op, x, y) +} + +func conv(x ir.Node, t *types.Type) ir.Node { + return ir.NewConvExpr(pos, ir.OCONV, t, x) +} + +func logical(x ir.Node, op ir.Op, y ir.Node) ir.Node { + return ir.NewLogicalExpr(pos, op, x, y) +} + +func un(op ir.Op, x ir.Node) ir.Node { + return ir.NewUnaryExpr(pos, op, x) +} + +func liti(i int64) ir.Node { + return ir.NewBasicLit(pos, types.Types[types.TINT64], constant.MakeInt64(i)) +} + +func lits(s string) ir.Node { + return ir.NewBasicLit(pos, types.Types[types.TSTRING], constant.MakeString(s)) +} + +func (s *state) nm(name string, t *types.Type) *ir.Name { + if n, ok := s.ntab[name]; ok { + if n.Type() != t { + panic("bad") + } + return n + } + sym := local.Lookup(name) + nn := ir.NewNameAt(pos, sym, t) + s.ntab[name] = nn + return nn +} + +func (s *state) nmi64(name string) *ir.Name { + return s.nm(name, types.Types[types.TINT64]) +} + +func (s *state) nms(name string) *ir.Name { + return s.nm(name, types.Types[types.TSTRING]) +} + +func TestClassifyIntegerCompare(t *testing.T) { + + // (n < 10 || n > 100) && (n >= 12 || n <= 99 || n != 101) + s := mkstate() + nn := s.nmi64("n") + nlt10 := bin(nn, ir.OLT, liti(10)) // n < 10 + ngt100 := bin(nn, ir.OGT, liti(100)) // n > 100 + nge12 := bin(nn, ir.OGE, liti(12)) // n >= 12 + nle99 := bin(nn, ir.OLE, liti(99)) // n < 10 + nne101 := bin(nn, ir.ONE, liti(101)) // n != 101 + noror1 := logical(nlt10, ir.OOROR, ngt100) // n < 10 || n > 100 + noror2 := logical(nge12, ir.OOROR, nle99) // n >= 12 || n <= 99 + noror3 := logical(noror2, ir.OOROR, nne101) + nandand := typecheck.Expr(logical(noror1, ir.OANDAND, noror3)) + + wantv := true + v := ShouldFoldIfNameConstant(nandand, []*ir.Name{nn}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", nandand, wantv, v) + } +} + +func TestClassifyStringCompare(t *testing.T) { + + // s != "foo" && s < "ooblek" && s > "plarkish" + s := mkstate() + nn := s.nms("s") + snefoo := bin(nn, ir.ONE, lits("foo")) // s != "foo" + sltoob := bin(nn, ir.OLT, lits("ooblek")) // s < "ooblek" + sgtpk := bin(nn, ir.OGT, lits("plarkish")) // s > "plarkish" + nandand := logical(snefoo, ir.OANDAND, sltoob) + top := typecheck.Expr(logical(nandand, ir.OANDAND, sgtpk)) + + wantv := true + v := ShouldFoldIfNameConstant(top, []*ir.Name{nn}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", top, wantv, v) + } +} + +func TestClassifyIntegerArith(t *testing.T) { + // n+1 ^ n-3 * n/2 + n<<9 + n>>2 - n&^7 + + s := mkstate() + nn := s.nmi64("n") + np1 := bin(nn, ir.OADD, liti(1)) // n+1 + nm3 := bin(nn, ir.OSUB, liti(3)) // n-3 + nd2 := bin(nn, ir.ODIV, liti(2)) // n/2 + nls9 := bin(nn, ir.OLSH, liti(9)) // n<<9 + nrs2 := bin(nn, ir.ORSH, liti(2)) // n>>2 + nan7 := bin(nn, ir.OANDNOT, liti(7)) // n&^7 + c1xor := bin(np1, ir.OXOR, nm3) + c2mul := bin(c1xor, ir.OMUL, nd2) + c3add := bin(c2mul, ir.OADD, nls9) + c4add := bin(c3add, ir.OADD, nrs2) + c5sub := bin(c4add, ir.OSUB, nan7) + top := typecheck.Expr(c5sub) + + wantv := true + v := ShouldFoldIfNameConstant(top, []*ir.Name{nn}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", top, wantv, v) + } +} + +func TestClassifyAssortedShifts(t *testing.T) { + + s := mkstate() + nn := s.nmi64("n") + badcases := []ir.Node{ + bin(liti(3), ir.OLSH, nn), // 3<<n + bin(liti(7), ir.ORSH, nn), // 7>>n + } + for _, bc := range badcases { + wantv := false + v := ShouldFoldIfNameConstant(typecheck.Expr(bc), []*ir.Name{nn}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", bc, wantv, v) + } + } +} + +func TestClassifyFloat(t *testing.T) { + // float32(n) + float32(10) + s := mkstate() + nn := s.nm("n", types.Types[types.TUINT32]) + f1 := conv(nn, types.Types[types.TFLOAT32]) + f2 := conv(liti(10), types.Types[types.TFLOAT32]) + add := bin(f1, ir.OADD, f2) + + wantv := false + v := ShouldFoldIfNameConstant(typecheck.Expr(add), []*ir.Name{nn}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", add, wantv, v) + } +} + +func TestMultipleNamesAllUsed(t *testing.T) { + // n != 101 && m < 2 + s := mkstate() + nn := s.nmi64("n") + nm := s.nmi64("m") + nne101 := bin(nn, ir.ONE, liti(101)) // n != 101 + mlt2 := bin(nm, ir.OLT, liti(2)) // m < 2 + nandand := typecheck.Expr(logical(nne101, ir.OANDAND, mlt2)) + + // all names used + wantv := true + v := ShouldFoldIfNameConstant(nandand, []*ir.Name{nn, nm}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", nandand, wantv, v) + } + + // not all names used + wantv = false + v = ShouldFoldIfNameConstant(nne101, []*ir.Name{nn, nm}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", nne101, wantv, v) + } + + // other names used. + np := s.nmi64("p") + pne0 := bin(np, ir.ONE, liti(101)) // p != 0 + noror := logical(nandand, ir.OOROR, pne0) + wantv = false + v = ShouldFoldIfNameConstant(noror, []*ir.Name{nn, nm}) + if v != wantv { + t.Errorf("wanted shouldfold(%v) %v, got %v", noror, wantv, v) + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/trace_off.go b/src/cmd/compile/internal/inline/inlheur/trace_off.go new file mode 100644 index 0000000..9eea7fa --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/trace_off.go @@ -0,0 +1,18 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !debugtrace + +package inlheur + +const debugTrace = 0 + +func enableDebugTrace(x int) { +} + +func enableDebugTraceIfEnv() { +} + +func disableDebugTrace() { +} diff --git a/src/cmd/compile/internal/inline/inlheur/trace_on.go b/src/cmd/compile/internal/inline/inlheur/trace_on.go new file mode 100644 index 0000000..1608429 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/trace_on.go @@ -0,0 +1,40 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build debugtrace + +package inlheur + +import ( + "os" + "strconv" +) + +var debugTrace = 0 + +func enableDebugTrace(x int) { + debugTrace = x +} + +func enableDebugTraceIfEnv() { + v := os.Getenv("DEBUG_TRACE_INLHEUR") + if v == "" { + return + } + if v[0] == '*' { + if !UnitTesting() { + return + } + v = v[1:] + } + i, err := strconv.Atoi(v) + if err != nil { + return + } + debugTrace = i +} + +func disableDebugTrace() { + debugTrace = 0 +} diff --git a/src/cmd/compile/internal/inline/inlheur/tserial_test.go b/src/cmd/compile/internal/inline/inlheur/tserial_test.go new file mode 100644 index 0000000..def12f5 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/tserial_test.go @@ -0,0 +1,65 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package inlheur + +import "testing" + +func fpeq(fp1, fp2 FuncProps) bool { + if fp1.Flags != fp2.Flags { + return false + } + if len(fp1.ParamFlags) != len(fp2.ParamFlags) { + return false + } + for i := range fp1.ParamFlags { + if fp1.ParamFlags[i] != fp2.ParamFlags[i] { + return false + } + } + if len(fp1.ResultFlags) != len(fp2.ResultFlags) { + return false + } + for i := range fp1.ResultFlags { + if fp1.ResultFlags[i] != fp2.ResultFlags[i] { + return false + } + } + return true +} + +func TestSerDeser(t *testing.T) { + testcases := []FuncProps{ + FuncProps{}, + FuncProps{ + Flags: 0xfffff, + }, + FuncProps{ + Flags: 1, + ResultFlags: []ResultPropBits{ResultAlwaysSameConstant}, + }, + FuncProps{ + Flags: 1, + ParamFlags: []ParamPropBits{0x99, 0xaa, 0xfffff}, + ResultFlags: []ResultPropBits{0xfeedface}, + }, + } + + for k, tc := range testcases { + s := tc.SerializeToString() + fp := DeserializeFromString(s) + got := fp.String() + want := tc.String() + if !fpeq(*fp, tc) { + t.Errorf("eq check failed for test %d: got:\n%s\nwant:\n%s\n", k, got, want) + } + } + + var nilt *FuncProps + ns := nilt.SerializeToString() + nfp := DeserializeFromString(ns) + if len(ns) != 0 || nfp != nil { + t.Errorf("nil serialize/deserialize failed") + } +} diff --git a/src/cmd/compile/internal/inline/interleaved/interleaved.go b/src/cmd/compile/internal/inline/interleaved/interleaved.go new file mode 100644 index 0000000..a6f19d4 --- /dev/null +++ b/src/cmd/compile/internal/inline/interleaved/interleaved.go @@ -0,0 +1,132 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package interleaved implements the interleaved devirtualization and +// inlining pass. +package interleaved + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/devirtualize" + "cmd/compile/internal/inline" + "cmd/compile/internal/inline/inlheur" + "cmd/compile/internal/ir" + "cmd/compile/internal/pgo" + "cmd/compile/internal/typecheck" + "fmt" +) + +// DevirtualizeAndInlinePackage interleaves devirtualization and inlining on +// all functions within pkg. +func DevirtualizeAndInlinePackage(pkg *ir.Package, profile *pgo.Profile) { + if profile != nil && base.Debug.PGODevirtualize > 0 { + // TODO(mdempsky): Integrate into DevirtualizeAndInlineFunc below. + ir.VisitFuncsBottomUp(typecheck.Target.Funcs, func(list []*ir.Func, recursive bool) { + for _, fn := range list { + devirtualize.ProfileGuided(fn, profile) + } + }) + ir.CurFunc = nil + } + + if base.Flag.LowerL != 0 { + inlheur.SetupScoreAdjustments() + } + + var inlProfile *pgo.Profile // copy of profile for inlining + if base.Debug.PGOInline != 0 { + inlProfile = profile + } + if inlProfile != nil { + inline.PGOInlinePrologue(inlProfile, pkg.Funcs) + } + + ir.VisitFuncsBottomUp(pkg.Funcs, func(funcs []*ir.Func, recursive bool) { + // We visit functions within an SCC in fairly arbitrary order, + // so by computing inlinability for all functions in the SCC + // before performing any inlining, the results are less + // sensitive to the order within the SCC (see #58905 for an + // example). + + // First compute inlinability for all functions in the SCC ... + inline.CanInlineSCC(funcs, recursive, inlProfile) + + // ... then make a second pass to do devirtualization and inlining + // of calls. + for _, fn := range funcs { + DevirtualizeAndInlineFunc(fn, inlProfile) + } + }) + + if base.Flag.LowerL != 0 { + // Perform a garbage collection of hidden closures functions that + // are no longer reachable from top-level functions following + // inlining. See #59404 and #59638 for more context. + inline.GarbageCollectUnreferencedHiddenClosures() + + if base.Debug.DumpInlFuncProps != "" { + inlheur.DumpFuncProps(nil, base.Debug.DumpInlFuncProps) + } + if inlheur.Enabled() { + inline.PostProcessCallSites(inlProfile) + inlheur.TearDown() + } + } +} + +// DevirtualizeAndInlineFunc interleaves devirtualization and inlining +// on a single function. +func DevirtualizeAndInlineFunc(fn *ir.Func, profile *pgo.Profile) { + ir.WithFunc(fn, func() { + if base.Flag.LowerL != 0 { + if inlheur.Enabled() && !fn.Wrapper() { + inlheur.ScoreCalls(fn) + defer inlheur.ScoreCallsCleanup() + } + if base.Debug.DumpInlFuncProps != "" && !fn.Wrapper() { + inlheur.DumpFuncProps(fn, base.Debug.DumpInlFuncProps) + } + } + + bigCaller := base.Flag.LowerL != 0 && inline.IsBigFunc(fn) + if bigCaller && base.Flag.LowerM > 1 { + fmt.Printf("%v: function %v considered 'big'; reducing max cost of inlinees\n", ir.Line(fn), fn) + } + + // Walk fn's body and apply devirtualization and inlining. + var inlCalls []*ir.InlinedCallExpr + var edit func(ir.Node) ir.Node + edit = func(n ir.Node) ir.Node { + switch n := n.(type) { + case *ir.TailCallStmt: + n.Call.NoInline = true // can't inline yet + } + + ir.EditChildren(n, edit) + + if call, ok := n.(*ir.CallExpr); ok { + devirtualize.StaticCall(call) + + if inlCall := inline.TryInlineCall(fn, call, bigCaller, profile); inlCall != nil { + inlCalls = append(inlCalls, inlCall) + n = inlCall + } + } + + return n + } + ir.EditChildren(fn, edit) + + // If we inlined any calls, we want to recursively visit their + // bodies for further devirtualization and inlining. However, we + // need to wait until *after* the original function body has been + // expanded, or else inlCallee can have false positives (e.g., + // #54632). + for len(inlCalls) > 0 { + call := inlCalls[0] + inlCalls = inlCalls[1:] + ir.EditChildren(call, edit) + } + }) +} |