diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
commit | f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch) | |
tree | 7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/cmd/compile/internal/ssa/tighten.go | |
parent | Initial commit. (diff) | |
download | golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip |
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/tighten.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/tighten.go | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/tighten.go b/src/cmd/compile/internal/ssa/tighten.go new file mode 100644 index 0000000..85b6a84 --- /dev/null +++ b/src/cmd/compile/internal/ssa/tighten.go @@ -0,0 +1,269 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import "cmd/compile/internal/base" + +// tighten moves Values closer to the Blocks in which they are used. +// This can reduce the amount of register spilling required, +// if it doesn't also create more live values. +// A Value can be moved to any block that +// dominates all blocks in which it is used. +func tighten(f *Func) { + if base.Flag.N != 0 && len(f.Blocks) < 10000 { + // Skip the optimization in -N mode, except for huge functions. + // Too many values live across blocks can cause pathological + // behavior in the register allocator (see issue 52180). + return + } + + canMove := f.Cache.allocBoolSlice(f.NumValues()) + defer f.Cache.freeBoolSlice(canMove) + + // Compute the memory states of each block. + startMem := f.Cache.allocValueSlice(f.NumBlocks()) + defer f.Cache.freeValueSlice(startMem) + endMem := f.Cache.allocValueSlice(f.NumBlocks()) + defer f.Cache.freeValueSlice(endMem) + memState(f, startMem, endMem) + + for _, b := range f.Blocks { + for _, v := range b.Values { + if v.Op.isLoweredGetClosurePtr() { + // Must stay in the entry block. + continue + } + switch v.Op { + case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN: + // Phis need to stay in their block. + // Arg must stay in the entry block. + // Tuple selectors must stay with the tuple generator. + // SelectN is typically, ultimately, a register. + continue + } + // Count arguments which will need a register. + narg := 0 + for _, a := range v.Args { + // SP and SB are special registers and have no effect on + // the allocation of general-purpose registers. + if a.needRegister() && a.Op != OpSB && a.Op != OpSP { + narg++ + } + } + if narg >= 2 && !v.Type.IsFlags() { + // Don't move values with more than one input, as that may + // increase register pressure. + // We make an exception for flags, as we want flag generators + // moved next to uses (because we only have 1 flag register). + continue + } + canMove[v.ID] = true + } + } + + // Build data structure for fast least-common-ancestor queries. + lca := makeLCArange(f) + + // For each moveable value, record the block that dominates all uses found so far. + target := f.Cache.allocBlockSlice(f.NumValues()) + defer f.Cache.freeBlockSlice(target) + + // Grab loop information. + // We use this to make sure we don't tighten a value into a (deeper) loop. + idom := f.Idom() + loops := f.loopnest() + loops.calculateDepths() + + changed := true + for changed { + changed = false + + // Reset target + for i := range target { + target[i] = nil + } + + // Compute target locations (for moveable values only). + // target location = the least common ancestor of all uses in the dominator tree. + for _, b := range f.Blocks { + for _, v := range b.Values { + for i, a := range v.Args { + if !canMove[a.ID] { + continue + } + use := b + if v.Op == OpPhi { + use = b.Preds[i].b + } + if target[a.ID] == nil { + target[a.ID] = use + } else { + target[a.ID] = lca.find(target[a.ID], use) + } + } + } + for _, c := range b.ControlValues() { + if !canMove[c.ID] { + continue + } + if target[c.ID] == nil { + target[c.ID] = b + } else { + target[c.ID] = lca.find(target[c.ID], b) + } + } + } + + // If the target location is inside a loop, + // move the target location up to just before the loop head. + for _, b := range f.Blocks { + origloop := loops.b2l[b.ID] + for _, v := range b.Values { + t := target[v.ID] + if t == nil { + continue + } + targetloop := loops.b2l[t.ID] + for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) { + t = idom[targetloop.header.ID] + target[v.ID] = t + targetloop = loops.b2l[t.ID] + } + } + } + + // Move values to target locations. + for _, b := range f.Blocks { + for i := 0; i < len(b.Values); i++ { + v := b.Values[i] + t := target[v.ID] + if t == nil || t == b { + // v is not moveable, or is already in correct place. + continue + } + if mem := v.MemoryArg(); mem != nil { + if startMem[t.ID] != mem { + // We can't move a value with a memory arg unless the target block + // has that memory arg as its starting memory. + continue + } + } + if f.pass.debug > 0 { + b.Func.Warnl(v.Pos, "%v is moved", v.Op) + } + // Move v to the block which dominates its uses. + t.Values = append(t.Values, v) + v.Block = t + last := len(b.Values) - 1 + b.Values[i] = b.Values[last] + b.Values[last] = nil + b.Values = b.Values[:last] + changed = true + i-- + } + } + } +} + +// phiTighten moves constants closer to phi users. +// This pass avoids having lots of constants live for lots of the program. +// See issue 16407. +func phiTighten(f *Func) { + for _, b := range f.Blocks { + for _, v := range b.Values { + if v.Op != OpPhi { + continue + } + for i, a := range v.Args { + if !a.rematerializeable() { + continue // not a constant we can move around + } + if a.Block == b.Preds[i].b { + continue // already in the right place + } + // Make a copy of a, put in predecessor block. + v.SetArg(i, a.copyInto(b.Preds[i].b)) + } + } + } +} + +// memState computes the memory state at the beginning and end of each block of +// the function. The memory state is represented by a value of mem type. +// The returned result is stored in startMem and endMem, and endMem is nil for +// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not +// suitable for infinite loop blocks that do not contain any mem operations. +// For example: +// b1: +// +// (some values) +// +// plain -> b2 +// b2: <- b1 b2 +// Plain -> b2 +// +// Algorithm introduction: +// 1. The start memory state of a block is InitMem, a Phi node of type mem or +// an incoming memory value. +// 2. The start memory state of a block is consistent with the end memory state +// of its parent nodes. If the start memory state of a block is a Phi value, +// then the end memory state of its parent nodes is consistent with the +// corresponding argument value of the Phi node. +// 3. The algorithm first obtains the memory state of some blocks in the tree +// in the first step. Then floods the known memory state to other nodes in +// the second step. +func memState(f *Func, startMem, endMem []*Value) { + // This slice contains the set of blocks that have had their startMem set but this + // startMem value has not yet been propagated to the endMem of its predecessors + changed := make([]*Block, 0) + // First step, init the memory state of some blocks. + for _, b := range f.Blocks { + for _, v := range b.Values { + var mem *Value + if v.Op == OpPhi { + if v.Type.IsMemory() { + mem = v + } + } else if v.Op == OpInitMem { + mem = v // This is actually not needed. + } else if a := v.MemoryArg(); a != nil && a.Block != b { + // The only incoming memory value doesn't belong to this block. + mem = a + } + if mem != nil { + if old := startMem[b.ID]; old != nil { + if old == mem { + continue + } + f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem) + } + startMem[b.ID] = mem + changed = append(changed, b) + } + } + } + + // Second step, floods the known memory state of some blocks to others. + for len(changed) != 0 { + top := changed[0] + changed = changed[1:] + mem := startMem[top.ID] + for i, p := range top.Preds { + pb := p.b + if endMem[pb.ID] != nil { + continue + } + if mem.Op == OpPhi && mem.Block == top { + endMem[pb.ID] = mem.Args[i] + } else { + endMem[pb.ID] = mem + } + if startMem[pb.ID] == nil { + startMem[pb.ID] = endMem[pb.ID] + changed = append(changed, pb) + } + } + } +} |