summaryrefslogtreecommitdiffstats
path: root/src/cmd/link/internal/ld/deadcode.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:23:18 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:23:18 +0000
commit43a123c1ae6613b3efeed291fa552ecd909d3acf (patch)
treefd92518b7024bc74031f78a1cf9e454b65e73665 /src/cmd/link/internal/ld/deadcode.go
parentInitial commit. (diff)
downloadgolang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz
golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/link/internal/ld/deadcode.go')
-rw-r--r--src/cmd/link/internal/ld/deadcode.go469
1 files changed, 469 insertions, 0 deletions
diff --git a/src/cmd/link/internal/ld/deadcode.go b/src/cmd/link/internal/ld/deadcode.go
new file mode 100644
index 0000000..0738a51
--- /dev/null
+++ b/src/cmd/link/internal/ld/deadcode.go
@@ -0,0 +1,469 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ld
+
+import (
+ "cmd/internal/goobj"
+ "cmd/internal/objabi"
+ "cmd/internal/sys"
+ "cmd/link/internal/loader"
+ "cmd/link/internal/sym"
+ "fmt"
+ "internal/buildcfg"
+ "unicode"
+)
+
+var _ = fmt.Print
+
+type deadcodePass struct {
+ ctxt *Link
+ ldr *loader.Loader
+ wq heap // work queue, using min-heap for better locality
+
+ ifaceMethod map[methodsig]bool // methods called from reached interface call sites
+ genericIfaceMethod map[string]bool // names of methods called from reached generic interface call sites
+ markableMethods []methodref // methods of reached types
+ reflectSeen bool // whether we have seen a reflect method call
+ dynlink bool
+
+ methodsigstmp []methodsig // scratch buffer for decoding method signatures
+}
+
+func (d *deadcodePass) init() {
+ d.ldr.InitReachable()
+ d.ifaceMethod = make(map[methodsig]bool)
+ d.genericIfaceMethod = make(map[string]bool)
+ if buildcfg.Experiment.FieldTrack {
+ d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym())
+ }
+ d.dynlink = d.ctxt.DynlinkingGo()
+
+ if d.ctxt.BuildMode == BuildModeShared {
+ // Mark all symbols defined in this library as reachable when
+ // building a shared library.
+ n := d.ldr.NDef()
+ for i := 1; i < n; i++ {
+ s := loader.Sym(i)
+ d.mark(s, 0)
+ }
+ return
+ }
+
+ var names []string
+
+ // In a normal binary, start at main.main and the init
+ // functions and mark what is reachable from there.
+ if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
+ names = append(names, "main.main", "main..inittask")
+ } else {
+ // The external linker refers main symbol directly.
+ if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
+ if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 {
+ *flagEntrySymbol = "_main"
+ } else {
+ *flagEntrySymbol = "main"
+ }
+ }
+ names = append(names, *flagEntrySymbol)
+ }
+ // runtime.unreachableMethod is a function that will throw if called.
+ // We redirect unreachable methods to it.
+ names = append(names, "runtime.unreachableMethod")
+ if d.ctxt.BuildMode == BuildModePlugin {
+ names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go:plugin.tabs")
+
+ // We don't keep the go.plugin.exports symbol,
+ // but we do keep the symbols it refers to.
+ exportsIdx := d.ldr.Lookup("go:plugin.exports", 0)
+ if exportsIdx != 0 {
+ relocs := d.ldr.Relocs(exportsIdx)
+ for i := 0; i < relocs.Count(); i++ {
+ d.mark(relocs.At(i).Sym(), 0)
+ }
+ }
+ }
+
+ if d.ctxt.Debugvlog > 1 {
+ d.ctxt.Logf("deadcode start names: %v\n", names)
+ }
+
+ for _, name := range names {
+ // Mark symbol as a data/ABI0 symbol.
+ d.mark(d.ldr.Lookup(name, 0), 0)
+ if abiInternalVer != 0 {
+ // Also mark any Go functions (internal ABI).
+ d.mark(d.ldr.Lookup(name, abiInternalVer), 0)
+ }
+ }
+
+ // All dynamic exports are roots.
+ for _, s := range d.ctxt.dynexp {
+ if d.ctxt.Debugvlog > 1 {
+ d.ctxt.Logf("deadcode start dynexp: %s<%d>\n", d.ldr.SymName(s), d.ldr.SymVersion(s))
+ }
+ d.mark(s, 0)
+ }
+}
+
+func (d *deadcodePass) flood() {
+ var methods []methodref
+ for !d.wq.empty() {
+ symIdx := d.wq.pop()
+
+ d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx)
+
+ isgotype := d.ldr.IsGoType(symIdx)
+ relocs := d.ldr.Relocs(symIdx)
+ var usedInIface bool
+
+ if isgotype {
+ if d.dynlink {
+ // When dynamic linking, a type may be passed across DSO
+ // boundary and get converted to interface at the other side.
+ d.ldr.SetAttrUsedInIface(symIdx, true)
+ }
+ usedInIface = d.ldr.AttrUsedInIface(symIdx)
+ }
+
+ methods = methods[:0]
+ for i := 0; i < relocs.Count(); i++ {
+ r := relocs.At(i)
+ // When build with "-linkshared", we can't tell if the interface
+ // method in itab will be used or not. Ignore the weak attribute.
+ if r.Weak() && !(d.ctxt.linkShared && d.ldr.IsItab(symIdx)) {
+ continue
+ }
+ t := r.Type()
+ switch t {
+ case objabi.R_METHODOFF:
+ if i+2 >= relocs.Count() {
+ panic("expect three consecutive R_METHODOFF relocs")
+ }
+ if usedInIface {
+ methods = append(methods, methodref{src: symIdx, r: i})
+ // The method descriptor is itself a type descriptor, and
+ // it can be used to reach other types, e.g. by using
+ // reflect.Type.Method(i).Type.In(j). We need to traverse
+ // its child types with UsedInIface set. (See also the
+ // comment below.)
+ rs := r.Sym()
+ if !d.ldr.AttrUsedInIface(rs) {
+ d.ldr.SetAttrUsedInIface(rs, true)
+ if d.ldr.AttrReachable(rs) {
+ d.ldr.SetAttrReachable(rs, false)
+ d.mark(rs, symIdx)
+ }
+ }
+ }
+ i += 2
+ continue
+ case objabi.R_USETYPE:
+ // type symbol used for DWARF. we need to load the symbol but it may not
+ // be otherwise reachable in the program.
+ // do nothing for now as we still load all type symbols.
+ continue
+ case objabi.R_USEIFACE:
+ // R_USEIFACE is a marker relocation that tells the linker the type is
+ // converted to an interface, i.e. should have UsedInIface set. See the
+ // comment below for why we need to unset the Reachable bit and re-mark it.
+ rs := r.Sym()
+ if !d.ldr.AttrUsedInIface(rs) {
+ d.ldr.SetAttrUsedInIface(rs, true)
+ if d.ldr.AttrReachable(rs) {
+ d.ldr.SetAttrReachable(rs, false)
+ d.mark(rs, symIdx)
+ }
+ }
+ continue
+ case objabi.R_USEIFACEMETHOD:
+ // R_USEIFACEMETHOD is a marker relocation that marks an interface
+ // method as used.
+ rs := r.Sym()
+ if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) {
+ // Don't decode symbol from shared library (we'll mark all exported methods anyway).
+ // We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't
+ // been resolved at this point.
+ continue
+ }
+ m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add())
+ if d.ctxt.Debugvlog > 1 {
+ d.ctxt.Logf("reached iface method: %v\n", m)
+ }
+ d.ifaceMethod[m] = true
+ continue
+ case objabi.R_USEGENERICIFACEMETHOD:
+ name := d.decodeGenericIfaceMethod(d.ldr, r.Sym())
+ if d.ctxt.Debugvlog > 1 {
+ d.ctxt.Logf("reached generic iface method: %s\n", name)
+ }
+ d.genericIfaceMethod[name] = true
+ continue // don't mark referenced symbol - it is not needed in the final binary.
+ }
+ rs := r.Sym()
+ if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) {
+ // If a type is converted to an interface, it is possible to obtain an
+ // interface with a "child" type of it using reflection (e.g. obtain an
+ // interface of T from []chan T). We need to traverse its "child" types
+ // with UsedInIface attribute set.
+ // When visiting the child type (chan T in the example above), it will
+ // have UsedInIface set, so it in turn will mark and (re)visit its children
+ // (e.g. T above).
+ // We unset the reachable bit here, so if the child type is already visited,
+ // it will be visited again.
+ // Note that a type symbol can be visited at most twice, one without
+ // UsedInIface and one with. So termination is still guaranteed.
+ d.ldr.SetAttrUsedInIface(rs, true)
+ d.ldr.SetAttrReachable(rs, false)
+ }
+ d.mark(rs, symIdx)
+ }
+ naux := d.ldr.NAux(symIdx)
+ for i := 0; i < naux; i++ {
+ a := d.ldr.Aux(symIdx, i)
+ if a.Type() == goobj.AuxGotype {
+ // A symbol being reachable doesn't imply we need its
+ // type descriptor. Don't mark it.
+ continue
+ }
+ d.mark(a.Sym(), symIdx)
+ }
+ // Some host object symbols have an outer object, which acts like a
+ // "carrier" symbol, or it holds all the symbols for a particular
+ // section. We need to mark all "referenced" symbols from that carrier,
+ // so we make sure we're pulling in all outer symbols, and their sub
+ // symbols. This is not ideal, and these carrier/section symbols could
+ // be removed.
+ if d.ldr.IsExternal(symIdx) {
+ d.mark(d.ldr.OuterSym(symIdx), symIdx)
+ d.mark(d.ldr.SubSym(symIdx), symIdx)
+ }
+
+ if len(methods) != 0 {
+ if !isgotype {
+ panic("method found on non-type symbol")
+ }
+ // Decode runtime type information for type methods
+ // to help work out which methods can be called
+ // dynamically via interfaces.
+ methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs)
+ if len(methods) != len(methodsigs) {
+ panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs)))
+ }
+ for i, m := range methodsigs {
+ methods[i].m = m
+ if d.ctxt.Debugvlog > 1 {
+ d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx))
+ }
+ }
+ d.markableMethods = append(d.markableMethods, methods...)
+ }
+ }
+}
+
+func (d *deadcodePass) mark(symIdx, parent loader.Sym) {
+ if symIdx != 0 && !d.ldr.AttrReachable(symIdx) {
+ d.wq.push(symIdx)
+ d.ldr.SetAttrReachable(symIdx, true)
+ if buildcfg.Experiment.FieldTrack && d.ldr.Reachparent[symIdx] == 0 {
+ d.ldr.Reachparent[symIdx] = parent
+ }
+ if *flagDumpDep {
+ to := d.ldr.SymName(symIdx)
+ if to != "" {
+ if d.ldr.AttrUsedInIface(symIdx) {
+ to += " <UsedInIface>"
+ }
+ from := "_"
+ if parent != 0 {
+ from = d.ldr.SymName(parent)
+ if d.ldr.AttrUsedInIface(parent) {
+ from += " <UsedInIface>"
+ }
+ }
+ fmt.Printf("%s -> %s\n", from, to)
+ }
+ }
+ }
+}
+
+func (d *deadcodePass) markMethod(m methodref) {
+ relocs := d.ldr.Relocs(m.src)
+ d.mark(relocs.At(m.r).Sym(), m.src)
+ d.mark(relocs.At(m.r+1).Sym(), m.src)
+ d.mark(relocs.At(m.r+2).Sym(), m.src)
+}
+
+// deadcode marks all reachable symbols.
+//
+// The basis of the dead code elimination is a flood fill of symbols,
+// following their relocations, beginning at *flagEntrySymbol.
+//
+// This flood fill is wrapped in logic for pruning unused methods.
+// All methods are mentioned by relocations on their receiver's *rtype.
+// These relocations are specially defined as R_METHODOFF by the compiler
+// so we can detect and manipulated them here.
+//
+// There are three ways a method of a reachable type can be invoked:
+//
+// 1. direct call
+// 2. through a reachable interface type
+// 3. reflect.Value.Method (or MethodByName), or reflect.Type.Method
+// (or MethodByName)
+//
+// The first case is handled by the flood fill, a directly called method
+// is marked as reachable.
+//
+// The second case is handled by decomposing all reachable interface
+// types into method signatures. Each encountered method is compared
+// against the interface method signatures, if it matches it is marked
+// as reachable. This is extremely conservative, but easy and correct.
+//
+// The third case is handled by looking to see if any of:
+// - reflect.Value.Method or MethodByName is reachable
+// - reflect.Type.Method or MethodByName is called (through the
+// REFLECTMETHOD attribute marked by the compiler).
+//
+// If any of these happen, all bets are off and all exported methods
+// of reachable types are marked reachable.
+//
+// Any unreached text symbols are removed from ctxt.Textp.
+func deadcode(ctxt *Link) {
+ ldr := ctxt.loader
+ d := deadcodePass{ctxt: ctxt, ldr: ldr}
+ d.init()
+ d.flood()
+
+ methSym := ldr.Lookup("reflect.Value.Method", abiInternalVer)
+ methByNameSym := ldr.Lookup("reflect.Value.MethodByName", abiInternalVer)
+
+ if ctxt.DynlinkingGo() {
+ // Exported methods may satisfy interfaces we don't know
+ // about yet when dynamically linking.
+ d.reflectSeen = true
+ }
+
+ for {
+ // Methods might be called via reflection. Give up on
+ // static analysis, mark all exported methods of
+ // all reachable types as reachable.
+ d.reflectSeen = d.reflectSeen || (methSym != 0 && ldr.AttrReachable(methSym)) || (methByNameSym != 0 && ldr.AttrReachable(methByNameSym))
+
+ // Mark all methods that could satisfy a discovered
+ // interface as reachable. We recheck old marked interfaces
+ // as new types (with new methods) may have been discovered
+ // in the last pass.
+ rem := d.markableMethods[:0]
+ for _, m := range d.markableMethods {
+ if (d.reflectSeen && (m.isExported() || d.dynlink)) || d.ifaceMethod[m.m] || d.genericIfaceMethod[m.m.name] {
+ d.markMethod(m)
+ } else {
+ rem = append(rem, m)
+ }
+ }
+ d.markableMethods = rem
+
+ if d.wq.empty() {
+ // No new work was discovered. Done.
+ break
+ }
+ d.flood()
+ }
+}
+
+// methodsig is a typed method signature (name + type).
+type methodsig struct {
+ name string
+ typ loader.Sym // type descriptor symbol of the function
+}
+
+// methodref holds the relocations from a receiver type symbol to its
+// method. There are three relocations, one for each of the fields in
+// the reflect.method struct: mtyp, ifn, and tfn.
+type methodref struct {
+ m methodsig
+ src loader.Sym // receiver type symbol
+ r int // the index of R_METHODOFF relocations
+}
+
+func (m methodref) isExported() bool {
+ for _, r := range m.m.name {
+ return unicode.IsUpper(r)
+ }
+ panic("methodref has no signature")
+}
+
+// decodeMethodSig decodes an array of method signature information.
+// Each element of the array is size bytes. The first 4 bytes is a
+// nameOff for the method name, and the next 4 bytes is a typeOff for
+// the function type.
+//
+// Conveniently this is the layout of both runtime.method and runtime.imethod.
+func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig {
+ if cap(d.methodsigstmp) < count {
+ d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...)
+ }
+ var methods = d.methodsigstmp[:count]
+ for i := 0; i < count; i++ {
+ methods[i].name = decodetypeName(ldr, symIdx, relocs, off)
+ methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4))
+ off += size
+ }
+ return methods
+}
+
+// Decode the method of interface type symbol symIdx at offset off.
+func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig {
+ p := ldr.Data(symIdx)
+ if p == nil {
+ panic(fmt.Sprintf("missing symbol %q", ldr.SymName(symIdx)))
+ }
+ if decodetypeKind(arch, p)&kindMask != kindInterface {
+ panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx)))
+ }
+ relocs := ldr.Relocs(symIdx)
+ var m methodsig
+ m.name = decodetypeName(ldr, symIdx, &relocs, int(off))
+ m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4))
+ return m
+}
+
+// Decode the method name stored in symbol symIdx. The symbol should contain just the bytes of a method name.
+func (d *deadcodePass) decodeGenericIfaceMethod(ldr *loader.Loader, symIdx loader.Sym) string {
+ return string(ldr.Data(symIdx))
+}
+
+func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig {
+ p := ldr.Data(symIdx)
+ if !decodetypeHasUncommon(arch, p) {
+ panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx)))
+ }
+ off := commonsize(arch) // reflect.rtype
+ switch decodetypeKind(arch, p) & kindMask {
+ case kindStruct: // reflect.structType
+ off += 4 * arch.PtrSize
+ case kindPtr: // reflect.ptrType
+ off += arch.PtrSize
+ case kindFunc: // reflect.funcType
+ off += arch.PtrSize // 4 bytes, pointer aligned
+ case kindSlice: // reflect.sliceType
+ off += arch.PtrSize
+ case kindArray: // reflect.arrayType
+ off += 3 * arch.PtrSize
+ case kindChan: // reflect.chanType
+ off += 2 * arch.PtrSize
+ case kindMap: // reflect.mapType
+ off += 4*arch.PtrSize + 8
+ case kindInterface: // reflect.interfaceType
+ off += 3 * arch.PtrSize
+ default:
+ // just Sizeof(rtype)
+ }
+
+ mcount := int(decodeInuxi(arch, p[off+4:], 2))
+ moff := int(decodeInuxi(arch, p[off+4+2+2:], 4))
+ off += moff // offset to array of reflect.method values
+ const sizeofMethod = 4 * 4 // sizeof reflect.method in program
+ return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount)
+}