diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/writebarrier.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/writebarrier.go | 679 |
1 files changed, 679 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/writebarrier.go b/src/cmd/compile/internal/ssa/writebarrier.go new file mode 100644 index 0000000..1676a93 --- /dev/null +++ b/src/cmd/compile/internal/ssa/writebarrier.go @@ -0,0 +1,679 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "cmd/compile/internal/reflectdata" + "cmd/compile/internal/types" + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/src" + "fmt" +) + +// A ZeroRegion records parts of an object which are known to be zero. +// A ZeroRegion only applies to a single memory state. +// Each bit in mask is set if the corresponding pointer-sized word of +// the base object is known to be zero. +// In other words, if mask & (1<<i) != 0, then [base+i*ptrSize, base+(i+1)*ptrSize) +// is known to be zero. +type ZeroRegion struct { + base *Value + mask uint64 +} + +// needwb reports whether we need write barrier for store op v. +// v must be Store/Move/Zero. +// zeroes provides known zero information (keyed by ID of memory-type values). +func needwb(v *Value, zeroes map[ID]ZeroRegion, select1 []*Value) bool { + t, ok := v.Aux.(*types.Type) + if !ok { + v.Fatalf("store aux is not a type: %s", v.LongString()) + } + if !t.HasPointers() { + return false + } + if IsStackAddr(v.Args[0]) { + return false // write on stack doesn't need write barrier + } + if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) { + if mem, ok := IsNewObject(v.Args[0], select1); ok && mem == v.MemoryArg() { + // Copying data from readonly memory into a fresh object doesn't need a write barrier. + return false + } + } + if v.Op == OpStore && IsGlobalAddr(v.Args[1]) { + // Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier. + ptr := v.Args[0] + var off int64 + size := v.Aux.(*types.Type).Size() + for ptr.Op == OpOffPtr { + off += ptr.AuxInt + ptr = ptr.Args[0] + } + ptrSize := v.Block.Func.Config.PtrSize + if off%ptrSize != 0 || size%ptrSize != 0 { + v.Fatalf("unaligned pointer write") + } + if off < 0 || off+size > 64*ptrSize { + // write goes off end of tracked offsets + return true + } + z := zeroes[v.MemoryArg().ID] + if ptr != z.base { + return true + } + for i := off; i < off+size; i += ptrSize { + if z.mask>>uint(i/ptrSize)&1 == 0 { + return true // not known to be zero + } + } + // All written locations are known to be zero - write barrier not needed. + return false + } + return true +} + +// writebarrier pass inserts write barriers for store ops (Store, Move, Zero) +// when necessary (the condition above). It rewrites store ops to branches +// and runtime calls, like +// +// if writeBarrier.enabled { +// gcWriteBarrier(ptr, val) // Not a regular Go call +// } else { +// *ptr = val +// } +// +// A sequence of WB stores for many pointer fields of a single type will +// be emitted together, with a single branch. +func writebarrier(f *Func) { + if !f.fe.UseWriteBarrier() { + return + } + + var sb, sp, wbaddr, const0 *Value + var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym + var stores, after []*Value + var sset *sparseSet + var storeNumber []int32 + + // Compute map from a value to the SelectN [1] value that uses it. + select1 := f.Cache.allocValueSlice(f.NumValues()) + defer func() { f.Cache.freeValueSlice(select1) }() + for _, b := range f.Blocks { + for _, v := range b.Values { + if v.Op != OpSelectN { + continue + } + if v.AuxInt != 1 { + continue + } + select1[v.Args[0].ID] = v + } + } + + zeroes := f.computeZeroMap(select1) + for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand + // first, identify all the stores that need to insert a write barrier. + // mark them with WB ops temporarily. record presence of WB ops. + nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block + for _, v := range b.Values { + switch v.Op { + case OpStore, OpMove, OpZero: + if needwb(v, zeroes, select1) { + switch v.Op { + case OpStore: + v.Op = OpStoreWB + case OpMove: + v.Op = OpMoveWB + case OpZero: + v.Op = OpZeroWB + } + nWBops++ + } + } + } + if nWBops == 0 { + continue + } + + if wbaddr == nil { + // lazily initialize global values for write barrier test and calls + // find SB and SP values in entry block + initpos := f.Entry.Pos + sp, sb = f.spSb() + wbsym := f.fe.Syslook("writeBarrier") + wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb) + gcWriteBarrier = f.fe.Syslook("gcWriteBarrier") + typedmemmove = f.fe.Syslook("typedmemmove") + typedmemclr = f.fe.Syslook("typedmemclr") + const0 = f.ConstInt32(f.Config.Types.UInt32, 0) + + // allocate auxiliary data structures for computing store order + sset = f.newSparseSet(f.NumValues()) + defer f.retSparseSet(sset) + storeNumber = f.Cache.allocInt32Slice(f.NumValues()) + defer f.Cache.freeInt32Slice(storeNumber) + } + + // order values in store order + b.Values = storeOrder(b.Values, sset, storeNumber) + + firstSplit := true + again: + // find the start and end of the last contiguous WB store sequence. + // a branch will be inserted there. values after it will be moved + // to a new block. + var last *Value + var start, end int + values := b.Values + FindSeq: + for i := len(values) - 1; i >= 0; i-- { + w := values[i] + switch w.Op { + case OpStoreWB, OpMoveWB, OpZeroWB: + start = i + if last == nil { + last = w + end = i + 1 + } + case OpVarDef, OpVarLive: + continue + default: + if last == nil { + continue + } + break FindSeq + } + } + stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing + after = append(after[:0], b.Values[end:]...) + b.Values = b.Values[:start] + + // find the memory before the WB stores + mem := stores[0].MemoryArg() + pos := stores[0].Pos + bThen := f.NewBlock(BlockPlain) + bElse := f.NewBlock(BlockPlain) + bEnd := f.NewBlock(b.Kind) + bThen.Pos = pos + bElse.Pos = pos + bEnd.Pos = b.Pos + b.Pos = pos + + // set up control flow for end block + bEnd.CopyControls(b) + bEnd.Likely = b.Likely + for _, e := range b.Succs { + bEnd.Succs = append(bEnd.Succs, e) + e.b.Preds[e.i].b = bEnd + } + + // set up control flow for write barrier test + // load word, test word, avoiding partial register write from load byte. + cfgtypes := &f.Config.Types + flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem) + flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0) + b.Kind = BlockIf + b.SetControl(flag) + b.Likely = BranchUnlikely + b.Succs = b.Succs[:0] + b.AddEdgeTo(bThen) + b.AddEdgeTo(bElse) + // TODO: For OpStoreWB and the buffered write barrier, + // we could move the write out of the write barrier, + // which would lead to fewer branches. We could do + // something similar to OpZeroWB, since the runtime + // could provide just the barrier half and then we + // could unconditionally do an OpZero (which could + // also generate better zeroing code). OpMoveWB is + // trickier and would require changing how + // cgoCheckMemmove works. + bThen.AddEdgeTo(bEnd) + bElse.AddEdgeTo(bEnd) + + // for each write barrier store, append write barrier version to bThen + // and simple store version to bElse + memThen := mem + memElse := mem + + // If the source of a MoveWB is volatile (will be clobbered by a + // function call), we need to copy it to a temporary location, as + // marshaling the args of typedmemmove might clobber the value we're + // trying to move. + // Look for volatile source, copy it to temporary before we emit any + // call. + // It is unlikely to have more than one of them. Just do a linear + // search instead of using a map. + type volatileCopy struct { + src *Value // address of original volatile value + tmp *Value // address of temporary we've copied the volatile value into + } + var volatiles []volatileCopy + copyLoop: + for _, w := range stores { + if w.Op == OpMoveWB { + val := w.Args[1] + if isVolatile(val) { + for _, c := range volatiles { + if val == c.src { + continue copyLoop // already copied + } + } + + t := val.Type.Elem() + tmp := f.fe.Auto(w.Pos, t) + memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen) + tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen) + siz := t.Size() + memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen) + memThen.Aux = t + volatiles = append(volatiles, volatileCopy{val, tmpaddr}) + } + } + } + + for _, w := range stores { + ptr := w.Args[0] + pos := w.Pos + + var fn *obj.LSym + var typ *obj.LSym + var val *Value + switch w.Op { + case OpStoreWB: + val = w.Args[1] + nWBops-- + case OpMoveWB: + fn = typedmemmove + val = w.Args[1] + typ = reflectdata.TypeLinksym(w.Aux.(*types.Type)) + nWBops-- + case OpZeroWB: + fn = typedmemclr + typ = reflectdata.TypeLinksym(w.Aux.(*types.Type)) + nWBops-- + case OpVarDef, OpVarLive: + } + + // then block: emit write barrier call + switch w.Op { + case OpStoreWB, OpMoveWB, OpZeroWB: + if w.Op == OpStoreWB { + memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen) + } else { + srcval := val + if w.Op == OpMoveWB && isVolatile(srcval) { + for _, c := range volatiles { + if srcval == c.src { + srcval = c.tmp + break + } + } + } + memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb) + } + // Note that we set up a writebarrier function call. + f.fe.SetWBPos(pos) + case OpVarDef, OpVarLive: + memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen) + } + + // else block: normal store + switch w.Op { + case OpStoreWB: + memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse) + case OpMoveWB: + memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse) + memElse.Aux = w.Aux + case OpZeroWB: + memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse) + memElse.Aux = w.Aux + case OpVarDef, OpVarLive: + memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse) + } + } + + // merge memory + // Splice memory Phi into the last memory of the original sequence, + // which may be used in subsequent blocks. Other memories in the + // sequence must be dead after this block since there can be only + // one memory live. + bEnd.Values = append(bEnd.Values, last) + last.Block = bEnd + last.reset(OpPhi) + last.Pos = last.Pos.WithNotStmt() + last.Type = types.TypeMem + last.AddArg(memThen) + last.AddArg(memElse) + for _, w := range stores { + if w != last { + w.resetArgs() + } + } + for _, w := range stores { + if w != last { + f.freeValue(w) + } + } + + // put values after the store sequence into the end block + bEnd.Values = append(bEnd.Values, after...) + for _, w := range after { + w.Block = bEnd + } + + // Preemption is unsafe between loading the write + // barrier-enabled flag and performing the write + // because that would allow a GC phase transition, + // which would invalidate the flag. Remember the + // conditional block so liveness analysis can disable + // safe-points. This is somewhat subtle because we're + // splitting b bottom-up. + if firstSplit { + // Add b itself. + b.Func.WBLoads = append(b.Func.WBLoads, b) + firstSplit = false + } else { + // We've already split b, so we just pushed a + // write barrier test into bEnd. + b.Func.WBLoads = append(b.Func.WBLoads, bEnd) + } + + // if we have more stores in this block, do this block again + if nWBops > 0 { + goto again + } + } +} + +// computeZeroMap returns a map from an ID of a memory value to +// a set of locations that are known to be zeroed at that memory value. +func (f *Func) computeZeroMap(select1 []*Value) map[ID]ZeroRegion { + + ptrSize := f.Config.PtrSize + // Keep track of which parts of memory are known to be zero. + // This helps with removing write barriers for various initialization patterns. + // This analysis is conservative. We only keep track, for each memory state, of + // which of the first 64 words of a single object are known to be zero. + zeroes := map[ID]ZeroRegion{} + // Find new objects. + for _, b := range f.Blocks { + for _, v := range b.Values { + if mem, ok := IsNewObject(v, select1); ok { + // While compiling package runtime itself, we might see user + // calls to newobject, which will have result type + // unsafe.Pointer instead. We can't easily infer how large the + // allocated memory is, so just skip it. + if types.LocalPkg.Path == "runtime" && v.Type.IsUnsafePtr() { + continue + } + + nptr := v.Type.Elem().Size() / ptrSize + if nptr > 64 { + nptr = 64 + } + zeroes[mem.ID] = ZeroRegion{base: v, mask: 1<<uint(nptr) - 1} + } + } + } + // Find stores to those new objects. + for { + changed := false + for _, b := range f.Blocks { + // Note: iterating forwards helps convergence, as values are + // typically (but not always!) in store order. + for _, v := range b.Values { + if v.Op != OpStore { + continue + } + z, ok := zeroes[v.MemoryArg().ID] + if !ok { + continue + } + ptr := v.Args[0] + var off int64 + size := v.Aux.(*types.Type).Size() + for ptr.Op == OpOffPtr { + off += ptr.AuxInt + ptr = ptr.Args[0] + } + if ptr != z.base { + // Different base object - we don't know anything. + // We could even be writing to the base object we know + // about, but through an aliased but offset pointer. + // So we have to throw all the zero information we have away. + continue + } + // Round to cover any partially written pointer slots. + // Pointer writes should never be unaligned like this, but non-pointer + // writes to pointer-containing types will do this. + if d := off % ptrSize; d != 0 { + off -= d + size += d + } + if d := size % ptrSize; d != 0 { + size += ptrSize - d + } + // Clip to the 64 words that we track. + min := off + max := off + size + if min < 0 { + min = 0 + } + if max > 64*ptrSize { + max = 64 * ptrSize + } + // Clear bits for parts that we are writing (and hence + // will no longer necessarily be zero). + for i := min; i < max; i += ptrSize { + bit := i / ptrSize + z.mask &^= 1 << uint(bit) + } + if z.mask == 0 { + // No more known zeros - don't bother keeping. + continue + } + // Save updated known zero contents for new store. + if zeroes[v.ID] != z { + zeroes[v.ID] = z + changed = true + } + } + } + if !changed { + break + } + } + if f.pass.debug > 0 { + fmt.Printf("func %s\n", f.Name) + for mem, z := range zeroes { + fmt.Printf(" memory=v%d ptr=%v zeromask=%b\n", mem, z.base, z.mask) + } + } + return zeroes +} + +// wbcall emits write barrier runtime call in b, returns memory. +func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value { + config := b.Func.Config + + var wbargs []*Value + // TODO (register args) this is a bit of a hack. + inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3 + + // put arguments on stack + off := config.ctxt.Arch.FixedFrameSize + + var argTypes []*types.Type + if typ != nil { // for typedmemmove + taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb) + argTypes = append(argTypes, b.Func.Config.Types.Uintptr) + off = round(off, taddr.Type.Alignment()) + if inRegs { + wbargs = append(wbargs, taddr) + } else { + arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp) + mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem) + } + off += taddr.Type.Size() + } + + argTypes = append(argTypes, ptr.Type) + off = round(off, ptr.Type.Alignment()) + if inRegs { + wbargs = append(wbargs, ptr) + } else { + arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp) + mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem) + } + off += ptr.Type.Size() + + if val != nil { + argTypes = append(argTypes, val.Type) + off = round(off, val.Type.Alignment()) + if inRegs { + wbargs = append(wbargs, val) + } else { + arg := b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp) + mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem) + } + off += val.Type.Size() + } + off = round(off, config.PtrSize) + wbargs = append(wbargs, mem) + + // issue call + call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil))) + call.AddArgs(wbargs...) + call.AuxInt = off - config.ctxt.Arch.FixedFrameSize + return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call) +} + +// round to a multiple of r, r is a power of 2. +func round(o int64, r int64) int64 { + return (o + r - 1) &^ (r - 1) +} + +// IsStackAddr reports whether v is known to be an address of a stack slot. +func IsStackAddr(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { + v = v.Args[0] + } + switch v.Op { + case OpSP, OpLocalAddr, OpSelectNAddr, OpGetCallerSP: + return true + } + return false +} + +// IsGlobalAddr reports whether v is known to be an address of a global (or nil). +func IsGlobalAddr(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { + v = v.Args[0] + } + if v.Op == OpAddr && v.Args[0].Op == OpSB { + return true // address of a global + } + if v.Op == OpConstNil { + return true + } + if v.Op == OpLoad && IsReadOnlyGlobalAddr(v.Args[0]) { + return true // loading from a read-only global - the resulting address can't be a heap address. + } + return false +} + +// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global. +func IsReadOnlyGlobalAddr(v *Value) bool { + if v.Op == OpConstNil { + // Nil pointers are read only. See issue 33438. + return true + } + if v.Op == OpAddr && v.Aux.(*obj.LSym).Type == objabi.SRODATA { + return true + } + return false +} + +// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object, +// if so, also returns the memory state mem at which v is zero. +func IsNewObject(v *Value, select1 []*Value) (mem *Value, ok bool) { + f := v.Block.Func + c := f.Config + if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 { + if v.Op != OpSelectN || v.AuxInt != 0 { + return nil, false + } + mem = select1[v.Args[0].ID] + if mem == nil { + return nil, false + } + } else { + if v.Op != OpLoad { + return nil, false + } + mem = v.MemoryArg() + if mem.Op != OpSelectN { + return nil, false + } + if mem.Type != types.TypeMem { + return nil, false + } // assume it is the right selection if true + } + call := mem.Args[0] + if call.Op != OpStaticCall { + return nil, false + } + if !isSameCall(call.Aux, "runtime.newobject") { + return nil, false + } + if f.ABIDefault == f.ABI1 && len(c.intParamRegs) >= 1 { + if v.Args[0] == call { + return mem, true + } + return nil, false + } + if v.Args[0].Op != OpOffPtr { + return nil, false + } + if v.Args[0].Args[0].Op != OpSP { + return nil, false + } + if v.Args[0].AuxInt != c.ctxt.Arch.FixedFrameSize+c.RegSize { // offset of return value + return nil, false + } + return mem, true +} + +// IsSanitizerSafeAddr reports whether v is known to be an address +// that doesn't need instrumentation. +func IsSanitizerSafeAddr(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy { + v = v.Args[0] + } + switch v.Op { + case OpSP, OpLocalAddr, OpSelectNAddr: + // Stack addresses are always safe. + return true + case OpITab, OpStringPtr, OpGetClosurePtr: + // Itabs, string data, and closure fields are + // read-only once initialized. + return true + case OpAddr: + vt := v.Aux.(*obj.LSym).Type + return vt == objabi.SRODATA || vt == objabi.SLIBFUZZER_8BIT_COUNTER || vt == objabi.SCOVERAGE_COUNTER || vt == objabi.SCOVERAGE_AUXVAR + } + return false +} + +// isVolatile reports whether v is a pointer to argument region on stack which +// will be clobbered by a function call. +func isVolatile(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr { + v = v.Args[0] + } + return v.Op == OpSP +} |