diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/runtime/trace | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/runtime/trace.go | 1579 | ||||
-rw-r--r-- | src/runtime/trace/annotation.go | 198 | ||||
-rw-r--r-- | src/runtime/trace/annotation_test.go | 156 | ||||
-rw-r--r-- | src/runtime/trace/example_test.go | 39 | ||||
-rw-r--r-- | src/runtime/trace/trace.go | 154 | ||||
-rw-r--r-- | src/runtime/trace/trace_stack_test.go | 333 | ||||
-rw-r--r-- | src/runtime/trace/trace_test.go | 792 | ||||
-rw-r--r-- | src/runtime/traceback.go | 1377 | ||||
-rw-r--r-- | src/runtime/traceback_test.go | 422 |
9 files changed, 5050 insertions, 0 deletions
diff --git a/src/runtime/trace.go b/src/runtime/trace.go new file mode 100644 index 0000000..e7dfab1 --- /dev/null +++ b/src/runtime/trace.go @@ -0,0 +1,1579 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Go execution tracer. +// The tracer captures a wide range of execution events like goroutine +// creation/blocking/unblocking, syscall enter/exit/block, GC-related events, +// changes of heap size, processor start/stop, etc and writes them to a buffer +// in a compact form. A precise nanosecond-precision timestamp and a stack +// trace is captured for most events. +// See https://golang.org/s/go15trace for more info. + +package runtime + +import ( + "internal/goarch" + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// Event types in the trace, args are given in square brackets. +const ( + traceEvNone = 0 // unused + traceEvBatch = 1 // start of per-P batch of events [pid, timestamp] + traceEvFrequency = 2 // contains tracer timer frequency [frequency (ticks per second)] + traceEvStack = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}] + traceEvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id] + traceEvProcStart = 5 // start of P [timestamp, thread id] + traceEvProcStop = 6 // stop of P [timestamp] + traceEvGCStart = 7 // GC start [timestamp, seq, stack id] + traceEvGCDone = 8 // GC done [timestamp] + traceEvGCSTWStart = 9 // GC STW start [timestamp, kind] + traceEvGCSTWDone = 10 // GC STW done [timestamp] + traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id] + traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed] + traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] + traceEvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq] + traceEvGoEnd = 15 // goroutine ends [timestamp] + traceEvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack] + traceEvGoSched = 17 // goroutine calls Gosched [timestamp, stack] + traceEvGoPreempt = 18 // goroutine is preempted [timestamp, stack] + traceEvGoSleep = 19 // goroutine calls Sleep [timestamp, stack] + traceEvGoBlock = 20 // goroutine blocks [timestamp, stack] + traceEvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack] + traceEvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack] + traceEvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack] + traceEvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack] + traceEvGoBlockSync = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack] + traceEvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack] + traceEvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack] + traceEvGoSysCall = 28 // syscall enter [timestamp, stack] + traceEvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp] + traceEvGoSysBlock = 30 // syscall blocks [timestamp] + traceEvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id] + traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id] + traceEvHeapAlloc = 33 // gcController.heapLive change [timestamp, heap_alloc] + traceEvHeapGoal = 34 // gcController.heapGoal() (formerly next_gc) change [timestamp, heap goal in bytes] + traceEvTimerGoroutine = 35 // not currently used; previously denoted timer goroutine [timer goroutine id] + traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp] + traceEvString = 37 // string dictionary entry [ID, length, string] + traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id] + traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack] + traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp] + traceEvGoStartLabel = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id] + traceEvGoBlockGC = 42 // goroutine blocks on GC assist [timestamp, stack] + traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack] + traceEvGCMarkAssistDone = 44 // GC mark assist done [timestamp] + traceEvUserTaskCreate = 45 // trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string] + traceEvUserTaskEnd = 46 // end of a task [timestamp, internal task id, stack] + traceEvUserRegion = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string] + traceEvUserLog = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string] + traceEvCPUSample = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id] + traceEvCount = 50 + // Byte is used but only 6 bits are available for event type. + // The remaining 2 bits are used to specify the number of arguments. + // That means, the max event type value is 63. +) + +const ( + // Timestamps in trace are cputicks/traceTickDiv. + // This makes absolute values of timestamp diffs smaller, + // and so they are encoded in less number of bytes. + // 64 on x86 is somewhat arbitrary (one tick is ~20ns on a 3GHz machine). + // The suggested increment frequency for PowerPC's time base register is + // 512 MHz according to Power ISA v2.07 section 6.2, so we use 16 on ppc64 + // and ppc64le. + // Tracing won't work reliably for architectures where cputicks is emulated + // by nanotime, so the value doesn't matter for those architectures. + traceTickDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64) + // Maximum number of PCs in a single stack trace. + // Since events contain only stack id rather than whole stack trace, + // we can allow quite large values here. + traceStackSize = 128 + // Identifier of a fake P that is used when we trace without a real P. + traceGlobProc = -1 + // Maximum number of bytes to encode uint64 in base-128. + traceBytesPerNumber = 10 + // Shift of the number of arguments in the first event byte. + traceArgCountShift = 6 + // Flag passed to traceGoPark to denote that the previous wakeup of this + // goroutine was futile. For example, a goroutine was unblocked on a mutex, + // but another goroutine got ahead and acquired the mutex before the first + // goroutine is scheduled, so the first goroutine has to block again. + // Such wakeups happen on buffered channels and sync.Mutex, + // but are generally not interesting for end user. + traceFutileWakeup byte = 128 +) + +// trace is global tracing context. +var trace struct { + // trace.lock must only be acquired on the system stack where + // stack splits cannot happen while it is held. + lock mutex // protects the following members + lockOwner *g // to avoid deadlocks during recursive lock locks + enabled bool // when set runtime traces events + shutdown bool // set when we are waiting for trace reader to finish after setting enabled to false + headerWritten bool // whether ReadTrace has emitted trace header + footerWritten bool // whether ReadTrace has emitted trace footer + shutdownSema uint32 // used to wait for ReadTrace completion + seqStart uint64 // sequence number when tracing was started + ticksStart int64 // cputicks when tracing was started + ticksEnd int64 // cputicks when tracing was stopped + timeStart int64 // nanotime when tracing was started + timeEnd int64 // nanotime when tracing was stopped + seqGC uint64 // GC start/done sequencer + reading traceBufPtr // buffer currently handed off to user + empty traceBufPtr // stack of empty buffers + fullHead traceBufPtr // queue of full buffers + fullTail traceBufPtr + stackTab traceStackTable // maps stack traces to unique ids + // cpuLogRead accepts CPU profile samples from the signal handler where + // they're generated. It uses a two-word header to hold the IDs of the P and + // G (respectively) that were active at the time of the sample. Because + // profBuf uses a record with all zeros in its header to indicate overflow, + // we make sure to make the P field always non-zero: The ID of a real P will + // start at bit 1, and bit 0 will be set. Samples that arrive while no P is + // running (such as near syscalls) will set the first header field to 0b10. + // This careful handling of the first header field allows us to store ID of + // the active G directly in the second field, even though that will be 0 + // when sampling g0. + cpuLogRead *profBuf + // cpuLogBuf is a trace buffer to hold events corresponding to CPU profile + // samples, which arrive out of band and not directly connected to a + // specific P. + cpuLogBuf traceBufPtr + + reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil + + signalLock atomic.Uint32 // protects use of the following member, only usable in signal handlers + cpuLogWrite *profBuf // copy of cpuLogRead for use in signal handlers, set without signalLock + + // Dictionary for traceEvString. + // + // TODO: central lock to access the map is not ideal. + // option: pre-assign ids to all user annotation region names and tags + // option: per-P cache + // option: sync.Map like data structure + stringsLock mutex + strings map[string]uint64 + stringSeq uint64 + + // markWorkerLabels maps gcMarkWorkerMode to string ID. + markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64 + + bufLock mutex // protects buf + buf traceBufPtr // global trace buffer, used when running without a p +} + +// traceBufHeader is per-P tracing buffer. +type traceBufHeader struct { + link traceBufPtr // in trace.empty/full + lastTicks uint64 // when we wrote the last event + pos int // next write offset in arr + stk [traceStackSize]uintptr // scratch buffer for traceback +} + +// traceBuf is per-P tracing buffer. +type traceBuf struct { + _ sys.NotInHeap + traceBufHeader + arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf +} + +// traceBufPtr is a *traceBuf that is not traced by the garbage +// collector and doesn't have write barriers. traceBufs are not +// allocated from the GC'd heap, so this is safe, and are often +// manipulated in contexts where write barriers are not allowed, so +// this is necessary. +// +// TODO: Since traceBuf is now embedded runtime/internal/sys.NotInHeap, this isn't necessary. +type traceBufPtr uintptr + +func (tp traceBufPtr) ptr() *traceBuf { return (*traceBuf)(unsafe.Pointer(tp)) } +func (tp *traceBufPtr) set(b *traceBuf) { *tp = traceBufPtr(unsafe.Pointer(b)) } +func traceBufPtrOf(b *traceBuf) traceBufPtr { + return traceBufPtr(unsafe.Pointer(b)) +} + +// StartTrace enables tracing for the current process. +// While tracing, the data will be buffered and available via ReadTrace. +// StartTrace returns an error if tracing is already enabled. +// Most clients should use the runtime/trace package or the testing package's +// -test.trace flag instead of calling StartTrace directly. +func StartTrace() error { + // Stop the world so that we can take a consistent snapshot + // of all goroutines at the beginning of the trace. + // Do not stop the world during GC so we ensure we always see + // a consistent view of GC-related events (e.g. a start is always + // paired with an end). + stopTheWorldGC("start tracing") + + // Prevent sysmon from running any code that could generate events. + lock(&sched.sysmonlock) + + // We are in stop-the-world, but syscalls can finish and write to trace concurrently. + // Exitsyscall could check trace.enabled long before and then suddenly wake up + // and decide to write to trace at a random point in time. + // However, such syscall will use the global trace.buf buffer, because we've + // acquired all p's by doing stop-the-world. So this protects us from such races. + lock(&trace.bufLock) + + if trace.enabled || trace.shutdown { + unlock(&trace.bufLock) + unlock(&sched.sysmonlock) + startTheWorldGC() + return errorString("tracing is already enabled") + } + + // Can't set trace.enabled yet. While the world is stopped, exitsyscall could + // already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here. + // That would lead to an inconsistent trace: + // - either GoSysExit appears before EvGoInSyscall, + // - or GoSysExit appears for a goroutine for which we don't emit EvGoInSyscall below. + // To instruct traceEvent that it must not ignore events below, we set startingtrace. + // trace.enabled is set afterwards once we have emitted all preliminary events. + mp := getg().m + mp.startingtrace = true + + // Obtain current stack ID to use in all traceEvGoCreate events below. + stkBuf := make([]uintptr, traceStackSize) + stackID := traceStackID(mp, stkBuf, 2) + + profBuf := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid] + trace.cpuLogRead = profBuf + + // We must not acquire trace.signalLock outside of a signal handler: a + // profiling signal may arrive at any time and try to acquire it, leading to + // deadlock. Because we can't use that lock to protect updates to + // trace.cpuLogWrite (only use of the structure it references), reads and + // writes of the pointer must be atomic. (And although this field is never + // the sole pointer to the profBuf value, it's best to allow a write barrier + // here.) + atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), unsafe.Pointer(profBuf)) + + // World is stopped, no need to lock. + forEachGRace(func(gp *g) { + status := readgstatus(gp) + if status != _Gdead { + gp.traceseq = 0 + gp.tracelastp = getg().m.p + // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. + id := trace.stackTab.put([]uintptr{startPCforTrace(gp.startpc) + sys.PCQuantum}) + traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID) + } + if status == _Gwaiting { + // traceEvGoWaiting is implied to have seq=1. + gp.traceseq++ + traceEvent(traceEvGoWaiting, -1, gp.goid) + } + if status == _Gsyscall { + gp.traceseq++ + traceEvent(traceEvGoInSyscall, -1, gp.goid) + } else if status == _Gdead && gp.m != nil && gp.m.isextra { + // Trigger two trace events for the dead g in the extra m, + // since the next event of the g will be traceEvGoSysExit in exitsyscall, + // while calling from C thread to Go. + gp.traceseq = 0 + gp.tracelastp = getg().m.p + // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. + id := trace.stackTab.put([]uintptr{startPCforTrace(0) + sys.PCQuantum}) // no start pc + traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID) + gp.traceseq++ + traceEvent(traceEvGoInSyscall, -1, gp.goid) + } else { + gp.sysblocktraced = false + } + }) + traceProcStart() + traceGoStart() + // Note: ticksStart needs to be set after we emit traceEvGoInSyscall events. + // If we do it the other way around, it is possible that exitsyscall will + // query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp. + // It will lead to a false conclusion that cputicks is broken. + trace.ticksStart = cputicks() + trace.timeStart = nanotime() + trace.headerWritten = false + trace.footerWritten = false + + // string to id mapping + // 0 : reserved for an empty string + // remaining: other strings registered by traceString + trace.stringSeq = 0 + trace.strings = make(map[string]uint64) + + trace.seqGC = 0 + mp.startingtrace = false + trace.enabled = true + + // Register runtime goroutine labels. + _, pid, bufp := traceAcquireBuffer() + for i, label := range gcMarkWorkerModeStrings[:] { + trace.markWorkerLabels[i], bufp = traceString(bufp, pid, label) + } + traceReleaseBuffer(pid) + + unlock(&trace.bufLock) + + unlock(&sched.sysmonlock) + + startTheWorldGC() + return nil +} + +// StopTrace stops tracing, if it was previously enabled. +// StopTrace only returns after all the reads for the trace have completed. +func StopTrace() { + // Stop the world so that we can collect the trace buffers from all p's below, + // and also to avoid races with traceEvent. + stopTheWorldGC("stop tracing") + + // See the comment in StartTrace. + lock(&sched.sysmonlock) + + // See the comment in StartTrace. + lock(&trace.bufLock) + + if !trace.enabled { + unlock(&trace.bufLock) + unlock(&sched.sysmonlock) + startTheWorldGC() + return + } + + traceGoSched() + + atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), nil) + trace.cpuLogRead.close() + traceReadCPU() + + // Loop over all allocated Ps because dead Ps may still have + // trace buffers. + for _, p := range allp[:cap(allp)] { + buf := p.tracebuf + if buf != 0 { + traceFullQueue(buf) + p.tracebuf = 0 + } + } + if trace.buf != 0 { + buf := trace.buf + trace.buf = 0 + if buf.ptr().pos != 0 { + traceFullQueue(buf) + } + } + if trace.cpuLogBuf != 0 { + buf := trace.cpuLogBuf + trace.cpuLogBuf = 0 + if buf.ptr().pos != 0 { + traceFullQueue(buf) + } + } + + for { + trace.ticksEnd = cputicks() + trace.timeEnd = nanotime() + // Windows time can tick only every 15ms, wait for at least one tick. + if trace.timeEnd != trace.timeStart { + break + } + osyield() + } + + trace.enabled = false + trace.shutdown = true + unlock(&trace.bufLock) + + unlock(&sched.sysmonlock) + + startTheWorldGC() + + // The world is started but we've set trace.shutdown, so new tracing can't start. + // Wait for the trace reader to flush pending buffers and stop. + semacquire(&trace.shutdownSema) + if raceenabled { + raceacquire(unsafe.Pointer(&trace.shutdownSema)) + } + + systemstack(func() { + // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world. + lock(&trace.lock) + for _, p := range allp[:cap(allp)] { + if p.tracebuf != 0 { + throw("trace: non-empty trace buffer in proc") + } + } + if trace.buf != 0 { + throw("trace: non-empty global trace buffer") + } + if trace.fullHead != 0 || trace.fullTail != 0 { + throw("trace: non-empty full trace buffer") + } + if trace.reading != 0 || trace.reader.Load() != nil { + throw("trace: reading after shutdown") + } + for trace.empty != 0 { + buf := trace.empty + trace.empty = buf.ptr().link + sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys) + } + trace.strings = nil + trace.shutdown = false + trace.cpuLogRead = nil + unlock(&trace.lock) + }) +} + +// ReadTrace returns the next chunk of binary tracing data, blocking until data +// is available. If tracing is turned off and all the data accumulated while it +// was on has been returned, ReadTrace returns nil. The caller must copy the +// returned data before calling ReadTrace again. +// ReadTrace must be called from one goroutine at a time. +func ReadTrace() []byte { +top: + var buf []byte + var park bool + systemstack(func() { + buf, park = readTrace0() + }) + if park { + gopark(func(gp *g, _ unsafe.Pointer) bool { + if !trace.reader.CompareAndSwapNoWB(nil, gp) { + // We're racing with another reader. + // Wake up and handle this case. + return false + } + + if g2 := traceReader(); gp == g2 { + // New data arrived between unlocking + // and the CAS and we won the wake-up + // race, so wake up directly. + return false + } else if g2 != nil { + printlock() + println("runtime: got trace reader", g2, g2.goid) + throw("unexpected trace reader") + } + + return true + }, nil, waitReasonTraceReaderBlocked, traceEvGoBlock, 2) + goto top + } + + return buf +} + +// readTrace0 is ReadTrace's continuation on g0. This must run on the +// system stack because it acquires trace.lock. +// +//go:systemstack +func readTrace0() (buf []byte, park bool) { + if raceenabled { + // g0 doesn't have a race context. Borrow the user G's. + if getg().racectx != 0 { + throw("expected racectx == 0") + } + getg().racectx = getg().m.curg.racectx + // (This defer should get open-coded, which is safe on + // the system stack.) + defer func() { getg().racectx = 0 }() + } + + // This function may need to lock trace.lock recursively + // (goparkunlock -> traceGoPark -> traceEvent -> traceFlush). + // To allow this we use trace.lockOwner. + // Also this function must not allocate while holding trace.lock: + // allocation can call heap allocate, which will try to emit a trace + // event while holding heap lock. + lock(&trace.lock) + trace.lockOwner = getg().m.curg + + if trace.reader.Load() != nil { + // More than one goroutine reads trace. This is bad. + // But we rather do not crash the program because of tracing, + // because tracing can be enabled at runtime on prod servers. + trace.lockOwner = nil + unlock(&trace.lock) + println("runtime: ReadTrace called from multiple goroutines simultaneously") + return nil, false + } + // Recycle the old buffer. + if buf := trace.reading; buf != 0 { + buf.ptr().link = trace.empty + trace.empty = buf + trace.reading = 0 + } + // Write trace header. + if !trace.headerWritten { + trace.headerWritten = true + trace.lockOwner = nil + unlock(&trace.lock) + return []byte("go 1.19 trace\x00\x00\x00"), false + } + // Optimistically look for CPU profile samples. This may write new stack + // records, and may write new tracing buffers. + if !trace.footerWritten && !trace.shutdown { + traceReadCPU() + } + // Wait for new data. + if trace.fullHead == 0 && !trace.shutdown { + // We don't simply use a note because the scheduler + // executes this goroutine directly when it wakes up + // (also a note would consume an M). + trace.lockOwner = nil + unlock(&trace.lock) + return nil, true + } +newFull: + assertLockHeld(&trace.lock) + // Write a buffer. + if trace.fullHead != 0 { + buf := traceFullDequeue() + trace.reading = buf + trace.lockOwner = nil + unlock(&trace.lock) + return buf.ptr().arr[:buf.ptr().pos], false + } + + // Write footer with timer frequency. + if !trace.footerWritten { + trace.footerWritten = true + // Use float64 because (trace.ticksEnd - trace.ticksStart) * 1e9 can overflow int64. + freq := float64(trace.ticksEnd-trace.ticksStart) * 1e9 / float64(trace.timeEnd-trace.timeStart) / traceTickDiv + if freq <= 0 { + throw("trace: ReadTrace got invalid frequency") + } + trace.lockOwner = nil + unlock(&trace.lock) + + // Write frequency event. + bufp := traceFlush(0, 0) + buf := bufp.ptr() + buf.byte(traceEvFrequency | 0<<traceArgCountShift) + buf.varint(uint64(freq)) + + // Dump stack table. + // This will emit a bunch of full buffers, we will pick them up + // on the next iteration. + bufp = trace.stackTab.dump(bufp) + + // Flush final buffer. + lock(&trace.lock) + traceFullQueue(bufp) + goto newFull // trace.lock should be held at newFull + } + // Done. + if trace.shutdown { + trace.lockOwner = nil + unlock(&trace.lock) + if raceenabled { + // Model synchronization on trace.shutdownSema, which race + // detector does not see. This is required to avoid false + // race reports on writer passed to trace.Start. + racerelease(unsafe.Pointer(&trace.shutdownSema)) + } + // trace.enabled is already reset, so can call traceable functions. + semrelease(&trace.shutdownSema) + return nil, false + } + // Also bad, but see the comment above. + trace.lockOwner = nil + unlock(&trace.lock) + println("runtime: spurious wakeup of trace reader") + return nil, false +} + +// traceReader returns the trace reader that should be woken up, if any. +// Callers should first check that trace.enabled or trace.shutdown is set. +// +// This must run on the system stack because it acquires trace.lock. +// +//go:systemstack +func traceReader() *g { + // Optimistic check first + if traceReaderAvailable() == nil { + return nil + } + lock(&trace.lock) + gp := traceReaderAvailable() + if gp == nil || !trace.reader.CompareAndSwapNoWB(gp, nil) { + unlock(&trace.lock) + return nil + } + unlock(&trace.lock) + return gp +} + +// traceReaderAvailable returns the trace reader if it is not currently +// scheduled and should be. Callers should first check that trace.enabled +// or trace.shutdown is set. +func traceReaderAvailable() *g { + if trace.fullHead != 0 || trace.shutdown { + return trace.reader.Load() + } + return nil +} + +// traceProcFree frees trace buffer associated with pp. +// +// This must run on the system stack because it acquires trace.lock. +// +//go:systemstack +func traceProcFree(pp *p) { + buf := pp.tracebuf + pp.tracebuf = 0 + if buf == 0 { + return + } + lock(&trace.lock) + traceFullQueue(buf) + unlock(&trace.lock) +} + +// traceFullQueue queues buf into queue of full buffers. +func traceFullQueue(buf traceBufPtr) { + buf.ptr().link = 0 + if trace.fullHead == 0 { + trace.fullHead = buf + } else { + trace.fullTail.ptr().link = buf + } + trace.fullTail = buf +} + +// traceFullDequeue dequeues from queue of full buffers. +func traceFullDequeue() traceBufPtr { + buf := trace.fullHead + if buf == 0 { + return 0 + } + trace.fullHead = buf.ptr().link + if trace.fullHead == 0 { + trace.fullTail = 0 + } + buf.ptr().link = 0 + return buf +} + +// traceEvent writes a single event to trace buffer, flushing the buffer if necessary. +// ev is event type. +// If skip > 0, write current stack id as the last argument (skipping skip top frames). +// If skip = 0, this event type should contain a stack, but we don't want +// to collect and remember it for this particular call. +func traceEvent(ev byte, skip int, args ...uint64) { + mp, pid, bufp := traceAcquireBuffer() + // Double-check trace.enabled now that we've done m.locks++ and acquired bufLock. + // This protects from races between traceEvent and StartTrace/StopTrace. + + // The caller checked that trace.enabled == true, but trace.enabled might have been + // turned off between the check and now. Check again. traceLockBuffer did mp.locks++, + // StopTrace does stopTheWorld, and stopTheWorld waits for mp.locks to go back to zero, + // so if we see trace.enabled == true now, we know it's true for the rest of the function. + // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace + // during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer. + // + // Note trace_userTaskCreate runs the same check. + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + if skip > 0 { + if getg() == mp.curg { + skip++ // +1 because stack is captured in traceEventLocked. + } + } + traceEventLocked(0, mp, pid, bufp, ev, 0, skip, args...) + traceReleaseBuffer(pid) +} + +// traceEventLocked writes a single event of type ev to the trace buffer bufp, +// flushing the buffer if necessary. pid is the id of the current P, or +// traceGlobProc if we're tracing without a real P. +// +// Preemption is disabled, and if running without a real P the global tracing +// buffer is locked. +// +// Events types that do not include a stack set skip to -1. Event types that +// include a stack may explicitly reference a stackID from the trace.stackTab +// (obtained by an earlier call to traceStackID). Without an explicit stackID, +// this function will automatically capture the stack of the goroutine currently +// running on mp, skipping skip top frames or, if skip is 0, writing out an +// empty stack record. +// +// It records the event's args to the traceBuf, and also makes an effort to +// reserve extraBytes bytes of additional space immediately following the event, +// in the same traceBuf. +func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, stackID uint32, skip int, args ...uint64) { + buf := bufp.ptr() + // TODO: test on non-zero extraBytes param. + maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params + if buf == nil || len(buf.arr)-buf.pos < maxSize { + systemstack(func() { + buf = traceFlush(traceBufPtrOf(buf), pid).ptr() + }) + bufp.set(buf) + } + + // NOTE: ticks might be same after tick division, although the real cputicks is + // linear growth. + ticks := uint64(cputicks()) / traceTickDiv + tickDiff := ticks - buf.lastTicks + if tickDiff == 0 { + ticks = buf.lastTicks + 1 + tickDiff = 1 + } + + buf.lastTicks = ticks + narg := byte(len(args)) + if stackID != 0 || skip >= 0 { + narg++ + } + // We have only 2 bits for number of arguments. + // If number is >= 3, then the event type is followed by event length in bytes. + if narg > 3 { + narg = 3 + } + startPos := buf.pos + buf.byte(ev | narg<<traceArgCountShift) + var lenp *byte + if narg == 3 { + // Reserve the byte for length assuming that length < 128. + buf.varint(0) + lenp = &buf.arr[buf.pos-1] + } + buf.varint(tickDiff) + for _, a := range args { + buf.varint(a) + } + if stackID != 0 { + buf.varint(uint64(stackID)) + } else if skip == 0 { + buf.varint(0) + } else if skip > 0 { + buf.varint(traceStackID(mp, buf.stk[:], skip)) + } + evSize := buf.pos - startPos + if evSize > maxSize { + throw("invalid length of trace event") + } + if lenp != nil { + // Fill in actual length. + *lenp = byte(evSize - 2) + } +} + +// traceCPUSample writes a CPU profile sample stack to the execution tracer's +// profiling buffer. It is called from a signal handler, so is limited in what +// it can do. +func traceCPUSample(gp *g, pp *p, stk []uintptr) { + if !trace.enabled { + // Tracing is usually turned off; don't spend time acquiring the signal + // lock unless it's active. + return + } + + // Match the clock used in traceEventLocked + now := cputicks() + // The "header" here is the ID of the P that was running the profiled code, + // followed by the ID of the goroutine. (For normal CPU profiling, it's + // usually the number of samples with the given stack.) Near syscalls, pp + // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp. + var hdr [2]uint64 + if pp != nil { + // Overflow records in profBuf have all header values set to zero. Make + // sure that real headers have at least one bit set. + hdr[0] = uint64(pp.id)<<1 | 0b1 + } else { + hdr[0] = 0b10 + } + if gp != nil { + hdr[1] = gp.goid + } + + // Allow only one writer at a time + for !trace.signalLock.CompareAndSwap(0, 1) { + // TODO: Is it safe to osyield here? https://go.dev/issue/52672 + osyield() + } + + if log := (*profBuf)(atomic.Loadp(unsafe.Pointer(&trace.cpuLogWrite))); log != nil { + // Note: we don't pass a tag pointer here (how should profiling tags + // interact with the execution tracer?), but if we did we'd need to be + // careful about write barriers. See the long comment in profBuf.write. + log.write(nil, now, hdr[:], stk) + } + + trace.signalLock.Store(0) +} + +func traceReadCPU() { + bufp := &trace.cpuLogBuf + + for { + data, tags, _ := trace.cpuLogRead.read(profBufNonBlocking) + if len(data) == 0 { + break + } + for len(data) > 0 { + if len(data) < 4 || data[0] > uint64(len(data)) { + break // truncated profile + } + if data[0] < 4 || tags != nil && len(tags) < 1 { + break // malformed profile + } + if len(tags) < 1 { + break // mismatched profile records and tags + } + timestamp := data[1] + ppid := data[2] >> 1 + if hasP := (data[2] & 0b1) != 0; !hasP { + ppid = ^uint64(0) + } + goid := data[3] + stk := data[4:data[0]] + empty := len(stk) == 1 && data[2] == 0 && data[3] == 0 + data = data[data[0]:] + // No support here for reporting goroutine tags at the moment; if + // that information is to be part of the execution trace, we'd + // probably want to see when the tags are applied and when they + // change, instead of only seeing them when we get a CPU sample. + tags = tags[1:] + + if empty { + // Looks like an overflow record from the profBuf. Not much to + // do here, we only want to report full records. + // + // TODO: should we start a goroutine to drain the profBuf, + // rather than relying on a high-enough volume of tracing events + // to keep ReadTrace busy? https://go.dev/issue/52674 + continue + } + + buf := bufp.ptr() + if buf == nil { + systemstack(func() { + *bufp = traceFlush(*bufp, 0) + }) + buf = bufp.ptr() + } + for i := range stk { + if i >= len(buf.stk) { + break + } + buf.stk[i] = uintptr(stk[i]) + } + stackID := trace.stackTab.put(buf.stk[:len(stk)]) + + traceEventLocked(0, nil, 0, bufp, traceEvCPUSample, stackID, 1, timestamp/traceTickDiv, ppid, goid) + } + } +} + +func traceStackID(mp *m, buf []uintptr, skip int) uint64 { + gp := getg() + curgp := mp.curg + var nstk int + if curgp == gp { + nstk = callers(skip+1, buf) + } else if curgp != nil { + nstk = gcallers(curgp, skip, buf) + } + if nstk > 0 { + nstk-- // skip runtime.goexit + } + if nstk > 0 && curgp.goid == 1 { + nstk-- // skip runtime.main + } + id := trace.stackTab.put(buf[:nstk]) + return uint64(id) +} + +// traceAcquireBuffer returns trace buffer to use and, if necessary, locks it. +func traceAcquireBuffer() (mp *m, pid int32, bufp *traceBufPtr) { + // Any time we acquire a buffer, we may end up flushing it, + // but flushes are rare. Record the lock edge even if it + // doesn't happen this time. + lockRankMayTraceFlush() + + mp = acquirem() + if p := mp.p.ptr(); p != nil { + return mp, p.id, &p.tracebuf + } + lock(&trace.bufLock) + return mp, traceGlobProc, &trace.buf +} + +// traceReleaseBuffer releases a buffer previously acquired with traceAcquireBuffer. +func traceReleaseBuffer(pid int32) { + if pid == traceGlobProc { + unlock(&trace.bufLock) + } + releasem(getg().m) +} + +// lockRankMayTraceFlush records the lock ranking effects of a +// potential call to traceFlush. +func lockRankMayTraceFlush() { + owner := trace.lockOwner + dolock := owner == nil || owner != getg().m.curg + if dolock { + lockWithRankMayAcquire(&trace.lock, getLockRank(&trace.lock)) + } +} + +// traceFlush puts buf onto stack of full buffers and returns an empty buffer. +// +// This must run on the system stack because it acquires trace.lock. +// +//go:systemstack +func traceFlush(buf traceBufPtr, pid int32) traceBufPtr { + owner := trace.lockOwner + dolock := owner == nil || owner != getg().m.curg + if dolock { + lock(&trace.lock) + } + if buf != 0 { + traceFullQueue(buf) + } + if trace.empty != 0 { + buf = trace.empty + trace.empty = buf.ptr().link + } else { + buf = traceBufPtr(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys)) + if buf == 0 { + throw("trace: out of memory") + } + } + bufp := buf.ptr() + bufp.link.set(nil) + bufp.pos = 0 + + // initialize the buffer for a new batch + ticks := uint64(cputicks()) / traceTickDiv + if ticks == bufp.lastTicks { + ticks = bufp.lastTicks + 1 + } + bufp.lastTicks = ticks + bufp.byte(traceEvBatch | 1<<traceArgCountShift) + bufp.varint(uint64(pid)) + bufp.varint(ticks) + + if dolock { + unlock(&trace.lock) + } + return buf +} + +// traceString adds a string to the trace.strings and returns the id. +func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) { + if s == "" { + return 0, bufp + } + + lock(&trace.stringsLock) + if raceenabled { + // raceacquire is necessary because the map access + // below is race annotated. + raceacquire(unsafe.Pointer(&trace.stringsLock)) + } + + if id, ok := trace.strings[s]; ok { + if raceenabled { + racerelease(unsafe.Pointer(&trace.stringsLock)) + } + unlock(&trace.stringsLock) + + return id, bufp + } + + trace.stringSeq++ + id := trace.stringSeq + trace.strings[s] = id + + if raceenabled { + racerelease(unsafe.Pointer(&trace.stringsLock)) + } + unlock(&trace.stringsLock) + + // memory allocation in above may trigger tracing and + // cause *bufp changes. Following code now works with *bufp, + // so there must be no memory allocation or any activities + // that causes tracing after this point. + + buf := bufp.ptr() + size := 1 + 2*traceBytesPerNumber + len(s) + if buf == nil || len(buf.arr)-buf.pos < size { + systemstack(func() { + buf = traceFlush(traceBufPtrOf(buf), pid).ptr() + bufp.set(buf) + }) + } + buf.byte(traceEvString) + buf.varint(id) + + // double-check the string and the length can fit. + // Otherwise, truncate the string. + slen := len(s) + if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber { + slen = room + } + + buf.varint(uint64(slen)) + buf.pos += copy(buf.arr[buf.pos:], s[:slen]) + + bufp.set(buf) + return id, bufp +} + +// varint appends v to buf in little-endian-base-128 encoding. +func (buf *traceBuf) varint(v uint64) { + pos := buf.pos + for ; v >= 0x80; v >>= 7 { + buf.arr[pos] = 0x80 | byte(v) + pos++ + } + buf.arr[pos] = byte(v) + pos++ + buf.pos = pos +} + +// varintAt writes varint v at byte position pos in buf. This always +// consumes traceBytesPerNumber bytes. This is intended for when the +// caller needs to reserve space for a varint but can't populate it +// until later. +func (buf *traceBuf) varintAt(pos int, v uint64) { + for i := 0; i < traceBytesPerNumber; i++ { + if i < traceBytesPerNumber-1 { + buf.arr[pos] = 0x80 | byte(v) + } else { + buf.arr[pos] = byte(v) + } + v >>= 7 + pos++ + } +} + +// byte appends v to buf. +func (buf *traceBuf) byte(v byte) { + buf.arr[buf.pos] = v + buf.pos++ +} + +// traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids. +// It is lock-free for reading. +type traceStackTable struct { + lock mutex // Must be acquired on the system stack + seq uint32 + mem traceAlloc + tab [1 << 13]traceStackPtr +} + +// traceStack is a single stack in traceStackTable. +type traceStack struct { + link traceStackPtr + hash uintptr + id uint32 + n int + stk [0]uintptr // real type [n]uintptr +} + +type traceStackPtr uintptr + +func (tp traceStackPtr) ptr() *traceStack { return (*traceStack)(unsafe.Pointer(tp)) } + +// stack returns slice of PCs. +func (ts *traceStack) stack() []uintptr { + return (*[traceStackSize]uintptr)(unsafe.Pointer(&ts.stk))[:ts.n] +} + +// put returns a unique id for the stack trace pcs and caches it in the table, +// if it sees the trace for the first time. +func (tab *traceStackTable) put(pcs []uintptr) uint32 { + if len(pcs) == 0 { + return 0 + } + hash := memhash(unsafe.Pointer(&pcs[0]), 0, uintptr(len(pcs))*unsafe.Sizeof(pcs[0])) + // First, search the hashtable w/o the mutex. + if id := tab.find(pcs, hash); id != 0 { + return id + } + // Now, double check under the mutex. + // Switch to the system stack so we can acquire tab.lock + var id uint32 + systemstack(func() { + lock(&tab.lock) + if id = tab.find(pcs, hash); id != 0 { + unlock(&tab.lock) + return + } + // Create new record. + tab.seq++ + stk := tab.newStack(len(pcs)) + stk.hash = hash + stk.id = tab.seq + id = stk.id + stk.n = len(pcs) + stkpc := stk.stack() + for i, pc := range pcs { + stkpc[i] = pc + } + part := int(hash % uintptr(len(tab.tab))) + stk.link = tab.tab[part] + atomicstorep(unsafe.Pointer(&tab.tab[part]), unsafe.Pointer(stk)) + unlock(&tab.lock) + }) + return id +} + +// find checks if the stack trace pcs is already present in the table. +func (tab *traceStackTable) find(pcs []uintptr, hash uintptr) uint32 { + part := int(hash % uintptr(len(tab.tab))) +Search: + for stk := tab.tab[part].ptr(); stk != nil; stk = stk.link.ptr() { + if stk.hash == hash && stk.n == len(pcs) { + for i, stkpc := range stk.stack() { + if stkpc != pcs[i] { + continue Search + } + } + return stk.id + } + } + return 0 +} + +// newStack allocates a new stack of size n. +func (tab *traceStackTable) newStack(n int) *traceStack { + return (*traceStack)(tab.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr(n)*goarch.PtrSize)) +} + +// traceFrames returns the frames corresponding to pcs. It may +// allocate and may emit trace events. +func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) { + frames := make([]traceFrame, 0, len(pcs)) + ci := CallersFrames(pcs) + for { + var frame traceFrame + f, more := ci.Next() + frame, bufp = traceFrameForPC(bufp, 0, f) + frames = append(frames, frame) + if !more { + return frames, bufp + } + } +} + +// dump writes all previously cached stacks to trace buffers, +// releases all memory and resets state. +// +// This must run on the system stack because it calls traceFlush. +// +//go:systemstack +func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr { + for i := range tab.tab { + stk := tab.tab[i].ptr() + for ; stk != nil; stk = stk.link.ptr() { + var frames []traceFrame + frames, bufp = traceFrames(bufp, stk.stack()) + + // Estimate the size of this record. This + // bound is pretty loose, but avoids counting + // lots of varint sizes. + maxSize := 1 + traceBytesPerNumber + (2+4*len(frames))*traceBytesPerNumber + // Make sure we have enough buffer space. + if buf := bufp.ptr(); len(buf.arr)-buf.pos < maxSize { + bufp = traceFlush(bufp, 0) + } + + // Emit header, with space reserved for length. + buf := bufp.ptr() + buf.byte(traceEvStack | 3<<traceArgCountShift) + lenPos := buf.pos + buf.pos += traceBytesPerNumber + + // Emit body. + recPos := buf.pos + buf.varint(uint64(stk.id)) + buf.varint(uint64(len(frames))) + for _, frame := range frames { + buf.varint(uint64(frame.PC)) + buf.varint(frame.funcID) + buf.varint(frame.fileID) + buf.varint(frame.line) + } + + // Fill in size header. + buf.varintAt(lenPos, uint64(buf.pos-recPos)) + } + } + + tab.mem.drop() + *tab = traceStackTable{} + lockInit(&((*tab).lock), lockRankTraceStackTab) + + return bufp +} + +type traceFrame struct { + PC uintptr + funcID uint64 + fileID uint64 + line uint64 +} + +// traceFrameForPC records the frame information. +// It may allocate memory. +func traceFrameForPC(buf traceBufPtr, pid int32, f Frame) (traceFrame, traceBufPtr) { + bufp := &buf + var frame traceFrame + frame.PC = f.PC + + fn := f.Function + const maxLen = 1 << 10 + if len(fn) > maxLen { + fn = fn[len(fn)-maxLen:] + } + frame.funcID, bufp = traceString(bufp, pid, fn) + frame.line = uint64(f.Line) + file := f.File + if len(file) > maxLen { + file = file[len(file)-maxLen:] + } + frame.fileID, bufp = traceString(bufp, pid, file) + return frame, (*bufp) +} + +// traceAlloc is a non-thread-safe region allocator. +// It holds a linked list of traceAllocBlock. +type traceAlloc struct { + head traceAllocBlockPtr + off uintptr +} + +// traceAllocBlock is a block in traceAlloc. +// +// traceAllocBlock is allocated from non-GC'd memory, so it must not +// contain heap pointers. Writes to pointers to traceAllocBlocks do +// not need write barriers. +type traceAllocBlock struct { + _ sys.NotInHeap + next traceAllocBlockPtr + data [64<<10 - goarch.PtrSize]byte +} + +// TODO: Since traceAllocBlock is now embedded runtime/internal/sys.NotInHeap, this isn't necessary. +type traceAllocBlockPtr uintptr + +func (p traceAllocBlockPtr) ptr() *traceAllocBlock { return (*traceAllocBlock)(unsafe.Pointer(p)) } +func (p *traceAllocBlockPtr) set(x *traceAllocBlock) { *p = traceAllocBlockPtr(unsafe.Pointer(x)) } + +// alloc allocates n-byte block. +func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer { + n = alignUp(n, goarch.PtrSize) + if a.head == 0 || a.off+n > uintptr(len(a.head.ptr().data)) { + if n > uintptr(len(a.head.ptr().data)) { + throw("trace: alloc too large") + } + block := (*traceAllocBlock)(sysAlloc(unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys)) + if block == nil { + throw("trace: out of memory") + } + block.next.set(a.head.ptr()) + a.head.set(block) + a.off = 0 + } + p := &a.head.ptr().data[a.off] + a.off += n + return unsafe.Pointer(p) +} + +// drop frees all previously allocated memory and resets the allocator. +func (a *traceAlloc) drop() { + for a.head != 0 { + block := a.head.ptr() + a.head.set(block.next.ptr()) + sysFree(unsafe.Pointer(block), unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys) + } +} + +// The following functions write specific events to trace. + +func traceGomaxprocs(procs int32) { + traceEvent(traceEvGomaxprocs, 1, uint64(procs)) +} + +func traceProcStart() { + traceEvent(traceEvProcStart, -1, uint64(getg().m.id)) +} + +func traceProcStop(pp *p) { + // Sysmon and stopTheWorld can stop Ps blocked in syscalls, + // to handle this we temporary employ the P. + mp := acquirem() + oldp := mp.p + mp.p.set(pp) + traceEvent(traceEvProcStop, -1) + mp.p = oldp + releasem(mp) +} + +func traceGCStart() { + traceEvent(traceEvGCStart, 3, trace.seqGC) + trace.seqGC++ +} + +func traceGCDone() { + traceEvent(traceEvGCDone, -1) +} + +func traceGCSTWStart(kind int) { + traceEvent(traceEvGCSTWStart, -1, uint64(kind)) +} + +func traceGCSTWDone() { + traceEvent(traceEvGCSTWDone, -1) +} + +// traceGCSweepStart prepares to trace a sweep loop. This does not +// emit any events until traceGCSweepSpan is called. +// +// traceGCSweepStart must be paired with traceGCSweepDone and there +// must be no preemption points between these two calls. +func traceGCSweepStart() { + // Delay the actual GCSweepStart event until the first span + // sweep. If we don't sweep anything, don't emit any events. + pp := getg().m.p.ptr() + if pp.traceSweep { + throw("double traceGCSweepStart") + } + pp.traceSweep, pp.traceSwept, pp.traceReclaimed = true, 0, 0 +} + +// traceGCSweepSpan traces the sweep of a single page. +// +// This may be called outside a traceGCSweepStart/traceGCSweepDone +// pair; however, it will not emit any trace events in this case. +func traceGCSweepSpan(bytesSwept uintptr) { + pp := getg().m.p.ptr() + if pp.traceSweep { + if pp.traceSwept == 0 { + traceEvent(traceEvGCSweepStart, 1) + } + pp.traceSwept += bytesSwept + } +} + +func traceGCSweepDone() { + pp := getg().m.p.ptr() + if !pp.traceSweep { + throw("missing traceGCSweepStart") + } + if pp.traceSwept != 0 { + traceEvent(traceEvGCSweepDone, -1, uint64(pp.traceSwept), uint64(pp.traceReclaimed)) + } + pp.traceSweep = false +} + +func traceGCMarkAssistStart() { + traceEvent(traceEvGCMarkAssistStart, 1) +} + +func traceGCMarkAssistDone() { + traceEvent(traceEvGCMarkAssistDone, -1) +} + +func traceGoCreate(newg *g, pc uintptr) { + newg.traceseq = 0 + newg.tracelastp = getg().m.p + // +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. + id := trace.stackTab.put([]uintptr{startPCforTrace(pc) + sys.PCQuantum}) + traceEvent(traceEvGoCreate, 2, newg.goid, uint64(id)) +} + +func traceGoStart() { + gp := getg().m.curg + pp := gp.m.p + gp.traceseq++ + if pp.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker { + traceEvent(traceEvGoStartLabel, -1, gp.goid, gp.traceseq, trace.markWorkerLabels[pp.ptr().gcMarkWorkerMode]) + } else if gp.tracelastp == pp { + traceEvent(traceEvGoStartLocal, -1, gp.goid) + } else { + gp.tracelastp = pp + traceEvent(traceEvGoStart, -1, gp.goid, gp.traceseq) + } +} + +func traceGoEnd() { + traceEvent(traceEvGoEnd, -1) +} + +func traceGoSched() { + gp := getg() + gp.tracelastp = gp.m.p + traceEvent(traceEvGoSched, 1) +} + +func traceGoPreempt() { + gp := getg() + gp.tracelastp = gp.m.p + traceEvent(traceEvGoPreempt, 1) +} + +func traceGoPark(traceEv byte, skip int) { + if traceEv&traceFutileWakeup != 0 { + traceEvent(traceEvFutileWakeup, -1) + } + traceEvent(traceEv & ^traceFutileWakeup, skip) +} + +func traceGoUnpark(gp *g, skip int) { + pp := getg().m.p + gp.traceseq++ + if gp.tracelastp == pp { + traceEvent(traceEvGoUnblockLocal, skip, gp.goid) + } else { + gp.tracelastp = pp + traceEvent(traceEvGoUnblock, skip, gp.goid, gp.traceseq) + } +} + +func traceGoSysCall() { + traceEvent(traceEvGoSysCall, 1) +} + +func traceGoSysExit(ts int64) { + if ts != 0 && ts < trace.ticksStart { + // There is a race between the code that initializes sysexitticks + // (in exitsyscall, which runs without a P, and therefore is not + // stopped with the rest of the world) and the code that initializes + // a new trace. The recorded sysexitticks must therefore be treated + // as "best effort". If they are valid for this trace, then great, + // use them for greater accuracy. But if they're not valid for this + // trace, assume that the trace was started after the actual syscall + // exit (but before we actually managed to start the goroutine, + // aka right now), and assign a fresh time stamp to keep the log consistent. + ts = 0 + } + gp := getg().m.curg + gp.traceseq++ + gp.tracelastp = gp.m.p + traceEvent(traceEvGoSysExit, -1, gp.goid, gp.traceseq, uint64(ts)/traceTickDiv) +} + +func traceGoSysBlock(pp *p) { + // Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked, + // to handle this we temporary employ the P. + mp := acquirem() + oldp := mp.p + mp.p.set(pp) + traceEvent(traceEvGoSysBlock, -1) + mp.p = oldp + releasem(mp) +} + +func traceHeapAlloc(live uint64) { + traceEvent(traceEvHeapAlloc, -1, live) +} + +func traceHeapGoal() { + heapGoal := gcController.heapGoal() + if heapGoal == ^uint64(0) { + // Heap-based triggering is disabled. + traceEvent(traceEvHeapGoal, -1, 0) + } else { + traceEvent(traceEvHeapGoal, -1, heapGoal) + } +} + +// To access runtime functions from runtime/trace. +// See runtime/trace/annotation.go + +//go:linkname trace_userTaskCreate runtime/trace.userTaskCreate +func trace_userTaskCreate(id, parentID uint64, taskType string) { + if !trace.enabled { + return + } + + // Same as in traceEvent. + mp, pid, bufp := traceAcquireBuffer() + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + typeStringID, bufp := traceString(bufp, pid, taskType) + traceEventLocked(0, mp, pid, bufp, traceEvUserTaskCreate, 0, 3, id, parentID, typeStringID) + traceReleaseBuffer(pid) +} + +//go:linkname trace_userTaskEnd runtime/trace.userTaskEnd +func trace_userTaskEnd(id uint64) { + traceEvent(traceEvUserTaskEnd, 2, id) +} + +//go:linkname trace_userRegion runtime/trace.userRegion +func trace_userRegion(id, mode uint64, name string) { + if !trace.enabled { + return + } + + mp, pid, bufp := traceAcquireBuffer() + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + nameStringID, bufp := traceString(bufp, pid, name) + traceEventLocked(0, mp, pid, bufp, traceEvUserRegion, 0, 3, id, mode, nameStringID) + traceReleaseBuffer(pid) +} + +//go:linkname trace_userLog runtime/trace.userLog +func trace_userLog(id uint64, category, message string) { + if !trace.enabled { + return + } + + mp, pid, bufp := traceAcquireBuffer() + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + categoryID, bufp := traceString(bufp, pid, category) + + extraSpace := traceBytesPerNumber + len(message) // extraSpace for the value string + traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 0, 3, id, categoryID) + // traceEventLocked reserved extra space for val and len(val) + // in buf, so buf now has room for the following. + buf := bufp.ptr() + + // double-check the message and its length can fit. + // Otherwise, truncate the message. + slen := len(message) + if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber { + slen = room + } + buf.varint(uint64(slen)) + buf.pos += copy(buf.arr[buf.pos:], message[:slen]) + + traceReleaseBuffer(pid) +} + +// the start PC of a goroutine for tracing purposes. If pc is a wrapper, +// it returns the PC of the wrapped function. Otherwise it returns pc. +func startPCforTrace(pc uintptr) uintptr { + f := findfunc(pc) + if !f.valid() { + return pc // may happen for locked g in extra M since its pc is 0. + } + w := funcdata(f, _FUNCDATA_WrapInfo) + if w == nil { + return pc // not a wrapper + } + return f.datap.textAddr(*(*uint32)(w)) +} diff --git a/src/runtime/trace/annotation.go b/src/runtime/trace/annotation.go new file mode 100644 index 0000000..d47cb85 --- /dev/null +++ b/src/runtime/trace/annotation.go @@ -0,0 +1,198 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +import ( + "context" + "fmt" + "sync/atomic" + _ "unsafe" +) + +type traceContextKey struct{} + +// NewTask creates a task instance with the type taskType and returns +// it along with a Context that carries the task. +// If the input context contains a task, the new task is its subtask. +// +// The taskType is used to classify task instances. Analysis tools +// like the Go execution tracer may assume there are only a bounded +// number of unique task types in the system. +// +// The returned end function is used to mark the task's end. +// The trace tool measures task latency as the time between task creation +// and when the end function is called, and provides the latency +// distribution per task type. +// If the end function is called multiple times, only the first +// call is used in the latency measurement. +// +// ctx, task := trace.NewTask(ctx, "awesomeTask") +// trace.WithRegion(ctx, "preparation", prepWork) +// // preparation of the task +// go func() { // continue processing the task in a separate goroutine. +// defer task.End() +// trace.WithRegion(ctx, "remainingWork", remainingWork) +// }() +func NewTask(pctx context.Context, taskType string) (ctx context.Context, task *Task) { + pid := fromContext(pctx).id + id := newID() + userTaskCreate(id, pid, taskType) + s := &Task{id: id} + return context.WithValue(pctx, traceContextKey{}, s), s + + // We allocate a new task and the end function even when + // the tracing is disabled because the context and the detach + // function can be used across trace enable/disable boundaries, + // which complicates the problem. + // + // For example, consider the following scenario: + // - trace is enabled. + // - trace.WithRegion is called, so a new context ctx + // with a new region is created. + // - trace is disabled. + // - trace is enabled again. + // - trace APIs with the ctx is called. Is the ID in the task + // a valid one to use? + // + // TODO(hyangah): reduce the overhead at least when + // tracing is disabled. Maybe the id can embed a tracing + // round number and ignore ids generated from previous + // tracing round. +} + +func fromContext(ctx context.Context) *Task { + if s, ok := ctx.Value(traceContextKey{}).(*Task); ok { + return s + } + return &bgTask +} + +// Task is a data type for tracing a user-defined, logical operation. +type Task struct { + id uint64 + // TODO(hyangah): record parent id? +} + +// End marks the end of the operation represented by the Task. +func (t *Task) End() { + userTaskEnd(t.id) +} + +var lastTaskID uint64 = 0 // task id issued last time + +func newID() uint64 { + // TODO(hyangah): use per-P cache + return atomic.AddUint64(&lastTaskID, 1) +} + +var bgTask = Task{id: uint64(0)} + +// Log emits a one-off event with the given category and message. +// Category can be empty and the API assumes there are only a handful of +// unique categories in the system. +func Log(ctx context.Context, category, message string) { + id := fromContext(ctx).id + userLog(id, category, message) +} + +// Logf is like Log, but the value is formatted using the specified format spec. +func Logf(ctx context.Context, category, format string, args ...any) { + if IsEnabled() { + // Ideally this should be just Log, but that will + // add one more frame in the stack trace. + id := fromContext(ctx).id + userLog(id, category, fmt.Sprintf(format, args...)) + } +} + +const ( + regionStartCode = uint64(0) + regionEndCode = uint64(1) +) + +// WithRegion starts a region associated with its calling goroutine, runs fn, +// and then ends the region. If the context carries a task, the region is +// associated with the task. Otherwise, the region is attached to the background +// task. +// +// The regionType is used to classify regions, so there should be only a +// handful of unique region types. +func WithRegion(ctx context.Context, regionType string, fn func()) { + // NOTE: + // WithRegion helps avoiding misuse of the API but in practice, + // this is very restrictive: + // - Use of WithRegion makes the stack traces captured from + // region start and end are identical. + // - Refactoring the existing code to use WithRegion is sometimes + // hard and makes the code less readable. + // e.g. code block nested deep in the loop with various + // exit point with return values + // - Refactoring the code to use this API with closure can + // cause different GC behavior such as retaining some parameters + // longer. + // This causes more churns in code than I hoped, and sometimes + // makes the code less readable. + + id := fromContext(ctx).id + userRegion(id, regionStartCode, regionType) + defer userRegion(id, regionEndCode, regionType) + fn() +} + +// StartRegion starts a region and returns a function for marking the +// end of the region. The returned Region's End function must be called +// from the same goroutine where the region was started. +// Within each goroutine, regions must nest. That is, regions started +// after this region must be ended before this region can be ended. +// Recommended usage is +// +// defer trace.StartRegion(ctx, "myTracedRegion").End() +func StartRegion(ctx context.Context, regionType string) *Region { + if !IsEnabled() { + return noopRegion + } + id := fromContext(ctx).id + userRegion(id, regionStartCode, regionType) + return &Region{id, regionType} +} + +// Region is a region of code whose execution time interval is traced. +type Region struct { + id uint64 + regionType string +} + +var noopRegion = &Region{} + +// End marks the end of the traced code region. +func (r *Region) End() { + if r == noopRegion { + return + } + userRegion(r.id, regionEndCode, r.regionType) +} + +// IsEnabled reports whether tracing is enabled. +// The information is advisory only. The tracing status +// may have changed by the time this function returns. +func IsEnabled() bool { + return tracing.enabled.Load() +} + +// +// Function bodies are defined in runtime/trace.go +// + +// emits UserTaskCreate event. +func userTaskCreate(id, parentID uint64, taskType string) + +// emits UserTaskEnd event. +func userTaskEnd(id uint64) + +// emits UserRegion event. +func userRegion(id, mode uint64, regionType string) + +// emits UserLog event. +func userLog(id uint64, category, message string) diff --git a/src/runtime/trace/annotation_test.go b/src/runtime/trace/annotation_test.go new file mode 100644 index 0000000..69ea8f2 --- /dev/null +++ b/src/runtime/trace/annotation_test.go @@ -0,0 +1,156 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "context" + "fmt" + "internal/trace" + "reflect" + . "runtime/trace" + "strings" + "sync" + "testing" +) + +func BenchmarkStartRegion(b *testing.B) { + b.ReportAllocs() + ctx, task := NewTask(context.Background(), "benchmark") + defer task.End() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + StartRegion(ctx, "region").End() + } + }) +} + +func BenchmarkNewTask(b *testing.B) { + b.ReportAllocs() + pctx, task := NewTask(context.Background(), "benchmark") + defer task.End() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _, task := NewTask(pctx, "task") + task.End() + } + }) +} + +func TestUserTaskRegion(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + bgctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + preExistingRegion := StartRegion(bgctx, "pre-existing region") + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + // Beginning of traced execution + var wg sync.WaitGroup + ctx, task := NewTask(bgctx, "task0") // EvUserTaskCreate("task0") + wg.Add(1) + go func() { + defer wg.Done() + defer task.End() // EvUserTaskEnd("task0") + + WithRegion(ctx, "region0", func() { + // EvUserRegionCreate("region0", start) + WithRegion(ctx, "region1", func() { + Log(ctx, "key0", "0123456789abcdef") // EvUserLog("task0", "key0", "0....f") + }) + // EvUserRegion("region0", end) + }) + }() + + wg.Wait() + + preExistingRegion.End() + postExistingRegion := StartRegion(bgctx, "post-existing region") + + // End of traced execution + Stop() + + postExistingRegion.End() + + saveTrace(t, buf, "TestUserTaskRegion") + res, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + // golang.org/issues/16755 + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + // Check whether we see all user annotation related records in order + type testData struct { + typ byte + strs []string + args []uint64 + setLink bool + } + + var got []testData + tasks := map[uint64]string{} + for _, e := range res.Events { + t.Logf("%s", e) + switch e.Type { + case trace.EvUserTaskCreate: + taskName := e.SArgs[0] + got = append(got, testData{trace.EvUserTaskCreate, []string{taskName}, nil, e.Link != nil}) + if e.Link != nil && e.Link.Type != trace.EvUserTaskEnd { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + tasks[e.Args[0]] = taskName + case trace.EvUserLog: + key, val := e.SArgs[0], e.SArgs[1] + taskName := tasks[e.Args[0]] + got = append(got, testData{trace.EvUserLog, []string{taskName, key, val}, nil, e.Link != nil}) + case trace.EvUserTaskEnd: + taskName := tasks[e.Args[0]] + got = append(got, testData{trace.EvUserTaskEnd, []string{taskName}, nil, e.Link != nil}) + if e.Link != nil && e.Link.Type != trace.EvUserTaskCreate { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + case trace.EvUserRegion: + taskName := tasks[e.Args[0]] + regionName := e.SArgs[0] + got = append(got, testData{trace.EvUserRegion, []string{taskName, regionName}, []uint64{e.Args[1]}, e.Link != nil}) + if e.Link != nil && (e.Link.Type != trace.EvUserRegion || e.Link.SArgs[0] != regionName) { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + } + } + want := []testData{ + {trace.EvUserTaskCreate, []string{"task0"}, nil, true}, + {trace.EvUserRegion, []string{"task0", "region0"}, []uint64{0}, true}, + {trace.EvUserRegion, []string{"task0", "region1"}, []uint64{0}, true}, + {trace.EvUserLog, []string{"task0", "key0", "0123456789abcdef"}, nil, false}, + {trace.EvUserRegion, []string{"task0", "region1"}, []uint64{1}, false}, + {trace.EvUserRegion, []string{"task0", "region0"}, []uint64{1}, false}, + {trace.EvUserTaskEnd, []string{"task0"}, nil, false}, + // Currently, pre-existing region is not recorded to avoid allocations. + // {trace.EvUserRegion, []string{"", "pre-existing region"}, []uint64{1}, false}, + {trace.EvUserRegion, []string{"", "post-existing region"}, []uint64{0}, false}, + } + if !reflect.DeepEqual(got, want) { + pretty := func(data []testData) string { + var s strings.Builder + for _, d := range data { + fmt.Fprintf(&s, "\t%+v\n", d) + } + return s.String() + } + t.Errorf("Got user region related events\n%+v\nwant:\n%+v", pretty(got), pretty(want)) + } +} diff --git a/src/runtime/trace/example_test.go b/src/runtime/trace/example_test.go new file mode 100644 index 0000000..ba96a82 --- /dev/null +++ b/src/runtime/trace/example_test.go @@ -0,0 +1,39 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "fmt" + "log" + "os" + "runtime/trace" +) + +// Example demonstrates the use of the trace package to trace +// the execution of a Go program. The trace output will be +// written to the file trace.out +func Example() { + f, err := os.Create("trace.out") + if err != nil { + log.Fatalf("failed to create trace output file: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Fatalf("failed to close trace file: %v", err) + } + }() + + if err := trace.Start(f); err != nil { + log.Fatalf("failed to start trace: %v", err) + } + defer trace.Stop() + + // your program here + RunMyProgram() +} + +func RunMyProgram() { + fmt.Printf("this function will be traced") +} diff --git a/src/runtime/trace/trace.go b/src/runtime/trace/trace.go new file mode 100644 index 0000000..86c97e2 --- /dev/null +++ b/src/runtime/trace/trace.go @@ -0,0 +1,154 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package trace contains facilities for programs to generate traces +// for the Go execution tracer. +// +// # Tracing runtime activities +// +// The execution trace captures a wide range of execution events such as +// goroutine creation/blocking/unblocking, syscall enter/exit/block, +// GC-related events, changes of heap size, processor start/stop, etc. +// When CPU profiling is active, the execution tracer makes an effort to +// include those samples as well. +// A precise nanosecond-precision timestamp and a stack trace is +// captured for most events. The generated trace can be interpreted +// using `go tool trace`. +// +// Support for tracing tests and benchmarks built with the standard +// testing package is built into `go test`. For example, the following +// command runs the test in the current directory and writes the trace +// file (trace.out). +// +// go test -trace=trace.out +// +// This runtime/trace package provides APIs to add equivalent tracing +// support to a standalone program. See the Example that demonstrates +// how to use this API to enable tracing. +// +// There is also a standard HTTP interface to trace data. Adding the +// following line will install a handler under the /debug/pprof/trace URL +// to download a live trace: +// +// import _ "net/http/pprof" +// +// See the net/http/pprof package for more details about all of the +// debug endpoints installed by this import. +// +// # User annotation +// +// Package trace provides user annotation APIs that can be used to +// log interesting events during execution. +// +// There are three types of user annotations: log messages, regions, +// and tasks. +// +// Log emits a timestamped message to the execution trace along with +// additional information such as the category of the message and +// which goroutine called Log. The execution tracer provides UIs to filter +// and group goroutines using the log category and the message supplied +// in Log. +// +// A region is for logging a time interval during a goroutine's execution. +// By definition, a region starts and ends in the same goroutine. +// Regions can be nested to represent subintervals. +// For example, the following code records four regions in the execution +// trace to trace the durations of sequential steps in a cappuccino making +// operation. +// +// trace.WithRegion(ctx, "makeCappuccino", func() { +// +// // orderID allows to identify a specific order +// // among many cappuccino order region records. +// trace.Log(ctx, "orderID", orderID) +// +// trace.WithRegion(ctx, "steamMilk", steamMilk) +// trace.WithRegion(ctx, "extractCoffee", extractCoffee) +// trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee) +// }) +// +// A task is a higher-level component that aids tracing of logical +// operations such as an RPC request, an HTTP request, or an +// interesting local operation which may require multiple goroutines +// working together. Since tasks can involve multiple goroutines, +// they are tracked via a context.Context object. NewTask creates +// a new task and embeds it in the returned context.Context object. +// Log messages and regions are attached to the task, if any, in the +// Context passed to Log and WithRegion. +// +// For example, assume that we decided to froth milk, extract coffee, +// and mix milk and coffee in separate goroutines. With a task, +// the trace tool can identify the goroutines involved in a specific +// cappuccino order. +// +// ctx, task := trace.NewTask(ctx, "makeCappuccino") +// trace.Log(ctx, "orderID", orderID) +// +// milk := make(chan bool) +// espresso := make(chan bool) +// +// go func() { +// trace.WithRegion(ctx, "steamMilk", steamMilk) +// milk <- true +// }() +// go func() { +// trace.WithRegion(ctx, "extractCoffee", extractCoffee) +// espresso <- true +// }() +// go func() { +// defer task.End() // When assemble is done, the order is complete. +// <-espresso +// <-milk +// trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee) +// }() +// +// The trace tool computes the latency of a task by measuring the +// time between the task creation and the task end and provides +// latency distributions for each task type found in the trace. +package trace + +import ( + "io" + "runtime" + "sync" + "sync/atomic" +) + +// Start enables tracing for the current program. +// While tracing, the trace will be buffered and written to w. +// Start returns an error if tracing is already enabled. +func Start(w io.Writer) error { + tracing.Lock() + defer tracing.Unlock() + + if err := runtime.StartTrace(); err != nil { + return err + } + go func() { + for { + data := runtime.ReadTrace() + if data == nil { + break + } + w.Write(data) + } + }() + tracing.enabled.Store(true) + return nil +} + +// Stop stops the current tracing, if any. +// Stop only returns after all the writes for the trace have completed. +func Stop() { + tracing.Lock() + defer tracing.Unlock() + tracing.enabled.Store(false) + + runtime.StopTrace() +} + +var tracing struct { + sync.Mutex // gate mutators (Start, Stop) + enabled atomic.Bool +} diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go new file mode 100644 index 0000000..be3adc9 --- /dev/null +++ b/src/runtime/trace/trace_stack_test.go @@ -0,0 +1,333 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "fmt" + "internal/testenv" + "internal/trace" + "net" + "os" + "runtime" + . "runtime/trace" + "strings" + "sync" + "testing" + "text/tabwriter" + "time" +) + +// TestTraceSymbolize tests symbolization and that events has proper stacks. +// In particular that we strip bottom uninteresting frames like goexit, +// top uninteresting frames (runtime guts). +func TestTraceSymbolize(t *testing.T) { + skipTraceSymbolizeTestIfNecessary(t) + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + defer Stop() // in case of early return + + // Now we will do a bunch of things for which we verify stacks later. + // It is impossible to ensure that a goroutine has actually blocked + // on a channel, in a select or otherwise. So we kick off goroutines + // that need to block first in the hope that while we are executing + // the rest of the test, they will block. + go func() { // func1 + select {} + }() + go func() { // func2 + var c chan int + c <- 0 + }() + go func() { // func3 + var c chan int + <-c + }() + done1 := make(chan bool) + go func() { // func4 + <-done1 + }() + done2 := make(chan bool) + go func() { // func5 + done2 <- true + }() + c1 := make(chan int) + c2 := make(chan int) + go func() { // func6 + select { + case <-c1: + case <-c2: + } + }() + var mu sync.Mutex + mu.Lock() + go func() { // func7 + mu.Lock() + mu.Unlock() + }() + var wg sync.WaitGroup + wg.Add(1) + go func() { // func8 + wg.Wait() + }() + cv := sync.NewCond(&sync.Mutex{}) + go func() { // func9 + cv.L.Lock() + cv.Wait() + cv.L.Unlock() + }() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + go func() { // func10 + c, err := ln.Accept() + if err != nil { + t.Errorf("failed to accept: %v", err) + return + } + c.Close() + }() + rp, wp, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create a pipe: %v", err) + } + defer rp.Close() + defer wp.Close() + pipeReadDone := make(chan bool) + go func() { // func11 + var data [1]byte + rp.Read(data[:]) + pipeReadDone <- true + }() + + time.Sleep(100 * time.Millisecond) + runtime.GC() + runtime.Gosched() + time.Sleep(100 * time.Millisecond) // the last chance for the goroutines above to block + done1 <- true + <-done2 + select { + case c1 <- 0: + case c2 <- 0: + } + mu.Unlock() + wg.Done() + cv.Signal() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Fatalf("failed to dial: %v", err) + } + c.Close() + var data [1]byte + wp.Write(data[:]) + <-pipeReadDone + + oldGoMaxProcs := runtime.GOMAXPROCS(0) + runtime.GOMAXPROCS(oldGoMaxProcs + 1) + + Stop() + + runtime.GOMAXPROCS(oldGoMaxProcs) + + events, _ := parseTrace(t, buf) + + // Now check that the stacks are correct. + type eventDesc struct { + Type byte + Stk []frame + } + want := []eventDesc{ + {trace.EvGCStart, []frame{ + {"runtime.GC", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoStart, []frame{ + {"runtime/trace_test.TestTraceSymbolize.func1", 0}, + }}, + {trace.EvGoSched, []frame{ + {"runtime/trace_test.TestTraceSymbolize", 111}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoCreate, []frame{ + {"runtime/trace_test.TestTraceSymbolize", 40}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoStop, []frame{ + {"runtime.block", 0}, + {"runtime/trace_test.TestTraceSymbolize.func1", 0}, + }}, + {trace.EvGoStop, []frame{ + {"runtime.chansend1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func2", 0}, + }}, + {trace.EvGoStop, []frame{ + {"runtime.chanrecv1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func3", 0}, + }}, + {trace.EvGoBlockRecv, []frame{ + {"runtime.chanrecv1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func4", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"runtime.chansend1", 0}, + {"runtime/trace_test.TestTraceSymbolize", 113}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSend, []frame{ + {"runtime.chansend1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func5", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"runtime.chanrecv1", 0}, + {"runtime/trace_test.TestTraceSymbolize", 114}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSelect, []frame{ + {"runtime.selectgo", 0}, + {"runtime/trace_test.TestTraceSymbolize.func6", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"runtime.selectgo", 0}, + {"runtime/trace_test.TestTraceSymbolize", 115}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSync, []frame{ + {"sync.(*Mutex).Lock", 0}, + {"runtime/trace_test.TestTraceSymbolize.func7", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"sync.(*Mutex).Unlock", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSync, []frame{ + {"sync.(*WaitGroup).Wait", 0}, + {"runtime/trace_test.TestTraceSymbolize.func8", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"sync.(*WaitGroup).Add", 0}, + {"sync.(*WaitGroup).Done", 0}, + {"runtime/trace_test.TestTraceSymbolize", 120}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockCond, []frame{ + {"sync.(*Cond).Wait", 0}, + {"runtime/trace_test.TestTraceSymbolize.func9", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"sync.(*Cond).Signal", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoSleep, []frame{ + {"time.Sleep", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGomaxprocs, []frame{ + {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged. + {"runtime.startTheWorldGC", 0}, + {"runtime.GOMAXPROCS", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + } + // Stacks for the following events are OS-dependent due to OS-specific code in net package. + if runtime.GOOS != "windows" && runtime.GOOS != "plan9" { + want = append(want, []eventDesc{ + {trace.EvGoBlockNet, []frame{ + {"internal/poll.(*FD).Accept", 0}, + {"net.(*netFD).accept", 0}, + {"net.(*TCPListener).accept", 0}, + {"net.(*TCPListener).Accept", 0}, + {"runtime/trace_test.TestTraceSymbolize.func10", 0}, + }}, + {trace.EvGoSysCall, []frame{ + {"syscall.read", 0}, + {"syscall.Read", 0}, + {"internal/poll.ignoringEINTRIO", 0}, + {"internal/poll.(*FD).Read", 0}, + {"os.(*File).read", 0}, + {"os.(*File).Read", 0}, + {"runtime/trace_test.TestTraceSymbolize.func11", 0}, + }}, + }...) + } + matched := make([]bool, len(want)) + for _, ev := range events { + wantLoop: + for i, w := range want { + if matched[i] || w.Type != ev.Type || len(w.Stk) != len(ev.Stk) { + continue + } + + for fi, f := range ev.Stk { + wf := w.Stk[fi] + if wf.Fn != f.Fn || wf.Line != 0 && wf.Line != f.Line { + continue wantLoop + } + } + matched[i] = true + } + } + for i, w := range want { + if matched[i] { + continue + } + seen, n := dumpEventStacks(w.Type, events) + t.Errorf("Did not match event %v with stack\n%s\nSeen %d events of the type\n%s", + trace.EventDescriptions[w.Type].Name, dumpFrames(w.Stk), n, seen) + } +} + +func skipTraceSymbolizeTestIfNecessary(t *testing.T) { + testenv.MustHaveGoBuild(t) + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } +} + +func dumpEventStacks(typ byte, events []*trace.Event) ([]byte, int) { + matched := 0 + o := new(bytes.Buffer) + tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0) + for _, ev := range events { + if ev.Type != typ { + continue + } + matched++ + fmt.Fprintf(tw, "Offset %d\n", ev.Off) + for _, f := range ev.Stk { + fname := f.File + if idx := strings.Index(fname, "/go/src/"); idx > 0 { + fname = fname[idx:] + } + fmt.Fprintf(tw, " %v\t%s:%d\n", f.Fn, fname, f.Line) + } + } + tw.Flush() + return o.Bytes(), matched +} + +type frame struct { + Fn string + Line int +} + +func dumpFrames(frames []frame) []byte { + o := new(bytes.Buffer) + tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0) + + for _, f := range frames { + fmt.Fprintf(tw, " %v\t :%d\n", f.Fn, f.Line) + } + tw.Flush() + return o.Bytes() +} diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go new file mode 100644 index 0000000..19f7dbe --- /dev/null +++ b/src/runtime/trace/trace_test.go @@ -0,0 +1,792 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "context" + "flag" + "fmt" + "internal/profile" + "internal/race" + "internal/trace" + "io" + "net" + "os" + "runtime" + "runtime/pprof" + . "runtime/trace" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +var ( + saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") +) + +// TestEventBatch tests Flush calls that happen during Start +// don't produce corrupted traces. +func TestEventBatch(t *testing.T) { + if race.Enabled { + t.Skip("skipping in race mode") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + // During Start, bunch of records are written to reflect the current + // snapshot of the program, including state of each goroutines. + // And some string constants are written to the trace to aid trace + // parsing. This test checks Flush of the buffer occurred during + // this process doesn't cause corrupted traces. + // When a Flush is called during Start is complicated + // so we test with a range of number of goroutines hoping that one + // of them triggers Flush. + // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate + // and traceEvGoWaiting events (12~13bytes per goroutine). + for g := 4950; g < 5050; g++ { + n := g + t.Run("G="+strconv.Itoa(n), func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(n) + + in := make(chan bool, 1000) + for i := 0; i < n; i++ { + go func() { + <-in + wg.Done() + }() + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + for i := 0; i < n; i++ { + in <- true + } + wg.Wait() + Stop() + + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + }) + } +} + +func TestTraceStartStop(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + Stop() + size := buf.Len() + if size == 0 { + t.Fatalf("trace is empty") + } + time.Sleep(100 * time.Millisecond) + if size != buf.Len() { + t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len()) + } + saveTrace(t, buf, "TestTraceStartStop") +} + +func TestTraceDoubleStart(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + Stop() + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + if err := Start(buf); err == nil { + t.Fatalf("succeed to start tracing second time") + } + Stop() + Stop() +} + +func TestTrace(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + Stop() + saveTrace(t, buf, "TestTrace") + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } +} + +func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) { + res, err := trace.Parse(r, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + gs := trace.GoroutineStats(res.Events) + for goid := range gs { + // We don't do any particular checks on the result at the moment. + // But still check that RelatedGoroutines does not crash, hang, etc. + _ = trace.RelatedGoroutines(res.Events, goid) + } + return res.Events, gs +} + +func testBrokenTimestamps(t *testing.T, data []byte) { + // On some processors cputicks (used to generate trace timestamps) + // produce non-monotonic timestamps. It is important that the parser + // distinguishes logically inconsistent traces (e.g. missing, excessive + // or misordered events) from broken timestamps. The former is a bug + // in tracer, the latter is a machine issue. + // So now that we have a consistent trace, test that (1) parser does + // not return a logical error in case of broken timestamps + // and (2) broken timestamps are eventually detected and reported. + trace.BreakTimestampsForTesting = true + defer func() { + trace.BreakTimestampsForTesting = false + }() + for i := 0; i < 1e4; i++ { + _, err := trace.Parse(bytes.NewReader(data), "") + if err == trace.ErrTimeOrder { + return + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + } +} + +func TestTraceStress(t *testing.T) { + if runtime.GOOS == "js" { + t.Skip("no os.Pipe on js") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + if testing.Short() { + t.Skip("skipping in -short mode") + } + + var wg sync.WaitGroup + done := make(chan bool) + + // Create a goroutine blocked before tracing. + wg.Add(1) + go func() { + <-done + wg.Done() + }() + + // Create a goroutine blocked in syscall before tracing. + rp, wp, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create pipe: %v", err) + } + defer func() { + rp.Close() + wp.Close() + }() + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + time.Sleep(time.Millisecond) // give the goroutine above time to block + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + procs := runtime.GOMAXPROCS(10) + time.Sleep(50 * time.Millisecond) // test proc stop/start events + + go func() { + runtime.LockOSThread() + for { + select { + case <-done: + return + default: + runtime.Gosched() + } + } + }() + + runtime.GC() + // Trigger GC from malloc. + n := int(1e3) + if isMemoryConstrained() { + // Reduce allocation to avoid running out of + // memory on the builder - see issue/12032. + n = 512 + } + for i := 0; i < n; i++ { + _ = make([]byte, 1<<20) + } + + // Create a bunch of busy goroutines to load all Ps. + for p := 0; p < 10; p++ { + wg.Add(1) + go func() { + // Do something useful. + tmp := make([]byte, 1<<16) + for i := range tmp { + tmp[i]++ + } + _ = tmp + <-done + wg.Done() + }() + } + + // Block in syscall. + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + + // Test timers. + timerDone := make(chan bool) + go func() { + time.Sleep(time.Millisecond) + timerDone <- true + }() + <-timerDone + + // A bit of network. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("listen failed: %v", err) + } + defer ln.Close() + go func() { + c, err := ln.Accept() + if err != nil { + return + } + time.Sleep(time.Millisecond) + var buf [1]byte + c.Write(buf[:]) + c.Close() + }() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Fatalf("dial failed: %v", err) + } + var tmp [1]byte + c.Read(tmp[:]) + c.Close() + + go func() { + runtime.Gosched() + select {} + }() + + // Unblock helper goroutines and wait them to finish. + wp.Write(tmp[:]) + wp.Write(tmp[:]) + close(done) + wg.Wait() + + runtime.GOMAXPROCS(procs) + + Stop() + saveTrace(t, buf, "TestTraceStress") + trace := buf.Bytes() + parseTrace(t, buf) + testBrokenTimestamps(t, trace) +} + +// isMemoryConstrained reports whether the current machine is likely +// to be memory constrained. +// This was originally for the openbsd/arm builder (Issue 12032). +// TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME? +func isMemoryConstrained() bool { + if runtime.GOOS == "plan9" { + return true + } + switch runtime.GOARCH { + case "arm", "mips", "mipsle": + return true + } + return false +} + +// Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. +// And concurrently with all that start/stop trace 3 times. +func TestTraceStressStartStop(t *testing.T) { + if runtime.GOOS == "js" { + t.Skip("no os.Pipe on js") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) + outerDone := make(chan bool) + + go func() { + defer func() { + outerDone <- true + }() + + var wg sync.WaitGroup + done := make(chan bool) + + wg.Add(1) + go func() { + <-done + wg.Done() + }() + + rp, wp, err := os.Pipe() + if err != nil { + t.Errorf("failed to create pipe: %v", err) + return + } + defer func() { + rp.Close() + wp.Close() + }() + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + time.Sleep(time.Millisecond) + + go func() { + runtime.LockOSThread() + for { + select { + case <-done: + return + default: + runtime.Gosched() + } + } + }() + + runtime.GC() + // Trigger GC from malloc. + n := int(1e3) + if isMemoryConstrained() { + // Reduce allocation to avoid running out of + // memory on the builder. + n = 512 + } + for i := 0; i < n; i++ { + _ = make([]byte, 1<<20) + } + + // Create a bunch of busy goroutines to load all Ps. + for p := 0; p < 10; p++ { + wg.Add(1) + go func() { + // Do something useful. + tmp := make([]byte, 1<<16) + for i := range tmp { + tmp[i]++ + } + _ = tmp + <-done + wg.Done() + }() + } + + // Block in syscall. + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + + runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + + // Test timers. + timerDone := make(chan bool) + go func() { + time.Sleep(time.Millisecond) + timerDone <- true + }() + <-timerDone + + // A bit of network. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Errorf("listen failed: %v", err) + return + } + defer ln.Close() + go func() { + c, err := ln.Accept() + if err != nil { + return + } + time.Sleep(time.Millisecond) + var buf [1]byte + c.Write(buf[:]) + c.Close() + }() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Errorf("dial failed: %v", err) + return + } + var tmp [1]byte + c.Read(tmp[:]) + c.Close() + + go func() { + runtime.Gosched() + select {} + }() + + // Unblock helper goroutines and wait them to finish. + wp.Write(tmp[:]) + wp.Write(tmp[:]) + close(done) + wg.Wait() + }() + + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + time.Sleep(time.Millisecond) + Stop() + saveTrace(t, buf, "TestTraceStressStartStop") + trace := buf.Bytes() + parseTrace(t, buf) + testBrokenTimestamps(t, trace) + } + <-outerDone +} + +func TestTraceFutileWakeup(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) + c0 := make(chan int, 1) + c1 := make(chan int, 1) + c2 := make(chan int, 1) + const procs = 2 + var done sync.WaitGroup + done.Add(4 * procs) + for p := 0; p < procs; p++ { + const iters = 1e3 + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + c0 <- 0 + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + <-c0 + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + select { + case c1 <- 0: + case c2 <- 0: + } + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + select { + case <-c1: + case <-c2: + } + } + done.Done() + }() + } + done.Wait() + + Stop() + saveTrace(t, buf, "TestTraceFutileWakeup") + events, _ := parseTrace(t, buf) + // Check that (1) trace does not contain EvFutileWakeup events and + // (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events + // (we call runtime.Gosched between all operations, so these would be futile wakeups). + gs := make(map[uint64]int) + for _, ev := range events { + switch ev.Type { + case trace.EvFutileWakeup: + t.Fatalf("found EvFutileWakeup event") + case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect: + if gs[ev.G] == 2 { + t.Fatalf("goroutine %v blocked on %v at %v right after start", + ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) + } + if gs[ev.G] == 1 { + t.Fatalf("goroutine %v blocked on %v at %v while blocked", + ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) + } + gs[ev.G] = 1 + case trace.EvGoStart: + if gs[ev.G] == 1 { + gs[ev.G] = 2 + } + default: + delete(gs, ev.G) + } + } +} + +func TestTraceCPUProfile(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + + cpuBuf := new(bytes.Buffer) + if err := pprof.StartCPUProfile(cpuBuf); err != nil { + t.Skipf("failed to start CPU profile: %v", err) + } + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + dur := 100 * time.Millisecond + func() { + // Create a region in the execution trace. Set and clear goroutine + // labels fully within that region, so we know that any CPU profile + // sample with the label must also be eligible for inclusion in the + // execution trace. + ctx := context.Background() + defer StartRegion(ctx, "cpuHogger").End() + pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) { + cpuHogger(cpuHog1, &salt1, dur) + }) + // Be sure the execution trace's view, when filtered to this goroutine + // via the explicit goroutine ID in each event, gets many more samples + // than the CPU profiler when filtered to this goroutine via labels. + cpuHogger(cpuHog1, &salt1, dur) + }() + + Stop() + pprof.StopCPUProfile() + saveTrace(t, buf, "TestTraceCPUProfile") + + prof, err := profile.Parse(cpuBuf) + if err != nil { + t.Fatalf("failed to parse CPU profile: %v", err) + } + // Examine the CPU profiler's view. Filter it to only include samples from + // the single test goroutine. Use labels to execute that filter: they should + // apply to all work done while that goroutine is getg().m.curg, and they + // should apply to no other goroutines. + pprofSamples := 0 + pprofStacks := make(map[string]int) + for _, s := range prof.Sample { + if s.Label["tracing"] != nil { + var fns []string + var leaf string + for _, loc := range s.Location { + for _, line := range loc.Line { + fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line)) + leaf = line.Function.Name + } + } + // runtime.sigprof synthesizes call stacks when "normal traceback is + // impossible or has failed", using particular placeholder functions + // to represent common failure cases. Look for those functions in + // the leaf position as a sign that the call stack and its + // symbolization are more complex than this test can handle. + // + // TODO: Make the symbolization done by the execution tracer and CPU + // profiler match up even in these harder cases. See #53378. + switch leaf { + case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO": + continue + } + stack := strings.Join(fns, " ") + samples := int(s.Value[0]) + pprofSamples += samples + pprofStacks[stack] += samples + } + } + if pprofSamples == 0 { + t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof) + } + + // Examine the execution tracer's view of the CPU profile samples. Filter it + // to only include samples from the single test goroutine. Use the goroutine + // ID that was recorded in the events: that should reflect getg().m.curg, + // same as the profiler's labels (even when the M is using its g0 stack). + totalTraceSamples := 0 + traceSamples := 0 + traceStacks := make(map[string]int) + events, _ := parseTrace(t, buf) + var hogRegion *trace.Event + for _, ev := range events { + if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" { + // mode "0" indicates region start + hogRegion = ev + } + } + if hogRegion == nil { + t.Fatalf("execution trace did not identify cpuHogger goroutine") + } else if hogRegion.Link == nil { + t.Fatalf("execution trace did not close cpuHogger region") + } + for _, ev := range events { + if ev.Type == trace.EvCPUSample { + totalTraceSamples++ + if ev.G == hogRegion.G { + traceSamples++ + var fns []string + for _, frame := range ev.Stk { + if frame.Fn != "runtime.goexit" { + fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line)) + } + } + stack := strings.Join(fns, " ") + traceStacks[stack]++ + } + } + } + + // The execution trace may drop CPU profile samples if the profiling buffer + // overflows. Based on the size of profBufWordCount, that takes a bit over + // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've + // hit that case, then we definitely have at least one full buffer's worth + // of CPU samples, so we'll call that success. + overflowed := totalTraceSamples >= 1900 + if traceSamples < pprofSamples { + t.Logf("exectution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples) + if !overflowed { + t.Fail() + } + } + + for stack, traceSamples := range traceStacks { + pprofSamples := pprofStacks[stack] + delete(pprofStacks, stack) + if traceSamples < pprofSamples { + t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace", + stack, pprofSamples, traceSamples) + if !overflowed { + t.Fail() + } + } + } + for stack, pprofSamples := range pprofStacks { + t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack) + if !overflowed { + t.Fail() + } + } + + if t.Failed() { + t.Logf("execution trace CPU samples:") + for stack, samples := range traceStacks { + t.Logf("%d: %q", samples, stack) + } + t.Logf("CPU profile:\n%v", prof) + } +} + +func cpuHogger(f func(x int) int, y *int, dur time.Duration) { + // We only need to get one 100 Hz clock tick, so we've got + // a large safety buffer. + // But do at least 500 iterations (which should take about 100ms), + // otherwise TestCPUProfileMultithreaded can fail if only one + // thread is scheduled during the testing period. + t0 := time.Now() + accum := *y + for i := 0; i < 500 || time.Since(t0) < dur; i++ { + accum = f(accum) + } + *y = accum +} + +var ( + salt1 = 0 +) + +// The actual CPU hogging function. +// Must not call other functions nor access heap/globals in the loop, +// otherwise under race detector the samples will be in the race runtime. +func cpuHog1(x int) int { + return cpuHog0(x, 1e5) +} + +func cpuHog0(x, n int) int { + foo := x + for i := 0; i < n; i++ { + if i%1000 == 0 { + // Spend time in mcall, stored as gp.m.curg, with g0 running + runtime.Gosched() + } + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + } + return foo +} + +func saveTrace(t *testing.T, buf *bytes.Buffer, name string) { + if !*saveTraces { + return + } + if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil { + t.Errorf("failed to write trace file: %s", err) + } +} diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go new file mode 100644 index 0000000..37f35d5 --- /dev/null +++ b/src/runtime/traceback.go @@ -0,0 +1,1377 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "internal/bytealg" + "internal/goarch" + "runtime/internal/sys" + "unsafe" +) + +// The code in this file implements stack trace walking for all architectures. +// The most important fact about a given architecture is whether it uses a link register. +// On systems with link registers, the prologue for a non-leaf function stores the +// incoming value of LR at the bottom of the newly allocated stack frame. +// On systems without link registers (x86), the architecture pushes a return PC during +// the call instruction, so the return PC ends up above the stack frame. +// In this file, the return PC is always called LR, no matter how it was found. + +const usesLR = sys.MinFrameSize > 0 + +// Generic traceback. Handles runtime stack prints (pcbuf == nil), +// the runtime.Callers function (pcbuf != nil), as well as the garbage +// collector (callback != nil). A little clunky to merge these, but avoids +// duplicating the code and all its subtlety. +// +// The skip argument is only valid with pcbuf != nil and counts the number +// of logical frames to skip rather than physical frames (with inlining, a +// PC in pcbuf can represent multiple calls). +func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max int, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer, flags uint) int { + if skip > 0 && callback != nil { + throw("gentraceback callback cannot be used with non-zero skip") + } + + // Don't call this "g"; it's too easy get "g" and "gp" confused. + if ourg := getg(); ourg == gp && ourg == ourg.m.curg { + // The starting sp has been passed in as a uintptr, and the caller may + // have other uintptr-typed stack references as well. + // If during one of the calls that got us here or during one of the + // callbacks below the stack must be grown, all these uintptr references + // to the stack will not be updated, and gentraceback will continue + // to inspect the old stack memory, which may no longer be valid. + // Even if all the variables were updated correctly, it is not clear that + // we want to expose a traceback that begins on one stack and ends + // on another stack. That could confuse callers quite a bit. + // Instead, we require that gentraceback and any other function that + // accepts an sp for the current goroutine (typically obtained by + // calling getcallersp) must not run on that goroutine's stack but + // instead on the g0 stack. + throw("gentraceback cannot trace user goroutine on its own stack") + } + level, _, _ := gotraceback() + + if pc0 == ^uintptr(0) && sp0 == ^uintptr(0) { // Signal to fetch saved values from gp. + if gp.syscallsp != 0 { + pc0 = gp.syscallpc + sp0 = gp.syscallsp + if usesLR { + lr0 = 0 + } + } else { + pc0 = gp.sched.pc + sp0 = gp.sched.sp + if usesLR { + lr0 = gp.sched.lr + } + } + } + + nprint := 0 + var frame stkframe + frame.pc = pc0 + frame.sp = sp0 + if usesLR { + frame.lr = lr0 + } + waspanic := false + cgoCtxt := gp.cgoCtxt + stack := gp.stack + printing := pcbuf == nil && callback == nil + + // If the PC is zero, it's likely a nil function call. + // Start in the caller's frame. + if frame.pc == 0 { + if usesLR { + frame.pc = *(*uintptr)(unsafe.Pointer(frame.sp)) + frame.lr = 0 + } else { + frame.pc = uintptr(*(*uintptr)(unsafe.Pointer(frame.sp))) + frame.sp += goarch.PtrSize + } + } + + // runtime/internal/atomic functions call into kernel helpers on + // arm < 7. See runtime/internal/atomic/sys_linux_arm.s. + // + // Start in the caller's frame. + if GOARCH == "arm" && goarm < 7 && GOOS == "linux" && frame.pc&0xffff0000 == 0xffff0000 { + // Note that the calls are simple BL without pushing the return + // address, so we use LR directly. + // + // The kernel helpers are frameless leaf functions, so SP and + // LR are not touched. + frame.pc = frame.lr + frame.lr = 0 + } + + f := findfunc(frame.pc) + if !f.valid() { + if callback != nil || printing { + print("runtime: g ", gp.goid, ": unknown pc ", hex(frame.pc), "\n") + tracebackHexdump(stack, &frame, 0) + } + if callback != nil { + throw("unknown pc") + } + return 0 + } + frame.fn = f + + var cache pcvalueCache + + lastFuncID := funcID_normal + n := 0 + for n < max { + // Typically: + // pc is the PC of the running function. + // sp is the stack pointer at that program counter. + // fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown. + // stk is the stack containing sp. + // The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp. + f = frame.fn + if f.pcsp == 0 { + // No frame information, must be external function, like race support. + // See golang.org/issue/13568. + break + } + + // Compute function info flags. + flag := f.flag + if f.funcID == funcID_cgocallback { + // cgocallback does write SP to switch from the g0 to the curg stack, + // but it carefully arranges that during the transition BOTH stacks + // have cgocallback frame valid for unwinding through. + // So we don't need to exclude it with the other SP-writing functions. + flag &^= funcFlag_SPWRITE + } + if frame.pc == pc0 && frame.sp == sp0 && pc0 == gp.syscallpc && sp0 == gp.syscallsp { + // Some Syscall functions write to SP, but they do so only after + // saving the entry PC/SP using entersyscall. + // Since we are using the entry PC/SP, the later SP write doesn't matter. + flag &^= funcFlag_SPWRITE + } + + // Found an actual function. + // Derive frame pointer and link register. + if frame.fp == 0 { + // Jump over system stack transitions. If we're on g0 and there's a user + // goroutine, try to jump. Otherwise this is a regular call. + // We also defensively check that this won't switch M's on us, + // which could happen at critical points in the scheduler. + // This ensures gp.m doesn't change from a stack jump. + if flags&_TraceJumpStack != 0 && gp == gp.m.g0 && gp.m.curg != nil && gp.m.curg.m == gp.m { + switch f.funcID { + case funcID_morestack: + // morestack does not return normally -- newstack() + // gogo's to curg.sched. Match that. + // This keeps morestack() from showing up in the backtrace, + // but that makes some sense since it'll never be returned + // to. + gp = gp.m.curg + frame.pc = gp.sched.pc + frame.fn = findfunc(frame.pc) + f = frame.fn + flag = f.flag + frame.lr = gp.sched.lr + frame.sp = gp.sched.sp + stack = gp.stack + cgoCtxt = gp.cgoCtxt + case funcID_systemstack: + // systemstack returns normally, so just follow the + // stack transition. + if usesLR && funcspdelta(f, frame.pc, &cache) == 0 { + // We're at the function prologue and the stack + // switch hasn't happened, or epilogue where we're + // about to return. Just unwind normally. + // Do this only on LR machines because on x86 + // systemstack doesn't have an SP delta (the CALL + // instruction opens the frame), therefore no way + // to check. + flag &^= funcFlag_SPWRITE + break + } + gp = gp.m.curg + frame.sp = gp.sched.sp + stack = gp.stack + cgoCtxt = gp.cgoCtxt + flag &^= funcFlag_SPWRITE + } + } + frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc, &cache)) + if !usesLR { + // On x86, call instruction pushes return PC before entering new function. + frame.fp += goarch.PtrSize + } + } + var flr funcInfo + if flag&funcFlag_TOPFRAME != 0 { + // This function marks the top of the stack. Stop the traceback. + frame.lr = 0 + flr = funcInfo{} + } else if flag&funcFlag_SPWRITE != 0 && (callback == nil || n > 0) { + // The function we are in does a write to SP that we don't know + // how to encode in the spdelta table. Examples include context + // switch routines like runtime.gogo but also any code that switches + // to the g0 stack to run host C code. Since we can't reliably unwind + // the SP (we might not even be on the stack we think we are), + // we stop the traceback here. + // This only applies for profiling signals (callback == nil). + // + // For a GC stack traversal (callback != nil), we should only see + // a function when it has voluntarily preempted itself on entry + // during the stack growth check. In that case, the function has + // not yet had a chance to do any writes to SP and is safe to unwind. + // isAsyncSafePoint does not allow assembly functions to be async preempted, + // and preemptPark double-checks that SPWRITE functions are not async preempted. + // So for GC stack traversal we leave things alone (this if body does not execute for n == 0) + // at the bottom frame of the stack. But farther up the stack we'd better not + // find any. + if callback != nil { + println("traceback: unexpected SPWRITE function", funcname(f)) + throw("traceback") + } + frame.lr = 0 + flr = funcInfo{} + } else { + var lrPtr uintptr + if usesLR { + if n == 0 && frame.sp < frame.fp || frame.lr == 0 { + lrPtr = frame.sp + frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr)) + } + } else { + if frame.lr == 0 { + lrPtr = frame.fp - goarch.PtrSize + frame.lr = uintptr(*(*uintptr)(unsafe.Pointer(lrPtr))) + } + } + flr = findfunc(frame.lr) + if !flr.valid() { + // This happens if you get a profiling interrupt at just the wrong time. + // In that context it is okay to stop early. + // But if callback is set, we're doing a garbage collection and must + // get everything, so crash loudly. + doPrint := printing + if doPrint && gp.m.incgo && f.funcID == funcID_sigpanic { + // We can inject sigpanic + // calls directly into C code, + // in which case we'll see a C + // return PC. Don't complain. + doPrint = false + } + if callback != nil || doPrint { + print("runtime: g ", gp.goid, ": unexpected return pc for ", funcname(f), " called from ", hex(frame.lr), "\n") + tracebackHexdump(stack, &frame, lrPtr) + } + if callback != nil { + throw("unknown caller pc") + } + } + } + + frame.varp = frame.fp + if !usesLR { + // On x86, call instruction pushes return PC before entering new function. + frame.varp -= goarch.PtrSize + } + + // For architectures with frame pointers, if there's + // a frame, then there's a saved frame pointer here. + // + // NOTE: This code is not as general as it looks. + // On x86, the ABI is to save the frame pointer word at the + // top of the stack frame, so we have to back down over it. + // On arm64, the frame pointer should be at the bottom of + // the stack (with R29 (aka FP) = RSP), in which case we would + // not want to do the subtraction here. But we started out without + // any frame pointer, and when we wanted to add it, we didn't + // want to break all the assembly doing direct writes to 8(RSP) + // to set the first parameter to a called function. + // So we decided to write the FP link *below* the stack pointer + // (with R29 = RSP - 8 in Go functions). + // This is technically ABI-compatible but not standard. + // And it happens to end up mimicking the x86 layout. + // Other architectures may make different decisions. + if frame.varp > frame.sp && framepointer_enabled { + frame.varp -= goarch.PtrSize + } + + frame.argp = frame.fp + sys.MinFrameSize + + // Determine frame's 'continuation PC', where it can continue. + // Normally this is the return address on the stack, but if sigpanic + // is immediately below this function on the stack, then the frame + // stopped executing due to a trap, and frame.pc is probably not + // a safe point for looking up liveness information. In this panicking case, + // the function either doesn't return at all (if it has no defers or if the + // defers do not recover) or it returns from one of the calls to + // deferproc a second time (if the corresponding deferred func recovers). + // In the latter case, use a deferreturn call site as the continuation pc. + frame.continpc = frame.pc + if waspanic { + if frame.fn.deferreturn != 0 { + frame.continpc = frame.fn.entry() + uintptr(frame.fn.deferreturn) + 1 + // Note: this may perhaps keep return variables alive longer than + // strictly necessary, as we are using "function has a defer statement" + // as a proxy for "function actually deferred something". It seems + // to be a minor drawback. (We used to actually look through the + // gp._defer for a defer corresponding to this function, but that + // is hard to do with defer records on the stack during a stack copy.) + // Note: the +1 is to offset the -1 that + // stack.go:getStackMap does to back up a return + // address make sure the pc is in the CALL instruction. + } else { + frame.continpc = 0 + } + } + + if callback != nil { + if !callback((*stkframe)(noescape(unsafe.Pointer(&frame))), v) { + return n + } + } + + if pcbuf != nil { + pc := frame.pc + // backup to CALL instruction to read inlining info (same logic as below) + tracepc := pc + // Normally, pc is a return address. In that case, we want to look up + // file/line information using pc-1, because that is the pc of the + // call instruction (more precisely, the last byte of the call instruction). + // Callers expect the pc buffer to contain return addresses and do the + // same -1 themselves, so we keep pc unchanged. + // When the pc is from a signal (e.g. profiler or segv) then we want + // to look up file/line information using pc, and we store pc+1 in the + // pc buffer so callers can unconditionally subtract 1 before looking up. + // See issue 34123. + // The pc can be at function entry when the frame is initialized without + // actually running code, like runtime.mstart. + if (n == 0 && flags&_TraceTrap != 0) || waspanic || pc == f.entry() { + pc++ + } else { + tracepc-- + } + + // If there is inlining info, record the inner frames. + if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil { + inltree := (*[1 << 20]inlinedCall)(inldata) + for { + ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, &cache) + if ix < 0 { + break + } + if inltree[ix].funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) { + // ignore wrappers + } else if skip > 0 { + skip-- + } else if n < max { + (*[1 << 20]uintptr)(unsafe.Pointer(pcbuf))[n] = pc + n++ + } + lastFuncID = inltree[ix].funcID + // Back up to an instruction in the "caller". + tracepc = frame.fn.entry() + uintptr(inltree[ix].parentPc) + pc = tracepc + 1 + } + } + // Record the main frame. + if f.funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) { + // Ignore wrapper functions (except when they trigger panics). + } else if skip > 0 { + skip-- + } else if n < max { + (*[1 << 20]uintptr)(unsafe.Pointer(pcbuf))[n] = pc + n++ + } + lastFuncID = f.funcID + n-- // offset n++ below + } + + if printing { + // assume skip=0 for printing. + // + // Never elide wrappers if we haven't printed + // any frames. And don't elide wrappers that + // called panic rather than the wrapped + // function. Otherwise, leave them out. + + // backup to CALL instruction to read inlining info (same logic as below) + tracepc := frame.pc + if (n > 0 || flags&_TraceTrap == 0) && frame.pc > f.entry() && !waspanic { + tracepc-- + } + // If there is inlining info, print the inner frames. + if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil { + inltree := (*[1 << 20]inlinedCall)(inldata) + var inlFunc _func + inlFuncInfo := funcInfo{&inlFunc, f.datap} + for { + ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, nil) + if ix < 0 { + break + } + + // Create a fake _func for the + // inlined function. + inlFunc.nameOff = inltree[ix].nameOff + inlFunc.funcID = inltree[ix].funcID + inlFunc.startLine = inltree[ix].startLine + + if (flags&_TraceRuntimeFrames) != 0 || showframe(inlFuncInfo, gp, nprint == 0, inlFuncInfo.funcID, lastFuncID) { + name := funcname(inlFuncInfo) + file, line := funcline(f, tracepc) + print(name, "(...)\n") + print("\t", file, ":", line, "\n") + nprint++ + } + lastFuncID = inltree[ix].funcID + // Back up to an instruction in the "caller". + tracepc = frame.fn.entry() + uintptr(inltree[ix].parentPc) + } + } + if (flags&_TraceRuntimeFrames) != 0 || showframe(f, gp, nprint == 0, f.funcID, lastFuncID) { + // Print during crash. + // main(0x1, 0x2, 0x3) + // /home/rsc/go/src/runtime/x.go:23 +0xf + // + name := funcname(f) + file, line := funcline(f, tracepc) + if name == "runtime.gopanic" { + name = "panic" + } + print(name, "(") + argp := unsafe.Pointer(frame.argp) + printArgs(f, argp, tracepc) + print(")\n") + print("\t", file, ":", line) + if frame.pc > f.entry() { + print(" +", hex(frame.pc-f.entry())) + } + if gp.m != nil && gp.m.throwing >= throwTypeRuntime && gp == gp.m.curg || level >= 2 { + print(" fp=", hex(frame.fp), " sp=", hex(frame.sp), " pc=", hex(frame.pc)) + } + print("\n") + nprint++ + } + lastFuncID = f.funcID + } + n++ + + if f.funcID == funcID_cgocallback && len(cgoCtxt) > 0 { + ctxt := cgoCtxt[len(cgoCtxt)-1] + cgoCtxt = cgoCtxt[:len(cgoCtxt)-1] + + // skip only applies to Go frames. + // callback != nil only used when we only care + // about Go frames. + if skip == 0 && callback == nil { + n = tracebackCgoContext(pcbuf, printing, ctxt, n, max) + } + } + + waspanic = f.funcID == funcID_sigpanic + injectedCall := waspanic || f.funcID == funcID_asyncPreempt || f.funcID == funcID_debugCallV2 + + // Do not unwind past the bottom of the stack. + if !flr.valid() { + break + } + + if frame.pc == frame.lr && frame.sp == frame.fp { + // If the next frame is identical to the current frame, we cannot make progress. + print("runtime: traceback stuck. pc=", hex(frame.pc), " sp=", hex(frame.sp), "\n") + tracebackHexdump(stack, &frame, frame.sp) + throw("traceback stuck") + } + + // Unwind to next frame. + frame.fn = flr + frame.pc = frame.lr + frame.lr = 0 + frame.sp = frame.fp + frame.fp = 0 + + // On link register architectures, sighandler saves the LR on stack + // before faking a call. + if usesLR && injectedCall { + x := *(*uintptr)(unsafe.Pointer(frame.sp)) + frame.sp += alignUp(sys.MinFrameSize, sys.StackAlign) + f = findfunc(frame.pc) + frame.fn = f + if !f.valid() { + frame.pc = x + } else if funcspdelta(f, frame.pc, &cache) == 0 { + frame.lr = x + } + } + } + + if printing { + n = nprint + } + + // Note that panic != nil is okay here: there can be leftover panics, + // because the defers on the panic stack do not nest in frame order as + // they do on the defer stack. If you have: + // + // frame 1 defers d1 + // frame 2 defers d2 + // frame 3 defers d3 + // frame 4 panics + // frame 4's panic starts running defers + // frame 5, running d3, defers d4 + // frame 5 panics + // frame 5's panic starts running defers + // frame 6, running d4, garbage collects + // frame 6, running d2, garbage collects + // + // During the execution of d4, the panic stack is d4 -> d3, which + // is nested properly, and we'll treat frame 3 as resumable, because we + // can find d3. (And in fact frame 3 is resumable. If d4 recovers + // and frame 5 continues running, d3, d3 can recover and we'll + // resume execution in (returning from) frame 3.) + // + // During the execution of d2, however, the panic stack is d2 -> d3, + // which is inverted. The scan will match d2 to frame 2 but having + // d2 on the stack until then means it will not match d3 to frame 3. + // This is okay: if we're running d2, then all the defers after d2 have + // completed and their corresponding frames are dead. Not finding d3 + // for frame 3 means we'll set frame 3's continpc == 0, which is correct + // (frame 3 is dead). At the end of the walk the panic stack can thus + // contain defers (d3 in this case) for dead frames. The inversion here + // always indicates a dead frame, and the effect of the inversion on the + // scan is to hide those dead frames, so the scan is still okay: + // what's left on the panic stack are exactly (and only) the dead frames. + // + // We require callback != nil here because only when callback != nil + // do we know that gentraceback is being called in a "must be correct" + // context as opposed to a "best effort" context. The tracebacks with + // callbacks only happen when everything is stopped nicely. + // At other times, such as when gathering a stack for a profiling signal + // or when printing a traceback during a crash, everything may not be + // stopped nicely, and the stack walk may not be able to complete. + if callback != nil && n < max && frame.sp != gp.stktopsp { + print("runtime: g", gp.goid, ": frame.sp=", hex(frame.sp), " top=", hex(gp.stktopsp), "\n") + print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "] n=", n, " max=", max, "\n") + throw("traceback did not unwind completely") + } + + return n +} + +// printArgs prints function arguments in traceback. +func printArgs(f funcInfo, argp unsafe.Pointer, pc uintptr) { + // The "instruction" of argument printing is encoded in _FUNCDATA_ArgInfo. + // See cmd/compile/internal/ssagen.emitArgInfo for the description of the + // encoding. + // These constants need to be in sync with the compiler. + const ( + _endSeq = 0xff + _startAgg = 0xfe + _endAgg = 0xfd + _dotdotdot = 0xfc + _offsetTooLarge = 0xfb + ) + + const ( + limit = 10 // print no more than 10 args/components + maxDepth = 5 // no more than 5 layers of nesting + maxLen = (maxDepth*3+2)*limit + 1 // max length of _FUNCDATA_ArgInfo (see the compiler side for reasoning) + ) + + p := (*[maxLen]uint8)(funcdata(f, _FUNCDATA_ArgInfo)) + if p == nil { + return + } + + liveInfo := funcdata(f, _FUNCDATA_ArgLiveInfo) + liveIdx := pcdatavalue(f, _PCDATA_ArgLiveIndex, pc, nil) + startOffset := uint8(0xff) // smallest offset that needs liveness info (slots with a lower offset is always live) + if liveInfo != nil { + startOffset = *(*uint8)(liveInfo) + } + + isLive := func(off, slotIdx uint8) bool { + if liveInfo == nil || liveIdx <= 0 { + return true // no liveness info, always live + } + if off < startOffset { + return true + } + bits := *(*uint8)(add(liveInfo, uintptr(liveIdx)+uintptr(slotIdx/8))) + return bits&(1<<(slotIdx%8)) != 0 + } + + print1 := func(off, sz, slotIdx uint8) { + x := readUnaligned64(add(argp, uintptr(off))) + // mask out irrelevant bits + if sz < 8 { + shift := 64 - sz*8 + if goarch.BigEndian { + x = x >> shift + } else { + x = x << shift >> shift + } + } + print(hex(x)) + if !isLive(off, slotIdx) { + print("?") + } + } + + start := true + printcomma := func() { + if !start { + print(", ") + } + } + pi := 0 + slotIdx := uint8(0) // register arg spill slot index +printloop: + for { + o := p[pi] + pi++ + switch o { + case _endSeq: + break printloop + case _startAgg: + printcomma() + print("{") + start = true + continue + case _endAgg: + print("}") + case _dotdotdot: + printcomma() + print("...") + case _offsetTooLarge: + printcomma() + print("_") + default: + printcomma() + sz := p[pi] + pi++ + print1(o, sz, slotIdx) + if o >= startOffset { + slotIdx++ + } + } + start = false + } +} + +// tracebackCgoContext handles tracing back a cgo context value, from +// the context argument to setCgoTraceback, for the gentraceback +// function. It returns the new value of n. +func tracebackCgoContext(pcbuf *uintptr, printing bool, ctxt uintptr, n, max int) int { + var cgoPCs [32]uintptr + cgoContextPCs(ctxt, cgoPCs[:]) + var arg cgoSymbolizerArg + anySymbolized := false + for _, pc := range cgoPCs { + if pc == 0 || n >= max { + break + } + if pcbuf != nil { + (*[1 << 20]uintptr)(unsafe.Pointer(pcbuf))[n] = pc + } + if printing { + if cgoSymbolizer == nil { + print("non-Go function at pc=", hex(pc), "\n") + } else { + c := printOneCgoTraceback(pc, max-n, &arg) + n += c - 1 // +1 a few lines down + anySymbolized = true + } + } + n++ + } + if anySymbolized { + arg.pc = 0 + callCgoSymbolizer(&arg) + } + return n +} + +func printcreatedby(gp *g) { + // Show what created goroutine, except main goroutine (goid 1). + pc := gp.gopc + f := findfunc(pc) + if f.valid() && showframe(f, gp, false, funcID_normal, funcID_normal) && gp.goid != 1 { + printcreatedby1(f, pc) + } +} + +func printcreatedby1(f funcInfo, pc uintptr) { + print("created by ", funcname(f), "\n") + tracepc := pc // back up to CALL instruction for funcline. + if pc > f.entry() { + tracepc -= sys.PCQuantum + } + file, line := funcline(f, tracepc) + print("\t", file, ":", line) + if pc > f.entry() { + print(" +", hex(pc-f.entry())) + } + print("\n") +} + +func traceback(pc, sp, lr uintptr, gp *g) { + traceback1(pc, sp, lr, gp, 0) +} + +// tracebacktrap is like traceback but expects that the PC and SP were obtained +// from a trap, not from gp->sched or gp->syscallpc/gp->syscallsp or getcallerpc/getcallersp. +// Because they are from a trap instead of from a saved pair, +// the initial PC must not be rewound to the previous instruction. +// (All the saved pairs record a PC that is a return address, so we +// rewind it into the CALL instruction.) +// If gp.m.libcall{g,pc,sp} information is available, it uses that information in preference to +// the pc/sp/lr passed in. +func tracebacktrap(pc, sp, lr uintptr, gp *g) { + if gp.m.libcallsp != 0 { + // We're in C code somewhere, traceback from the saved position. + traceback1(gp.m.libcallpc, gp.m.libcallsp, 0, gp.m.libcallg.ptr(), 0) + return + } + traceback1(pc, sp, lr, gp, _TraceTrap) +} + +func traceback1(pc, sp, lr uintptr, gp *g, flags uint) { + // If the goroutine is in cgo, and we have a cgo traceback, print that. + if iscgo && gp.m != nil && gp.m.ncgo > 0 && gp.syscallsp != 0 && gp.m.cgoCallers != nil && gp.m.cgoCallers[0] != 0 { + // Lock cgoCallers so that a signal handler won't + // change it, copy the array, reset it, unlock it. + // We are locked to the thread and are not running + // concurrently with a signal handler. + // We just have to stop a signal handler from interrupting + // in the middle of our copy. + gp.m.cgoCallersUse.Store(1) + cgoCallers := *gp.m.cgoCallers + gp.m.cgoCallers[0] = 0 + gp.m.cgoCallersUse.Store(0) + + printCgoTraceback(&cgoCallers) + } + + if readgstatus(gp)&^_Gscan == _Gsyscall { + // Override registers if blocked in system call. + pc = gp.syscallpc + sp = gp.syscallsp + flags &^= _TraceTrap + } + if gp.m != nil && gp.m.vdsoSP != 0 { + // Override registers if running in VDSO. This comes after the + // _Gsyscall check to cover VDSO calls after entersyscall. + pc = gp.m.vdsoPC + sp = gp.m.vdsoSP + flags &^= _TraceTrap + } + + // Print traceback. By default, omits runtime frames. + // If that means we print nothing at all, repeat forcing all frames printed. + n := gentraceback(pc, sp, lr, gp, 0, nil, _TracebackMaxFrames, nil, nil, flags) + if n == 0 && (flags&_TraceRuntimeFrames) == 0 { + n = gentraceback(pc, sp, lr, gp, 0, nil, _TracebackMaxFrames, nil, nil, flags|_TraceRuntimeFrames) + } + if n == _TracebackMaxFrames { + print("...additional frames elided...\n") + } + printcreatedby(gp) + + if gp.ancestors == nil { + return + } + for _, ancestor := range *gp.ancestors { + printAncestorTraceback(ancestor) + } +} + +// printAncestorTraceback prints the traceback of the given ancestor. +// TODO: Unify this with gentraceback and CallersFrames. +func printAncestorTraceback(ancestor ancestorInfo) { + print("[originating from goroutine ", ancestor.goid, "]:\n") + for fidx, pc := range ancestor.pcs { + f := findfunc(pc) // f previously validated + if showfuncinfo(f, fidx == 0, funcID_normal, funcID_normal) { + printAncestorTracebackFuncInfo(f, pc) + } + } + if len(ancestor.pcs) == _TracebackMaxFrames { + print("...additional frames elided...\n") + } + // Show what created goroutine, except main goroutine (goid 1). + f := findfunc(ancestor.gopc) + if f.valid() && showfuncinfo(f, false, funcID_normal, funcID_normal) && ancestor.goid != 1 { + printcreatedby1(f, ancestor.gopc) + } +} + +// printAncestorTracebackFuncInfo prints the given function info at a given pc +// within an ancestor traceback. The precision of this info is reduced +// due to only have access to the pcs at the time of the caller +// goroutine being created. +func printAncestorTracebackFuncInfo(f funcInfo, pc uintptr) { + name := funcname(f) + if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil { + inltree := (*[1 << 20]inlinedCall)(inldata) + ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil) + if ix >= 0 { + name = funcnameFromNameOff(f, inltree[ix].nameOff) + } + } + file, line := funcline(f, pc) + if name == "runtime.gopanic" { + name = "panic" + } + print(name, "(...)\n") + print("\t", file, ":", line) + if pc > f.entry() { + print(" +", hex(pc-f.entry())) + } + print("\n") +} + +func callers(skip int, pcbuf []uintptr) int { + sp := getcallersp() + pc := getcallerpc() + gp := getg() + var n int + systemstack(func() { + n = gentraceback(pc, sp, 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0) + }) + return n +} + +func gcallers(gp *g, skip int, pcbuf []uintptr) int { + return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0) +} + +// showframe reports whether the frame with the given characteristics should +// be printed during a traceback. +func showframe(f funcInfo, gp *g, firstFrame bool, funcID, childID funcID) bool { + mp := getg().m + if mp.throwing >= throwTypeRuntime && gp != nil && (gp == mp.curg || gp == mp.caughtsig.ptr()) { + return true + } + return showfuncinfo(f, firstFrame, funcID, childID) +} + +// showfuncinfo reports whether a function with the given characteristics should +// be printed during a traceback. +func showfuncinfo(f funcInfo, firstFrame bool, funcID, childID funcID) bool { + // Note that f may be a synthesized funcInfo for an inlined + // function, in which case only nameOff and funcID are set. + + level, _, _ := gotraceback() + if level > 1 { + // Show all frames. + return true + } + + if !f.valid() { + return false + } + + if funcID == funcID_wrapper && elideWrapperCalling(childID) { + return false + } + + name := funcname(f) + + // Special case: always show runtime.gopanic frame + // in the middle of a stack trace, so that we can + // see the boundary between ordinary code and + // panic-induced deferred code. + // See golang.org/issue/5832. + if name == "runtime.gopanic" && !firstFrame { + return true + } + + return bytealg.IndexByteString(name, '.') >= 0 && (!hasPrefix(name, "runtime.") || isExportedRuntime(name)) +} + +// isExportedRuntime reports whether name is an exported runtime function. +// It is only for runtime functions, so ASCII A-Z is fine. +func isExportedRuntime(name string) bool { + const n = len("runtime.") + return len(name) > n && name[:n] == "runtime." && 'A' <= name[n] && name[n] <= 'Z' +} + +// elideWrapperCalling reports whether a wrapper function that called +// function id should be elided from stack traces. +func elideWrapperCalling(id funcID) bool { + // If the wrapper called a panic function instead of the + // wrapped function, we want to include it in stacks. + return !(id == funcID_gopanic || id == funcID_sigpanic || id == funcID_panicwrap) +} + +var gStatusStrings = [...]string{ + _Gidle: "idle", + _Grunnable: "runnable", + _Grunning: "running", + _Gsyscall: "syscall", + _Gwaiting: "waiting", + _Gdead: "dead", + _Gcopystack: "copystack", + _Gpreempted: "preempted", +} + +func goroutineheader(gp *g) { + gpstatus := readgstatus(gp) + + isScan := gpstatus&_Gscan != 0 + gpstatus &^= _Gscan // drop the scan bit + + // Basic string status + var status string + if 0 <= gpstatus && gpstatus < uint32(len(gStatusStrings)) { + status = gStatusStrings[gpstatus] + } else { + status = "???" + } + + // Override. + if gpstatus == _Gwaiting && gp.waitreason != waitReasonZero { + status = gp.waitreason.String() + } + + // approx time the G is blocked, in minutes + var waitfor int64 + if (gpstatus == _Gwaiting || gpstatus == _Gsyscall) && gp.waitsince != 0 { + waitfor = (nanotime() - gp.waitsince) / 60e9 + } + print("goroutine ", gp.goid, " [", status) + if isScan { + print(" (scan)") + } + if waitfor >= 1 { + print(", ", waitfor, " minutes") + } + if gp.lockedm != 0 { + print(", locked to thread") + } + print("]:\n") +} + +func tracebackothers(me *g) { + level, _, _ := gotraceback() + + // Show the current goroutine first, if we haven't already. + curgp := getg().m.curg + if curgp != nil && curgp != me { + print("\n") + goroutineheader(curgp) + traceback(^uintptr(0), ^uintptr(0), 0, curgp) + } + + // We can't call locking forEachG here because this may be during fatal + // throw/panic, where locking could be out-of-order or a direct + // deadlock. + // + // Instead, use forEachGRace, which requires no locking. We don't lock + // against concurrent creation of new Gs, but even with allglock we may + // miss Gs created after this loop. + forEachGRace(func(gp *g) { + if gp == me || gp == curgp || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 { + return + } + print("\n") + goroutineheader(gp) + // Note: gp.m == getg().m occurs when tracebackothers is called + // from a signal handler initiated during a systemstack call. + // The original G is still in the running state, and we want to + // print its stack. + if gp.m != getg().m && readgstatus(gp)&^_Gscan == _Grunning { + print("\tgoroutine running on other thread; stack unavailable\n") + printcreatedby(gp) + } else { + traceback(^uintptr(0), ^uintptr(0), 0, gp) + } + }) +} + +// tracebackHexdump hexdumps part of stk around frame.sp and frame.fp +// for debugging purposes. If the address bad is included in the +// hexdumped range, it will mark it as well. +func tracebackHexdump(stk stack, frame *stkframe, bad uintptr) { + const expand = 32 * goarch.PtrSize + const maxExpand = 256 * goarch.PtrSize + // Start around frame.sp. + lo, hi := frame.sp, frame.sp + // Expand to include frame.fp. + if frame.fp != 0 && frame.fp < lo { + lo = frame.fp + } + if frame.fp != 0 && frame.fp > hi { + hi = frame.fp + } + // Expand a bit more. + lo, hi = lo-expand, hi+expand + // But don't go too far from frame.sp. + if lo < frame.sp-maxExpand { + lo = frame.sp - maxExpand + } + if hi > frame.sp+maxExpand { + hi = frame.sp + maxExpand + } + // And don't go outside the stack bounds. + if lo < stk.lo { + lo = stk.lo + } + if hi > stk.hi { + hi = stk.hi + } + + // Print the hex dump. + print("stack: frame={sp:", hex(frame.sp), ", fp:", hex(frame.fp), "} stack=[", hex(stk.lo), ",", hex(stk.hi), ")\n") + hexdumpWords(lo, hi, func(p uintptr) byte { + switch p { + case frame.fp: + return '>' + case frame.sp: + return '<' + case bad: + return '!' + } + return 0 + }) +} + +// isSystemGoroutine reports whether the goroutine g must be omitted +// in stack dumps and deadlock detector. This is any goroutine that +// starts at a runtime.* entry point, except for runtime.main, +// runtime.handleAsyncEvent (wasm only) and sometimes runtime.runfinq. +// +// If fixed is true, any goroutine that can vary between user and +// system (that is, the finalizer goroutine) is considered a user +// goroutine. +func isSystemGoroutine(gp *g, fixed bool) bool { + // Keep this in sync with internal/trace.IsSystemGoroutine. + f := findfunc(gp.startpc) + if !f.valid() { + return false + } + if f.funcID == funcID_runtime_main || f.funcID == funcID_handleAsyncEvent { + return false + } + if f.funcID == funcID_runfinq { + // We include the finalizer goroutine if it's calling + // back into user code. + if fixed { + // This goroutine can vary. In fixed mode, + // always consider it a user goroutine. + return false + } + return fingStatus.Load()&fingRunningFinalizer == 0 + } + return hasPrefix(funcname(f), "runtime.") +} + +// SetCgoTraceback records three C functions to use to gather +// traceback information from C code and to convert that traceback +// information into symbolic information. These are used when printing +// stack traces for a program that uses cgo. +// +// The traceback and context functions may be called from a signal +// handler, and must therefore use only async-signal safe functions. +// The symbolizer function may be called while the program is +// crashing, and so must be cautious about using memory. None of the +// functions may call back into Go. +// +// The context function will be called with a single argument, a +// pointer to a struct: +// +// struct { +// Context uintptr +// } +// +// In C syntax, this struct will be +// +// struct { +// uintptr_t Context; +// }; +// +// If the Context field is 0, the context function is being called to +// record the current traceback context. It should record in the +// Context field whatever information is needed about the current +// point of execution to later produce a stack trace, probably the +// stack pointer and PC. In this case the context function will be +// called from C code. +// +// If the Context field is not 0, then it is a value returned by a +// previous call to the context function. This case is called when the +// context is no longer needed; that is, when the Go code is returning +// to its C code caller. This permits the context function to release +// any associated resources. +// +// While it would be correct for the context function to record a +// complete a stack trace whenever it is called, and simply copy that +// out in the traceback function, in a typical program the context +// function will be called many times without ever recording a +// traceback for that context. Recording a complete stack trace in a +// call to the context function is likely to be inefficient. +// +// The traceback function will be called with a single argument, a +// pointer to a struct: +// +// struct { +// Context uintptr +// SigContext uintptr +// Buf *uintptr +// Max uintptr +// } +// +// In C syntax, this struct will be +// +// struct { +// uintptr_t Context; +// uintptr_t SigContext; +// uintptr_t* Buf; +// uintptr_t Max; +// }; +// +// The Context field will be zero to gather a traceback from the +// current program execution point. In this case, the traceback +// function will be called from C code. +// +// Otherwise Context will be a value previously returned by a call to +// the context function. The traceback function should gather a stack +// trace from that saved point in the program execution. The traceback +// function may be called from an execution thread other than the one +// that recorded the context, but only when the context is known to be +// valid and unchanging. The traceback function may also be called +// deeper in the call stack on the same thread that recorded the +// context. The traceback function may be called multiple times with +// the same Context value; it will usually be appropriate to cache the +// result, if possible, the first time this is called for a specific +// context value. +// +// If the traceback function is called from a signal handler on a Unix +// system, SigContext will be the signal context argument passed to +// the signal handler (a C ucontext_t* cast to uintptr_t). This may be +// used to start tracing at the point where the signal occurred. If +// the traceback function is not called from a signal handler, +// SigContext will be zero. +// +// Buf is where the traceback information should be stored. It should +// be PC values, such that Buf[0] is the PC of the caller, Buf[1] is +// the PC of that function's caller, and so on. Max is the maximum +// number of entries to store. The function should store a zero to +// indicate the top of the stack, or that the caller is on a different +// stack, presumably a Go stack. +// +// Unlike runtime.Callers, the PC values returned should, when passed +// to the symbolizer function, return the file/line of the call +// instruction. No additional subtraction is required or appropriate. +// +// On all platforms, the traceback function is invoked when a call from +// Go to C to Go requests a stack trace. On linux/amd64, linux/ppc64le, +// linux/arm64, and freebsd/amd64, the traceback function is also invoked +// when a signal is received by a thread that is executing a cgo call. +// The traceback function should not make assumptions about when it is +// called, as future versions of Go may make additional calls. +// +// The symbolizer function will be called with a single argument, a +// pointer to a struct: +// +// struct { +// PC uintptr // program counter to fetch information for +// File *byte // file name (NUL terminated) +// Lineno uintptr // line number +// Func *byte // function name (NUL terminated) +// Entry uintptr // function entry point +// More uintptr // set non-zero if more info for this PC +// Data uintptr // unused by runtime, available for function +// } +// +// In C syntax, this struct will be +// +// struct { +// uintptr_t PC; +// char* File; +// uintptr_t Lineno; +// char* Func; +// uintptr_t Entry; +// uintptr_t More; +// uintptr_t Data; +// }; +// +// The PC field will be a value returned by a call to the traceback +// function. +// +// The first time the function is called for a particular traceback, +// all the fields except PC will be 0. The function should fill in the +// other fields if possible, setting them to 0/nil if the information +// is not available. The Data field may be used to store any useful +// information across calls. The More field should be set to non-zero +// if there is more information for this PC, zero otherwise. If More +// is set non-zero, the function will be called again with the same +// PC, and may return different information (this is intended for use +// with inlined functions). If More is zero, the function will be +// called with the next PC value in the traceback. When the traceback +// is complete, the function will be called once more with PC set to +// zero; this may be used to free any information. Each call will +// leave the fields of the struct set to the same values they had upon +// return, except for the PC field when the More field is zero. The +// function must not keep a copy of the struct pointer between calls. +// +// When calling SetCgoTraceback, the version argument is the version +// number of the structs that the functions expect to receive. +// Currently this must be zero. +// +// The symbolizer function may be nil, in which case the results of +// the traceback function will be displayed as numbers. If the +// traceback function is nil, the symbolizer function will never be +// called. The context function may be nil, in which case the +// traceback function will only be called with the context field set +// to zero. If the context function is nil, then calls from Go to C +// to Go will not show a traceback for the C portion of the call stack. +// +// SetCgoTraceback should be called only once, ideally from an init function. +func SetCgoTraceback(version int, traceback, context, symbolizer unsafe.Pointer) { + if version != 0 { + panic("unsupported version") + } + + if cgoTraceback != nil && cgoTraceback != traceback || + cgoContext != nil && cgoContext != context || + cgoSymbolizer != nil && cgoSymbolizer != symbolizer { + panic("call SetCgoTraceback only once") + } + + cgoTraceback = traceback + cgoContext = context + cgoSymbolizer = symbolizer + + // The context function is called when a C function calls a Go + // function. As such it is only called by C code in runtime/cgo. + if _cgo_set_context_function != nil { + cgocall(_cgo_set_context_function, context) + } +} + +var cgoTraceback unsafe.Pointer +var cgoContext unsafe.Pointer +var cgoSymbolizer unsafe.Pointer + +// cgoTracebackArg is the type passed to cgoTraceback. +type cgoTracebackArg struct { + context uintptr + sigContext uintptr + buf *uintptr + max uintptr +} + +// cgoContextArg is the type passed to the context function. +type cgoContextArg struct { + context uintptr +} + +// cgoSymbolizerArg is the type passed to cgoSymbolizer. +type cgoSymbolizerArg struct { + pc uintptr + file *byte + lineno uintptr + funcName *byte + entry uintptr + more uintptr + data uintptr +} + +// printCgoTraceback prints a traceback of callers. +func printCgoTraceback(callers *cgoCallers) { + if cgoSymbolizer == nil { + for _, c := range callers { + if c == 0 { + break + } + print("non-Go function at pc=", hex(c), "\n") + } + return + } + + var arg cgoSymbolizerArg + for _, c := range callers { + if c == 0 { + break + } + printOneCgoTraceback(c, 0x7fffffff, &arg) + } + arg.pc = 0 + callCgoSymbolizer(&arg) +} + +// printOneCgoTraceback prints the traceback of a single cgo caller. +// This can print more than one line because of inlining. +// Returns the number of frames printed. +func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int { + c := 0 + arg.pc = pc + for c <= max { + callCgoSymbolizer(arg) + if arg.funcName != nil { + // Note that we don't print any argument + // information here, not even parentheses. + // The symbolizer must add that if appropriate. + println(gostringnocopy(arg.funcName)) + } else { + println("non-Go function") + } + print("\t") + if arg.file != nil { + print(gostringnocopy(arg.file), ":", arg.lineno, " ") + } + print("pc=", hex(pc), "\n") + c++ + if arg.more == 0 { + break + } + } + return c +} + +// callCgoSymbolizer calls the cgoSymbolizer function. +func callCgoSymbolizer(arg *cgoSymbolizerArg) { + call := cgocall + if panicking.Load() > 0 || getg().m.curg != getg() { + // We do not want to call into the scheduler when panicking + // or when on the system stack. + call = asmcgocall + } + if msanenabled { + msanwrite(unsafe.Pointer(arg), unsafe.Sizeof(cgoSymbolizerArg{})) + } + if asanenabled { + asanwrite(unsafe.Pointer(arg), unsafe.Sizeof(cgoSymbolizerArg{})) + } + call(cgoSymbolizer, noescape(unsafe.Pointer(arg))) +} + +// cgoContextPCs gets the PC values from a cgo traceback. +func cgoContextPCs(ctxt uintptr, buf []uintptr) { + if cgoTraceback == nil { + return + } + call := cgocall + if panicking.Load() > 0 || getg().m.curg != getg() { + // We do not want to call into the scheduler when panicking + // or when on the system stack. + call = asmcgocall + } + arg := cgoTracebackArg{ + context: ctxt, + buf: (*uintptr)(noescape(unsafe.Pointer(&buf[0]))), + max: uintptr(len(buf)), + } + if msanenabled { + msanwrite(unsafe.Pointer(&arg), unsafe.Sizeof(arg)) + } + if asanenabled { + asanwrite(unsafe.Pointer(&arg), unsafe.Sizeof(arg)) + } + call(cgoTraceback, noescape(unsafe.Pointer(&arg))) +} diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go new file mode 100644 index 0000000..97eb921 --- /dev/null +++ b/src/runtime/traceback_test.go @@ -0,0 +1,422 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "bytes" + "internal/abi" + "internal/testenv" + "runtime" + "testing" +) + +var testTracebackArgsBuf [1000]byte + +func TestTracebackArgs(t *testing.T) { + if *flagQuick { + t.Skip("-quick") + } + optimized := !testenv.OptimizationOff() + abiSel := func(x, y string) string { + // select expected output based on ABI + // In noopt build we always spill arguments so the output is the same as stack ABI. + if optimized && abi.IntArgRegs > 0 { + return x + } + return y + } + + tests := []struct { + fn func() int + expect string + }{ + // simple ints + { + func() int { return testTracebackArgs1(1, 2, 3, 4, 5) }, + "testTracebackArgs1(0x1, 0x2, 0x3, 0x4, 0x5)", + }, + // some aggregates + { + func() int { + return testTracebackArgs2(false, struct { + a, b, c int + x [2]int + }{1, 2, 3, [2]int{4, 5}}, [0]int{}, [3]byte{6, 7, 8}) + }, + "testTracebackArgs2(0x0, {0x1, 0x2, 0x3, {0x4, 0x5}}, {}, {0x6, 0x7, 0x8})", + }, + { + func() int { return testTracebackArgs3([3]byte{1, 2, 3}, 4, 5, 6, [3]byte{7, 8, 9}) }, + "testTracebackArgs3({0x1, 0x2, 0x3}, 0x4, 0x5, 0x6, {0x7, 0x8, 0x9})", + }, + // too deeply nested type + { + func() int { return testTracebackArgs4(false, [1][1][1][1][1][1][1][1][1][1]int{}) }, + "testTracebackArgs4(0x0, {{{{{...}}}}})", + }, + // a lot of zero-sized type + { + func() int { + z := [0]int{} + return testTracebackArgs5(false, struct { + x int + y [0]int + z [2][0]int + }{1, z, [2][0]int{}}, z, z, z, z, z, z, z, z, z, z, z, z) + }, + "testTracebackArgs5(0x0, {0x1, {}, {{}, {}}}, {}, {}, {}, {}, {}, ...)", + }, + + // edge cases for ... + // no ... for 10 args + { + func() int { return testTracebackArgs6a(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) }, + "testTracebackArgs6a(0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa)", + }, + // has ... for 11 args + { + func() int { return testTracebackArgs6b(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) }, + "testTracebackArgs6b(0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, ...)", + }, + // no ... for aggregates with 10 words + { + func() int { return testTracebackArgs7a([10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) }, + "testTracebackArgs7a({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa})", + }, + // has ... for aggregates with 11 words + { + func() int { return testTracebackArgs7b([11]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) }, + "testTracebackArgs7b({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, ...})", + }, + // no ... for aggregates, but with more args + { + func() int { return testTracebackArgs7c([10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 11) }, + "testTracebackArgs7c({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa}, ...)", + }, + // has ... for aggregates and also for more args + { + func() int { return testTracebackArgs7d([11]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 12) }, + "testTracebackArgs7d({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, ...}, ...)", + }, + // nested aggregates, no ... + { + func() int { return testTracebackArgs8a(testArgsType8a{1, 2, 3, 4, 5, 6, 7, 8, [2]int{9, 10}}) }, + "testTracebackArgs8a({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, {0x9, 0xa}})", + }, + // nested aggregates, ... in inner but not outer + { + func() int { return testTracebackArgs8b(testArgsType8b{1, 2, 3, 4, 5, 6, 7, 8, [3]int{9, 10, 11}}) }, + "testTracebackArgs8b({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, {0x9, 0xa, ...}})", + }, + // nested aggregates, ... in outer but not inner + { + func() int { return testTracebackArgs8c(testArgsType8c{1, 2, 3, 4, 5, 6, 7, 8, [2]int{9, 10}, 11}) }, + "testTracebackArgs8c({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, {0x9, 0xa}, ...})", + }, + // nested aggregates, ... in both inner and outer + { + func() int { return testTracebackArgs8d(testArgsType8d{1, 2, 3, 4, 5, 6, 7, 8, [3]int{9, 10, 11}, 12}) }, + "testTracebackArgs8d({0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, {0x9, 0xa, ...}, ...})", + }, + + // Register argument liveness. + // 1, 3 are used and live, 2, 4 are dead (in register ABI). + // Address-taken (7) and stack ({5, 6}) args are always live. + { + func() int { + poisonStack() // poison arg area to make output deterministic + return testTracebackArgs9(1, 2, 3, 4, [2]int{5, 6}, 7) + }, + abiSel( + "testTracebackArgs9(0x1, 0xffffffff?, 0x3, 0xff?, {0x5, 0x6}, 0x7)", + "testTracebackArgs9(0x1, 0x2, 0x3, 0x4, {0x5, 0x6}, 0x7)"), + }, + // No live. + // (Note: this assume at least 5 int registers if register ABI is used.) + { + func() int { + poisonStack() // poison arg area to make output deterministic + return testTracebackArgs10(1, 2, 3, 4, 5) + }, + abiSel( + "testTracebackArgs10(0xffffffff?, 0xffffffff?, 0xffffffff?, 0xffffffff?, 0xffffffff?)", + "testTracebackArgs10(0x1, 0x2, 0x3, 0x4, 0x5)"), + }, + // Conditional spills. + // Spill in conditional, not executed. + { + func() int { + poisonStack() // poison arg area to make output deterministic + return testTracebackArgs11a(1, 2, 3) + }, + abiSel( + "testTracebackArgs11a(0xffffffff?, 0xffffffff?, 0xffffffff?)", + "testTracebackArgs11a(0x1, 0x2, 0x3)"), + }, + // 2 spills in conditional, not executed; 3 spills in conditional, executed, but not statically known. + // So print 0x3?. + { + func() int { + poisonStack() // poison arg area to make output deterministic + return testTracebackArgs11b(1, 2, 3, 4) + }, + abiSel( + "testTracebackArgs11b(0xffffffff?, 0xffffffff?, 0x3?, 0x4)", + "testTracebackArgs11b(0x1, 0x2, 0x3, 0x4)"), + }, + } + for _, test := range tests { + n := test.fn() + got := testTracebackArgsBuf[:n] + if !bytes.Contains(got, []byte(test.expect)) { + t.Errorf("traceback does not contain expected string: want %q, got\n%s", test.expect, got) + } + } +} + +//go:noinline +func testTracebackArgs1(a, b, c, d, e int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a < 0 { + // use in-reg args to keep them alive + return a + b + c + d + e + } + return n +} + +//go:noinline +func testTracebackArgs2(a bool, b struct { + a, b, c int + x [2]int +}, _ [0]int, d [3]byte) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a { + // use in-reg args to keep them alive + return b.a + b.b + b.c + b.x[0] + b.x[1] + int(d[0]) + int(d[1]) + int(d[2]) + } + return n + +} + +//go:noinline +//go:registerparams +func testTracebackArgs3(x [3]byte, a, b, c int, y [3]byte) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a < 0 { + // use in-reg args to keep them alive + return int(x[0]) + int(x[1]) + int(x[2]) + a + b + c + int(y[0]) + int(y[1]) + int(y[2]) + } + return n +} + +//go:noinline +func testTracebackArgs4(a bool, x [1][1][1][1][1][1][1][1][1][1]int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a { + panic(x) // use args to keep them alive + } + return n +} + +//go:noinline +func testTracebackArgs5(a bool, x struct { + x int + y [0]int + z [2][0]int +}, _, _, _, _, _, _, _, _, _, _, _, _ [0]int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a { + panic(x) // use args to keep them alive + } + return n +} + +//go:noinline +func testTracebackArgs6a(a, b, c, d, e, f, g, h, i, j int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a < 0 { + // use in-reg args to keep them alive + return a + b + c + d + e + f + g + h + i + j + } + return n +} + +//go:noinline +func testTracebackArgs6b(a, b, c, d, e, f, g, h, i, j, k int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a < 0 { + // use in-reg args to keep them alive + return a + b + c + d + e + f + g + h + i + j + k + } + return n +} + +//go:noinline +func testTracebackArgs7a(a [10]int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a[0] < 0 { + // use in-reg args to keep them alive + return a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] + a[8] + a[9] + } + return n +} + +//go:noinline +func testTracebackArgs7b(a [11]int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a[0] < 0 { + // use in-reg args to keep them alive + return a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] + a[8] + a[9] + a[10] + } + return n +} + +//go:noinline +func testTracebackArgs7c(a [10]int, b int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a[0] < 0 { + // use in-reg args to keep them alive + return a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] + a[8] + a[9] + b + } + return n +} + +//go:noinline +func testTracebackArgs7d(a [11]int, b int) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a[0] < 0 { + // use in-reg args to keep them alive + return a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] + a[8] + a[9] + a[10] + b + } + return n +} + +type testArgsType8a struct { + a, b, c, d, e, f, g, h int + i [2]int +} +type testArgsType8b struct { + a, b, c, d, e, f, g, h int + i [3]int +} +type testArgsType8c struct { + a, b, c, d, e, f, g, h int + i [2]int + j int +} +type testArgsType8d struct { + a, b, c, d, e, f, g, h int + i [3]int + j int +} + +//go:noinline +func testTracebackArgs8a(a testArgsType8a) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a.a < 0 { + // use in-reg args to keep them alive + return a.b + a.c + a.d + a.e + a.f + a.g + a.h + a.i[0] + a.i[1] + } + return n +} + +//go:noinline +func testTracebackArgs8b(a testArgsType8b) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a.a < 0 { + // use in-reg args to keep them alive + return a.b + a.c + a.d + a.e + a.f + a.g + a.h + a.i[0] + a.i[1] + a.i[2] + } + return n +} + +//go:noinline +func testTracebackArgs8c(a testArgsType8c) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a.a < 0 { + // use in-reg args to keep them alive + return a.b + a.c + a.d + a.e + a.f + a.g + a.h + a.i[0] + a.i[1] + a.j + } + return n +} + +//go:noinline +func testTracebackArgs8d(a testArgsType8d) int { + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a.a < 0 { + // use in-reg args to keep them alive + return a.b + a.c + a.d + a.e + a.f + a.g + a.h + a.i[0] + a.i[1] + a.i[2] + a.j + } + return n +} + +// nosplit to avoid preemption or morestack spilling registers. +// +//go:nosplit +//go:noinline +func testTracebackArgs9(a int64, b int32, c int16, d int8, x [2]int, y int) int { + if a < 0 { + println(&y) // take address, make y live, even if no longer used at traceback + } + n := runtime.Stack(testTracebackArgsBuf[:], false) + if a < 0 { + // use half of in-reg args to keep them alive, the other half are dead + return int(a) + int(c) + } + return n +} + +// nosplit to avoid preemption or morestack spilling registers. +// +//go:nosplit +//go:noinline +func testTracebackArgs10(a, b, c, d, e int32) int { + // no use of any args + return runtime.Stack(testTracebackArgsBuf[:], false) +} + +// norace to avoid race instrumentation changing spill locations. +// nosplit to avoid preemption or morestack spilling registers. +// +//go:norace +//go:nosplit +//go:noinline +func testTracebackArgs11a(a, b, c int32) int { + if a < 0 { + println(a, b, c) // spill in a conditional, may not execute + } + if b < 0 { + return int(a + b + c) + } + return runtime.Stack(testTracebackArgsBuf[:], false) +} + +// norace to avoid race instrumentation changing spill locations. +// nosplit to avoid preemption or morestack spilling registers. +// +//go:norace +//go:nosplit +//go:noinline +func testTracebackArgs11b(a, b, c, d int32) int { + var x int32 + if a < 0 { + print() // spill b in a conditional + x = b + } else { + print() // spill c in a conditional + x = c + } + if d < 0 { // d is always needed + return int(x + d) + } + return runtime.Stack(testTracebackArgsBuf[:], false) +} + +// Poison the arg area with deterministic values. +// +//go:noinline +func poisonStack() [20]int { + return [20]int{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1} +} |