diff options
Diffstat (limited to 'src/runtime/trace')
-rw-r--r-- | src/runtime/trace/annotation.go | 198 | ||||
-rw-r--r-- | src/runtime/trace/annotation_test.go | 156 | ||||
-rw-r--r-- | src/runtime/trace/example_test.go | 39 | ||||
-rw-r--r-- | src/runtime/trace/trace.go | 154 | ||||
-rw-r--r-- | src/runtime/trace/trace_stack_test.go | 333 | ||||
-rw-r--r-- | src/runtime/trace/trace_test.go | 794 |
6 files changed, 1674 insertions, 0 deletions
diff --git a/src/runtime/trace/annotation.go b/src/runtime/trace/annotation.go new file mode 100644 index 0000000..2666d14 --- /dev/null +++ b/src/runtime/trace/annotation.go @@ -0,0 +1,198 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace + +import ( + "context" + "fmt" + "sync/atomic" + _ "unsafe" +) + +type traceContextKey struct{} + +// NewTask creates a task instance with the type taskType and returns +// it along with a Context that carries the task. +// If the input context contains a task, the new task is its subtask. +// +// The taskType is used to classify task instances. Analysis tools +// like the Go execution tracer may assume there are only a bounded +// number of unique task types in the system. +// +// The returned Task's [Task.End] method is used to mark the task's end. +// The trace tool measures task latency as the time between task creation +// and when the End method is called, and provides the latency +// distribution per task type. +// If the End method is called multiple times, only the first +// call is used in the latency measurement. +// +// ctx, task := trace.NewTask(ctx, "awesomeTask") +// trace.WithRegion(ctx, "preparation", prepWork) +// // preparation of the task +// go func() { // continue processing the task in a separate goroutine. +// defer task.End() +// trace.WithRegion(ctx, "remainingWork", remainingWork) +// }() +func NewTask(pctx context.Context, taskType string) (ctx context.Context, task *Task) { + pid := fromContext(pctx).id + id := newID() + userTaskCreate(id, pid, taskType) + s := &Task{id: id} + return context.WithValue(pctx, traceContextKey{}, s), s + + // We allocate a new task even when + // the tracing is disabled because the context and task + // can be used across trace enable/disable boundaries, + // which complicates the problem. + // + // For example, consider the following scenario: + // - trace is enabled. + // - trace.WithRegion is called, so a new context ctx + // with a new region is created. + // - trace is disabled. + // - trace is enabled again. + // - trace APIs with the ctx is called. Is the ID in the task + // a valid one to use? + // + // TODO(hyangah): reduce the overhead at least when + // tracing is disabled. Maybe the id can embed a tracing + // round number and ignore ids generated from previous + // tracing round. +} + +func fromContext(ctx context.Context) *Task { + if s, ok := ctx.Value(traceContextKey{}).(*Task); ok { + return s + } + return &bgTask +} + +// Task is a data type for tracing a user-defined, logical operation. +type Task struct { + id uint64 + // TODO(hyangah): record parent id? +} + +// End marks the end of the operation represented by the [Task]. +func (t *Task) End() { + userTaskEnd(t.id) +} + +var lastTaskID uint64 = 0 // task id issued last time + +func newID() uint64 { + // TODO(hyangah): use per-P cache + return atomic.AddUint64(&lastTaskID, 1) +} + +var bgTask = Task{id: uint64(0)} + +// Log emits a one-off event with the given category and message. +// Category can be empty and the API assumes there are only a handful of +// unique categories in the system. +func Log(ctx context.Context, category, message string) { + id := fromContext(ctx).id + userLog(id, category, message) +} + +// Logf is like [Log], but the value is formatted using the specified format spec. +func Logf(ctx context.Context, category, format string, args ...any) { + if IsEnabled() { + // Ideally this should be just Log, but that will + // add one more frame in the stack trace. + id := fromContext(ctx).id + userLog(id, category, fmt.Sprintf(format, args...)) + } +} + +const ( + regionStartCode = uint64(0) + regionEndCode = uint64(1) +) + +// WithRegion starts a region associated with its calling goroutine, runs fn, +// and then ends the region. If the context carries a task, the region is +// associated with the task. Otherwise, the region is attached to the background +// task. +// +// The regionType is used to classify regions, so there should be only a +// handful of unique region types. +func WithRegion(ctx context.Context, regionType string, fn func()) { + // NOTE: + // WithRegion helps avoiding misuse of the API but in practice, + // this is very restrictive: + // - Use of WithRegion makes the stack traces captured from + // region start and end are identical. + // - Refactoring the existing code to use WithRegion is sometimes + // hard and makes the code less readable. + // e.g. code block nested deep in the loop with various + // exit point with return values + // - Refactoring the code to use this API with closure can + // cause different GC behavior such as retaining some parameters + // longer. + // This causes more churns in code than I hoped, and sometimes + // makes the code less readable. + + id := fromContext(ctx).id + userRegion(id, regionStartCode, regionType) + defer userRegion(id, regionEndCode, regionType) + fn() +} + +// StartRegion starts a region and returns it. +// The returned Region's [Region.End] method must be called +// from the same goroutine where the region was started. +// Within each goroutine, regions must nest. That is, regions started +// after this region must be ended before this region can be ended. +// Recommended usage is +// +// defer trace.StartRegion(ctx, "myTracedRegion").End() +func StartRegion(ctx context.Context, regionType string) *Region { + if !IsEnabled() { + return noopRegion + } + id := fromContext(ctx).id + userRegion(id, regionStartCode, regionType) + return &Region{id, regionType} +} + +// Region is a region of code whose execution time interval is traced. +type Region struct { + id uint64 + regionType string +} + +var noopRegion = &Region{} + +// End marks the end of the traced code region. +func (r *Region) End() { + if r == noopRegion { + return + } + userRegion(r.id, regionEndCode, r.regionType) +} + +// IsEnabled reports whether tracing is enabled. +// The information is advisory only. The tracing status +// may have changed by the time this function returns. +func IsEnabled() bool { + return tracing.enabled.Load() +} + +// +// Function bodies are defined in runtime/trace.go +// + +// emits UserTaskCreate event. +func userTaskCreate(id, parentID uint64, taskType string) + +// emits UserTaskEnd event. +func userTaskEnd(id uint64) + +// emits UserRegion event. +func userRegion(id, mode uint64, regionType string) + +// emits UserLog event. +func userLog(id uint64, category, message string) diff --git a/src/runtime/trace/annotation_test.go b/src/runtime/trace/annotation_test.go new file mode 100644 index 0000000..69ea8f2 --- /dev/null +++ b/src/runtime/trace/annotation_test.go @@ -0,0 +1,156 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "context" + "fmt" + "internal/trace" + "reflect" + . "runtime/trace" + "strings" + "sync" + "testing" +) + +func BenchmarkStartRegion(b *testing.B) { + b.ReportAllocs() + ctx, task := NewTask(context.Background(), "benchmark") + defer task.End() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + StartRegion(ctx, "region").End() + } + }) +} + +func BenchmarkNewTask(b *testing.B) { + b.ReportAllocs() + pctx, task := NewTask(context.Background(), "benchmark") + defer task.End() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _, task := NewTask(pctx, "task") + task.End() + } + }) +} + +func TestUserTaskRegion(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + bgctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + preExistingRegion := StartRegion(bgctx, "pre-existing region") + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + // Beginning of traced execution + var wg sync.WaitGroup + ctx, task := NewTask(bgctx, "task0") // EvUserTaskCreate("task0") + wg.Add(1) + go func() { + defer wg.Done() + defer task.End() // EvUserTaskEnd("task0") + + WithRegion(ctx, "region0", func() { + // EvUserRegionCreate("region0", start) + WithRegion(ctx, "region1", func() { + Log(ctx, "key0", "0123456789abcdef") // EvUserLog("task0", "key0", "0....f") + }) + // EvUserRegion("region0", end) + }) + }() + + wg.Wait() + + preExistingRegion.End() + postExistingRegion := StartRegion(bgctx, "post-existing region") + + // End of traced execution + Stop() + + postExistingRegion.End() + + saveTrace(t, buf, "TestUserTaskRegion") + res, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + // golang.org/issues/16755 + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + // Check whether we see all user annotation related records in order + type testData struct { + typ byte + strs []string + args []uint64 + setLink bool + } + + var got []testData + tasks := map[uint64]string{} + for _, e := range res.Events { + t.Logf("%s", e) + switch e.Type { + case trace.EvUserTaskCreate: + taskName := e.SArgs[0] + got = append(got, testData{trace.EvUserTaskCreate, []string{taskName}, nil, e.Link != nil}) + if e.Link != nil && e.Link.Type != trace.EvUserTaskEnd { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + tasks[e.Args[0]] = taskName + case trace.EvUserLog: + key, val := e.SArgs[0], e.SArgs[1] + taskName := tasks[e.Args[0]] + got = append(got, testData{trace.EvUserLog, []string{taskName, key, val}, nil, e.Link != nil}) + case trace.EvUserTaskEnd: + taskName := tasks[e.Args[0]] + got = append(got, testData{trace.EvUserTaskEnd, []string{taskName}, nil, e.Link != nil}) + if e.Link != nil && e.Link.Type != trace.EvUserTaskCreate { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + case trace.EvUserRegion: + taskName := tasks[e.Args[0]] + regionName := e.SArgs[0] + got = append(got, testData{trace.EvUserRegion, []string{taskName, regionName}, []uint64{e.Args[1]}, e.Link != nil}) + if e.Link != nil && (e.Link.Type != trace.EvUserRegion || e.Link.SArgs[0] != regionName) { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + } + } + want := []testData{ + {trace.EvUserTaskCreate, []string{"task0"}, nil, true}, + {trace.EvUserRegion, []string{"task0", "region0"}, []uint64{0}, true}, + {trace.EvUserRegion, []string{"task0", "region1"}, []uint64{0}, true}, + {trace.EvUserLog, []string{"task0", "key0", "0123456789abcdef"}, nil, false}, + {trace.EvUserRegion, []string{"task0", "region1"}, []uint64{1}, false}, + {trace.EvUserRegion, []string{"task0", "region0"}, []uint64{1}, false}, + {trace.EvUserTaskEnd, []string{"task0"}, nil, false}, + // Currently, pre-existing region is not recorded to avoid allocations. + // {trace.EvUserRegion, []string{"", "pre-existing region"}, []uint64{1}, false}, + {trace.EvUserRegion, []string{"", "post-existing region"}, []uint64{0}, false}, + } + if !reflect.DeepEqual(got, want) { + pretty := func(data []testData) string { + var s strings.Builder + for _, d := range data { + fmt.Fprintf(&s, "\t%+v\n", d) + } + return s.String() + } + t.Errorf("Got user region related events\n%+v\nwant:\n%+v", pretty(got), pretty(want)) + } +} diff --git a/src/runtime/trace/example_test.go b/src/runtime/trace/example_test.go new file mode 100644 index 0000000..ba96a82 --- /dev/null +++ b/src/runtime/trace/example_test.go @@ -0,0 +1,39 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "fmt" + "log" + "os" + "runtime/trace" +) + +// Example demonstrates the use of the trace package to trace +// the execution of a Go program. The trace output will be +// written to the file trace.out +func Example() { + f, err := os.Create("trace.out") + if err != nil { + log.Fatalf("failed to create trace output file: %v", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Fatalf("failed to close trace file: %v", err) + } + }() + + if err := trace.Start(f); err != nil { + log.Fatalf("failed to start trace: %v", err) + } + defer trace.Stop() + + // your program here + RunMyProgram() +} + +func RunMyProgram() { + fmt.Printf("this function will be traced") +} diff --git a/src/runtime/trace/trace.go b/src/runtime/trace/trace.go new file mode 100644 index 0000000..935d222 --- /dev/null +++ b/src/runtime/trace/trace.go @@ -0,0 +1,154 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package trace contains facilities for programs to generate traces +// for the Go execution tracer. +// +// # Tracing runtime activities +// +// The execution trace captures a wide range of execution events such as +// goroutine creation/blocking/unblocking, syscall enter/exit/block, +// GC-related events, changes of heap size, processor start/stop, etc. +// When CPU profiling is active, the execution tracer makes an effort to +// include those samples as well. +// A precise nanosecond-precision timestamp and a stack trace is +// captured for most events. The generated trace can be interpreted +// using `go tool trace`. +// +// Support for tracing tests and benchmarks built with the standard +// testing package is built into `go test`. For example, the following +// command runs the test in the current directory and writes the trace +// file (trace.out). +// +// go test -trace=trace.out +// +// This runtime/trace package provides APIs to add equivalent tracing +// support to a standalone program. See the Example that demonstrates +// how to use this API to enable tracing. +// +// There is also a standard HTTP interface to trace data. Adding the +// following line will install a handler under the /debug/pprof/trace URL +// to download a live trace: +// +// import _ "net/http/pprof" +// +// See the [net/http/pprof] package for more details about all of the +// debug endpoints installed by this import. +// +// # User annotation +// +// Package trace provides user annotation APIs that can be used to +// log interesting events during execution. +// +// There are three types of user annotations: log messages, regions, +// and tasks. +// +// [Log] emits a timestamped message to the execution trace along with +// additional information such as the category of the message and +// which goroutine called [Log]. The execution tracer provides UIs to filter +// and group goroutines using the log category and the message supplied +// in [Log]. +// +// A region is for logging a time interval during a goroutine's execution. +// By definition, a region starts and ends in the same goroutine. +// Regions can be nested to represent subintervals. +// For example, the following code records four regions in the execution +// trace to trace the durations of sequential steps in a cappuccino making +// operation. +// +// trace.WithRegion(ctx, "makeCappuccino", func() { +// +// // orderID allows to identify a specific order +// // among many cappuccino order region records. +// trace.Log(ctx, "orderID", orderID) +// +// trace.WithRegion(ctx, "steamMilk", steamMilk) +// trace.WithRegion(ctx, "extractCoffee", extractCoffee) +// trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee) +// }) +// +// A task is a higher-level component that aids tracing of logical +// operations such as an RPC request, an HTTP request, or an +// interesting local operation which may require multiple goroutines +// working together. Since tasks can involve multiple goroutines, +// they are tracked via a [context.Context] object. [NewTask] creates +// a new task and embeds it in the returned [context.Context] object. +// Log messages and regions are attached to the task, if any, in the +// Context passed to [Log] and [WithRegion]. +// +// For example, assume that we decided to froth milk, extract coffee, +// and mix milk and coffee in separate goroutines. With a task, +// the trace tool can identify the goroutines involved in a specific +// cappuccino order. +// +// ctx, task := trace.NewTask(ctx, "makeCappuccino") +// trace.Log(ctx, "orderID", orderID) +// +// milk := make(chan bool) +// espresso := make(chan bool) +// +// go func() { +// trace.WithRegion(ctx, "steamMilk", steamMilk) +// milk <- true +// }() +// go func() { +// trace.WithRegion(ctx, "extractCoffee", extractCoffee) +// espresso <- true +// }() +// go func() { +// defer task.End() // When assemble is done, the order is complete. +// <-espresso +// <-milk +// trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee) +// }() +// +// The trace tool computes the latency of a task by measuring the +// time between the task creation and the task end and provides +// latency distributions for each task type found in the trace. +package trace + +import ( + "io" + "runtime" + "sync" + "sync/atomic" +) + +// Start enables tracing for the current program. +// While tracing, the trace will be buffered and written to w. +// Start returns an error if tracing is already enabled. +func Start(w io.Writer) error { + tracing.Lock() + defer tracing.Unlock() + + if err := runtime.StartTrace(); err != nil { + return err + } + go func() { + for { + data := runtime.ReadTrace() + if data == nil { + break + } + w.Write(data) + } + }() + tracing.enabled.Store(true) + return nil +} + +// Stop stops the current tracing, if any. +// Stop only returns after all the writes for the trace have completed. +func Stop() { + tracing.Lock() + defer tracing.Unlock() + tracing.enabled.Store(false) + + runtime.StopTrace() +} + +var tracing struct { + sync.Mutex // gate mutators (Start, Stop) + enabled atomic.Bool +} diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go new file mode 100644 index 0000000..be3adc9 --- /dev/null +++ b/src/runtime/trace/trace_stack_test.go @@ -0,0 +1,333 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "fmt" + "internal/testenv" + "internal/trace" + "net" + "os" + "runtime" + . "runtime/trace" + "strings" + "sync" + "testing" + "text/tabwriter" + "time" +) + +// TestTraceSymbolize tests symbolization and that events has proper stacks. +// In particular that we strip bottom uninteresting frames like goexit, +// top uninteresting frames (runtime guts). +func TestTraceSymbolize(t *testing.T) { + skipTraceSymbolizeTestIfNecessary(t) + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + defer Stop() // in case of early return + + // Now we will do a bunch of things for which we verify stacks later. + // It is impossible to ensure that a goroutine has actually blocked + // on a channel, in a select or otherwise. So we kick off goroutines + // that need to block first in the hope that while we are executing + // the rest of the test, they will block. + go func() { // func1 + select {} + }() + go func() { // func2 + var c chan int + c <- 0 + }() + go func() { // func3 + var c chan int + <-c + }() + done1 := make(chan bool) + go func() { // func4 + <-done1 + }() + done2 := make(chan bool) + go func() { // func5 + done2 <- true + }() + c1 := make(chan int) + c2 := make(chan int) + go func() { // func6 + select { + case <-c1: + case <-c2: + } + }() + var mu sync.Mutex + mu.Lock() + go func() { // func7 + mu.Lock() + mu.Unlock() + }() + var wg sync.WaitGroup + wg.Add(1) + go func() { // func8 + wg.Wait() + }() + cv := sync.NewCond(&sync.Mutex{}) + go func() { // func9 + cv.L.Lock() + cv.Wait() + cv.L.Unlock() + }() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + go func() { // func10 + c, err := ln.Accept() + if err != nil { + t.Errorf("failed to accept: %v", err) + return + } + c.Close() + }() + rp, wp, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create a pipe: %v", err) + } + defer rp.Close() + defer wp.Close() + pipeReadDone := make(chan bool) + go func() { // func11 + var data [1]byte + rp.Read(data[:]) + pipeReadDone <- true + }() + + time.Sleep(100 * time.Millisecond) + runtime.GC() + runtime.Gosched() + time.Sleep(100 * time.Millisecond) // the last chance for the goroutines above to block + done1 <- true + <-done2 + select { + case c1 <- 0: + case c2 <- 0: + } + mu.Unlock() + wg.Done() + cv.Signal() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Fatalf("failed to dial: %v", err) + } + c.Close() + var data [1]byte + wp.Write(data[:]) + <-pipeReadDone + + oldGoMaxProcs := runtime.GOMAXPROCS(0) + runtime.GOMAXPROCS(oldGoMaxProcs + 1) + + Stop() + + runtime.GOMAXPROCS(oldGoMaxProcs) + + events, _ := parseTrace(t, buf) + + // Now check that the stacks are correct. + type eventDesc struct { + Type byte + Stk []frame + } + want := []eventDesc{ + {trace.EvGCStart, []frame{ + {"runtime.GC", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoStart, []frame{ + {"runtime/trace_test.TestTraceSymbolize.func1", 0}, + }}, + {trace.EvGoSched, []frame{ + {"runtime/trace_test.TestTraceSymbolize", 111}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoCreate, []frame{ + {"runtime/trace_test.TestTraceSymbolize", 40}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoStop, []frame{ + {"runtime.block", 0}, + {"runtime/trace_test.TestTraceSymbolize.func1", 0}, + }}, + {trace.EvGoStop, []frame{ + {"runtime.chansend1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func2", 0}, + }}, + {trace.EvGoStop, []frame{ + {"runtime.chanrecv1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func3", 0}, + }}, + {trace.EvGoBlockRecv, []frame{ + {"runtime.chanrecv1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func4", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"runtime.chansend1", 0}, + {"runtime/trace_test.TestTraceSymbolize", 113}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSend, []frame{ + {"runtime.chansend1", 0}, + {"runtime/trace_test.TestTraceSymbolize.func5", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"runtime.chanrecv1", 0}, + {"runtime/trace_test.TestTraceSymbolize", 114}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSelect, []frame{ + {"runtime.selectgo", 0}, + {"runtime/trace_test.TestTraceSymbolize.func6", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"runtime.selectgo", 0}, + {"runtime/trace_test.TestTraceSymbolize", 115}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSync, []frame{ + {"sync.(*Mutex).Lock", 0}, + {"runtime/trace_test.TestTraceSymbolize.func7", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"sync.(*Mutex).Unlock", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockSync, []frame{ + {"sync.(*WaitGroup).Wait", 0}, + {"runtime/trace_test.TestTraceSymbolize.func8", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"sync.(*WaitGroup).Add", 0}, + {"sync.(*WaitGroup).Done", 0}, + {"runtime/trace_test.TestTraceSymbolize", 120}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoBlockCond, []frame{ + {"sync.(*Cond).Wait", 0}, + {"runtime/trace_test.TestTraceSymbolize.func9", 0}, + }}, + {trace.EvGoUnblock, []frame{ + {"sync.(*Cond).Signal", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGoSleep, []frame{ + {"time.Sleep", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGomaxprocs, []frame{ + {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged. + {"runtime.startTheWorldGC", 0}, + {"runtime.GOMAXPROCS", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + } + // Stacks for the following events are OS-dependent due to OS-specific code in net package. + if runtime.GOOS != "windows" && runtime.GOOS != "plan9" { + want = append(want, []eventDesc{ + {trace.EvGoBlockNet, []frame{ + {"internal/poll.(*FD).Accept", 0}, + {"net.(*netFD).accept", 0}, + {"net.(*TCPListener).accept", 0}, + {"net.(*TCPListener).Accept", 0}, + {"runtime/trace_test.TestTraceSymbolize.func10", 0}, + }}, + {trace.EvGoSysCall, []frame{ + {"syscall.read", 0}, + {"syscall.Read", 0}, + {"internal/poll.ignoringEINTRIO", 0}, + {"internal/poll.(*FD).Read", 0}, + {"os.(*File).read", 0}, + {"os.(*File).Read", 0}, + {"runtime/trace_test.TestTraceSymbolize.func11", 0}, + }}, + }...) + } + matched := make([]bool, len(want)) + for _, ev := range events { + wantLoop: + for i, w := range want { + if matched[i] || w.Type != ev.Type || len(w.Stk) != len(ev.Stk) { + continue + } + + for fi, f := range ev.Stk { + wf := w.Stk[fi] + if wf.Fn != f.Fn || wf.Line != 0 && wf.Line != f.Line { + continue wantLoop + } + } + matched[i] = true + } + } + for i, w := range want { + if matched[i] { + continue + } + seen, n := dumpEventStacks(w.Type, events) + t.Errorf("Did not match event %v with stack\n%s\nSeen %d events of the type\n%s", + trace.EventDescriptions[w.Type].Name, dumpFrames(w.Stk), n, seen) + } +} + +func skipTraceSymbolizeTestIfNecessary(t *testing.T) { + testenv.MustHaveGoBuild(t) + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } +} + +func dumpEventStacks(typ byte, events []*trace.Event) ([]byte, int) { + matched := 0 + o := new(bytes.Buffer) + tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0) + for _, ev := range events { + if ev.Type != typ { + continue + } + matched++ + fmt.Fprintf(tw, "Offset %d\n", ev.Off) + for _, f := range ev.Stk { + fname := f.File + if idx := strings.Index(fname, "/go/src/"); idx > 0 { + fname = fname[idx:] + } + fmt.Fprintf(tw, " %v\t%s:%d\n", f.Fn, fname, f.Line) + } + } + tw.Flush() + return o.Bytes(), matched +} + +type frame struct { + Fn string + Line int +} + +func dumpFrames(frames []frame) []byte { + o := new(bytes.Buffer) + tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0) + + for _, f := range frames { + fmt.Fprintf(tw, " %v\t :%d\n", f.Fn, f.Line) + } + tw.Flush() + return o.Bytes() +} diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go new file mode 100644 index 0000000..04a43a0 --- /dev/null +++ b/src/runtime/trace/trace_test.go @@ -0,0 +1,794 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "context" + "flag" + "fmt" + "internal/profile" + "internal/race" + "internal/trace" + "io" + "net" + "os" + "runtime" + "runtime/pprof" + . "runtime/trace" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +var ( + saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") +) + +// TestEventBatch tests Flush calls that happen during Start +// don't produce corrupted traces. +func TestEventBatch(t *testing.T) { + if race.Enabled { + t.Skip("skipping in race mode") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + // During Start, bunch of records are written to reflect the current + // snapshot of the program, including state of each goroutines. + // And some string constants are written to the trace to aid trace + // parsing. This test checks Flush of the buffer occurred during + // this process doesn't cause corrupted traces. + // When a Flush is called during Start is complicated + // so we test with a range of number of goroutines hoping that one + // of them triggers Flush. + // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate + // and traceEvGoWaiting events (12~13bytes per goroutine). + for g := 4950; g < 5050; g++ { + n := g + t.Run("G="+strconv.Itoa(n), func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(n) + + in := make(chan bool, 1000) + for i := 0; i < n; i++ { + go func() { + <-in + wg.Done() + }() + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + for i := 0; i < n; i++ { + in <- true + } + wg.Wait() + Stop() + + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + }) + } +} + +func TestTraceStartStop(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + Stop() + size := buf.Len() + if size == 0 { + t.Fatalf("trace is empty") + } + time.Sleep(100 * time.Millisecond) + if size != buf.Len() { + t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len()) + } + saveTrace(t, buf, "TestTraceStartStop") +} + +func TestTraceDoubleStart(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + Stop() + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + if err := Start(buf); err == nil { + t.Fatalf("succeed to start tracing second time") + } + Stop() + Stop() +} + +func TestTrace(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + Stop() + saveTrace(t, buf, "TestTrace") + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } +} + +func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) { + res, err := trace.Parse(r, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + gs := trace.GoroutineStats(res.Events) + for goid := range gs { + // We don't do any particular checks on the result at the moment. + // But still check that RelatedGoroutines does not crash, hang, etc. + _ = trace.RelatedGoroutines(res.Events, goid) + } + return res.Events, gs +} + +func testBrokenTimestamps(t *testing.T, data []byte) { + // On some processors cputicks (used to generate trace timestamps) + // produce non-monotonic timestamps. It is important that the parser + // distinguishes logically inconsistent traces (e.g. missing, excessive + // or misordered events) from broken timestamps. The former is a bug + // in tracer, the latter is a machine issue. + // So now that we have a consistent trace, test that (1) parser does + // not return a logical error in case of broken timestamps + // and (2) broken timestamps are eventually detected and reported. + trace.BreakTimestampsForTesting = true + defer func() { + trace.BreakTimestampsForTesting = false + }() + for i := 0; i < 1e4; i++ { + _, err := trace.Parse(bytes.NewReader(data), "") + if err == trace.ErrTimeOrder { + return + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + } +} + +func TestTraceStress(t *testing.T) { + switch runtime.GOOS { + case "js", "wasip1": + t.Skip("no os.Pipe on " + runtime.GOOS) + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + if testing.Short() { + t.Skip("skipping in -short mode") + } + + var wg sync.WaitGroup + done := make(chan bool) + + // Create a goroutine blocked before tracing. + wg.Add(1) + go func() { + <-done + wg.Done() + }() + + // Create a goroutine blocked in syscall before tracing. + rp, wp, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create pipe: %v", err) + } + defer func() { + rp.Close() + wp.Close() + }() + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + time.Sleep(time.Millisecond) // give the goroutine above time to block + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + procs := runtime.GOMAXPROCS(10) + time.Sleep(50 * time.Millisecond) // test proc stop/start events + + go func() { + runtime.LockOSThread() + for { + select { + case <-done: + return + default: + runtime.Gosched() + } + } + }() + + runtime.GC() + // Trigger GC from malloc. + n := int(1e3) + if isMemoryConstrained() { + // Reduce allocation to avoid running out of + // memory on the builder - see issue/12032. + n = 512 + } + for i := 0; i < n; i++ { + _ = make([]byte, 1<<20) + } + + // Create a bunch of busy goroutines to load all Ps. + for p := 0; p < 10; p++ { + wg.Add(1) + go func() { + // Do something useful. + tmp := make([]byte, 1<<16) + for i := range tmp { + tmp[i]++ + } + _ = tmp + <-done + wg.Done() + }() + } + + // Block in syscall. + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + + // Test timers. + timerDone := make(chan bool) + go func() { + time.Sleep(time.Millisecond) + timerDone <- true + }() + <-timerDone + + // A bit of network. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("listen failed: %v", err) + } + defer ln.Close() + go func() { + c, err := ln.Accept() + if err != nil { + return + } + time.Sleep(time.Millisecond) + var buf [1]byte + c.Write(buf[:]) + c.Close() + }() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Fatalf("dial failed: %v", err) + } + var tmp [1]byte + c.Read(tmp[:]) + c.Close() + + go func() { + runtime.Gosched() + select {} + }() + + // Unblock helper goroutines and wait them to finish. + wp.Write(tmp[:]) + wp.Write(tmp[:]) + close(done) + wg.Wait() + + runtime.GOMAXPROCS(procs) + + Stop() + saveTrace(t, buf, "TestTraceStress") + trace := buf.Bytes() + parseTrace(t, buf) + testBrokenTimestamps(t, trace) +} + +// isMemoryConstrained reports whether the current machine is likely +// to be memory constrained. +// This was originally for the openbsd/arm builder (Issue 12032). +// TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME? +func isMemoryConstrained() bool { + if runtime.GOOS == "plan9" { + return true + } + switch runtime.GOARCH { + case "arm", "mips", "mipsle": + return true + } + return false +} + +// Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. +// And concurrently with all that start/stop trace 3 times. +func TestTraceStressStartStop(t *testing.T) { + switch runtime.GOOS { + case "js", "wasip1": + t.Skip("no os.Pipe on " + runtime.GOOS) + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) + outerDone := make(chan bool) + + go func() { + defer func() { + outerDone <- true + }() + + var wg sync.WaitGroup + done := make(chan bool) + + wg.Add(1) + go func() { + <-done + wg.Done() + }() + + rp, wp, err := os.Pipe() + if err != nil { + t.Errorf("failed to create pipe: %v", err) + return + } + defer func() { + rp.Close() + wp.Close() + }() + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + time.Sleep(time.Millisecond) + + go func() { + runtime.LockOSThread() + for { + select { + case <-done: + return + default: + runtime.Gosched() + } + } + }() + + runtime.GC() + // Trigger GC from malloc. + n := int(1e3) + if isMemoryConstrained() { + // Reduce allocation to avoid running out of + // memory on the builder. + n = 512 + } + for i := 0; i < n; i++ { + _ = make([]byte, 1<<20) + } + + // Create a bunch of busy goroutines to load all Ps. + for p := 0; p < 10; p++ { + wg.Add(1) + go func() { + // Do something useful. + tmp := make([]byte, 1<<16) + for i := range tmp { + tmp[i]++ + } + _ = tmp + <-done + wg.Done() + }() + } + + // Block in syscall. + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + + runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + + // Test timers. + timerDone := make(chan bool) + go func() { + time.Sleep(time.Millisecond) + timerDone <- true + }() + <-timerDone + + // A bit of network. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Errorf("listen failed: %v", err) + return + } + defer ln.Close() + go func() { + c, err := ln.Accept() + if err != nil { + return + } + time.Sleep(time.Millisecond) + var buf [1]byte + c.Write(buf[:]) + c.Close() + }() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Errorf("dial failed: %v", err) + return + } + var tmp [1]byte + c.Read(tmp[:]) + c.Close() + + go func() { + runtime.Gosched() + select {} + }() + + // Unblock helper goroutines and wait them to finish. + wp.Write(tmp[:]) + wp.Write(tmp[:]) + close(done) + wg.Wait() + }() + + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + time.Sleep(time.Millisecond) + Stop() + saveTrace(t, buf, "TestTraceStressStartStop") + trace := buf.Bytes() + parseTrace(t, buf) + testBrokenTimestamps(t, trace) + } + <-outerDone +} + +func TestTraceFutileWakeup(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) + c0 := make(chan int, 1) + c1 := make(chan int, 1) + c2 := make(chan int, 1) + const procs = 2 + var done sync.WaitGroup + done.Add(4 * procs) + for p := 0; p < procs; p++ { + const iters = 1e3 + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + c0 <- 0 + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + <-c0 + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + select { + case c1 <- 0: + case c2 <- 0: + } + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + select { + case <-c1: + case <-c2: + } + } + done.Done() + }() + } + done.Wait() + + Stop() + saveTrace(t, buf, "TestTraceFutileWakeup") + events, _ := parseTrace(t, buf) + // Check that (1) trace does not contain EvFutileWakeup events and + // (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events + // (we call runtime.Gosched between all operations, so these would be futile wakeups). + gs := make(map[uint64]int) + for _, ev := range events { + switch ev.Type { + case trace.EvFutileWakeup: + t.Fatalf("found EvFutileWakeup event") + case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect: + if gs[ev.G] == 2 { + t.Fatalf("goroutine %v blocked on %v at %v right after start", + ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) + } + if gs[ev.G] == 1 { + t.Fatalf("goroutine %v blocked on %v at %v while blocked", + ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) + } + gs[ev.G] = 1 + case trace.EvGoStart: + if gs[ev.G] == 1 { + gs[ev.G] = 2 + } + default: + delete(gs, ev.G) + } + } +} + +func TestTraceCPUProfile(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + + cpuBuf := new(bytes.Buffer) + if err := pprof.StartCPUProfile(cpuBuf); err != nil { + t.Skipf("failed to start CPU profile: %v", err) + } + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + dur := 100 * time.Millisecond + func() { + // Create a region in the execution trace. Set and clear goroutine + // labels fully within that region, so we know that any CPU profile + // sample with the label must also be eligible for inclusion in the + // execution trace. + ctx := context.Background() + defer StartRegion(ctx, "cpuHogger").End() + pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) { + cpuHogger(cpuHog1, &salt1, dur) + }) + // Be sure the execution trace's view, when filtered to this goroutine + // via the explicit goroutine ID in each event, gets many more samples + // than the CPU profiler when filtered to this goroutine via labels. + cpuHogger(cpuHog1, &salt1, dur) + }() + + Stop() + pprof.StopCPUProfile() + saveTrace(t, buf, "TestTraceCPUProfile") + + prof, err := profile.Parse(cpuBuf) + if err != nil { + t.Fatalf("failed to parse CPU profile: %v", err) + } + // Examine the CPU profiler's view. Filter it to only include samples from + // the single test goroutine. Use labels to execute that filter: they should + // apply to all work done while that goroutine is getg().m.curg, and they + // should apply to no other goroutines. + pprofSamples := 0 + pprofStacks := make(map[string]int) + for _, s := range prof.Sample { + if s.Label["tracing"] != nil { + var fns []string + var leaf string + for _, loc := range s.Location { + for _, line := range loc.Line { + fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line)) + leaf = line.Function.Name + } + } + // runtime.sigprof synthesizes call stacks when "normal traceback is + // impossible or has failed", using particular placeholder functions + // to represent common failure cases. Look for those functions in + // the leaf position as a sign that the call stack and its + // symbolization are more complex than this test can handle. + // + // TODO: Make the symbolization done by the execution tracer and CPU + // profiler match up even in these harder cases. See #53378. + switch leaf { + case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO": + continue + } + stack := strings.Join(fns, " ") + samples := int(s.Value[0]) + pprofSamples += samples + pprofStacks[stack] += samples + } + } + if pprofSamples == 0 { + t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof) + } + + // Examine the execution tracer's view of the CPU profile samples. Filter it + // to only include samples from the single test goroutine. Use the goroutine + // ID that was recorded in the events: that should reflect getg().m.curg, + // same as the profiler's labels (even when the M is using its g0 stack). + totalTraceSamples := 0 + traceSamples := 0 + traceStacks := make(map[string]int) + events, _ := parseTrace(t, buf) + var hogRegion *trace.Event + for _, ev := range events { + if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" { + // mode "0" indicates region start + hogRegion = ev + } + } + if hogRegion == nil { + t.Fatalf("execution trace did not identify cpuHogger goroutine") + } else if hogRegion.Link == nil { + t.Fatalf("execution trace did not close cpuHogger region") + } + for _, ev := range events { + if ev.Type == trace.EvCPUSample { + totalTraceSamples++ + if ev.G == hogRegion.G { + traceSamples++ + var fns []string + for _, frame := range ev.Stk { + if frame.Fn != "runtime.goexit" { + fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line)) + } + } + stack := strings.Join(fns, " ") + traceStacks[stack]++ + } + } + } + + // The execution trace may drop CPU profile samples if the profiling buffer + // overflows. Based on the size of profBufWordCount, that takes a bit over + // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've + // hit that case, then we definitely have at least one full buffer's worth + // of CPU samples, so we'll call that success. + overflowed := totalTraceSamples >= 1900 + if traceSamples < pprofSamples { + t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples) + if !overflowed { + t.Fail() + } + } + + for stack, traceSamples := range traceStacks { + pprofSamples := pprofStacks[stack] + delete(pprofStacks, stack) + if traceSamples < pprofSamples { + t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace", + stack, pprofSamples, traceSamples) + if !overflowed { + t.Fail() + } + } + } + for stack, pprofSamples := range pprofStacks { + t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack) + if !overflowed { + t.Fail() + } + } + + if t.Failed() { + t.Logf("execution trace CPU samples:") + for stack, samples := range traceStacks { + t.Logf("%d: %q", samples, stack) + } + t.Logf("CPU profile:\n%v", prof) + } +} + +func cpuHogger(f func(x int) int, y *int, dur time.Duration) { + // We only need to get one 100 Hz clock tick, so we've got + // a large safety buffer. + // But do at least 500 iterations (which should take about 100ms), + // otherwise TestCPUProfileMultithreaded can fail if only one + // thread is scheduled during the testing period. + t0 := time.Now() + accum := *y + for i := 0; i < 500 || time.Since(t0) < dur; i++ { + accum = f(accum) + } + *y = accum +} + +var ( + salt1 = 0 +) + +// The actual CPU hogging function. +// Must not call other functions nor access heap/globals in the loop, +// otherwise under race detector the samples will be in the race runtime. +func cpuHog1(x int) int { + return cpuHog0(x, 1e5) +} + +func cpuHog0(x, n int) int { + foo := x + for i := 0; i < n; i++ { + if i%1000 == 0 { + // Spend time in mcall, stored as gp.m.curg, with g0 running + runtime.Gosched() + } + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + } + return foo +} + +func saveTrace(t *testing.T, buf *bytes.Buffer, name string) { + if !*saveTraces { + return + } + if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil { + t.Errorf("failed to write trace file: %s", err) + } +} |