diff options
Diffstat (limited to '')
-rw-r--r-- | src/runtime/trace/trace_test.go | 792 |
1 files changed, 792 insertions, 0 deletions
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go new file mode 100644 index 0000000..19f7dbe --- /dev/null +++ b/src/runtime/trace/trace_test.go @@ -0,0 +1,792 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package trace_test + +import ( + "bytes" + "context" + "flag" + "fmt" + "internal/profile" + "internal/race" + "internal/trace" + "io" + "net" + "os" + "runtime" + "runtime/pprof" + . "runtime/trace" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +var ( + saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") +) + +// TestEventBatch tests Flush calls that happen during Start +// don't produce corrupted traces. +func TestEventBatch(t *testing.T) { + if race.Enabled { + t.Skip("skipping in race mode") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + // During Start, bunch of records are written to reflect the current + // snapshot of the program, including state of each goroutines. + // And some string constants are written to the trace to aid trace + // parsing. This test checks Flush of the buffer occurred during + // this process doesn't cause corrupted traces. + // When a Flush is called during Start is complicated + // so we test with a range of number of goroutines hoping that one + // of them triggers Flush. + // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate + // and traceEvGoWaiting events (12~13bytes per goroutine). + for g := 4950; g < 5050; g++ { + n := g + t.Run("G="+strconv.Itoa(n), func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(n) + + in := make(chan bool, 1000) + for i := 0; i < n; i++ { + go func() { + <-in + wg.Done() + }() + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + for i := 0; i < n; i++ { + in <- true + } + wg.Wait() + Stop() + + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + }) + } +} + +func TestTraceStartStop(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + Stop() + size := buf.Len() + if size == 0 { + t.Fatalf("trace is empty") + } + time.Sleep(100 * time.Millisecond) + if size != buf.Len() { + t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len()) + } + saveTrace(t, buf, "TestTraceStartStop") +} + +func TestTraceDoubleStart(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + Stop() + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + if err := Start(buf); err == nil { + t.Fatalf("succeed to start tracing second time") + } + Stop() + Stop() +} + +func TestTrace(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + Stop() + saveTrace(t, buf, "TestTrace") + _, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } +} + +func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) { + res, err := trace.Parse(r, "") + if err == trace.ErrTimeOrder { + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + gs := trace.GoroutineStats(res.Events) + for goid := range gs { + // We don't do any particular checks on the result at the moment. + // But still check that RelatedGoroutines does not crash, hang, etc. + _ = trace.RelatedGoroutines(res.Events, goid) + } + return res.Events, gs +} + +func testBrokenTimestamps(t *testing.T, data []byte) { + // On some processors cputicks (used to generate trace timestamps) + // produce non-monotonic timestamps. It is important that the parser + // distinguishes logically inconsistent traces (e.g. missing, excessive + // or misordered events) from broken timestamps. The former is a bug + // in tracer, the latter is a machine issue. + // So now that we have a consistent trace, test that (1) parser does + // not return a logical error in case of broken timestamps + // and (2) broken timestamps are eventually detected and reported. + trace.BreakTimestampsForTesting = true + defer func() { + trace.BreakTimestampsForTesting = false + }() + for i := 0; i < 1e4; i++ { + _, err := trace.Parse(bytes.NewReader(data), "") + if err == trace.ErrTimeOrder { + return + } + if err != nil { + t.Fatalf("failed to parse trace: %v", err) + } + } +} + +func TestTraceStress(t *testing.T) { + if runtime.GOOS == "js" { + t.Skip("no os.Pipe on js") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + if testing.Short() { + t.Skip("skipping in -short mode") + } + + var wg sync.WaitGroup + done := make(chan bool) + + // Create a goroutine blocked before tracing. + wg.Add(1) + go func() { + <-done + wg.Done() + }() + + // Create a goroutine blocked in syscall before tracing. + rp, wp, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create pipe: %v", err) + } + defer func() { + rp.Close() + wp.Close() + }() + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + time.Sleep(time.Millisecond) // give the goroutine above time to block + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + procs := runtime.GOMAXPROCS(10) + time.Sleep(50 * time.Millisecond) // test proc stop/start events + + go func() { + runtime.LockOSThread() + for { + select { + case <-done: + return + default: + runtime.Gosched() + } + } + }() + + runtime.GC() + // Trigger GC from malloc. + n := int(1e3) + if isMemoryConstrained() { + // Reduce allocation to avoid running out of + // memory on the builder - see issue/12032. + n = 512 + } + for i := 0; i < n; i++ { + _ = make([]byte, 1<<20) + } + + // Create a bunch of busy goroutines to load all Ps. + for p := 0; p < 10; p++ { + wg.Add(1) + go func() { + // Do something useful. + tmp := make([]byte, 1<<16) + for i := range tmp { + tmp[i]++ + } + _ = tmp + <-done + wg.Done() + }() + } + + // Block in syscall. + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + + // Test timers. + timerDone := make(chan bool) + go func() { + time.Sleep(time.Millisecond) + timerDone <- true + }() + <-timerDone + + // A bit of network. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("listen failed: %v", err) + } + defer ln.Close() + go func() { + c, err := ln.Accept() + if err != nil { + return + } + time.Sleep(time.Millisecond) + var buf [1]byte + c.Write(buf[:]) + c.Close() + }() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Fatalf("dial failed: %v", err) + } + var tmp [1]byte + c.Read(tmp[:]) + c.Close() + + go func() { + runtime.Gosched() + select {} + }() + + // Unblock helper goroutines and wait them to finish. + wp.Write(tmp[:]) + wp.Write(tmp[:]) + close(done) + wg.Wait() + + runtime.GOMAXPROCS(procs) + + Stop() + saveTrace(t, buf, "TestTraceStress") + trace := buf.Bytes() + parseTrace(t, buf) + testBrokenTimestamps(t, trace) +} + +// isMemoryConstrained reports whether the current machine is likely +// to be memory constrained. +// This was originally for the openbsd/arm builder (Issue 12032). +// TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME? +func isMemoryConstrained() bool { + if runtime.GOOS == "plan9" { + return true + } + switch runtime.GOARCH { + case "arm", "mips", "mipsle": + return true + } + return false +} + +// Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. +// And concurrently with all that start/stop trace 3 times. +func TestTraceStressStartStop(t *testing.T) { + if runtime.GOOS == "js" { + t.Skip("no os.Pipe on js") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) + outerDone := make(chan bool) + + go func() { + defer func() { + outerDone <- true + }() + + var wg sync.WaitGroup + done := make(chan bool) + + wg.Add(1) + go func() { + <-done + wg.Done() + }() + + rp, wp, err := os.Pipe() + if err != nil { + t.Errorf("failed to create pipe: %v", err) + return + } + defer func() { + rp.Close() + wp.Close() + }() + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + time.Sleep(time.Millisecond) + + go func() { + runtime.LockOSThread() + for { + select { + case <-done: + return + default: + runtime.Gosched() + } + } + }() + + runtime.GC() + // Trigger GC from malloc. + n := int(1e3) + if isMemoryConstrained() { + // Reduce allocation to avoid running out of + // memory on the builder. + n = 512 + } + for i := 0; i < n; i++ { + _ = make([]byte, 1<<20) + } + + // Create a bunch of busy goroutines to load all Ps. + for p := 0; p < 10; p++ { + wg.Add(1) + go func() { + // Do something useful. + tmp := make([]byte, 1<<16) + for i := range tmp { + tmp[i]++ + } + _ = tmp + <-done + wg.Done() + }() + } + + // Block in syscall. + wg.Add(1) + go func() { + var tmp [1]byte + rp.Read(tmp[:]) + <-done + wg.Done() + }() + + runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + + // Test timers. + timerDone := make(chan bool) + go func() { + time.Sleep(time.Millisecond) + timerDone <- true + }() + <-timerDone + + // A bit of network. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Errorf("listen failed: %v", err) + return + } + defer ln.Close() + go func() { + c, err := ln.Accept() + if err != nil { + return + } + time.Sleep(time.Millisecond) + var buf [1]byte + c.Write(buf[:]) + c.Close() + }() + c, err := net.Dial("tcp", ln.Addr().String()) + if err != nil { + t.Errorf("dial failed: %v", err) + return + } + var tmp [1]byte + c.Read(tmp[:]) + c.Close() + + go func() { + runtime.Gosched() + select {} + }() + + // Unblock helper goroutines and wait them to finish. + wp.Write(tmp[:]) + wp.Write(tmp[:]) + close(done) + wg.Wait() + }() + + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + time.Sleep(time.Millisecond) + Stop() + saveTrace(t, buf, "TestTraceStressStartStop") + trace := buf.Bytes() + parseTrace(t, buf) + testBrokenTimestamps(t, trace) + } + <-outerDone +} + +func TestTraceFutileWakeup(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) + c0 := make(chan int, 1) + c1 := make(chan int, 1) + c2 := make(chan int, 1) + const procs = 2 + var done sync.WaitGroup + done.Add(4 * procs) + for p := 0; p < procs; p++ { + const iters = 1e3 + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + c0 <- 0 + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + <-c0 + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + select { + case c1 <- 0: + case c2 <- 0: + } + } + done.Done() + }() + go func() { + for i := 0; i < iters; i++ { + runtime.Gosched() + select { + case <-c1: + case <-c2: + } + } + done.Done() + }() + } + done.Wait() + + Stop() + saveTrace(t, buf, "TestTraceFutileWakeup") + events, _ := parseTrace(t, buf) + // Check that (1) trace does not contain EvFutileWakeup events and + // (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events + // (we call runtime.Gosched between all operations, so these would be futile wakeups). + gs := make(map[uint64]int) + for _, ev := range events { + switch ev.Type { + case trace.EvFutileWakeup: + t.Fatalf("found EvFutileWakeup event") + case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect: + if gs[ev.G] == 2 { + t.Fatalf("goroutine %v blocked on %v at %v right after start", + ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) + } + if gs[ev.G] == 1 { + t.Fatalf("goroutine %v blocked on %v at %v while blocked", + ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) + } + gs[ev.G] = 1 + case trace.EvGoStart: + if gs[ev.G] == 1 { + gs[ev.G] = 2 + } + default: + delete(gs, ev.G) + } + } +} + +func TestTraceCPUProfile(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + + cpuBuf := new(bytes.Buffer) + if err := pprof.StartCPUProfile(cpuBuf); err != nil { + t.Skipf("failed to start CPU profile: %v", err) + } + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + dur := 100 * time.Millisecond + func() { + // Create a region in the execution trace. Set and clear goroutine + // labels fully within that region, so we know that any CPU profile + // sample with the label must also be eligible for inclusion in the + // execution trace. + ctx := context.Background() + defer StartRegion(ctx, "cpuHogger").End() + pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) { + cpuHogger(cpuHog1, &salt1, dur) + }) + // Be sure the execution trace's view, when filtered to this goroutine + // via the explicit goroutine ID in each event, gets many more samples + // than the CPU profiler when filtered to this goroutine via labels. + cpuHogger(cpuHog1, &salt1, dur) + }() + + Stop() + pprof.StopCPUProfile() + saveTrace(t, buf, "TestTraceCPUProfile") + + prof, err := profile.Parse(cpuBuf) + if err != nil { + t.Fatalf("failed to parse CPU profile: %v", err) + } + // Examine the CPU profiler's view. Filter it to only include samples from + // the single test goroutine. Use labels to execute that filter: they should + // apply to all work done while that goroutine is getg().m.curg, and they + // should apply to no other goroutines. + pprofSamples := 0 + pprofStacks := make(map[string]int) + for _, s := range prof.Sample { + if s.Label["tracing"] != nil { + var fns []string + var leaf string + for _, loc := range s.Location { + for _, line := range loc.Line { + fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line)) + leaf = line.Function.Name + } + } + // runtime.sigprof synthesizes call stacks when "normal traceback is + // impossible or has failed", using particular placeholder functions + // to represent common failure cases. Look for those functions in + // the leaf position as a sign that the call stack and its + // symbolization are more complex than this test can handle. + // + // TODO: Make the symbolization done by the execution tracer and CPU + // profiler match up even in these harder cases. See #53378. + switch leaf { + case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO": + continue + } + stack := strings.Join(fns, " ") + samples := int(s.Value[0]) + pprofSamples += samples + pprofStacks[stack] += samples + } + } + if pprofSamples == 0 { + t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof) + } + + // Examine the execution tracer's view of the CPU profile samples. Filter it + // to only include samples from the single test goroutine. Use the goroutine + // ID that was recorded in the events: that should reflect getg().m.curg, + // same as the profiler's labels (even when the M is using its g0 stack). + totalTraceSamples := 0 + traceSamples := 0 + traceStacks := make(map[string]int) + events, _ := parseTrace(t, buf) + var hogRegion *trace.Event + for _, ev := range events { + if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" { + // mode "0" indicates region start + hogRegion = ev + } + } + if hogRegion == nil { + t.Fatalf("execution trace did not identify cpuHogger goroutine") + } else if hogRegion.Link == nil { + t.Fatalf("execution trace did not close cpuHogger region") + } + for _, ev := range events { + if ev.Type == trace.EvCPUSample { + totalTraceSamples++ + if ev.G == hogRegion.G { + traceSamples++ + var fns []string + for _, frame := range ev.Stk { + if frame.Fn != "runtime.goexit" { + fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line)) + } + } + stack := strings.Join(fns, " ") + traceStacks[stack]++ + } + } + } + + // The execution trace may drop CPU profile samples if the profiling buffer + // overflows. Based on the size of profBufWordCount, that takes a bit over + // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've + // hit that case, then we definitely have at least one full buffer's worth + // of CPU samples, so we'll call that success. + overflowed := totalTraceSamples >= 1900 + if traceSamples < pprofSamples { + t.Logf("exectution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples) + if !overflowed { + t.Fail() + } + } + + for stack, traceSamples := range traceStacks { + pprofSamples := pprofStacks[stack] + delete(pprofStacks, stack) + if traceSamples < pprofSamples { + t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace", + stack, pprofSamples, traceSamples) + if !overflowed { + t.Fail() + } + } + } + for stack, pprofSamples := range pprofStacks { + t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack) + if !overflowed { + t.Fail() + } + } + + if t.Failed() { + t.Logf("execution trace CPU samples:") + for stack, samples := range traceStacks { + t.Logf("%d: %q", samples, stack) + } + t.Logf("CPU profile:\n%v", prof) + } +} + +func cpuHogger(f func(x int) int, y *int, dur time.Duration) { + // We only need to get one 100 Hz clock tick, so we've got + // a large safety buffer. + // But do at least 500 iterations (which should take about 100ms), + // otherwise TestCPUProfileMultithreaded can fail if only one + // thread is scheduled during the testing period. + t0 := time.Now() + accum := *y + for i := 0; i < 500 || time.Since(t0) < dur; i++ { + accum = f(accum) + } + *y = accum +} + +var ( + salt1 = 0 +) + +// The actual CPU hogging function. +// Must not call other functions nor access heap/globals in the loop, +// otherwise under race detector the samples will be in the race runtime. +func cpuHog1(x int) int { + return cpuHog0(x, 1e5) +} + +func cpuHog0(x, n int) int { + foo := x + for i := 0; i < n; i++ { + if i%1000 == 0 { + // Spend time in mcall, stored as gp.m.curg, with g0 running + runtime.Gosched() + } + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + } + return foo +} + +func saveTrace(t *testing.T, buf *bytes.Buffer, name string) { + if !*saveTraces { + return + } + if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil { + t.Errorf("failed to write trace file: %s", err) + } +} |