summaryrefslogtreecommitdiffstats
path: root/src/internal/trace
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:23:18 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:23:18 +0000
commit43a123c1ae6613b3efeed291fa552ecd909d3acf (patch)
treefd92518b7024bc74031f78a1cf9e454b65e73665 /src/internal/trace
parentInitial commit. (diff)
downloadgolang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz
golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/internal/trace')
-rw-r--r--src/internal/trace/gc.go825
-rw-r--r--src/internal/trace/gc_test.go202
-rw-r--r--src/internal/trace/goroutines.go358
-rwxr-xr-xsrc/internal/trace/mkcanned.bash20
-rw-r--r--src/internal/trace/mud.go223
-rw-r--r--src/internal/trace/mud_test.go87
-rw-r--r--src/internal/trace/order.go285
-rw-r--r--src/internal/trace/parser.go1142
-rw-r--r--src/internal/trace/parser_test.go110
-rw-r--r--src/internal/trace/testdata/http_1_10_goodbin0 -> 2201 bytes
-rw-r--r--src/internal/trace/testdata/http_1_11_goodbin0 -> 2779 bytes
-rw-r--r--src/internal/trace/testdata/http_1_19_goodbin0 -> 28172 bytes
-rw-r--r--src/internal/trace/testdata/http_1_5_goodbin0 -> 42218 bytes
-rw-r--r--src/internal/trace/testdata/http_1_7_goodbin0 -> 1971 bytes
-rw-r--r--src/internal/trace/testdata/http_1_9_goodbin0 -> 2187 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_10_goodbin0 -> 370999 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_11_goodbin0 -> 370129 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_19_goodbin0 -> 322338 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_5_goodbin0 -> 7446 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_5_unorderedbin0 -> 8194 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_7_goodbin0 -> 396526 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_9_goodbin0 -> 365129 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_10_goodbin0 -> 6338 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_11_goodbin0 -> 4882 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_19_goodbin0 -> 7448 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_5_goodbin0 -> 6997 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_7_goodbin0 -> 2055 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_9_goodbin0 -> 6271 bytes
-rw-r--r--src/internal/trace/testdata/user_task_region_1_11_goodbin0 -> 2000 bytes
-rw-r--r--src/internal/trace/testdata/user_task_region_1_19_goodbin0 -> 1922 bytes
-rw-r--r--src/internal/trace/writer.go49
31 files changed, 3301 insertions, 0 deletions
diff --git a/src/internal/trace/gc.go b/src/internal/trace/gc.go
new file mode 100644
index 0000000..c1bc862
--- /dev/null
+++ b/src/internal/trace/gc.go
@@ -0,0 +1,825 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "container/heap"
+ "math"
+ "sort"
+ "strings"
+ "time"
+)
+
+// MutatorUtil is a change in mutator utilization at a particular
+// time. Mutator utilization functions are represented as a
+// time-ordered []MutatorUtil.
+type MutatorUtil struct {
+ Time int64
+ // Util is the mean mutator utilization starting at Time. This
+ // is in the range [0, 1].
+ Util float64
+}
+
+// UtilFlags controls the behavior of MutatorUtilization.
+type UtilFlags int
+
+const (
+ // UtilSTW means utilization should account for STW events.
+ UtilSTW UtilFlags = 1 << iota
+ // UtilBackground means utilization should account for
+ // background mark workers.
+ UtilBackground
+ // UtilAssist means utilization should account for mark
+ // assists.
+ UtilAssist
+ // UtilSweep means utilization should account for sweeping.
+ UtilSweep
+
+ // UtilPerProc means each P should be given a separate
+ // utilization function. Otherwise, there is a single function
+ // and each P is given a fraction of the utilization.
+ UtilPerProc
+)
+
+// MutatorUtilization returns a set of mutator utilization functions
+// for the given trace. Each function will always end with 0
+// utilization. The bounds of each function are implicit in the first
+// and last event; outside of these bounds each function is undefined.
+//
+// If the UtilPerProc flag is not given, this always returns a single
+// utilization function. Otherwise, it returns one function per P.
+func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
+ if len(events) == 0 {
+ return nil
+ }
+
+ type perP struct {
+ // gc > 0 indicates that GC is active on this P.
+ gc int
+ // series the logical series number for this P. This
+ // is necessary because Ps may be removed and then
+ // re-added, and then the new P needs a new series.
+ series int
+ }
+ ps := []perP{}
+ stw := 0
+
+ out := [][]MutatorUtil{}
+ assists := map[uint64]bool{}
+ block := map[uint64]*Event{}
+ bgMark := map[uint64]bool{}
+
+ for _, ev := range events {
+ switch ev.Type {
+ case EvGomaxprocs:
+ gomaxprocs := int(ev.Args[0])
+ if len(ps) > gomaxprocs {
+ if flags&UtilPerProc != 0 {
+ // End each P's series.
+ for _, p := range ps[gomaxprocs:] {
+ out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
+ }
+ }
+ ps = ps[:gomaxprocs]
+ }
+ for len(ps) < gomaxprocs {
+ // Start new P's series.
+ series := 0
+ if flags&UtilPerProc != 0 || len(out) == 0 {
+ series = len(out)
+ out = append(out, []MutatorUtil{{ev.Ts, 1}})
+ }
+ ps = append(ps, perP{series: series})
+ }
+ case EvGCSTWStart:
+ if flags&UtilSTW != 0 {
+ stw++
+ }
+ case EvGCSTWDone:
+ if flags&UtilSTW != 0 {
+ stw--
+ }
+ case EvGCMarkAssistStart:
+ if flags&UtilAssist != 0 {
+ ps[ev.P].gc++
+ assists[ev.G] = true
+ }
+ case EvGCMarkAssistDone:
+ if flags&UtilAssist != 0 {
+ ps[ev.P].gc--
+ delete(assists, ev.G)
+ }
+ case EvGCSweepStart:
+ if flags&UtilSweep != 0 {
+ ps[ev.P].gc++
+ }
+ case EvGCSweepDone:
+ if flags&UtilSweep != 0 {
+ ps[ev.P].gc--
+ }
+ case EvGoStartLabel:
+ if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
+ // Background mark worker.
+ //
+ // If we're in per-proc mode, we don't
+ // count dedicated workers because
+ // they kick all of the goroutines off
+ // that P, so don't directly
+ // contribute to goroutine latency.
+ if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
+ bgMark[ev.G] = true
+ ps[ev.P].gc++
+ }
+ }
+ fallthrough
+ case EvGoStart:
+ if assists[ev.G] {
+ // Unblocked during assist.
+ ps[ev.P].gc++
+ }
+ block[ev.G] = ev.Link
+ default:
+ if ev != block[ev.G] {
+ continue
+ }
+
+ if assists[ev.G] {
+ // Blocked during assist.
+ ps[ev.P].gc--
+ }
+ if bgMark[ev.G] {
+ // Background mark worker done.
+ ps[ev.P].gc--
+ delete(bgMark, ev.G)
+ }
+ delete(block, ev.G)
+ }
+
+ if flags&UtilPerProc == 0 {
+ // Compute the current average utilization.
+ if len(ps) == 0 {
+ continue
+ }
+ gcPs := 0
+ if stw > 0 {
+ gcPs = len(ps)
+ } else {
+ for i := range ps {
+ if ps[i].gc > 0 {
+ gcPs++
+ }
+ }
+ }
+ mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
+
+ // Record the utilization change. (Since
+ // len(ps) == len(out), we know len(out) > 0.)
+ out[0] = addUtil(out[0], mu)
+ } else {
+ // Check for per-P utilization changes.
+ for i := range ps {
+ p := &ps[i]
+ util := 1.0
+ if stw > 0 || p.gc > 0 {
+ util = 0.0
+ }
+ out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
+ }
+ }
+ }
+
+ // Add final 0 utilization event to any remaining series. This
+ // is important to mark the end of the trace. The exact value
+ // shouldn't matter since no window should extend beyond this,
+ // but using 0 is symmetric with the start of the trace.
+ mu := MutatorUtil{events[len(events)-1].Ts, 0}
+ for i := range ps {
+ out[ps[i].series] = addUtil(out[ps[i].series], mu)
+ }
+ return out
+}
+
+func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
+ if len(util) > 0 {
+ if mu.Util == util[len(util)-1].Util {
+ // No change.
+ return util
+ }
+ if mu.Time == util[len(util)-1].Time {
+ // Take the lowest utilization at a time stamp.
+ if mu.Util < util[len(util)-1].Util {
+ util[len(util)-1] = mu
+ }
+ return util
+ }
+ }
+ return append(util, mu)
+}
+
+// totalUtil is total utilization, measured in nanoseconds. This is a
+// separate type primarily to distinguish it from mean utilization,
+// which is also a float64.
+type totalUtil float64
+
+func totalUtilOf(meanUtil float64, dur int64) totalUtil {
+ return totalUtil(meanUtil * float64(dur))
+}
+
+// mean returns the mean utilization over dur.
+func (u totalUtil) mean(dur time.Duration) float64 {
+ return float64(u) / float64(dur)
+}
+
+// An MMUCurve is the minimum mutator utilization curve across
+// multiple window sizes.
+type MMUCurve struct {
+ series []mmuSeries
+}
+
+type mmuSeries struct {
+ util []MutatorUtil
+ // sums[j] is the cumulative sum of util[:j].
+ sums []totalUtil
+ // bands summarizes util in non-overlapping bands of duration
+ // bandDur.
+ bands []mmuBand
+ // bandDur is the duration of each band.
+ bandDur int64
+}
+
+type mmuBand struct {
+ // minUtil is the minimum instantaneous mutator utilization in
+ // this band.
+ minUtil float64
+ // cumUtil is the cumulative total mutator utilization between
+ // time 0 and the left edge of this band.
+ cumUtil totalUtil
+
+ // integrator is the integrator for the left edge of this
+ // band.
+ integrator integrator
+}
+
+// NewMMUCurve returns an MMU curve for the given mutator utilization
+// function.
+func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
+ series := make([]mmuSeries, len(utils))
+ for i, util := range utils {
+ series[i] = newMMUSeries(util)
+ }
+ return &MMUCurve{series}
+}
+
+// bandsPerSeries is the number of bands to divide each series into.
+// This is only changed by tests.
+var bandsPerSeries = 1000
+
+func newMMUSeries(util []MutatorUtil) mmuSeries {
+ // Compute cumulative sum.
+ sums := make([]totalUtil, len(util))
+ var prev MutatorUtil
+ var sum totalUtil
+ for j, u := range util {
+ sum += totalUtilOf(prev.Util, u.Time-prev.Time)
+ sums[j] = sum
+ prev = u
+ }
+
+ // Divide the utilization curve up into equal size
+ // non-overlapping "bands" and compute a summary for each of
+ // these bands.
+ //
+ // Compute the duration of each band.
+ numBands := bandsPerSeries
+ if numBands > len(util) {
+ // There's no point in having lots of bands if there
+ // aren't many events.
+ numBands = len(util)
+ }
+ dur := util[len(util)-1].Time - util[0].Time
+ bandDur := (dur + int64(numBands) - 1) / int64(numBands)
+ if bandDur < 1 {
+ bandDur = 1
+ }
+ // Compute the bands. There are numBands+1 bands in order to
+ // record the final cumulative sum.
+ bands := make([]mmuBand, numBands+1)
+ s := mmuSeries{util, sums, bands, bandDur}
+ leftSum := integrator{&s, 0}
+ for i := range bands {
+ startTime, endTime := s.bandTime(i)
+ cumUtil := leftSum.advance(startTime)
+ predIdx := leftSum.pos
+ minUtil := 1.0
+ for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
+ minUtil = math.Min(minUtil, util[i].Util)
+ }
+ bands[i] = mmuBand{minUtil, cumUtil, leftSum}
+ }
+
+ return s
+}
+
+func (s *mmuSeries) bandTime(i int) (start, end int64) {
+ start = int64(i)*s.bandDur + s.util[0].Time
+ end = start + s.bandDur
+ return
+}
+
+type bandUtil struct {
+ // Utilization series index
+ series int
+ // Band index
+ i int
+ // Lower bound of mutator utilization for all windows
+ // with a left edge in this band.
+ utilBound float64
+}
+
+type bandUtilHeap []bandUtil
+
+func (h bandUtilHeap) Len() int {
+ return len(h)
+}
+
+func (h bandUtilHeap) Less(i, j int) bool {
+ return h[i].utilBound < h[j].utilBound
+}
+
+func (h bandUtilHeap) Swap(i, j int) {
+ h[i], h[j] = h[j], h[i]
+}
+
+func (h *bandUtilHeap) Push(x any) {
+ *h = append(*h, x.(bandUtil))
+}
+
+func (h *bandUtilHeap) Pop() any {
+ x := (*h)[len(*h)-1]
+ *h = (*h)[:len(*h)-1]
+ return x
+}
+
+// UtilWindow is a specific window at Time.
+type UtilWindow struct {
+ Time int64
+ // MutatorUtil is the mean mutator utilization in this window.
+ MutatorUtil float64
+}
+
+type utilHeap []UtilWindow
+
+func (h utilHeap) Len() int {
+ return len(h)
+}
+
+func (h utilHeap) Less(i, j int) bool {
+ if h[i].MutatorUtil != h[j].MutatorUtil {
+ return h[i].MutatorUtil > h[j].MutatorUtil
+ }
+ return h[i].Time > h[j].Time
+}
+
+func (h utilHeap) Swap(i, j int) {
+ h[i], h[j] = h[j], h[i]
+}
+
+func (h *utilHeap) Push(x any) {
+ *h = append(*h, x.(UtilWindow))
+}
+
+func (h *utilHeap) Pop() any {
+ x := (*h)[len(*h)-1]
+ *h = (*h)[:len(*h)-1]
+ return x
+}
+
+// An accumulator takes a windowed mutator utilization function and
+// tracks various statistics for that function.
+type accumulator struct {
+ mmu float64
+
+ // bound is the mutator utilization bound where adding any
+ // mutator utilization above this bound cannot affect the
+ // accumulated statistics.
+ bound float64
+
+ // Worst N window tracking
+ nWorst int
+ wHeap utilHeap
+
+ // Mutator utilization distribution tracking
+ mud *mud
+ // preciseMass is the distribution mass that must be precise
+ // before accumulation is stopped.
+ preciseMass float64
+ // lastTime and lastMU are the previous point added to the
+ // windowed mutator utilization function.
+ lastTime int64
+ lastMU float64
+}
+
+// resetTime declares a discontinuity in the windowed mutator
+// utilization function by resetting the current time.
+func (acc *accumulator) resetTime() {
+ // This only matters for distribution collection, since that's
+ // the only thing that depends on the progression of the
+ // windowed mutator utilization function.
+ acc.lastTime = math.MaxInt64
+}
+
+// addMU adds a point to the windowed mutator utilization function at
+// (time, mu). This must be called for monotonically increasing values
+// of time.
+//
+// It returns true if further calls to addMU would be pointless.
+func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
+ if mu < acc.mmu {
+ acc.mmu = mu
+ }
+ acc.bound = acc.mmu
+
+ if acc.nWorst == 0 {
+ // If the minimum has reached zero, it can't go any
+ // lower, so we can stop early.
+ return mu == 0
+ }
+
+ // Consider adding this window to the n worst.
+ if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
+ // This window is lower than the K'th worst window.
+ //
+ // Check if there's any overlapping window
+ // already in the heap and keep whichever is
+ // worse.
+ for i, ui := range acc.wHeap {
+ if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
+ if ui.MutatorUtil <= mu {
+ // Keep the first window.
+ goto keep
+ } else {
+ // Replace it with this window.
+ heap.Remove(&acc.wHeap, i)
+ break
+ }
+ }
+ }
+
+ heap.Push(&acc.wHeap, UtilWindow{time, mu})
+ if len(acc.wHeap) > acc.nWorst {
+ heap.Pop(&acc.wHeap)
+ }
+ keep:
+ }
+
+ if len(acc.wHeap) < acc.nWorst {
+ // We don't have N windows yet, so keep accumulating.
+ acc.bound = 1.0
+ } else {
+ // Anything above the least worst window has no effect.
+ acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
+ }
+
+ if acc.mud != nil {
+ if acc.lastTime != math.MaxInt64 {
+ // Update distribution.
+ acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
+ }
+ acc.lastTime, acc.lastMU = time, mu
+ if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
+ acc.bound = math.Max(acc.bound, mudBound)
+ } else {
+ // We haven't accumulated enough total precise
+ // mass yet to even reach our goal, so keep
+ // accumulating.
+ acc.bound = 1
+ }
+ // It's not worth checking percentiles every time, so
+ // just keep accumulating this band.
+ return false
+ }
+
+ // If we've found enough 0 utilizations, we can stop immediately.
+ return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
+}
+
+// MMU returns the minimum mutator utilization for the given time
+// window. This is the minimum utilization for all windows of this
+// duration across the execution. The returned value is in the range
+// [0, 1].
+func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
+ acc := accumulator{mmu: 1.0, bound: 1.0}
+ c.mmu(window, &acc)
+ return acc.mmu
+}
+
+// Examples returns n specific examples of the lowest mutator
+// utilization for the given window size. The returned windows will be
+// disjoint (otherwise there would be a huge number of
+// mostly-overlapping windows at the single lowest point). There are
+// no guarantees on which set of disjoint windows this returns.
+func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
+ acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
+ c.mmu(window, &acc)
+ sort.Sort(sort.Reverse(acc.wHeap))
+ return ([]UtilWindow)(acc.wHeap)
+}
+
+// MUD returns mutator utilization distribution quantiles for the
+// given window size.
+//
+// The mutator utilization distribution is the distribution of mean
+// mutator utilization across all windows of the given window size in
+// the trace.
+//
+// The minimum mutator utilization is the minimum (0th percentile) of
+// this distribution. (However, if only the minimum is desired, it's
+// more efficient to use the MMU method.)
+func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
+ if len(quantiles) == 0 {
+ return []float64{}
+ }
+
+ // Each unrefined band contributes a known total mass to the
+ // distribution (bandDur except at the end), but in an unknown
+ // way. However, we know that all the mass it contributes must
+ // be at or above its worst-case mean mutator utilization.
+ //
+ // Hence, we refine bands until the highest desired
+ // distribution quantile is less than the next worst-case mean
+ // mutator utilization. At this point, all further
+ // contributions to the distribution must be beyond the
+ // desired quantile and hence cannot affect it.
+ //
+ // First, find the highest desired distribution quantile.
+ maxQ := quantiles[0]
+ for _, q := range quantiles {
+ if q > maxQ {
+ maxQ = q
+ }
+ }
+ // The distribution's mass is in units of time (it's not
+ // normalized because this would make it more annoying to
+ // account for future contributions of unrefined bands). The
+ // total final mass will be the duration of the trace itself
+ // minus the window size. Using this, we can compute the mass
+ // corresponding to quantile maxQ.
+ var duration int64
+ for _, s := range c.series {
+ duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
+ if duration1 >= int64(window) {
+ duration += duration1 - int64(window)
+ }
+ }
+ qMass := float64(duration) * maxQ
+
+ // Accumulate the MUD until we have precise information for
+ // everything to the left of qMass.
+ acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
+ acc.mud.setTrackMass(qMass)
+ c.mmu(window, &acc)
+
+ // Evaluate the quantiles on the accumulated MUD.
+ out := make([]float64, len(quantiles))
+ for i := range out {
+ mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
+ if math.IsNaN(mu) {
+ // There are a few legitimate ways this can
+ // happen:
+ //
+ // 1. If the window is the full trace
+ // duration, then the windowed MU function is
+ // only defined at a single point, so the MU
+ // distribution is not well-defined.
+ //
+ // 2. If there are no events, then the MU
+ // distribution has no mass.
+ //
+ // Either way, all of the quantiles will have
+ // converged toward the MMU at this point.
+ mu = acc.mmu
+ }
+ out[i] = mu
+ }
+ return out
+}
+
+func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
+ if window <= 0 {
+ acc.mmu = 0
+ return
+ }
+
+ var bandU bandUtilHeap
+ windows := make([]time.Duration, len(c.series))
+ for i, s := range c.series {
+ windows[i] = window
+ if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
+ windows[i] = max
+ }
+
+ bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
+ if bandU == nil {
+ bandU = bandU1
+ } else {
+ bandU = append(bandU, bandU1...)
+ }
+ }
+
+ // Process bands from lowest utilization bound to highest.
+ heap.Init(&bandU)
+
+ // Refine each band into a precise window and MMU until
+ // refining the next lowest band can no longer affect the MMU
+ // or windows.
+ for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
+ i := bandU[0].series
+ c.series[i].bandMMU(bandU[0].i, windows[i], acc)
+ heap.Pop(&bandU)
+ }
+}
+
+func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
+ // For each band, compute the worst-possible total mutator
+ // utilization for all windows that start in that band.
+
+ // minBands is the minimum number of bands a window can span
+ // and maxBands is the maximum number of bands a window can
+ // span in any alignment.
+ minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
+ maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
+ if window > 1 && maxBands < 2 {
+ panic("maxBands < 2")
+ }
+ tailDur := int64(window) % c.bandDur
+ nUtil := len(c.bands) - maxBands + 1
+ if nUtil < 0 {
+ nUtil = 0
+ }
+ bandU := make([]bandUtil, nUtil)
+ for i := range bandU {
+ // To compute the worst-case MU, we assume the minimum
+ // for any bands that are only partially overlapped by
+ // some window and the mean for any bands that are
+ // completely covered by all windows.
+ var util totalUtil
+
+ // Find the lowest and second lowest of the partial
+ // bands.
+ l := c.bands[i].minUtil
+ r1 := c.bands[i+minBands-1].minUtil
+ r2 := c.bands[i+maxBands-1].minUtil
+ minBand := math.Min(l, math.Min(r1, r2))
+ // Assume the worst window maximally overlaps the
+ // worst minimum and then the rest overlaps the second
+ // worst minimum.
+ if minBands == 1 {
+ util += totalUtilOf(minBand, int64(window))
+ } else {
+ util += totalUtilOf(minBand, c.bandDur)
+ midBand := 0.0
+ switch {
+ case minBand == l:
+ midBand = math.Min(r1, r2)
+ case minBand == r1:
+ midBand = math.Min(l, r2)
+ case minBand == r2:
+ midBand = math.Min(l, r1)
+ }
+ util += totalUtilOf(midBand, tailDur)
+ }
+
+ // Add the total mean MU of bands that are completely
+ // overlapped by all windows.
+ if minBands > 2 {
+ util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
+ }
+
+ bandU[i] = bandUtil{series, i, util.mean(window)}
+ }
+
+ return bandU
+}
+
+// bandMMU computes the precise minimum mutator utilization for
+// windows with a left edge in band bandIdx.
+func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
+ util := c.util
+
+ // We think of the mutator utilization over time as the
+ // box-filtered utilization function, which we call the
+ // "windowed mutator utilization function". The resulting
+ // function is continuous and piecewise linear (unless
+ // window==0, which we handle elsewhere), where the boundaries
+ // between segments occur when either edge of the window
+ // encounters a change in the instantaneous mutator
+ // utilization function. Hence, the minimum of this function
+ // will always occur when one of the edges of the window
+ // aligns with a utilization change, so these are the only
+ // points we need to consider.
+ //
+ // We compute the mutator utilization function incrementally
+ // by tracking the integral from t=0 to the left edge of the
+ // window and to the right edge of the window.
+ left := c.bands[bandIdx].integrator
+ right := left
+ time, endTime := c.bandTime(bandIdx)
+ if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
+ endTime = utilEnd
+ }
+ acc.resetTime()
+ for {
+ // Advance edges to time and time+window.
+ mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
+ if acc.addMU(time, mu, window) {
+ break
+ }
+ if time == endTime {
+ break
+ }
+
+ // The maximum slope of the windowed mutator
+ // utilization function is 1/window, so we can always
+ // advance the time by at least (mu - mmu) * window
+ // without dropping below mmu.
+ minTime := time + int64((mu-acc.bound)*float64(window))
+
+ // Advance the window to the next time where either
+ // the left or right edge of the window encounters a
+ // change in the utilization curve.
+ if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
+ time = t1
+ } else {
+ time = t2
+ }
+ if time < minTime {
+ time = minTime
+ }
+ if time >= endTime {
+ // For MMUs we could stop here, but for MUDs
+ // it's important that we span the entire
+ // band.
+ time = endTime
+ }
+ }
+}
+
+// An integrator tracks a position in a utilization function and
+// integrates it.
+type integrator struct {
+ u *mmuSeries
+ // pos is the index in u.util of the current time's non-strict
+ // predecessor.
+ pos int
+}
+
+// advance returns the integral of the utilization function from 0 to
+// time. advance must be called on monotonically increasing values of
+// times.
+func (in *integrator) advance(time int64) totalUtil {
+ util, pos := in.u.util, in.pos
+ // Advance pos until pos+1 is time's strict successor (making
+ // pos time's non-strict predecessor).
+ //
+ // Very often, this will be nearby, so we optimize that case,
+ // but it may be arbitrarily far away, so we handled that
+ // efficiently, too.
+ const maxSeq = 8
+ if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
+ // Nearby. Use a linear scan.
+ for pos+1 < len(util) && util[pos+1].Time <= time {
+ pos++
+ }
+ } else {
+ // Far. Binary search for time's strict successor.
+ l, r := pos, len(util)
+ for l < r {
+ h := int(uint(l+r) >> 1)
+ if util[h].Time <= time {
+ l = h + 1
+ } else {
+ r = h
+ }
+ }
+ pos = l - 1 // Non-strict predecessor.
+ }
+ in.pos = pos
+ var partial totalUtil
+ if time != util[pos].Time {
+ partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
+ }
+ return in.u.sums[pos] + partial
+}
+
+// next returns the smallest time t' > time of a change in the
+// utilization function.
+func (in *integrator) next(time int64) int64 {
+ for _, u := range in.u.util[in.pos:] {
+ if u.Time > time {
+ return u.Time
+ }
+ }
+ return 1<<63 - 1
+}
diff --git a/src/internal/trace/gc_test.go b/src/internal/trace/gc_test.go
new file mode 100644
index 0000000..9b9771e
--- /dev/null
+++ b/src/internal/trace/gc_test.go
@@ -0,0 +1,202 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "bytes"
+ "math"
+ "os"
+ "testing"
+ "time"
+)
+
+// aeq returns true if x and y are equal up to 8 digits (1 part in 100
+// million).
+func aeq(x, y float64) bool {
+ if x < 0 && y < 0 {
+ x, y = -x, -y
+ }
+ const digits = 8
+ factor := 1 - math.Pow(10, -digits+1)
+ return x*factor <= y && y*factor <= x
+}
+
+func TestMMU(t *testing.T) {
+ t.Parallel()
+
+ // MU
+ // 1.0 ***** ***** *****
+ // 0.5 * * * *
+ // 0.0 ***** *****
+ // 0 1 2 3 4 5
+ util := [][]MutatorUtil{{
+ {0e9, 1},
+ {1e9, 0},
+ {2e9, 1},
+ {3e9, 0},
+ {4e9, 1},
+ {5e9, 0},
+ }}
+ mmuCurve := NewMMUCurve(util)
+
+ for _, test := range []struct {
+ window time.Duration
+ want float64
+ worst []float64
+ }{
+ {0, 0, []float64{}},
+ {time.Millisecond, 0, []float64{0, 0}},
+ {time.Second, 0, []float64{0, 0}},
+ {2 * time.Second, 0.5, []float64{0.5, 0.5}},
+ {3 * time.Second, 1 / 3.0, []float64{1 / 3.0}},
+ {4 * time.Second, 0.5, []float64{0.5}},
+ {5 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+ {6 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+ } {
+ if got := mmuCurve.MMU(test.window); !aeq(test.want, got) {
+ t.Errorf("for %s window, want mu = %f, got %f", test.window, test.want, got)
+ }
+ worst := mmuCurve.Examples(test.window, 2)
+ // Which exact windows are returned is unspecified
+ // (and depends on the exact banding), so we just
+ // check that we got the right number with the right
+ // utilizations.
+ if len(worst) != len(test.worst) {
+ t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+ } else {
+ for i := range worst {
+ if worst[i].MutatorUtil != test.worst[i] {
+ t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+ break
+ }
+ }
+ }
+ }
+}
+
+func TestMMUTrace(t *testing.T) {
+ // Can't be t.Parallel() because it modifies the
+ // testingOneBand package variable.
+ if testing.Short() {
+ // test input too big for all.bash
+ t.Skip("skipping in -short mode")
+ }
+
+ data, err := os.ReadFile("testdata/stress_1_10_good")
+ if err != nil {
+ t.Fatalf("failed to read input file: %v", err)
+ }
+ _, events, err := parse(bytes.NewReader(data), "")
+ if err != nil {
+ t.Fatalf("failed to parse trace: %s", err)
+ }
+ mu := MutatorUtilization(events.Events, UtilSTW|UtilBackground|UtilAssist)
+ mmuCurve := NewMMUCurve(mu)
+
+ // Test the optimized implementation against the "obviously
+ // correct" implementation.
+ for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
+ want := mmuSlow(mu[0], window)
+ got := mmuCurve.MMU(window)
+ if !aeq(want, got) {
+ t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
+ }
+ }
+
+ // Test MUD with band optimization against MUD without band
+ // optimization. We don't have a simple testing implementation
+ // of MUDs (the simplest implementation is still quite
+ // complex), but this is still a pretty good test.
+ defer func(old int) { bandsPerSeries = old }(bandsPerSeries)
+ bandsPerSeries = 1
+ mmuCurve2 := NewMMUCurve(mu)
+ quantiles := []float64{0, 1 - .999, 1 - .99}
+ for window := time.Microsecond; window < time.Second; window *= 10 {
+ mud1 := mmuCurve.MUD(window, quantiles)
+ mud2 := mmuCurve2.MUD(window, quantiles)
+ for i := range mud1 {
+ if !aeq(mud1[i], mud2[i]) {
+ t.Errorf("for quantiles %v at window %v, want %v, got %v", quantiles, window, mud2, mud1)
+ break
+ }
+ }
+ }
+}
+
+func BenchmarkMMU(b *testing.B) {
+ data, err := os.ReadFile("testdata/stress_1_10_good")
+ if err != nil {
+ b.Fatalf("failed to read input file: %v", err)
+ }
+ _, events, err := parse(bytes.NewReader(data), "")
+ if err != nil {
+ b.Fatalf("failed to parse trace: %s", err)
+ }
+ mu := MutatorUtilization(events.Events, UtilSTW|UtilBackground|UtilAssist|UtilSweep)
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ mmuCurve := NewMMUCurve(mu)
+ xMin, xMax := time.Microsecond, time.Second
+ logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
+ const samples = 100
+ for i := 0; i < samples; i++ {
+ window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
+ mmuCurve.MMU(window)
+ }
+ }
+}
+
+func mmuSlow(util []MutatorUtil, window time.Duration) (mmu float64) {
+ if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
+ window = max
+ }
+
+ mmu = 1.0
+
+ // muInWindow returns the mean mutator utilization between
+ // util[0].Time and end.
+ muInWindow := func(util []MutatorUtil, end int64) float64 {
+ total := 0.0
+ var prevU MutatorUtil
+ for _, u := range util {
+ if u.Time > end {
+ total += prevU.Util * float64(end-prevU.Time)
+ break
+ }
+ total += prevU.Util * float64(u.Time-prevU.Time)
+ prevU = u
+ }
+ return total / float64(end-util[0].Time)
+ }
+ update := func() {
+ for i, u := range util {
+ if u.Time+int64(window) > util[len(util)-1].Time {
+ break
+ }
+ mmu = math.Min(mmu, muInWindow(util[i:], u.Time+int64(window)))
+ }
+ }
+
+ // Consider all left-aligned windows.
+ update()
+ // Reverse the trace. Slightly subtle because each MutatorUtil
+ // is a *change*.
+ rutil := make([]MutatorUtil, len(util))
+ if util[len(util)-1].Util != 0 {
+ panic("irreversible trace")
+ }
+ for i, u := range util {
+ util1 := 0.0
+ if i != 0 {
+ util1 = util[i-1].Util
+ }
+ rutil[len(rutil)-i-1] = MutatorUtil{Time: -u.Time, Util: util1}
+ }
+ util = rutil
+ // Consider all right-aligned windows.
+ update()
+ return
+}
diff --git a/src/internal/trace/goroutines.go b/src/internal/trace/goroutines.go
new file mode 100644
index 0000000..4b4f13d
--- /dev/null
+++ b/src/internal/trace/goroutines.go
@@ -0,0 +1,358 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "sort"
+ "strings"
+)
+
+// GDesc contains statistics and execution details of a single goroutine.
+type GDesc struct {
+ ID uint64
+ Name string
+ PC uint64
+ CreationTime int64
+ StartTime int64
+ EndTime int64
+
+ // List of regions in the goroutine, sorted based on the start time.
+ Regions []*UserRegionDesc
+
+ // Statistics of execution time during the goroutine execution.
+ GExecutionStat
+
+ *gdesc // private part.
+}
+
+// UserRegionDesc represents a region and goroutine execution stats
+// while the region was active.
+type UserRegionDesc struct {
+ TaskID uint64
+ Name string
+
+ // Region start event. Normally EvUserRegion start event or nil,
+ // but can be EvGoCreate event if the region is a synthetic
+ // region representing task inheritance from the parent goroutine.
+ Start *Event
+
+ // Region end event. Normally EvUserRegion end event or nil,
+ // but can be EvGoStop or EvGoEnd event if the goroutine
+ // terminated without explicitly ending the region.
+ End *Event
+
+ GExecutionStat
+}
+
+// GExecutionStat contains statistics about a goroutine's execution
+// during a period of time.
+type GExecutionStat struct {
+ ExecTime int64
+ SchedWaitTime int64
+ IOTime int64
+ BlockTime int64
+ SyscallTime int64
+ GCTime int64
+ SweepTime int64
+ TotalTime int64
+}
+
+// sub returns the stats v-s.
+func (s GExecutionStat) sub(v GExecutionStat) (r GExecutionStat) {
+ r = s
+ r.ExecTime -= v.ExecTime
+ r.SchedWaitTime -= v.SchedWaitTime
+ r.IOTime -= v.IOTime
+ r.BlockTime -= v.BlockTime
+ r.SyscallTime -= v.SyscallTime
+ r.GCTime -= v.GCTime
+ r.SweepTime -= v.SweepTime
+ r.TotalTime -= v.TotalTime
+ return r
+}
+
+// snapshotStat returns the snapshot of the goroutine execution statistics.
+// This is called as we process the ordered trace event stream. lastTs and
+// activeGCStartTime are used to process pending statistics if this is called
+// before any goroutine end event.
+func (g *GDesc) snapshotStat(lastTs, activeGCStartTime int64) (ret GExecutionStat) {
+ ret = g.GExecutionStat
+
+ if g.gdesc == nil {
+ return ret // finalized GDesc. No pending state.
+ }
+
+ if activeGCStartTime != 0 { // terminating while GC is active
+ if g.CreationTime < activeGCStartTime {
+ ret.GCTime += lastTs - activeGCStartTime
+ } else {
+ // The goroutine's lifetime completely overlaps
+ // with a GC.
+ ret.GCTime += lastTs - g.CreationTime
+ }
+ }
+
+ if g.TotalTime == 0 {
+ ret.TotalTime = lastTs - g.CreationTime
+ }
+
+ if g.lastStartTime != 0 {
+ ret.ExecTime += lastTs - g.lastStartTime
+ }
+ if g.blockNetTime != 0 {
+ ret.IOTime += lastTs - g.blockNetTime
+ }
+ if g.blockSyncTime != 0 {
+ ret.BlockTime += lastTs - g.blockSyncTime
+ }
+ if g.blockSyscallTime != 0 {
+ ret.SyscallTime += lastTs - g.blockSyscallTime
+ }
+ if g.blockSchedTime != 0 {
+ ret.SchedWaitTime += lastTs - g.blockSchedTime
+ }
+ if g.blockSweepTime != 0 {
+ ret.SweepTime += lastTs - g.blockSweepTime
+ }
+ return ret
+}
+
+// finalize is called when processing a goroutine end event or at
+// the end of trace processing. This finalizes the execution stat
+// and any active regions in the goroutine, in which case trigger is nil.
+func (g *GDesc) finalize(lastTs, activeGCStartTime int64, trigger *Event) {
+ if trigger != nil {
+ g.EndTime = trigger.Ts
+ }
+ finalStat := g.snapshotStat(lastTs, activeGCStartTime)
+
+ g.GExecutionStat = finalStat
+
+ // System goroutines are never part of regions, even though they
+ // "inherit" a task due to creation (EvGoCreate) from within a region.
+ // This may happen e.g. if the first GC is triggered within a region,
+ // starting the GC worker goroutines.
+ if !IsSystemGoroutine(g.Name) {
+ for _, s := range g.activeRegions {
+ s.End = trigger
+ s.GExecutionStat = finalStat.sub(s.GExecutionStat)
+ g.Regions = append(g.Regions, s)
+ }
+ }
+ *(g.gdesc) = gdesc{}
+}
+
+// gdesc is a private part of GDesc that is required only during analysis.
+type gdesc struct {
+ lastStartTime int64
+ blockNetTime int64
+ blockSyncTime int64
+ blockSyscallTime int64
+ blockSweepTime int64
+ blockGCTime int64
+ blockSchedTime int64
+
+ activeRegions []*UserRegionDesc // stack of active regions
+}
+
+// GoroutineStats generates statistics for all goroutines in the trace.
+func GoroutineStats(events []*Event) map[uint64]*GDesc {
+ gs := make(map[uint64]*GDesc)
+ var lastTs int64
+ var gcStartTime int64 // gcStartTime == 0 indicates gc is inactive.
+ for _, ev := range events {
+ lastTs = ev.Ts
+ switch ev.Type {
+ case EvGoCreate:
+ g := &GDesc{ID: ev.Args[0], CreationTime: ev.Ts, gdesc: new(gdesc)}
+ g.blockSchedTime = ev.Ts
+ // When a goroutine is newly created, inherit the task
+ // of the active region. For ease handling of this
+ // case, we create a fake region description with the
+ // task id. This isn't strictly necessary as this
+ // goroutine may not be associated with the task, but
+ // it can be convenient to see all children created
+ // during a region.
+ if creatorG := gs[ev.G]; creatorG != nil && len(creatorG.gdesc.activeRegions) > 0 {
+ regions := creatorG.gdesc.activeRegions
+ s := regions[len(regions)-1]
+ if s.TaskID != 0 {
+ g.gdesc.activeRegions = []*UserRegionDesc{
+ {TaskID: s.TaskID, Start: ev},
+ }
+ }
+ }
+ gs[g.ID] = g
+ case EvGoStart, EvGoStartLabel:
+ g := gs[ev.G]
+ if g.PC == 0 && len(ev.Stk) > 0 {
+ g.PC = ev.Stk[0].PC
+ g.Name = ev.Stk[0].Fn
+ }
+ g.lastStartTime = ev.Ts
+ if g.StartTime == 0 {
+ g.StartTime = ev.Ts
+ }
+ if g.blockSchedTime != 0 {
+ g.SchedWaitTime += ev.Ts - g.blockSchedTime
+ g.blockSchedTime = 0
+ }
+ case EvGoEnd, EvGoStop:
+ g := gs[ev.G]
+ g.finalize(ev.Ts, gcStartTime, ev)
+ case EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+ EvGoBlockSync, EvGoBlockCond:
+ g := gs[ev.G]
+ g.ExecTime += ev.Ts - g.lastStartTime
+ g.lastStartTime = 0
+ g.blockSyncTime = ev.Ts
+ case EvGoSched, EvGoPreempt:
+ g := gs[ev.G]
+ g.ExecTime += ev.Ts - g.lastStartTime
+ g.lastStartTime = 0
+ g.blockSchedTime = ev.Ts
+ case EvGoSleep, EvGoBlock:
+ g := gs[ev.G]
+ g.ExecTime += ev.Ts - g.lastStartTime
+ g.lastStartTime = 0
+ case EvGoBlockNet:
+ g := gs[ev.G]
+ g.ExecTime += ev.Ts - g.lastStartTime
+ g.lastStartTime = 0
+ g.blockNetTime = ev.Ts
+ case EvGoBlockGC:
+ g := gs[ev.G]
+ g.ExecTime += ev.Ts - g.lastStartTime
+ g.lastStartTime = 0
+ g.blockGCTime = ev.Ts
+ case EvGoUnblock:
+ g := gs[ev.Args[0]]
+ if g.blockNetTime != 0 {
+ g.IOTime += ev.Ts - g.blockNetTime
+ g.blockNetTime = 0
+ }
+ if g.blockSyncTime != 0 {
+ g.BlockTime += ev.Ts - g.blockSyncTime
+ g.blockSyncTime = 0
+ }
+ g.blockSchedTime = ev.Ts
+ case EvGoSysBlock:
+ g := gs[ev.G]
+ g.ExecTime += ev.Ts - g.lastStartTime
+ g.lastStartTime = 0
+ g.blockSyscallTime = ev.Ts
+ case EvGoSysExit:
+ g := gs[ev.G]
+ if g.blockSyscallTime != 0 {
+ g.SyscallTime += ev.Ts - g.blockSyscallTime
+ g.blockSyscallTime = 0
+ }
+ g.blockSchedTime = ev.Ts
+ case EvGCSweepStart:
+ g := gs[ev.G]
+ if g != nil {
+ // Sweep can happen during GC on system goroutine.
+ g.blockSweepTime = ev.Ts
+ }
+ case EvGCSweepDone:
+ g := gs[ev.G]
+ if g != nil && g.blockSweepTime != 0 {
+ g.SweepTime += ev.Ts - g.blockSweepTime
+ g.blockSweepTime = 0
+ }
+ case EvGCStart:
+ gcStartTime = ev.Ts
+ case EvGCDone:
+ for _, g := range gs {
+ if g.EndTime != 0 {
+ continue
+ }
+ if gcStartTime < g.CreationTime {
+ g.GCTime += ev.Ts - g.CreationTime
+ } else {
+ g.GCTime += ev.Ts - gcStartTime
+ }
+ }
+ gcStartTime = 0 // indicates gc is inactive.
+ case EvUserRegion:
+ g := gs[ev.G]
+ switch mode := ev.Args[1]; mode {
+ case 0: // region start
+ g.activeRegions = append(g.activeRegions, &UserRegionDesc{
+ Name: ev.SArgs[0],
+ TaskID: ev.Args[0],
+ Start: ev,
+ GExecutionStat: g.snapshotStat(lastTs, gcStartTime),
+ })
+ case 1: // region end
+ var sd *UserRegionDesc
+ if regionStk := g.activeRegions; len(regionStk) > 0 {
+ n := len(regionStk)
+ sd = regionStk[n-1]
+ regionStk = regionStk[:n-1] // pop
+ g.activeRegions = regionStk
+ } else {
+ sd = &UserRegionDesc{
+ Name: ev.SArgs[0],
+ TaskID: ev.Args[0],
+ }
+ }
+ sd.GExecutionStat = g.snapshotStat(lastTs, gcStartTime).sub(sd.GExecutionStat)
+ sd.End = ev
+ g.Regions = append(g.Regions, sd)
+ }
+ }
+ }
+
+ for _, g := range gs {
+ g.finalize(lastTs, gcStartTime, nil)
+
+ // sort based on region start time
+ sort.Slice(g.Regions, func(i, j int) bool {
+ x := g.Regions[i].Start
+ y := g.Regions[j].Start
+ if x == nil {
+ return true
+ }
+ if y == nil {
+ return false
+ }
+ return x.Ts < y.Ts
+ })
+
+ g.gdesc = nil
+ }
+
+ return gs
+}
+
+// RelatedGoroutines finds a set of goroutines related to goroutine goid.
+func RelatedGoroutines(events []*Event, goid uint64) map[uint64]bool {
+ // BFS of depth 2 over "unblock" edges
+ // (what goroutines unblock goroutine goid?).
+ gmap := make(map[uint64]bool)
+ gmap[goid] = true
+ for i := 0; i < 2; i++ {
+ gmap1 := make(map[uint64]bool)
+ for g := range gmap {
+ gmap1[g] = true
+ }
+ for _, ev := range events {
+ if ev.Type == EvGoUnblock && gmap[ev.Args[0]] {
+ gmap1[ev.G] = true
+ }
+ }
+ gmap = gmap1
+ }
+ gmap[0] = true // for GC events
+ return gmap
+}
+
+func IsSystemGoroutine(entryFn string) bool {
+ // This mimics runtime.isSystemGoroutine as closely as
+ // possible.
+ // Also, locked g in extra M (with empty entryFn) is system goroutine.
+ return entryFn == "" || entryFn != "runtime.main" && strings.HasPrefix(entryFn, "runtime.")
+}
diff --git a/src/internal/trace/mkcanned.bash b/src/internal/trace/mkcanned.bash
new file mode 100755
index 0000000..879cf1c
--- /dev/null
+++ b/src/internal/trace/mkcanned.bash
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Copyright 2016 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# mkcanned.bash creates canned traces for the trace test suite using
+# the current Go version.
+
+set -e
+
+if [ $# != 1 ]; then
+ echo "usage: $0 <label>" >&2
+ exit 1
+fi
+
+go test -run '^$' -bench ClientServerParallel4 -benchtime 10x -trace "testdata/http_$1_good" net/http
+go test -run 'TraceStress$|TraceStressStartStop$|TestUserTaskRegion$' runtime/trace -savetraces
+mv ../../runtime/trace/TestTraceStress.trace "testdata/stress_$1_good"
+mv ../../runtime/trace/TestTraceStressStartStop.trace "testdata/stress_start_stop_$1_good"
+mv ../../runtime/trace/TestUserTaskRegion.trace "testdata/user_task_region_$1_good"
diff --git a/src/internal/trace/mud.go b/src/internal/trace/mud.go
new file mode 100644
index 0000000..8826306
--- /dev/null
+++ b/src/internal/trace/mud.go
@@ -0,0 +1,223 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "math"
+ "sort"
+)
+
+// mud is an updatable mutator utilization distribution.
+//
+// This is a continuous distribution of duration over mutator
+// utilization. For example, the integral from mutator utilization a
+// to b is the total duration during which the mutator utilization was
+// in the range [a, b].
+//
+// This distribution is *not* normalized (it is not a probability
+// distribution). This makes it easier to work with as it's being
+// updated.
+//
+// It is represented as the sum of scaled uniform distribution
+// functions and Dirac delta functions (which are treated as
+// degenerate uniform distributions).
+type mud struct {
+ sorted, unsorted []edge
+
+ // trackMass is the inverse cumulative sum to track as the
+ // distribution is updated.
+ trackMass float64
+ // trackBucket is the bucket in which trackMass falls. If the
+ // total mass of the distribution is < trackMass, this is
+ // len(hist).
+ trackBucket int
+ // trackSum is the cumulative sum of hist[:trackBucket]. Once
+ // trackSum >= trackMass, trackBucket must be recomputed.
+ trackSum float64
+
+ // hist is a hierarchical histogram of distribution mass.
+ hist [mudDegree]float64
+}
+
+const (
+ // mudDegree is the number of buckets in the MUD summary
+ // histogram.
+ mudDegree = 1024
+)
+
+type edge struct {
+ // At x, the function increases by y.
+ x, delta float64
+ // Additionally at x is a Dirac delta function with area dirac.
+ dirac float64
+}
+
+// add adds a uniform function over [l, r] scaled so the total weight
+// of the uniform is area. If l==r, this adds a Dirac delta function.
+func (d *mud) add(l, r, area float64) {
+ if area == 0 {
+ return
+ }
+
+ if r < l {
+ l, r = r, l
+ }
+
+ // Add the edges.
+ if l == r {
+ d.unsorted = append(d.unsorted, edge{l, 0, area})
+ } else {
+ delta := area / (r - l)
+ d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
+ }
+
+ // Update the histogram.
+ h := &d.hist
+ lbFloat, lf := math.Modf(l * mudDegree)
+ lb := int(lbFloat)
+ if lb >= mudDegree {
+ lb, lf = mudDegree-1, 1
+ }
+ if l == r {
+ h[lb] += area
+ } else {
+ rbFloat, rf := math.Modf(r * mudDegree)
+ rb := int(rbFloat)
+ if rb >= mudDegree {
+ rb, rf = mudDegree-1, 1
+ }
+ if lb == rb {
+ h[lb] += area
+ } else {
+ perBucket := area / (r - l) / mudDegree
+ h[lb] += perBucket * (1 - lf)
+ h[rb] += perBucket * rf
+ for i := lb + 1; i < rb; i++ {
+ h[i] += perBucket
+ }
+ }
+ }
+
+ // Update mass tracking.
+ if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
+ if r < thresh {
+ d.trackSum += area
+ } else {
+ d.trackSum += area * (thresh - l) / (r - l)
+ }
+ if d.trackSum >= d.trackMass {
+ // The tracked mass now falls in a different
+ // bucket. Recompute the inverse cumulative sum.
+ d.setTrackMass(d.trackMass)
+ }
+ }
+}
+
+// setTrackMass sets the mass to track the inverse cumulative sum for.
+//
+// Specifically, mass is a cumulative duration, and the mutator
+// utilization bounds for this duration can be queried using
+// approxInvCumulativeSum.
+func (d *mud) setTrackMass(mass float64) {
+ d.trackMass = mass
+
+ // Find the bucket currently containing trackMass by computing
+ // the cumulative sum.
+ sum := 0.0
+ for i, val := range d.hist[:] {
+ newSum := sum + val
+ if newSum > mass {
+ // mass falls in bucket i.
+ d.trackBucket = i
+ d.trackSum = sum
+ return
+ }
+ sum = newSum
+ }
+ d.trackBucket = len(d.hist)
+ d.trackSum = sum
+}
+
+// approxInvCumulativeSum is like invCumulativeSum, but specifically
+// operates on the tracked mass and returns an upper and lower bound
+// approximation of the inverse cumulative sum.
+//
+// The true inverse cumulative sum will be in the range [lower, upper).
+func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
+ if d.trackBucket == len(d.hist) {
+ return math.NaN(), math.NaN(), false
+ }
+ return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
+}
+
+// invCumulativeSum returns x such that the integral of d from -∞ to x
+// is y. If the total weight of d is less than y, it returns the
+// maximum of the distribution and false.
+//
+// Specifically, y is a cumulative duration, and invCumulativeSum
+// returns the mutator utilization x such that at least y time has
+// been spent with mutator utilization <= x.
+func (d *mud) invCumulativeSum(y float64) (float64, bool) {
+ if len(d.sorted) == 0 && len(d.unsorted) == 0 {
+ return math.NaN(), false
+ }
+
+ // Sort edges.
+ edges := d.unsorted
+ sort.Slice(edges, func(i, j int) bool {
+ return edges[i].x < edges[j].x
+ })
+ // Merge with sorted edges.
+ d.unsorted = nil
+ if d.sorted == nil {
+ d.sorted = edges
+ } else {
+ oldSorted := d.sorted
+ newSorted := make([]edge, len(oldSorted)+len(edges))
+ i, j := 0, 0
+ for o := range newSorted {
+ if i >= len(oldSorted) {
+ copy(newSorted[o:], edges[j:])
+ break
+ } else if j >= len(edges) {
+ copy(newSorted[o:], oldSorted[i:])
+ break
+ } else if oldSorted[i].x < edges[j].x {
+ newSorted[o] = oldSorted[i]
+ i++
+ } else {
+ newSorted[o] = edges[j]
+ j++
+ }
+ }
+ d.sorted = newSorted
+ }
+
+ // Traverse edges in order computing a cumulative sum.
+ csum, rate, prevX := 0.0, 0.0, 0.0
+ for _, e := range d.sorted {
+ newCsum := csum + (e.x-prevX)*rate
+ if newCsum >= y {
+ // y was exceeded between the previous edge
+ // and this one.
+ if rate == 0 {
+ // Anywhere between prevX and
+ // e.x will do. We return e.x
+ // because that takes care of
+ // the y==0 case naturally.
+ return e.x, true
+ }
+ return (y-csum)/rate + prevX, true
+ }
+ newCsum += e.dirac
+ if newCsum >= y {
+ // y was exceeded by the Dirac delta at e.x.
+ return e.x, true
+ }
+ csum, prevX = newCsum, e.x
+ rate += e.delta
+ }
+ return prevX, false
+}
diff --git a/src/internal/trace/mud_test.go b/src/internal/trace/mud_test.go
new file mode 100644
index 0000000..b3d74dc
--- /dev/null
+++ b/src/internal/trace/mud_test.go
@@ -0,0 +1,87 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "math/rand"
+ "testing"
+)
+
+func TestMUD(t *testing.T) {
+ // Insert random uniforms and check histogram mass and
+ // cumulative sum approximations.
+ rnd := rand.New(rand.NewSource(42))
+ mass := 0.0
+ var mud mud
+ for i := 0; i < 100; i++ {
+ area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+ if rnd.Intn(10) == 0 {
+ r = l
+ }
+ t.Log(l, r, area)
+ mud.add(l, r, area)
+ mass += area
+
+ // Check total histogram weight.
+ hmass := 0.0
+ for _, val := range mud.hist {
+ hmass += val
+ }
+ if !aeq(mass, hmass) {
+ t.Fatalf("want mass %g, got %g", mass, hmass)
+ }
+
+ // Check inverse cumulative sum approximations.
+ for j := 0.0; j < mass; j += mass * 0.099 {
+ mud.setTrackMass(j)
+ l, u, ok := mud.approxInvCumulativeSum()
+ inv, ok2 := mud.invCumulativeSum(j)
+ if !ok || !ok2 {
+ t.Fatalf("inverse cumulative sum failed: approx %v, exact %v", ok, ok2)
+ }
+ if !(l <= inv && inv < u) {
+ t.Fatalf("inverse(%g) = %g, not ∈ [%g, %g)", j, inv, l, u)
+ }
+ }
+ }
+}
+
+func TestMUDTracking(t *testing.T) {
+ // Test that the tracked mass is tracked correctly across
+ // updates.
+ rnd := rand.New(rand.NewSource(42))
+ const uniforms = 100
+ for trackMass := 0.0; trackMass < uniforms; trackMass += uniforms / 50 {
+ var mud mud
+ mass := 0.0
+ mud.setTrackMass(trackMass)
+ for i := 0; i < uniforms; i++ {
+ area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+ mud.add(l, r, area)
+ mass += area
+ l, u, ok := mud.approxInvCumulativeSum()
+ inv, ok2 := mud.invCumulativeSum(trackMass)
+
+ if mass < trackMass {
+ if ok {
+ t.Errorf("approx(%g) = [%g, %g), but mass = %g", trackMass, l, u, mass)
+ }
+ if ok2 {
+ t.Errorf("exact(%g) = %g, but mass = %g", trackMass, inv, mass)
+ }
+ } else {
+ if !ok {
+ t.Errorf("approx(%g) failed, but mass = %g", trackMass, mass)
+ }
+ if !ok2 {
+ t.Errorf("exact(%g) failed, but mass = %g", trackMass, mass)
+ }
+ if ok && ok2 && !(l <= inv && inv < u) {
+ t.Errorf("inverse(%g) = %g, not ∈ [%g, %g)", trackMass, inv, l, u)
+ }
+ }
+ }
+ }
+}
diff --git a/src/internal/trace/order.go b/src/internal/trace/order.go
new file mode 100644
index 0000000..07a6e13
--- /dev/null
+++ b/src/internal/trace/order.go
@@ -0,0 +1,285 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "fmt"
+ "sort"
+)
+
+type eventBatch struct {
+ events []*Event
+ selected bool
+}
+
+type orderEvent struct {
+ ev *Event
+ batch int
+ g uint64
+ init gState
+ next gState
+}
+
+type gStatus int
+
+type gState struct {
+ seq uint64
+ status gStatus
+}
+
+const (
+ gDead gStatus = iota
+ gRunnable
+ gRunning
+ gWaiting
+
+ unordered = ^uint64(0)
+ garbage = ^uint64(0) - 1
+ noseq = ^uint64(0)
+ seqinc = ^uint64(0) - 1
+)
+
+// order1007 merges a set of per-P event batches into a single, consistent stream.
+// The high level idea is as follows. Events within an individual batch are in
+// correct order, because they are emitted by a single P. So we need to produce
+// a correct interleaving of the batches. To do this we take first unmerged event
+// from each batch (frontier). Then choose subset that is "ready" to be merged,
+// that is, events for which all dependencies are already merged. Then we choose
+// event with the lowest timestamp from the subset, merge it and repeat.
+// This approach ensures that we form a consistent stream even if timestamps are
+// incorrect (condition observed on some machines).
+func order1007(m map[int][]*Event) (events []*Event, err error) {
+ pending := 0
+ // The ordering of CPU profile sample events in the data stream is based on
+ // when each run of the signal handler was able to acquire the spinlock,
+ // with original timestamps corresponding to when ReadTrace pulled the data
+ // off of the profBuf queue. Re-sort them by the timestamp we captured
+ // inside the signal handler.
+ sort.Stable(eventList(m[ProfileP]))
+ var batches []*eventBatch
+ for _, v := range m {
+ pending += len(v)
+ batches = append(batches, &eventBatch{v, false})
+ }
+ gs := make(map[uint64]gState)
+ var frontier []orderEvent
+ for ; pending != 0; pending-- {
+ for i, b := range batches {
+ if b.selected || len(b.events) == 0 {
+ continue
+ }
+ ev := b.events[0]
+ g, init, next := stateTransition(ev)
+ if !transitionReady(g, gs[g], init) {
+ continue
+ }
+ frontier = append(frontier, orderEvent{ev, i, g, init, next})
+ b.events = b.events[1:]
+ b.selected = true
+ // Get rid of "Local" events, they are intended merely for ordering.
+ switch ev.Type {
+ case EvGoStartLocal:
+ ev.Type = EvGoStart
+ case EvGoUnblockLocal:
+ ev.Type = EvGoUnblock
+ case EvGoSysExitLocal:
+ ev.Type = EvGoSysExit
+ }
+ }
+ if len(frontier) == 0 {
+ return nil, fmt.Errorf("no consistent ordering of events possible")
+ }
+ sort.Sort(orderEventList(frontier))
+ f := frontier[0]
+ frontier[0] = frontier[len(frontier)-1]
+ frontier = frontier[:len(frontier)-1]
+ events = append(events, f.ev)
+ transition(gs, f.g, f.init, f.next)
+ if !batches[f.batch].selected {
+ panic("frontier batch is not selected")
+ }
+ batches[f.batch].selected = false
+ }
+
+ // At this point we have a consistent stream of events.
+ // Make sure time stamps respect the ordering.
+ // The tests will skip (not fail) the test case if they see this error.
+ if !sort.IsSorted(eventList(events)) {
+ return nil, ErrTimeOrder
+ }
+
+ // The last part is giving correct timestamps to EvGoSysExit events.
+ // The problem with EvGoSysExit is that actual syscall exit timestamp (ev.Args[2])
+ // is potentially acquired long before event emission. So far we've used
+ // timestamp of event emission (ev.Ts).
+ // We could not set ev.Ts = ev.Args[2] earlier, because it would produce
+ // seemingly broken timestamps (misplaced event).
+ // We also can't simply update the timestamp and resort events, because
+ // if timestamps are broken we will misplace the event and later report
+ // logically broken trace (instead of reporting broken timestamps).
+ lastSysBlock := make(map[uint64]int64)
+ for _, ev := range events {
+ switch ev.Type {
+ case EvGoSysBlock, EvGoInSyscall:
+ lastSysBlock[ev.G] = ev.Ts
+ case EvGoSysExit:
+ ts := int64(ev.Args[2])
+ if ts == 0 {
+ continue
+ }
+ block := lastSysBlock[ev.G]
+ if block == 0 {
+ return nil, fmt.Errorf("stray syscall exit")
+ }
+ if ts < block {
+ return nil, ErrTimeOrder
+ }
+ ev.Ts = ts
+ }
+ }
+ sort.Stable(eventList(events))
+
+ return
+}
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+ switch ev.Type {
+ case EvGoCreate:
+ g = ev.Args[0]
+ init = gState{0, gDead}
+ next = gState{1, gRunnable}
+ case EvGoWaiting, EvGoInSyscall:
+ g = ev.G
+ init = gState{1, gRunnable}
+ next = gState{2, gWaiting}
+ case EvGoStart, EvGoStartLabel:
+ g = ev.G
+ init = gState{ev.Args[1], gRunnable}
+ next = gState{ev.Args[1] + 1, gRunning}
+ case EvGoStartLocal:
+ // noseq means that this event is ready for merging as soon as
+ // frontier reaches it (EvGoStartLocal is emitted on the same P
+ // as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+ // is already merged).
+ // seqinc is a stub for cases when event increments g sequence,
+ // but since we don't know current seq we also don't know next seq.
+ g = ev.G
+ init = gState{noseq, gRunnable}
+ next = gState{seqinc, gRunning}
+ case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+ EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep,
+ EvGoSysBlock, EvGoBlockGC:
+ g = ev.G
+ init = gState{noseq, gRunning}
+ next = gState{noseq, gWaiting}
+ case EvGoSched, EvGoPreempt:
+ g = ev.G
+ init = gState{noseq, gRunning}
+ next = gState{noseq, gRunnable}
+ case EvGoUnblock, EvGoSysExit:
+ g = ev.Args[0]
+ init = gState{ev.Args[1], gWaiting}
+ next = gState{ev.Args[1] + 1, gRunnable}
+ case EvGoUnblockLocal, EvGoSysExitLocal:
+ g = ev.Args[0]
+ init = gState{noseq, gWaiting}
+ next = gState{seqinc, gRunnable}
+ case EvGCStart:
+ g = garbage
+ init = gState{ev.Args[0], gDead}
+ next = gState{ev.Args[0] + 1, gDead}
+ default:
+ // no ordering requirements
+ g = unordered
+ }
+ return
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+ return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) {
+ if g == unordered {
+ return
+ }
+ curr := gs[g]
+ if !transitionReady(g, curr, init) {
+ panic("event sequences are broken")
+ }
+ switch next.seq {
+ case noseq:
+ next.seq = curr.seq
+ case seqinc:
+ next.seq = curr.seq + 1
+ }
+ gs[g] = next
+}
+
+// order1005 merges a set of per-P event batches into a single, consistent stream.
+func order1005(m map[int][]*Event) (events []*Event, err error) {
+ for _, batch := range m {
+ events = append(events, batch...)
+ }
+ for _, ev := range events {
+ if ev.Type == EvGoSysExit {
+ // EvGoSysExit emission is delayed until the thread has a P.
+ // Give it the real sequence number and time stamp.
+ ev.seq = int64(ev.Args[1])
+ if ev.Args[2] != 0 {
+ ev.Ts = int64(ev.Args[2])
+ }
+ }
+ }
+ sort.Sort(eventSeqList(events))
+ if !sort.IsSorted(eventList(events)) {
+ return nil, ErrTimeOrder
+ }
+ return
+}
+
+type orderEventList []orderEvent
+
+func (l orderEventList) Len() int {
+ return len(l)
+}
+
+func (l orderEventList) Less(i, j int) bool {
+ return l[i].ev.Ts < l[j].ev.Ts
+}
+
+func (l orderEventList) Swap(i, j int) {
+ l[i], l[j] = l[j], l[i]
+}
+
+type eventList []*Event
+
+func (l eventList) Len() int {
+ return len(l)
+}
+
+func (l eventList) Less(i, j int) bool {
+ return l[i].Ts < l[j].Ts
+}
+
+func (l eventList) Swap(i, j int) {
+ l[i], l[j] = l[j], l[i]
+}
+
+type eventSeqList []*Event
+
+func (l eventSeqList) Len() int {
+ return len(l)
+}
+
+func (l eventSeqList) Less(i, j int) bool {
+ return l[i].seq < l[j].seq
+}
+
+func (l eventSeqList) Swap(i, j int) {
+ l[i], l[j] = l[j], l[i]
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
new file mode 100644
index 0000000..b091a85
--- /dev/null
+++ b/src/internal/trace/parser.go
@@ -0,0 +1,1142 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io"
+ "math/rand"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ _ "unsafe"
+)
+
+func goCmd() string {
+ var exeSuffix string
+ if runtime.GOOS == "windows" {
+ exeSuffix = ".exe"
+ }
+ path := filepath.Join(runtime.GOROOT(), "bin", "go"+exeSuffix)
+ if _, err := os.Stat(path); err == nil {
+ return path
+ }
+ return "go"
+}
+
+// Event describes one event in the trace.
+type Event struct {
+ Off int // offset in input file (for debugging and error reporting)
+ Type byte // one of Ev*
+ seq int64 // sequence number
+ Ts int64 // timestamp in nanoseconds
+ P int // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
+ G uint64 // G on which the event happened
+ StkID uint64 // unique stack ID
+ Stk []*Frame // stack trace (can be empty)
+ Args [3]uint64 // event-type-specific arguments
+ SArgs []string // event-type-specific string args
+ // linked event (can be nil), depends on event type:
+ // for GCStart: the GCStop
+ // for GCSTWStart: the GCSTWDone
+ // for GCSweepStart: the GCSweepDone
+ // for GoCreate: first GoStart of the created goroutine
+ // for GoStart/GoStartLabel: the associated GoEnd, GoBlock or other blocking event
+ // for GoSched/GoPreempt: the next GoStart
+ // for GoBlock and other blocking events: the unblock event
+ // for GoUnblock: the associated GoStart
+ // for blocking GoSysCall: the associated GoSysExit
+ // for GoSysExit: the next GoStart
+ // for GCMarkAssistStart: the associated GCMarkAssistDone
+ // for UserTaskCreate: the UserTaskEnd
+ // for UserRegion: if the start region, the corresponding UserRegion end event
+ Link *Event
+}
+
+// Frame is a frame in stack traces.
+type Frame struct {
+ PC uint64
+ Fn string
+ File string
+ Line int
+}
+
+const (
+ // Special P identifiers:
+ FakeP = 1000000 + iota
+ TimerP // depicts timer unblocks
+ NetpollP // depicts network unblocks
+ SyscallP // depicts returns from syscalls
+ GCP // depicts GC state
+ ProfileP // depicts recording of CPU profile samples
+)
+
+// ParseResult is the result of Parse.
+type ParseResult struct {
+ // Events is the sorted list of Events in the trace.
+ Events []*Event
+ // Stacks is the stack traces keyed by stack IDs from the trace.
+ Stacks map[uint64][]*Frame
+}
+
+// Parse parses, post-processes and verifies the trace.
+func Parse(r io.Reader, bin string) (ParseResult, error) {
+ ver, res, err := parse(r, bin)
+ if err != nil {
+ return ParseResult{}, err
+ }
+ if ver < 1007 && bin == "" {
+ return ParseResult{}, fmt.Errorf("for traces produced by go 1.6 or below, the binary argument must be provided")
+ }
+ return res, nil
+}
+
+// parse parses, post-processes and verifies the trace. It returns the
+// trace version and the list of events.
+func parse(r io.Reader, bin string) (int, ParseResult, error) {
+ ver, rawEvents, strings, err := readTrace(r)
+ if err != nil {
+ return 0, ParseResult{}, err
+ }
+ events, stacks, err := parseEvents(ver, rawEvents, strings)
+ if err != nil {
+ return 0, ParseResult{}, err
+ }
+ events = removeFutile(events)
+ err = postProcessTrace(ver, events)
+ if err != nil {
+ return 0, ParseResult{}, err
+ }
+ // Attach stack traces.
+ for _, ev := range events {
+ if ev.StkID != 0 {
+ ev.Stk = stacks[ev.StkID]
+ }
+ }
+ if ver < 1007 && bin != "" {
+ if err := symbolize(events, bin); err != nil {
+ return 0, ParseResult{}, err
+ }
+ }
+ return ver, ParseResult{Events: events, Stacks: stacks}, nil
+}
+
+// rawEvent is a helper type used during parsing.
+type rawEvent struct {
+ off int
+ typ byte
+ args []uint64
+ sargs []string
+}
+
+// readTrace does wire-format parsing and verification.
+// It does not care about specific event types and argument meaning.
+func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]string, err error) {
+ // Read and validate trace header.
+ var buf [16]byte
+ off, err := io.ReadFull(r, buf[:])
+ if err != nil {
+ err = fmt.Errorf("failed to read header: read %v, err %v", off, err)
+ return
+ }
+ ver, err = parseHeader(buf[:])
+ if err != nil {
+ return
+ }
+ switch ver {
+ case 1005, 1007, 1008, 1009, 1010, 1011, 1019:
+ // Note: When adding a new version, confirm that canned traces from the
+ // old version are part of the test suite. Add them using mkcanned.bash.
+ break
+ default:
+ err = fmt.Errorf("unsupported trace file version %v.%v (update Go toolchain) %v", ver/1000, ver%1000, ver)
+ return
+ }
+
+ // Read events.
+ strings = make(map[uint64]string)
+ for {
+ // Read event type and number of arguments (1 byte).
+ off0 := off
+ var n int
+ n, err = r.Read(buf[:1])
+ if err == io.EOF {
+ err = nil
+ break
+ }
+ if err != nil || n != 1 {
+ err = fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
+ return
+ }
+ off += n
+ typ := buf[0] << 2 >> 2
+ narg := buf[0]>>6 + 1
+ inlineArgs := byte(4)
+ if ver < 1007 {
+ narg++
+ inlineArgs++
+ }
+ if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
+ err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
+ return
+ }
+ if typ == EvString {
+ // String dictionary entry [ID, length, string].
+ var id uint64
+ id, off, err = readVal(r, off)
+ if err != nil {
+ return
+ }
+ if id == 0 {
+ err = fmt.Errorf("string at offset %d has invalid id 0", off)
+ return
+ }
+ if strings[id] != "" {
+ err = fmt.Errorf("string at offset %d has duplicate id %v", off, id)
+ return
+ }
+ var ln uint64
+ ln, off, err = readVal(r, off)
+ if err != nil {
+ return
+ }
+ if ln == 0 {
+ err = fmt.Errorf("string at offset %d has invalid length 0", off)
+ return
+ }
+ if ln > 1e6 {
+ err = fmt.Errorf("string at offset %d has too large length %v", off, ln)
+ return
+ }
+ buf := make([]byte, ln)
+ var n int
+ n, err = io.ReadFull(r, buf)
+ if err != nil {
+ err = fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, ln, err)
+ return
+ }
+ off += n
+ strings[id] = string(buf)
+ continue
+ }
+ ev := rawEvent{typ: typ, off: off0}
+ if narg < inlineArgs {
+ for i := 0; i < int(narg); i++ {
+ var v uint64
+ v, off, err = readVal(r, off)
+ if err != nil {
+ err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+ return
+ }
+ ev.args = append(ev.args, v)
+ }
+ } else {
+ // More than inlineArgs args, the first value is length of the event in bytes.
+ var v uint64
+ v, off, err = readVal(r, off)
+ if err != nil {
+ err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+ return
+ }
+ evLen := v
+ off1 := off
+ for evLen > uint64(off-off1) {
+ v, off, err = readVal(r, off)
+ if err != nil {
+ err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+ return
+ }
+ ev.args = append(ev.args, v)
+ }
+ if evLen != uint64(off-off1) {
+ err = fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
+ return
+ }
+ }
+ switch ev.typ {
+ case EvUserLog: // EvUserLog records are followed by a value string of length ev.args[len(ev.args)-1]
+ var s string
+ s, off, err = readStr(r, off)
+ ev.sargs = append(ev.sargs, s)
+ }
+ events = append(events, ev)
+ }
+ return
+}
+
+func readStr(r io.Reader, off0 int) (s string, off int, err error) {
+ var sz uint64
+ sz, off, err = readVal(r, off0)
+ if err != nil || sz == 0 {
+ return "", off, err
+ }
+ if sz > 1e6 {
+ return "", off, fmt.Errorf("string at offset %d is too large (len=%d)", off, sz)
+ }
+ buf := make([]byte, sz)
+ n, err := io.ReadFull(r, buf)
+ if err != nil || sz != uint64(n) {
+ return "", off + n, fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, sz, err)
+ }
+ return string(buf), off + n, nil
+}
+
+// parseHeader parses trace header of the form "go 1.7 trace\x00\x00\x00\x00"
+// and returns parsed version as 1007.
+func parseHeader(buf []byte) (int, error) {
+ if len(buf) != 16 {
+ return 0, fmt.Errorf("bad header length")
+ }
+ if buf[0] != 'g' || buf[1] != 'o' || buf[2] != ' ' ||
+ buf[3] < '1' || buf[3] > '9' ||
+ buf[4] != '.' ||
+ buf[5] < '1' || buf[5] > '9' {
+ return 0, fmt.Errorf("not a trace file")
+ }
+ ver := int(buf[5] - '0')
+ i := 0
+ for ; buf[6+i] >= '0' && buf[6+i] <= '9' && i < 2; i++ {
+ ver = ver*10 + int(buf[6+i]-'0')
+ }
+ ver += int(buf[3]-'0') * 1000
+ if !bytes.Equal(buf[6+i:], []byte(" trace\x00\x00\x00\x00")[:10-i]) {
+ return 0, fmt.Errorf("not a trace file")
+ }
+ return ver, nil
+}
+
+// Parse events transforms raw events into events.
+// It does analyze and verify per-event-type arguments.
+func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
+ var ticksPerSec, lastSeq, lastTs int64
+ var lastG uint64
+ var lastP int
+ timerGoids := make(map[uint64]bool)
+ lastGs := make(map[int]uint64) // last goroutine running on P
+ stacks = make(map[uint64][]*Frame)
+ batches := make(map[int][]*Event) // events by P
+ for _, raw := range rawEvents {
+ desc := EventDescriptions[raw.typ]
+ if desc.Name == "" {
+ err = fmt.Errorf("missing description for event type %v", raw.typ)
+ return
+ }
+ narg := argNum(raw, ver)
+ if len(raw.args) != narg {
+ err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
+ desc.Name, raw.off, narg, len(raw.args))
+ return
+ }
+ switch raw.typ {
+ case EvBatch:
+ lastGs[lastP] = lastG
+ lastP = int(raw.args[0])
+ lastG = lastGs[lastP]
+ if ver < 1007 {
+ lastSeq = int64(raw.args[1])
+ lastTs = int64(raw.args[2])
+ } else {
+ lastTs = int64(raw.args[1])
+ }
+ case EvFrequency:
+ ticksPerSec = int64(raw.args[0])
+ if ticksPerSec <= 0 {
+ // The most likely cause for this is tick skew on different CPUs.
+ // For example, solaris/amd64 seems to have wildly different
+ // ticks on different CPUs.
+ err = ErrTimeOrder
+ return
+ }
+ case EvTimerGoroutine:
+ timerGoids[raw.args[0]] = true
+ case EvStack:
+ if len(raw.args) < 2 {
+ err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want at least 2, got %v",
+ raw.off, len(raw.args))
+ return
+ }
+ size := raw.args[1]
+ if size > 1000 {
+ err = fmt.Errorf("EvStack has bad number of frames at offset 0x%x: %v",
+ raw.off, size)
+ return
+ }
+ want := 2 + 4*size
+ if ver < 1007 {
+ want = 2 + size
+ }
+ if uint64(len(raw.args)) != want {
+ err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want %v, got %v",
+ raw.off, want, len(raw.args))
+ return
+ }
+ id := raw.args[0]
+ if id != 0 && size > 0 {
+ stk := make([]*Frame, size)
+ for i := 0; i < int(size); i++ {
+ if ver < 1007 {
+ stk[i] = &Frame{PC: raw.args[2+i]}
+ } else {
+ pc := raw.args[2+i*4+0]
+ fn := raw.args[2+i*4+1]
+ file := raw.args[2+i*4+2]
+ line := raw.args[2+i*4+3]
+ stk[i] = &Frame{PC: pc, Fn: strings[fn], File: strings[file], Line: int(line)}
+ }
+ }
+ stacks[id] = stk
+ }
+ default:
+ e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
+ var argOffset int
+ if ver < 1007 {
+ e.seq = lastSeq + int64(raw.args[0])
+ e.Ts = lastTs + int64(raw.args[1])
+ lastSeq = e.seq
+ argOffset = 2
+ } else {
+ e.Ts = lastTs + int64(raw.args[0])
+ argOffset = 1
+ }
+ lastTs = e.Ts
+ for i := argOffset; i < narg; i++ {
+ if i == narg-1 && desc.Stack {
+ e.StkID = raw.args[i]
+ } else {
+ e.Args[i-argOffset] = raw.args[i]
+ }
+ }
+ switch raw.typ {
+ case EvGoStart, EvGoStartLocal, EvGoStartLabel:
+ lastG = e.Args[0]
+ e.G = lastG
+ if raw.typ == EvGoStartLabel {
+ e.SArgs = []string{strings[e.Args[2]]}
+ }
+ case EvGCSTWStart:
+ e.G = 0
+ switch e.Args[0] {
+ case 0:
+ e.SArgs = []string{"mark termination"}
+ case 1:
+ e.SArgs = []string{"sweep termination"}
+ default:
+ err = fmt.Errorf("unknown STW kind %d", e.Args[0])
+ return
+ }
+ case EvGCStart, EvGCDone, EvGCSTWDone:
+ e.G = 0
+ case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
+ EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+ EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
+ EvGoSysBlock, EvGoBlockGC:
+ lastG = 0
+ case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+ e.G = e.Args[0]
+ case EvUserTaskCreate:
+ // e.Args 0: taskID, 1:parentID, 2:nameID
+ e.SArgs = []string{strings[e.Args[2]]}
+ case EvUserRegion:
+ // e.Args 0: taskID, 1: mode, 2:nameID
+ e.SArgs = []string{strings[e.Args[2]]}
+ case EvUserLog:
+ // e.Args 0: taskID, 1:keyID, 2: stackID
+ e.SArgs = []string{strings[e.Args[1]], raw.sargs[0]}
+ case EvCPUSample:
+ e.Ts = int64(e.Args[0])
+ e.P = int(e.Args[1])
+ e.G = e.Args[2]
+ e.Args[0] = 0
+ }
+ switch raw.typ {
+ default:
+ batches[lastP] = append(batches[lastP], e)
+ case EvCPUSample:
+ // Most events are written out by the active P at the exact
+ // moment they describe. CPU profile samples are different
+ // because they're written to the tracing log after some delay,
+ // by a separate worker goroutine, into a separate buffer.
+ //
+ // We keep these in their own batch until all of the batches are
+ // merged in timestamp order. We also (right before the merge)
+ // re-sort these events by the timestamp captured in the
+ // profiling signal handler.
+ batches[ProfileP] = append(batches[ProfileP], e)
+ }
+ }
+ }
+ if len(batches) == 0 {
+ err = fmt.Errorf("trace is empty")
+ return
+ }
+ if ticksPerSec == 0 {
+ err = fmt.Errorf("no EvFrequency event")
+ return
+ }
+ if BreakTimestampsForTesting {
+ var batchArr [][]*Event
+ for _, batch := range batches {
+ batchArr = append(batchArr, batch)
+ }
+ for i := 0; i < 5; i++ {
+ batch := batchArr[rand.Intn(len(batchArr))]
+ batch[rand.Intn(len(batch))].Ts += int64(rand.Intn(2000) - 1000)
+ }
+ }
+ if ver < 1007 {
+ events, err = order1005(batches)
+ } else {
+ events, err = order1007(batches)
+ }
+ if err != nil {
+ return
+ }
+
+ // Translate cpu ticks to real time.
+ minTs := events[0].Ts
+ // Use floating point to avoid integer overflows.
+ freq := 1e9 / float64(ticksPerSec)
+ for _, ev := range events {
+ ev.Ts = int64(float64(ev.Ts-minTs) * freq)
+ // Move timers and syscalls to separate fake Ps.
+ if timerGoids[ev.G] && ev.Type == EvGoUnblock {
+ ev.P = TimerP
+ }
+ if ev.Type == EvGoSysExit {
+ ev.P = SyscallP
+ }
+ }
+
+ return
+}
+
+// removeFutile removes all constituents of futile wakeups (block, unblock, start).
+// For example, a goroutine was unblocked on a mutex, but another goroutine got
+// ahead and acquired the mutex before the first goroutine is scheduled,
+// so the first goroutine has to block again. Such wakeups happen on buffered
+// channels and sync.Mutex, but are generally not interesting for end user.
+func removeFutile(events []*Event) []*Event {
+ // Two non-trivial aspects:
+ // 1. A goroutine can be preempted during a futile wakeup and migrate to another P.
+ // We want to remove all of that.
+ // 2. Tracing can start in the middle of a futile wakeup.
+ // That is, we can see a futile wakeup event w/o the actual wakeup before it.
+ // postProcessTrace runs after us and ensures that we leave the trace in a consistent state.
+
+ // Phase 1: determine futile wakeup sequences.
+ type G struct {
+ futile bool
+ wakeup []*Event // wakeup sequence (subject for removal)
+ }
+ gs := make(map[uint64]G)
+ futile := make(map[*Event]bool)
+ for _, ev := range events {
+ switch ev.Type {
+ case EvGoUnblock:
+ g := gs[ev.Args[0]]
+ g.wakeup = []*Event{ev}
+ gs[ev.Args[0]] = g
+ case EvGoStart, EvGoPreempt, EvFutileWakeup:
+ g := gs[ev.G]
+ g.wakeup = append(g.wakeup, ev)
+ if ev.Type == EvFutileWakeup {
+ g.futile = true
+ }
+ gs[ev.G] = g
+ case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond:
+ g := gs[ev.G]
+ if g.futile {
+ futile[ev] = true
+ for _, ev1 := range g.wakeup {
+ futile[ev1] = true
+ }
+ }
+ delete(gs, ev.G)
+ }
+ }
+
+ // Phase 2: remove futile wakeup sequences.
+ newEvents := events[:0] // overwrite the original slice
+ for _, ev := range events {
+ if !futile[ev] {
+ newEvents = append(newEvents, ev)
+ }
+ }
+ return newEvents
+}
+
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = fmt.Errorf("time stamps out of order")
+
+// postProcessTrace does inter-event verification and information restoration.
+// The resulting trace is guaranteed to be consistent
+// (for example, a P does not run two Gs at the same time, or a G is indeed
+// blocked before an unblock event).
+func postProcessTrace(ver int, events []*Event) error {
+ const (
+ gDead = iota
+ gRunnable
+ gRunning
+ gWaiting
+ )
+ type gdesc struct {
+ state int
+ ev *Event
+ evStart *Event
+ evCreate *Event
+ evMarkAssist *Event
+ }
+ type pdesc struct {
+ running bool
+ g uint64
+ evSTW *Event
+ evSweep *Event
+ }
+
+ gs := make(map[uint64]gdesc)
+ ps := make(map[int]pdesc)
+ tasks := make(map[uint64]*Event) // task id to task creation events
+ activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
+ gs[0] = gdesc{state: gRunning}
+ var evGC, evSTW *Event
+
+ checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
+ name := EventDescriptions[ev.Type].Name
+ if g.state != gRunning {
+ return fmt.Errorf("g %v is not running while %v (offset %v, time %v)", ev.G, name, ev.Off, ev.Ts)
+ }
+ if p.g != ev.G {
+ return fmt.Errorf("p %v is not running g %v while %v (offset %v, time %v)", ev.P, ev.G, name, ev.Off, ev.Ts)
+ }
+ if !allowG0 && ev.G == 0 {
+ return fmt.Errorf("g 0 did %v (offset %v, time %v)", EventDescriptions[ev.Type].Name, ev.Off, ev.Ts)
+ }
+ return nil
+ }
+
+ for _, ev := range events {
+ g := gs[ev.G]
+ p := ps[ev.P]
+
+ switch ev.Type {
+ case EvProcStart:
+ if p.running {
+ return fmt.Errorf("p %v is running before start (offset %v, time %v)", ev.P, ev.Off, ev.Ts)
+ }
+ p.running = true
+ case EvProcStop:
+ if !p.running {
+ return fmt.Errorf("p %v is not running before stop (offset %v, time %v)", ev.P, ev.Off, ev.Ts)
+ }
+ if p.g != 0 {
+ return fmt.Errorf("p %v is running a goroutine %v during stop (offset %v, time %v)", ev.P, p.g, ev.Off, ev.Ts)
+ }
+ p.running = false
+ case EvGCStart:
+ if evGC != nil {
+ return fmt.Errorf("previous GC is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ evGC = ev
+ // Attribute this to the global GC state.
+ ev.P = GCP
+ case EvGCDone:
+ if evGC == nil {
+ return fmt.Errorf("bogus GC end (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ evGC.Link = ev
+ evGC = nil
+ case EvGCSTWStart:
+ evp := &evSTW
+ if ver < 1010 {
+ // Before 1.10, EvGCSTWStart was per-P.
+ evp = &p.evSTW
+ }
+ if *evp != nil {
+ return fmt.Errorf("previous STW is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ *evp = ev
+ case EvGCSTWDone:
+ evp := &evSTW
+ if ver < 1010 {
+ // Before 1.10, EvGCSTWDone was per-P.
+ evp = &p.evSTW
+ }
+ if *evp == nil {
+ return fmt.Errorf("bogus STW end (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ (*evp).Link = ev
+ *evp = nil
+ case EvGCSweepStart:
+ if p.evSweep != nil {
+ return fmt.Errorf("previous sweeping is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ p.evSweep = ev
+ case EvGCMarkAssistStart:
+ if g.evMarkAssist != nil {
+ return fmt.Errorf("previous mark assist is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ g.evMarkAssist = ev
+ case EvGCMarkAssistDone:
+ // Unlike most events, mark assists can be in progress when a
+ // goroutine starts tracing, so we can't report an error here.
+ if g.evMarkAssist != nil {
+ g.evMarkAssist.Link = ev
+ g.evMarkAssist = nil
+ }
+ case EvGCSweepDone:
+ if p.evSweep == nil {
+ return fmt.Errorf("bogus sweeping end (offset %v, time %v)", ev.Off, ev.Ts)
+ }
+ p.evSweep.Link = ev
+ p.evSweep = nil
+ case EvGoWaiting:
+ if g.state != gRunnable {
+ return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+ }
+ g.state = gWaiting
+ g.ev = ev
+ case EvGoInSyscall:
+ if g.state != gRunnable {
+ return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+ }
+ g.state = gWaiting
+ g.ev = ev
+ case EvGoCreate:
+ if err := checkRunning(p, g, ev, true); err != nil {
+ return err
+ }
+ if _, ok := gs[ev.Args[0]]; ok {
+ return fmt.Errorf("g %v already exists (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+ }
+ gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
+ case EvGoStart, EvGoStartLabel:
+ if g.state != gRunnable {
+ return fmt.Errorf("g %v is not runnable before start (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+ }
+ if p.g != 0 {
+ return fmt.Errorf("p %v is already running g %v while start g %v (offset %v, time %v)", ev.P, p.g, ev.G, ev.Off, ev.Ts)
+ }
+ g.state = gRunning
+ g.evStart = ev
+ p.g = ev.G
+ if g.evCreate != nil {
+ if ver < 1007 {
+ // +1 because symbolizer expects return pc.
+ ev.Stk = []*Frame{{PC: g.evCreate.Args[1] + 1}}
+ } else {
+ ev.StkID = g.evCreate.Args[1]
+ }
+ g.evCreate = nil
+ }
+
+ if g.ev != nil {
+ g.ev.Link = ev
+ g.ev = nil
+ }
+ case EvGoEnd, EvGoStop:
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.evStart.Link = ev
+ g.evStart = nil
+ g.state = gDead
+ p.g = 0
+
+ if ev.Type == EvGoEnd { // flush all active regions
+ regions := activeRegions[ev.G]
+ for _, s := range regions {
+ s.Link = ev
+ }
+ delete(activeRegions, ev.G)
+ }
+
+ case EvGoSched, EvGoPreempt:
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.state = gRunnable
+ g.evStart.Link = ev
+ g.evStart = nil
+ p.g = 0
+ g.ev = ev
+ case EvGoUnblock:
+ if g.state != gRunning {
+ return fmt.Errorf("g %v is not running while unpark (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+ }
+ if ev.P != TimerP && p.g != ev.G {
+ return fmt.Errorf("p %v is not running g %v while unpark (offset %v, time %v)", ev.P, ev.G, ev.Off, ev.Ts)
+ }
+ g1 := gs[ev.Args[0]]
+ if g1.state != gWaiting {
+ return fmt.Errorf("g %v is not waiting before unpark (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+ }
+ if g1.ev != nil && g1.ev.Type == EvGoBlockNet && ev.P != TimerP {
+ ev.P = NetpollP
+ }
+ if g1.ev != nil {
+ g1.ev.Link = ev
+ }
+ g1.state = gRunnable
+ g1.ev = ev
+ gs[ev.Args[0]] = g1
+ case EvGoSysCall:
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.ev = ev
+ case EvGoSysBlock:
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.state = gWaiting
+ g.evStart.Link = ev
+ g.evStart = nil
+ p.g = 0
+ case EvGoSysExit:
+ if g.state != gWaiting {
+ return fmt.Errorf("g %v is not waiting during syscall exit (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+ }
+ if g.ev != nil && g.ev.Type == EvGoSysCall {
+ g.ev.Link = ev
+ }
+ g.state = gRunnable
+ g.ev = ev
+ case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+ EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
+ if err := checkRunning(p, g, ev, false); err != nil {
+ return err
+ }
+ g.state = gWaiting
+ g.ev = ev
+ g.evStart.Link = ev
+ g.evStart = nil
+ p.g = 0
+ case EvUserTaskCreate:
+ taskid := ev.Args[0]
+ if prevEv, ok := tasks[taskid]; ok {
+ return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
+ }
+ tasks[ev.Args[0]] = ev
+ case EvUserTaskEnd:
+ taskid := ev.Args[0]
+ if taskCreateEv, ok := tasks[taskid]; ok {
+ taskCreateEv.Link = ev
+ delete(tasks, taskid)
+ }
+ case EvUserRegion:
+ mode := ev.Args[1]
+ regions := activeRegions[ev.G]
+ if mode == 0 { // region start
+ activeRegions[ev.G] = append(regions, ev) // push
+ } else if mode == 1 { // region end
+ n := len(regions)
+ if n > 0 { // matching region start event is in the trace.
+ s := regions[n-1]
+ if s.Args[0] != ev.Args[0] || s.SArgs[0] != ev.SArgs[0] { // task id, region name mismatch
+ return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
+ }
+ // Link region start event with span end event
+ s.Link = ev
+
+ if n > 1 {
+ activeRegions[ev.G] = regions[:n-1]
+ } else {
+ delete(activeRegions, ev.G)
+ }
+ }
+ } else {
+ return fmt.Errorf("invalid user region mode: %q", ev)
+ }
+ }
+
+ gs[ev.G] = g
+ ps[ev.P] = p
+ }
+
+ // TODO(dvyukov): restore stacks for EvGoStart events.
+ // TODO(dvyukov): test that all EvGoStart events has non-nil Link.
+
+ return nil
+}
+
+// symbolize attaches func/file/line info to stack traces.
+func symbolize(events []*Event, bin string) error {
+ // First, collect and dedup all pcs.
+ pcs := make(map[uint64]*Frame)
+ for _, ev := range events {
+ for _, f := range ev.Stk {
+ pcs[f.PC] = nil
+ }
+ }
+
+ // Start addr2line.
+ cmd := exec.Command(goCmd(), "tool", "addr2line", bin)
+ in, err := cmd.StdinPipe()
+ if err != nil {
+ return fmt.Errorf("failed to pipe addr2line stdin: %v", err)
+ }
+ cmd.Stderr = os.Stderr
+ out, err := cmd.StdoutPipe()
+ if err != nil {
+ return fmt.Errorf("failed to pipe addr2line stdout: %v", err)
+ }
+ err = cmd.Start()
+ if err != nil {
+ return fmt.Errorf("failed to start addr2line: %v", err)
+ }
+ outb := bufio.NewReader(out)
+
+ // Write all pcs to addr2line.
+ // Need to copy pcs to an array, because map iteration order is non-deterministic.
+ var pcArray []uint64
+ for pc := range pcs {
+ pcArray = append(pcArray, pc)
+ _, err := fmt.Fprintf(in, "0x%x\n", pc-1)
+ if err != nil {
+ return fmt.Errorf("failed to write to addr2line: %v", err)
+ }
+ }
+ in.Close()
+
+ // Read in answers.
+ for _, pc := range pcArray {
+ fn, err := outb.ReadString('\n')
+ if err != nil {
+ return fmt.Errorf("failed to read from addr2line: %v", err)
+ }
+ file, err := outb.ReadString('\n')
+ if err != nil {
+ return fmt.Errorf("failed to read from addr2line: %v", err)
+ }
+ f := &Frame{PC: pc}
+ f.Fn = fn[:len(fn)-1]
+ f.File = file[:len(file)-1]
+ if colon := strings.LastIndex(f.File, ":"); colon != -1 {
+ ln, err := strconv.Atoi(f.File[colon+1:])
+ if err == nil {
+ f.File = f.File[:colon]
+ f.Line = ln
+ }
+ }
+ pcs[pc] = f
+ }
+ cmd.Wait()
+
+ // Replace frames in events array.
+ for _, ev := range events {
+ for i, f := range ev.Stk {
+ ev.Stk[i] = pcs[f.PC]
+ }
+ }
+
+ return nil
+}
+
+// readVal reads unsigned base-128 value from r.
+func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
+ off = off0
+ for i := 0; i < 10; i++ {
+ var buf [1]byte
+ var n int
+ n, err = r.Read(buf[:])
+ if err != nil || n != 1 {
+ return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err)
+ }
+ off++
+ v |= uint64(buf[0]&0x7f) << (uint(i) * 7)
+ if buf[0]&0x80 == 0 {
+ return
+ }
+ }
+ return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
+}
+
+// Print dumps events to stdout. For debugging.
+func Print(events []*Event) {
+ for _, ev := range events {
+ PrintEvent(ev)
+ }
+}
+
+// PrintEvent dumps the event to stdout. For debugging.
+func PrintEvent(ev *Event) {
+ fmt.Printf("%s\n", ev)
+}
+
+func (ev *Event) String() string {
+ desc := EventDescriptions[ev.Type]
+ w := new(strings.Builder)
+ fmt.Fprintf(w, "%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
+ for i, a := range desc.Args {
+ fmt.Fprintf(w, " %v=%v", a, ev.Args[i])
+ }
+ for i, a := range desc.SArgs {
+ fmt.Fprintf(w, " %v=%v", a, ev.SArgs[i])
+ }
+ return w.String()
+}
+
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func argNum(raw rawEvent, ver int) int {
+ desc := EventDescriptions[raw.typ]
+ if raw.typ == EvStack {
+ return len(raw.args)
+ }
+ narg := len(desc.Args)
+ if desc.Stack {
+ narg++
+ }
+ switch raw.typ {
+ case EvBatch, EvFrequency, EvTimerGoroutine:
+ if ver < 1007 {
+ narg++ // there was an unused arg before 1.7
+ }
+ return narg
+ }
+ narg++ // timestamp
+ if ver < 1007 {
+ narg++ // sequence
+ }
+ switch raw.typ {
+ case EvGCSweepDone:
+ if ver < 1009 {
+ narg -= 2 // 1.9 added two arguments
+ }
+ case EvGCStart, EvGoStart, EvGoUnblock:
+ if ver < 1007 {
+ narg-- // 1.7 added an additional seq arg
+ }
+ case EvGCSTWStart:
+ if ver < 1010 {
+ narg-- // 1.10 added an argument
+ }
+ }
+ return narg
+}
+
+// BreakTimestampsForTesting causes the parser to randomly alter timestamps (for testing of broken cputicks).
+var BreakTimestampsForTesting bool
+
+// Event types in the trace.
+// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
+const (
+ EvNone = 0 // unused
+ EvBatch = 1 // start of per-P batch of events [pid, timestamp]
+ EvFrequency = 2 // contains tracer timer frequency [frequency (ticks per second)]
+ EvStack = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
+ EvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
+ EvProcStart = 5 // start of P [timestamp, thread id]
+ EvProcStop = 6 // stop of P [timestamp]
+ EvGCStart = 7 // GC start [timestamp, seq, stack id]
+ EvGCDone = 8 // GC done [timestamp]
+ EvGCSTWStart = 9 // GC mark termination start [timestamp, kind]
+ EvGCSTWDone = 10 // GC mark termination done [timestamp]
+ EvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
+ EvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed]
+ EvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
+ EvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq]
+ EvGoEnd = 15 // goroutine ends [timestamp]
+ EvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack]
+ EvGoSched = 17 // goroutine calls Gosched [timestamp, stack]
+ EvGoPreempt = 18 // goroutine is preempted [timestamp, stack]
+ EvGoSleep = 19 // goroutine calls Sleep [timestamp, stack]
+ EvGoBlock = 20 // goroutine blocks [timestamp, stack]
+ EvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
+ EvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack]
+ EvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack]
+ EvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack]
+ EvGoBlockSync = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
+ EvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack]
+ EvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack]
+ EvGoSysCall = 28 // syscall enter [timestamp, stack]
+ EvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
+ EvGoSysBlock = 30 // syscall blocks [timestamp]
+ EvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+ EvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
+ EvHeapAlloc = 33 // gcController.heapLive change [timestamp, heap live bytes]
+ EvHeapGoal = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
+ EvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
+ EvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
+ EvString = 37 // string dictionary entry [ID, length, string]
+ EvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+ EvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+ EvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+ EvGoStartLabel = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
+ EvGoBlockGC = 42 // goroutine blocks on GC assist [timestamp, stack]
+ EvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
+ EvGCMarkAssistDone = 44 // GC mark assist done [timestamp]
+ EvUserTaskCreate = 45 // trace.NewContext [timestamp, internal task id, internal parent id, stack, name string]
+ EvUserTaskEnd = 46 // end of task [timestamp, internal task id, stack]
+ EvUserRegion = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string]
+ EvUserLog = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
+ EvCPUSample = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id]
+ EvCount = 50
+)
+
+var EventDescriptions = [EvCount]struct {
+ Name string
+ minVersion int
+ Stack bool
+ Args []string
+ SArgs []string // string arguments
+}{
+ EvNone: {"None", 1005, false, []string{}, nil},
+ EvBatch: {"Batch", 1005, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
+ EvFrequency: {"Frequency", 1005, false, []string{"freq"}, nil}, // in 1.5 format it was {"freq", "unused"}
+ EvStack: {"Stack", 1005, false, []string{"id", "siz"}, nil},
+ EvGomaxprocs: {"Gomaxprocs", 1005, true, []string{"procs"}, nil},
+ EvProcStart: {"ProcStart", 1005, false, []string{"thread"}, nil},
+ EvProcStop: {"ProcStop", 1005, false, []string{}, nil},
+ EvGCStart: {"GCStart", 1005, true, []string{"seq"}, nil}, // in 1.5 format it was {}
+ EvGCDone: {"GCDone", 1005, false, []string{}, nil},
+ EvGCSTWStart: {"GCSTWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
+ EvGCSTWDone: {"GCSTWDone", 1005, false, []string{}, nil},
+ EvGCSweepStart: {"GCSweepStart", 1005, true, []string{}, nil},
+ EvGCSweepDone: {"GCSweepDone", 1005, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
+ EvGoCreate: {"GoCreate", 1005, true, []string{"g", "stack"}, nil},
+ EvGoStart: {"GoStart", 1005, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+ EvGoEnd: {"GoEnd", 1005, false, []string{}, nil},
+ EvGoStop: {"GoStop", 1005, true, []string{}, nil},
+ EvGoSched: {"GoSched", 1005, true, []string{}, nil},
+ EvGoPreempt: {"GoPreempt", 1005, true, []string{}, nil},
+ EvGoSleep: {"GoSleep", 1005, true, []string{}, nil},
+ EvGoBlock: {"GoBlock", 1005, true, []string{}, nil},
+ EvGoUnblock: {"GoUnblock", 1005, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+ EvGoBlockSend: {"GoBlockSend", 1005, true, []string{}, nil},
+ EvGoBlockRecv: {"GoBlockRecv", 1005, true, []string{}, nil},
+ EvGoBlockSelect: {"GoBlockSelect", 1005, true, []string{}, nil},
+ EvGoBlockSync: {"GoBlockSync", 1005, true, []string{}, nil},
+ EvGoBlockCond: {"GoBlockCond", 1005, true, []string{}, nil},
+ EvGoBlockNet: {"GoBlockNet", 1005, true, []string{}, nil},
+ EvGoSysCall: {"GoSysCall", 1005, true, []string{}, nil},
+ EvGoSysExit: {"GoSysExit", 1005, false, []string{"g", "seq", "ts"}, nil},
+ EvGoSysBlock: {"GoSysBlock", 1005, false, []string{}, nil},
+ EvGoWaiting: {"GoWaiting", 1005, false, []string{"g"}, nil},
+ EvGoInSyscall: {"GoInSyscall", 1005, false, []string{"g"}, nil},
+ EvHeapAlloc: {"HeapAlloc", 1005, false, []string{"mem"}, nil},
+ EvHeapGoal: {"HeapGoal", 1005, false, []string{"mem"}, nil},
+ EvTimerGoroutine: {"TimerGoroutine", 1005, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
+ EvFutileWakeup: {"FutileWakeup", 1005, false, []string{}, nil},
+ EvString: {"String", 1007, false, []string{}, nil},
+ EvGoStartLocal: {"GoStartLocal", 1007, false, []string{"g"}, nil},
+ EvGoUnblockLocal: {"GoUnblockLocal", 1007, true, []string{"g"}, nil},
+ EvGoSysExitLocal: {"GoSysExitLocal", 1007, false, []string{"g", "ts"}, nil},
+ EvGoStartLabel: {"GoStartLabel", 1008, false, []string{"g", "seq", "labelid"}, []string{"label"}},
+ EvGoBlockGC: {"GoBlockGC", 1008, true, []string{}, nil},
+ EvGCMarkAssistStart: {"GCMarkAssistStart", 1009, true, []string{}, nil},
+ EvGCMarkAssistDone: {"GCMarkAssistDone", 1009, false, []string{}, nil},
+ EvUserTaskCreate: {"UserTaskCreate", 1011, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
+ EvUserTaskEnd: {"UserTaskEnd", 1011, true, []string{"taskid"}, nil},
+ EvUserRegion: {"UserRegion", 1011, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
+ EvUserLog: {"UserLog", 1011, true, []string{"id", "keyid"}, []string{"category", "message"}},
+ EvCPUSample: {"CPUSample", 1019, true, []string{"ts", "p", "g"}, nil},
+}
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
new file mode 100644
index 0000000..cdab95a
--- /dev/null
+++ b/src/internal/trace/parser_test.go
@@ -0,0 +1,110 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+ "bytes"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func TestCorruptedInputs(t *testing.T) {
+ // These inputs crashed parser previously.
+ tests := []string{
+ "gotrace\x00\x020",
+ "gotrace\x00Q00\x020",
+ "gotrace\x00T00\x020",
+ "gotrace\x00\xc3\x0200",
+ "go 1.5 trace\x00\x00\x00\x00\x020",
+ "go 1.5 trace\x00\x00\x00\x00Q00\x020",
+ "go 1.5 trace\x00\x00\x00\x00T00\x020",
+ "go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
+ }
+ for _, data := range tests {
+ res, err := Parse(strings.NewReader(data), "")
+ if err == nil || res.Events != nil || res.Stacks != nil {
+ t.Fatalf("no error on input: %q", data)
+ }
+ }
+}
+
+func TestParseCanned(t *testing.T) {
+ files, err := os.ReadDir("./testdata")
+ if err != nil {
+ t.Fatalf("failed to read ./testdata: %v", err)
+ }
+ for _, f := range files {
+ info, err := f.Info()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if testing.Short() && info.Size() > 10000 {
+ continue
+ }
+ name := filepath.Join("./testdata", f.Name())
+ data, err := os.ReadFile(name)
+ if err != nil {
+ t.Fatal(err)
+ }
+ // Instead of Parse that requires a proper binary name for old traces,
+ // we use 'parse' that omits symbol lookup if an empty string is given.
+ _, _, err = parse(bytes.NewReader(data), "")
+ switch {
+ case strings.HasSuffix(f.Name(), "_good"):
+ if err != nil {
+ t.Errorf("failed to parse good trace %v: %v", f.Name(), err)
+ }
+ case strings.HasSuffix(f.Name(), "_unordered"):
+ if err != ErrTimeOrder {
+ t.Errorf("unordered trace is not detected %v: %v", f.Name(), err)
+ }
+ default:
+ t.Errorf("unknown input file suffix: %v", f.Name())
+ }
+ }
+}
+
+func TestParseVersion(t *testing.T) {
+ tests := map[string]int{
+ "go 1.5 trace\x00\x00\x00\x00": 1005,
+ "go 1.7 trace\x00\x00\x00\x00": 1007,
+ "go 1.10 trace\x00\x00\x00": 1010,
+ "go 1.25 trace\x00\x00\x00": 1025,
+ "go 1.234 trace\x00\x00": 1234,
+ "go 1.2345 trace\x00": -1,
+ "go 0.0 trace\x00\x00\x00\x00": -1,
+ "go a.b trace\x00\x00\x00\x00": -1,
+ }
+ for header, ver := range tests {
+ ver1, err := parseHeader([]byte(header))
+ if ver == -1 {
+ if err == nil {
+ t.Fatalf("no error on input: %q, version %v", header, ver1)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("failed to parse: %q (%v)", header, err)
+ }
+ if ver != ver1 {
+ t.Fatalf("wrong version: %v, want %v, input: %q", ver1, ver, header)
+ }
+ }
+ }
+}
+
+func TestTimestampOverflow(t *testing.T) {
+ // Test that parser correctly handles large timestamps (long tracing).
+ w := NewWriter()
+ w.Emit(EvBatch, 0, 0)
+ w.Emit(EvFrequency, 1e9)
+ for ts := uint64(1); ts < 1e16; ts *= 2 {
+ w.Emit(EvGoCreate, ts, ts, 0, 0)
+ }
+ if _, err := Parse(w, ""); err != nil {
+ t.Fatalf("failed to parse: %v", err)
+ }
+}
diff --git a/src/internal/trace/testdata/http_1_10_good b/src/internal/trace/testdata/http_1_10_good
new file mode 100644
index 0000000..a4f2ed8
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_10_good
Binary files differ
diff --git a/src/internal/trace/testdata/http_1_11_good b/src/internal/trace/testdata/http_1_11_good
new file mode 100644
index 0000000..0efcc6f
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_11_good
Binary files differ
diff --git a/src/internal/trace/testdata/http_1_19_good b/src/internal/trace/testdata/http_1_19_good
new file mode 100644
index 0000000..c1d519e
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_19_good
Binary files differ
diff --git a/src/internal/trace/testdata/http_1_5_good b/src/internal/trace/testdata/http_1_5_good
new file mode 100644
index 0000000..0736cae
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_5_good
Binary files differ
diff --git a/src/internal/trace/testdata/http_1_7_good b/src/internal/trace/testdata/http_1_7_good
new file mode 100644
index 0000000..b0e318e
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_7_good
Binary files differ
diff --git a/src/internal/trace/testdata/http_1_9_good b/src/internal/trace/testdata/http_1_9_good
new file mode 100644
index 0000000..ca89278
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_9_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_10_good b/src/internal/trace/testdata/stress_1_10_good
new file mode 100644
index 0000000..19778b0
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_10_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_11_good b/src/internal/trace/testdata/stress_1_11_good
new file mode 100644
index 0000000..6468d89
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_11_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_19_good b/src/internal/trace/testdata/stress_1_19_good
new file mode 100644
index 0000000..13f5926
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_19_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_5_good b/src/internal/trace/testdata/stress_1_5_good
new file mode 100644
index 0000000..c5055eb
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_5_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_5_unordered b/src/internal/trace/testdata/stress_1_5_unordered
new file mode 100644
index 0000000..11f7d74
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_5_unordered
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_7_good b/src/internal/trace/testdata/stress_1_7_good
new file mode 100644
index 0000000..b4d927d
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_7_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_9_good b/src/internal/trace/testdata/stress_1_9_good
new file mode 100644
index 0000000..dcf17f1
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_9_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_10_good b/src/internal/trace/testdata/stress_start_stop_1_10_good
new file mode 100644
index 0000000..b908e10
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_10_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_11_good b/src/internal/trace/testdata/stress_start_stop_1_11_good
new file mode 100644
index 0000000..457f01a
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_11_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_19_good b/src/internal/trace/testdata/stress_start_stop_1_19_good
new file mode 100644
index 0000000..92d9278
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_19_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_5_good b/src/internal/trace/testdata/stress_start_stop_1_5_good
new file mode 100644
index 0000000..72a887b
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_5_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_7_good b/src/internal/trace/testdata/stress_start_stop_1_7_good
new file mode 100644
index 0000000..c23ed7d
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_7_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_9_good b/src/internal/trace/testdata/stress_start_stop_1_9_good
new file mode 100644
index 0000000..f00f190
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_9_good
Binary files differ
diff --git a/src/internal/trace/testdata/user_task_region_1_11_good b/src/internal/trace/testdata/user_task_region_1_11_good
new file mode 100644
index 0000000..f4edb67
--- /dev/null
+++ b/src/internal/trace/testdata/user_task_region_1_11_good
Binary files differ
diff --git a/src/internal/trace/testdata/user_task_region_1_19_good b/src/internal/trace/testdata/user_task_region_1_19_good
new file mode 100644
index 0000000..1daa3b2
--- /dev/null
+++ b/src/internal/trace/testdata/user_task_region_1_19_good
Binary files differ
diff --git a/src/internal/trace/writer.go b/src/internal/trace/writer.go
new file mode 100644
index 0000000..dd0b9f1
--- /dev/null
+++ b/src/internal/trace/writer.go
@@ -0,0 +1,49 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "bytes"
+
+// Writer is a test trace writer.
+type Writer struct {
+ bytes.Buffer
+}
+
+func NewWriter() *Writer {
+ w := new(Writer)
+ w.Write([]byte("go 1.9 trace\x00\x00\x00\x00"))
+ return w
+}
+
+// Emit writes an event record to the trace.
+// See Event types for valid types and required arguments.
+func (w *Writer) Emit(typ byte, args ...uint64) {
+ nargs := byte(len(args)) - 1
+ if nargs > 3 {
+ nargs = 3
+ }
+ buf := []byte{typ | nargs<<6}
+ if nargs == 3 {
+ buf = append(buf, 0)
+ }
+ for _, a := range args {
+ buf = appendVarint(buf, a)
+ }
+ if nargs == 3 {
+ buf[1] = byte(len(buf) - 2)
+ }
+ n, err := w.Write(buf)
+ if n != len(buf) || err != nil {
+ panic("failed to write")
+ }
+}
+
+func appendVarint(buf []byte, v uint64) []byte {
+ for ; v >= 0x80; v >>= 7 {
+ buf = append(buf, 0x80|byte(v))
+ }
+ buf = append(buf, byte(v))
+ return buf
+}