27 files changed, 3253 insertions, 0 deletions
diff --git a/src/internal/trace/gc.go b/src/internal/trace/gc.go
new file mode 100644
index 0000000..cc19fdf
--- /dev/null
+++ b/src/internal/trace/gc.go
@@ -0,0 +1,825 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"container/heap"
+	"math"
+	"sort"
+	"strings"
+	"time"
+)
+
+// MutatorUtil is a change in mutator utilization at a particular
+// time. Mutator utilization functions are represented as a
+// time-ordered []MutatorUtil.
+type MutatorUtil struct {
+	Time int64
+	// Util is the mean mutator utilization starting at Time. This
+	// is in the range [0, 1].
+	Util float64
+}
+
+// UtilFlags controls the behavior of MutatorUtilization.
+type UtilFlags int
+
+const (
+	// UtilSTW means utilization should account for STW events.
+	UtilSTW UtilFlags = 1 << iota
+	// UtilBackground means utilization should account for
+	// background mark workers.
+	UtilBackground
+	// UtilAssist means utilization should account for mark
+	// assists.
+	UtilAssist
+	// UtilSweep means utilization should account for sweeping.
+	UtilSweep
+
+	// UtilPerProc means each P should be given a separate
+	// utilization function. Otherwise, there is a single function
+	// and each P is given a fraction of the utilization.
+	UtilPerProc
+)
+
+// MutatorUtilization returns a set of mutator utilization functions
+// for the given trace. Each function will always end with 0
+// utilization. The bounds of each function are implicit in the first
+// and last event; outside of these bounds each function is undefined.
+//
+// If the UtilPerProc flag is not given, this always returns a single
+// utilization function. Otherwise, it returns one function per P.
+func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
+	if len(events) == 0 {
+		return nil
+	}
+
+	type perP struct {
+		// gc > 0 indicates that GC is active on this P.
+		gc int
+		// series the logical series number for this P. This
+		// is necessary because Ps may be removed and then
+		// re-added, and then the new P needs a new series.
+		series int
+	}
+	ps := []perP{}
+	stw := 0
+
+	out := [][]MutatorUtil{}
+	assists := map[uint64]bool{}
+	block := map[uint64]*Event{}
+	bgMark := map[uint64]bool{}
+
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGomaxprocs:
+			gomaxprocs := int(ev.Args[0])
+			if len(ps) > gomaxprocs {
+				if flags&UtilPerProc != 0 {
+					// End each P's series.
+					for _, p := range ps[gomaxprocs:] {
+						out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
+					}
+				}
+				ps = ps[:gomaxprocs]
+			}
+			for len(ps) < gomaxprocs {
+				// Start new P's series.
+				series := 0
+				if flags&UtilPerProc != 0 || len(out) == 0 {
+					series = len(out)
+					out = append(out, []MutatorUtil{{ev.Ts, 1}})
+				}
+				ps = append(ps, perP{series: series})
+			}
+		case EvGCSTWStart:
+			if flags&UtilSTW != 0 {
+				stw++
+			}
+		case EvGCSTWDone:
+			if flags&UtilSTW != 0 {
+				stw--
+			}
+		case EvGCMarkAssistStart:
+			if flags&UtilAssist != 0 {
+				ps[ev.P].gc++
+				assists[ev.G] = true
+			}
+		case EvGCMarkAssistDone:
+			if flags&UtilAssist != 0 {
+				ps[ev.P].gc--
+				delete(assists, ev.G)
+			}
+		case EvGCSweepStart:
+			if flags&UtilSweep != 0 {
+				ps[ev.P].gc++
+			}
+		case EvGCSweepDone:
+			if flags&UtilSweep != 0 {
+				ps[ev.P].gc--
+			}
+		case EvGoStartLabel:
+			if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
+				// Background mark worker.
+				//
+				// If we're in per-proc mode, we don't
+				// count dedicated workers because
+				// they kick all of the goroutines off
+				// that P, so don't directly
+				// contribute to goroutine latency.
+				if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
+					bgMark[ev.G] = true
+					ps[ev.P].gc++
+				}
+			}
+			fallthrough
+		case EvGoStart:
+			if assists[ev.G] {
+				// Unblocked during assist.
+				ps[ev.P].gc++
+			}
+			block[ev.G] = ev.Link
+		default:
+			if ev != block[ev.G] {
+				continue
+			}
+
+			if assists[ev.G] {
+				// Blocked during assist.
+				ps[ev.P].gc--
+			}
+			if bgMark[ev.G] {
+				// Background mark worker done.
+				ps[ev.P].gc--
+				delete(bgMark, ev.G)
+			}
+			delete(block, ev.G)
+		}
+
+		if flags&UtilPerProc == 0 {
+			// Compute the current average utilization.
+			if len(ps) == 0 {
+				continue
+			}
+			gcPs := 0
+			if stw > 0 {
+				gcPs = len(ps)
+			} else {
+				for i := range ps {
+					if ps[i].gc > 0 {
+						gcPs++
+					}
+				}
+			}
+			mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
+
+			// Record the utilization change. (Since
+			// len(ps) == len(out), we know len(out) > 0.)
+			out[0] = addUtil(out[0], mu)
+		} else {
+			// Check for per-P utilization changes.
+			for i := range ps {
+				p := &ps[i]
+				util := 1.0
+				if stw > 0 || p.gc > 0 {
+					util = 0.0
+				}
+				out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
+			}
+		}
+	}
+
+	// Add final 0 utilization event to any remaining series. This
+	// is important to mark the end of the trace. The exact value
+	// shouldn't matter since no window should extend beyond this,
+	// but using 0 is symmetric with the start of the trace.
+	mu := MutatorUtil{events[len(events)-1].Ts, 0}
+	for i := range ps {
+		out[ps[i].series] = addUtil(out[ps[i].series], mu)
+	}
+	return out
+}
+
+func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
+	if len(util) > 0 {
+		if mu.Util == util[len(util)-1].Util {
+			// No change.
+			return util
+		}
+		if mu.Time == util[len(util)-1].Time {
+			// Take the lowest utilization at a time stamp.
+			if mu.Util < util[len(util)-1].Util {
+				util[len(util)-1] = mu
+			}
+			return util
+		}
+	}
+	return append(util, mu)
+}
+
+// totalUtil is total utilization, measured in nanoseconds. This is a
+// separate type primarily to distinguish it from mean utilization,
+// which is also a float64.
+type totalUtil float64
+
+func totalUtilOf(meanUtil float64, dur int64) totalUtil {
+	return totalUtil(meanUtil * float64(dur))
+}
+
+// mean returns the mean utilization over dur.
+func (u totalUtil) mean(dur time.Duration) float64 {
+	return float64(u) / float64(dur)
+}
+
+// An MMUCurve is the minimum mutator utilization curve across
+// multiple window sizes.
+type MMUCurve struct {
+	series []mmuSeries
+}
+
+type mmuSeries struct {
+	util []MutatorUtil
+	// sums[j] is the cumulative sum of util[:j].
+	sums []totalUtil
+	// bands summarizes util in non-overlapping bands of duration
+	// bandDur.
+	bands []mmuBand
+	// bandDur is the duration of each band.
+	bandDur int64
+}
+
+type mmuBand struct {
+	// minUtil is the minimum instantaneous mutator utilization in
+	// this band.
+	minUtil float64
+	// cumUtil is the cumulative total mutator utilization between
+	// time 0 and the left edge of this band.
+	cumUtil totalUtil
+
+	// integrator is the integrator for the left edge of this
+	// band.
+	integrator integrator
+}
+
+// NewMMUCurve returns an MMU curve for the given mutator utilization
+// function.
+func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
+	series := make([]mmuSeries, len(utils))
+	for i, util := range utils {
+		series[i] = newMMUSeries(util)
+	}
+	return &MMUCurve{series}
+}
+
+// bandsPerSeries is the number of bands to divide each series into.
+// This is only changed by tests.
+var bandsPerSeries = 1000
+
+func newMMUSeries(util []MutatorUtil) mmuSeries {
+	// Compute cumulative sum.
+	sums := make([]totalUtil, len(util))
+	var prev MutatorUtil
+	var sum totalUtil
+	for j, u := range util {
+		sum += totalUtilOf(prev.Util, u.Time-prev.Time)
+		sums[j] = sum
+		prev = u
+	}
+
+	// Divide the utilization curve up into equal size
+	// non-overlapping "bands" and compute a summary for each of
+	// these bands.
+	//
+	// Compute the duration of each band.
+	numBands := bandsPerSeries
+	if numBands > len(util) {
+		// There's no point in having lots of bands if there
+		// aren't many events.
+		numBands = len(util)
+	}
+	dur := util[len(util)-1].Time - util[0].Time
+	bandDur := (dur + int64(numBands) - 1) / int64(numBands)
+	if bandDur < 1 {
+		bandDur = 1
+	}
+	// Compute the bands. There are numBands+1 bands in order to
+	// record the final cumulative sum.
+	bands := make([]mmuBand, numBands+1)
+	s := mmuSeries{util, sums, bands, bandDur}
+	leftSum := integrator{&s, 0}
+	for i := range bands {
+		startTime, endTime := s.bandTime(i)
+		cumUtil := leftSum.advance(startTime)
+		predIdx := leftSum.pos
+		minUtil := 1.0
+		for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
+			minUtil = math.Min(minUtil, util[i].Util)
+		}
+		bands[i] = mmuBand{minUtil, cumUtil, leftSum}
+	}
+
+	return s
+}
+
+func (s *mmuSeries) bandTime(i int) (start, end int64) {
+	start = int64(i)*s.bandDur + s.util[0].Time
+	end = start + s.bandDur
+	return
+}
+
+type bandUtil struct {
+	// Utilization series index
+	series int
+	// Band index
+	i int
+	// Lower bound of mutator utilization for all windows
+	// with a left edge in this band.
+	utilBound float64
+}
+
+type bandUtilHeap []bandUtil
+
+func (h bandUtilHeap) Len() int {
+	return len(h)
+}
+
+func (h bandUtilHeap) Less(i, j int) bool {
+	return h[i].utilBound < h[j].utilBound
+}
+
+func (h bandUtilHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *bandUtilHeap) Push(x interface{}) {
+	*h = append(*h, x.(bandUtil))
+}
+
+func (h *bandUtilHeap) Pop() interface{} {
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
+}
+
+// UtilWindow is a specific window at Time.
+type UtilWindow struct {
+	Time int64
+	// MutatorUtil is the mean mutator utilization in this window.
+	MutatorUtil float64
+}
+
+type utilHeap []UtilWindow
+
+func (h utilHeap) Len() int {
+	return len(h)
+}
+
+func (h utilHeap) Less(i, j int) bool {
+	if h[i].MutatorUtil != h[j].MutatorUtil {
+		return h[i].MutatorUtil > h[j].MutatorUtil
+	}
+	return h[i].Time > h[j].Time
+}
+
+func (h utilHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *utilHeap) Push(x interface{}) {
+	*h = append(*h, x.(UtilWindow))
+}
+
+func (h *utilHeap) Pop() interface{} {
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
+}
+
+// An accumulator takes a windowed mutator utilization function and
+// tracks various statistics for that function.
+type accumulator struct {
+	mmu float64
+
+	// bound is the mutator utilization bound where adding any
+	// mutator utilization above this bound cannot affect the
+	// accumulated statistics.
+	bound float64
+
+	// Worst N window tracking
+	nWorst int
+	wHeap  utilHeap
+
+	// Mutator utilization distribution tracking
+	mud *mud
+	// preciseMass is the distribution mass that must be precise
+	// before accumulation is stopped.
+	preciseMass float64
+	// lastTime and lastMU are the previous point added to the
+	// windowed mutator utilization function.
+	lastTime int64
+	lastMU   float64
+}
+
+// resetTime declares a discontinuity in the windowed mutator
+// utilization function by resetting the current time.
+func (acc *accumulator) resetTime() {
+	// This only matters for distribution collection, since that's
+	// the only thing that depends on the progression of the
+	// windowed mutator utilization function.
+	acc.lastTime = math.MaxInt64
+}
+
+// addMU adds a point to the windowed mutator utilization function at
+// (time, mu). This must be called for monotonically increasing values
+// of time.
+//
+// It returns true if further calls to addMU would be pointless.
+func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
+	if mu < acc.mmu {
+		acc.mmu = mu
+	}
+	acc.bound = acc.mmu
+
+	if acc.nWorst == 0 {
+		// If the minimum has reached zero, it can't go any
+		// lower, so we can stop early.
+		return mu == 0
+	}
+
+	// Consider adding this window to the n worst.
+	if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
+		// This window is lower than the K'th worst window.
+		//
+		// Check if there's any overlapping window
+		// already in the heap and keep whichever is
+		// worse.
+		for i, ui := range acc.wHeap {
+			if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
+				if ui.MutatorUtil <= mu {
+					// Keep the first window.
+					goto keep
+				} else {
+					// Replace it with this window.
+					heap.Remove(&acc.wHeap, i)
+					break
+				}
+			}
+		}
+
+		heap.Push(&acc.wHeap, UtilWindow{time, mu})
+		if len(acc.wHeap) > acc.nWorst {
+			heap.Pop(&acc.wHeap)
+		}
+	keep:
+	}
+
+	if len(acc.wHeap) < acc.nWorst {
+		// We don't have N windows yet, so keep accumulating.
+		acc.bound = 1.0
+	} else {
+		// Anything above the least worst window has no effect.
+		acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
+	}
+
+	if acc.mud != nil {
+		if acc.lastTime != math.MaxInt64 {
+			// Update distribution.
+			acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
+		}
+		acc.lastTime, acc.lastMU = time, mu
+		if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
+			acc.bound = math.Max(acc.bound, mudBound)
+		} else {
+			// We haven't accumulated enough total precise
+			// mass yet to even reach our goal, so keep
+			// accumulating.
+			acc.bound = 1
+		}
+		// It's not worth checking percentiles every time, so
+		// just keep accumulating this band.
+		return false
+	}
+
+	// If we've found enough 0 utilizations, we can stop immediately.
+	return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
+}
+
+// MMU returns the minimum mutator utilization for the given time
+// window. This is the minimum utilization for all windows of this
+// duration across the execution. The returned value is in the range
+// [0, 1].
+func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
+	acc := accumulator{mmu: 1.0, bound: 1.0}
+	c.mmu(window, &acc)
+	return acc.mmu
+}
+
+// Examples returns n specific examples of the lowest mutator
+// utilization for the given window size. The returned windows will be
+// disjoint (otherwise there would be a huge number of
+// mostly-overlapping windows at the single lowest point). There are
+// no guarantees on which set of disjoint windows this returns.
+func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
+	acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
+	c.mmu(window, &acc)
+	sort.Sort(sort.Reverse(acc.wHeap))
+	return ([]UtilWindow)(acc.wHeap)
+}
+
+// MUD returns mutator utilization distribution quantiles for the
+// given window size.
+//
+// The mutator utilization distribution is the distribution of mean
+// mutator utilization across all windows of the given window size in
+// the trace.
+//
+// The minimum mutator utilization is the minimum (0th percentile) of
+// this distribution. (However, if only the minimum is desired, it's
+// more efficient to use the MMU method.)
+func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
+	if len(quantiles) == 0 {
+		return []float64{}
+	}
+
+	// Each unrefined band contributes a known total mass to the
+	// distribution (bandDur except at the end), but in an unknown
+	// way. However, we know that all the mass it contributes must
+	// be at or above its worst-case mean mutator utilization.
+	//
+	// Hence, we refine bands until the highest desired
+	// distribution quantile is less than the next worst-case mean
+	// mutator utilization. At this point, all further
+	// contributions to the distribution must be beyond the
+	// desired quantile and hence cannot affect it.
+	//
+	// First, find the highest desired distribution quantile.
+	maxQ := quantiles[0]
+	for _, q := range quantiles {
+		if q > maxQ {
+			maxQ = q
+		}
+	}
+	// The distribution's mass is in units of time (it's not
+	// normalized because this would make it more annoying to
+	// account for future contributions of unrefined bands). The
+	// total final mass will be the duration of the trace itself
+	// minus the window size. Using this, we can compute the mass
+	// corresponding to quantile maxQ.
+	var duration int64
+	for _, s := range c.series {
+		duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
+		if duration1 >= int64(window) {
+			duration += duration1 - int64(window)
+		}
+	}
+	qMass := float64(duration) * maxQ
+
+	// Accumulate the MUD until we have precise information for
+	// everything to the left of qMass.
+	acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
+	acc.mud.setTrackMass(qMass)
+	c.mmu(window, &acc)
+
+	// Evaluate the quantiles on the accumulated MUD.
+	out := make([]float64, len(quantiles))
+	for i := range out {
+		mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
+		if math.IsNaN(mu) {
+			// There are a few legitimate ways this can
+			// happen:
+			//
+			// 1. If the window is the full trace
+			// duration, then the windowed MU function is
+			// only defined at a single point, so the MU
+			// distribution is not well-defined.
+			//
+			// 2. If there are no events, then the MU
+			// distribution has no mass.
+			//
+			// Either way, all of the quantiles will have
+			// converged toward the MMU at this point.
+			mu = acc.mmu
+		}
+		out[i] = mu
+	}
+	return out
+}
+
+func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
+	if window <= 0 {
+		acc.mmu = 0
+		return
+	}
+
+	var bandU bandUtilHeap
+	windows := make([]time.Duration, len(c.series))
+	for i, s := range c.series {
+		windows[i] = window
+		if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
+			windows[i] = max
+		}
+
+		bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
+		if bandU == nil {
+			bandU = bandU1
+		} else {
+			bandU = append(bandU, bandU1...)
+		}
+	}
+
+	// Process bands from lowest utilization bound to highest.
+	heap.Init(&bandU)
+
+	// Refine each band into a precise window and MMU until
+	// refining the next lowest band can no longer affect the MMU
+	// or windows.
+	for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
+		i := bandU[0].series
+		c.series[i].bandMMU(bandU[0].i, windows[i], acc)
+		heap.Pop(&bandU)
+	}
+}
+
+func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
+	// For each band, compute the worst-possible total mutator
+	// utilization for all windows that start in that band.
+
+	// minBands is the minimum number of bands a window can span
+	// and maxBands is the maximum number of bands a window can
+	// span in any alignment.
+	minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
+	maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
+	if window > 1 && maxBands < 2 {
+		panic("maxBands < 2")
+	}
+	tailDur := int64(window) % c.bandDur
+	nUtil := len(c.bands) - maxBands + 1
+	if nUtil < 0 {
+		nUtil = 0
+	}
+	bandU := make([]bandUtil, nUtil)
+	for i := range bandU {
+		// To compute the worst-case MU, we assume the minimum
+		// for any bands that are only partially overlapped by
+		// some window and the mean for any bands that are
+		// completely covered by all windows.
+		var util totalUtil
+
+		// Find the lowest and second lowest of the partial
+		// bands.
+		l := c.bands[i].minUtil
+		r1 := c.bands[i+minBands-1].minUtil
+		r2 := c.bands[i+maxBands-1].minUtil
+		minBand := math.Min(l, math.Min(r1, r2))
+		// Assume the worst window maximally overlaps the
+		// worst minimum and then the rest overlaps the second
+		// worst minimum.
+		if minBands == 1 {
+			util += totalUtilOf(minBand, int64(window))
+		} else {
+			util += totalUtilOf(minBand, c.bandDur)
+			midBand := 0.0
+			switch {
+			case minBand == l:
+				midBand = math.Min(r1, r2)
+			case minBand == r1:
+				midBand = math.Min(l, r2)
+			case minBand == r2:
+				midBand = math.Min(l, r1)
+			}
+			util += totalUtilOf(midBand, tailDur)
+		}
+
+		// Add the total mean MU of bands that are completely
+		// overlapped by all windows.
+		if minBands > 2 {
+			util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
+		}
+
+		bandU[i] = bandUtil{series, i, util.mean(window)}
+	}
+
+	return bandU
+}
+
+// bandMMU computes the precise minimum mutator utilization for
+// windows with a left edge in band bandIdx.
+func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
+	util := c.util
+
+	// We think of the mutator utilization over time as the
+	// box-filtered utilization function, which we call the
+	// "windowed mutator utilization function". The resulting
+	// function is continuous and piecewise linear (unless
+	// window==0, which we handle elsewhere), where the boundaries
+	// between segments occur when either edge of the window
+	// encounters a change in the instantaneous mutator
+	// utilization function. Hence, the minimum of this function
+	// will always occur when one of the edges of the window
+	// aligns with a utilization change, so these are the only
+	// points we need to consider.
+	//
+	// We compute the mutator utilization function incrementally
+	// by tracking the integral from t=0 to the left edge of the
+	// window and to the right edge of the window.
+	left := c.bands[bandIdx].integrator
+	right := left
+	time, endTime := c.bandTime(bandIdx)
+	if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
+		endTime = utilEnd
+	}
+	acc.resetTime()
+	for {
+		// Advance edges to time and time+window.
+		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
+		if acc.addMU(time, mu, window) {
+			break
+		}
+		if time == endTime {
+			break
+		}
+
+		// The maximum slope of the windowed mutator
+		// utilization function is 1/window, so we can always
+		// advance the time by at least (mu - mmu) * window
+		// without dropping below mmu.
+		minTime := time + int64((mu-acc.bound)*float64(window))
+
+		// Advance the window to the next time where either
+		// the left or right edge of the window encounters a
+		// change in the utilization curve.
+		if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
+			time = t1
+		} else {
+			time = t2
+		}
+		if time < minTime {
+			time = minTime
+		}
+		if time >= endTime {
+			// For MMUs we could stop here, but for MUDs
+			// it's important that we span the entire
+			// band.
+			time = endTime
+		}
+	}
+}
+
+// An integrator tracks a position in a utilization function and
+// integrates it.
+type integrator struct {
+	u *mmuSeries
+	// pos is the index in u.util of the current time's non-strict
+	// predecessor.
+	pos int
+}
+
+// advance returns the integral of the utilization function from 0 to
+// time. advance must be called on monotonically increasing values of
+// times.
+func (in *integrator) advance(time int64) totalUtil {
+	util, pos := in.u.util, in.pos
+	// Advance pos until pos+1 is time's strict successor (making
+	// pos time's non-strict predecessor).
+	//
+	// Very often, this will be nearby, so we optimize that case,
+	// but it may be arbitrarily far away, so we handled that
+	// efficiently, too.
+	const maxSeq = 8
+	if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
+		// Nearby. Use a linear scan.
+		for pos+1 < len(util) && util[pos+1].Time <= time {
+			pos++
+		}
+	} else {
+		// Far. Binary search for time's strict successor.
+		l, r := pos, len(util)
+		for l < r {
+			h := int(uint(l+r) >> 1)
+			if util[h].Time <= time {
+				l = h + 1
+			} else {
+				r = h
+			}
+		}
+		pos = l - 1 // Non-strict predecessor.
+	}
+	in.pos = pos
+	var partial totalUtil
+	if time != util[pos].Time {
+		partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
+	}
+	return in.u.sums[pos] + partial
+}
+
+// next returns the smallest time t' > time of a change in the
+// utilization function.
+func (in *integrator) next(time int64) int64 {
+	for _, u := range in.u.util[in.pos:] {
+		if u.Time > time {
+			return u.Time
+		}
+	}
+	return 1<<63 - 1
+}
diff --git a/src/internal/trace/gc_test.go b/src/internal/trace/gc_test.go
new file mode 100644
index 0000000..9b9771e
--- /dev/null
+++ b/src/internal/trace/gc_test.go
@@ -0,0 +1,202 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"bytes"
+	"math"
+	"os"
+	"testing"
+	"time"
+)
+
+// aeq returns true if x and y are equal up to 8 digits (1 part in 100
+// million).
+func aeq(x, y float64) bool {
+	if x < 0 && y < 0 {
+		x, y = -x, -y
+	}
+	const digits = 8
+	factor := 1 - math.Pow(10, -digits+1)
+	return x*factor <= y && y*factor <= x
+}
+
+func TestMMU(t *testing.T) {
+	t.Parallel()
+
+	// MU
+	// 1.0  *****   *****   *****
+	// 0.5      *   *   *   *
+	// 0.0      *****   *****
+	//      0   1   2   3   4   5
+	util := [][]MutatorUtil{{
+		{0e9, 1},
+		{1e9, 0},
+		{2e9, 1},
+		{3e9, 0},
+		{4e9, 1},
+		{5e9, 0},
+	}}
+	mmuCurve := NewMMUCurve(util)
+
+	for _, test := range []struct {
+		window time.Duration
+		want   float64
+		worst  []float64
+	}{
+		{0, 0, []float64{}},
+		{time.Millisecond, 0, []float64{0, 0}},
+		{time.Second, 0, []float64{0, 0}},
+		{2 * time.Second, 0.5, []float64{0.5, 0.5}},
+		{3 * time.Second, 1 / 3.0, []float64{1 / 3.0}},
+		{4 * time.Second, 0.5, []float64{0.5}},
+		{5 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+		{6 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+	} {
+		if got := mmuCurve.MMU(test.window); !aeq(test.want, got) {
+			t.Errorf("for %s window, want mu = %f, got %f", test.window, test.want, got)
+		}
+		worst := mmuCurve.Examples(test.window, 2)
+		// Which exact windows are returned is unspecified
+		// (and depends on the exact banding), so we just
+		// check that we got the right number with the right
+		// utilizations.
+		if len(worst) != len(test.worst) {
+			t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+		} else {
+			for i := range worst {
+				if worst[i].MutatorUtil != test.worst[i] {
+					t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+					break
+				}
+			}
+		}
+	}
+}
+
+func TestMMUTrace(t *testing.T) {
+	// Can't be t.Parallel() because it modifies the
+	// testingOneBand package variable.
+	if testing.Short() {
+		// test input too big for all.bash
+		t.Skip("skipping in -short mode")
+	}
+
+	data, err := os.ReadFile("testdata/stress_1_10_good")
+	if err != nil {
+		t.Fatalf("failed to read input file: %v", err)
+	}
+	_, events, err := parse(bytes.NewReader(data), "")
+	if err != nil {
+		t.Fatalf("failed to parse trace: %s", err)
+	}
+	mu := MutatorUtilization(events.Events, UtilSTW|UtilBackground|UtilAssist)
+	mmuCurve := NewMMUCurve(mu)
+
+	// Test the optimized implementation against the "obviously
+	// correct" implementation.
+	for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
+		want := mmuSlow(mu[0], window)
+		got := mmuCurve.MMU(window)
+		if !aeq(want, got) {
+			t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
+		}
+	}
+
+	// Test MUD with band optimization against MUD without band
+	// optimization. We don't have a simple testing implementation
+	// of MUDs (the simplest implementation is still quite
+	// complex), but this is still a pretty good test.
+	defer func(old int) { bandsPerSeries = old }(bandsPerSeries)
+	bandsPerSeries = 1
+	mmuCurve2 := NewMMUCurve(mu)
+	quantiles := []float64{0, 1 - .999, 1 - .99}
+	for window := time.Microsecond; window < time.Second; window *= 10 {
+		mud1 := mmuCurve.MUD(window, quantiles)
+		mud2 := mmuCurve2.MUD(window, quantiles)
+		for i := range mud1 {
+			if !aeq(mud1[i], mud2[i]) {
+				t.Errorf("for quantiles %v at window %v, want %v, got %v", quantiles, window, mud2, mud1)
+				break
+			}
+		}
+	}
+}
+
+func BenchmarkMMU(b *testing.B) {
+	data, err := os.ReadFile("testdata/stress_1_10_good")
+	if err != nil {
+		b.Fatalf("failed to read input file: %v", err)
+	}
+	_, events, err := parse(bytes.NewReader(data), "")
+	if err != nil {
+		b.Fatalf("failed to parse trace: %s", err)
+	}
+	mu := MutatorUtilization(events.Events, UtilSTW|UtilBackground|UtilAssist|UtilSweep)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mmuCurve := NewMMUCurve(mu)
+		xMin, xMax := time.Microsecond, time.Second
+		logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
+		const samples = 100
+		for i := 0; i < samples; i++ {
+			window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
+			mmuCurve.MMU(window)
+		}
+	}
+}
+
+func mmuSlow(util []MutatorUtil, window time.Duration) (mmu float64) {
+	if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
+		window = max
+	}
+
+	mmu = 1.0
+
+	// muInWindow returns the mean mutator utilization between
+	// util[0].Time and end.
+	muInWindow := func(util []MutatorUtil, end int64) float64 {
+		total := 0.0
+		var prevU MutatorUtil
+		for _, u := range util {
+			if u.Time > end {
+				total += prevU.Util * float64(end-prevU.Time)
+				break
+			}
+			total += prevU.Util * float64(u.Time-prevU.Time)
+			prevU = u
+		}
+		return total / float64(end-util[0].Time)
+	}
+	update := func() {
+		for i, u := range util {
+			if u.Time+int64(window) > util[len(util)-1].Time {
+				break
+			}
+			mmu = math.Min(mmu, muInWindow(util[i:], u.Time+int64(window)))
+		}
+	}
+
+	// Consider all left-aligned windows.
+	update()
+	// Reverse the trace. Slightly subtle because each MutatorUtil
+	// is a *change*.
+	rutil := make([]MutatorUtil, len(util))
+	if util[len(util)-1].Util != 0 {
+		panic("irreversible trace")
+	}
+	for i, u := range util {
+		util1 := 0.0
+		if i != 0 {
+			util1 = util[i-1].Util
+		}
+		rutil[len(rutil)-i-1] = MutatorUtil{Time: -u.Time, Util: util1}
+	}
+	util = rutil
+	// Consider all right-aligned windows.
+	update()
+	return
+}
diff --git a/src/internal/trace/goroutines.go b/src/internal/trace/goroutines.go
new file mode 100644
index 0000000..a5fda48
--- /dev/null
+++ b/src/internal/trace/goroutines.go
@@ -0,0 +1,338 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "sort"
+
+// GDesc contains statistics and execution details of a single goroutine.
+type GDesc struct {
+	ID           uint64
+	Name         string
+	PC           uint64
+	CreationTime int64
+	StartTime    int64
+	EndTime      int64
+
+	// List of regions in the goroutine, sorted based on the start time.
+	Regions []*UserRegionDesc
+
+	// Statistics of execution time during the goroutine execution.
+	GExecutionStat
+
+	*gdesc // private part.
+}
+
+// UserRegionDesc represents a region and goroutine execution stats
+// while the region was active.
+type UserRegionDesc struct {
+	TaskID uint64
+	Name   string
+
+	// Region start event. Normally EvUserRegion start event or nil,
+	// but can be EvGoCreate event if the region is a synthetic
+	// region representing task inheritance from the parent goroutine.
+	Start *Event
+
+	// Region end event. Normally EvUserRegion end event or nil,
+	// but can be EvGoStop or EvGoEnd event if the goroutine
+	// terminated without explicitly ending the region.
+	End *Event
+
+	GExecutionStat
+}
+
+// GExecutionStat contains statistics about a goroutine's execution
+// during a period of time.
+type GExecutionStat struct {
+	ExecTime      int64
+	SchedWaitTime int64
+	IOTime        int64
+	BlockTime     int64
+	SyscallTime   int64
+	GCTime        int64
+	SweepTime     int64
+	TotalTime     int64
+}
+
+// sub returns the stats v-s.
+func (s GExecutionStat) sub(v GExecutionStat) (r GExecutionStat) {
+	r = s
+	r.ExecTime -= v.ExecTime
+	r.SchedWaitTime -= v.SchedWaitTime
+	r.IOTime -= v.IOTime
+	r.BlockTime -= v.BlockTime
+	r.SyscallTime -= v.SyscallTime
+	r.GCTime -= v.GCTime
+	r.SweepTime -= v.SweepTime
+	r.TotalTime -= v.TotalTime
+	return r
+}
+
+// snapshotStat returns the snapshot of the goroutine execution statistics.
+// This is called as we process the ordered trace event stream. lastTs and
+// activeGCStartTime are used to process pending statistics if this is called
+// before any goroutine end event.
+func (g *GDesc) snapshotStat(lastTs, activeGCStartTime int64) (ret GExecutionStat) {
+	ret = g.GExecutionStat
+
+	if g.gdesc == nil {
+		return ret // finalized GDesc. No pending state.
+	}
+
+	if activeGCStartTime != 0 { // terminating while GC is active
+		if g.CreationTime < activeGCStartTime {
+			ret.GCTime += lastTs - activeGCStartTime
+		} else {
+			// The goroutine's lifetime completely overlaps
+			// with a GC.
+			ret.GCTime += lastTs - g.CreationTime
+		}
+	}
+
+	if g.TotalTime == 0 {
+		ret.TotalTime = lastTs - g.CreationTime
+	}
+
+	if g.lastStartTime != 0 {
+		ret.ExecTime += lastTs - g.lastStartTime
+	}
+	if g.blockNetTime != 0 {
+		ret.IOTime += lastTs - g.blockNetTime
+	}
+	if g.blockSyncTime != 0 {
+		ret.BlockTime += lastTs - g.blockSyncTime
+	}
+	if g.blockSyscallTime != 0 {
+		ret.SyscallTime += lastTs - g.blockSyscallTime
+	}
+	if g.blockSchedTime != 0 {
+		ret.SchedWaitTime += lastTs - g.blockSchedTime
+	}
+	if g.blockSweepTime != 0 {
+		ret.SweepTime += lastTs - g.blockSweepTime
+	}
+	return ret
+}
+
+// finalize is called when processing a goroutine end event or at
+// the end of trace processing. This finalizes the execution stat
+// and any active regions in the goroutine, in which case trigger is nil.
+func (g *GDesc) finalize(lastTs, activeGCStartTime int64, trigger *Event) {
+	if trigger != nil {
+		g.EndTime = trigger.Ts
+	}
+	finalStat := g.snapshotStat(lastTs, activeGCStartTime)
+
+	g.GExecutionStat = finalStat
+	for _, s := range g.activeRegions {
+		s.End = trigger
+		s.GExecutionStat = finalStat.sub(s.GExecutionStat)
+		g.Regions = append(g.Regions, s)
+	}
+	*(g.gdesc) = gdesc{}
+}
+
+// gdesc is a private part of GDesc that is required only during analysis.
+type gdesc struct {
+	lastStartTime    int64
+	blockNetTime     int64
+	blockSyncTime    int64
+	blockSyscallTime int64
+	blockSweepTime   int64
+	blockGCTime      int64
+	blockSchedTime   int64
+
+	activeRegions []*UserRegionDesc // stack of active regions
+}
+
+// GoroutineStats generates statistics for all goroutines in the trace.
+func GoroutineStats(events []*Event) map[uint64]*GDesc {
+	gs := make(map[uint64]*GDesc)
+	var lastTs int64
+	var gcStartTime int64 // gcStartTime == 0 indicates gc is inactive.
+	for _, ev := range events {
+		lastTs = ev.Ts
+		switch ev.Type {
+		case EvGoCreate:
+			g := &GDesc{ID: ev.Args[0], CreationTime: ev.Ts, gdesc: new(gdesc)}
+			g.blockSchedTime = ev.Ts
+			// When a goroutine is newly created, inherit the
+			// task of the active region. For ease handling of
+			// this case, we create a fake region description with
+			// the task id.
+			if creatorG := gs[ev.G]; creatorG != nil && len(creatorG.gdesc.activeRegions) > 0 {
+				regions := creatorG.gdesc.activeRegions
+				s := regions[len(regions)-1]
+				if s.TaskID != 0 {
+					g.gdesc.activeRegions = []*UserRegionDesc{
+						{TaskID: s.TaskID, Start: ev},
+					}
+				}
+			}
+			gs[g.ID] = g
+		case EvGoStart, EvGoStartLabel:
+			g := gs[ev.G]
+			if g.PC == 0 {
+				g.PC = ev.Stk[0].PC
+				g.Name = ev.Stk[0].Fn
+			}
+			g.lastStartTime = ev.Ts
+			if g.StartTime == 0 {
+				g.StartTime = ev.Ts
+			}
+			if g.blockSchedTime != 0 {
+				g.SchedWaitTime += ev.Ts - g.blockSchedTime
+				g.blockSchedTime = 0
+			}
+		case EvGoEnd, EvGoStop:
+			g := gs[ev.G]
+			g.finalize(ev.Ts, gcStartTime, ev)
+		case EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+			EvGoBlockSync, EvGoBlockCond:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockSyncTime = ev.Ts
+		case EvGoSched, EvGoPreempt:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockSchedTime = ev.Ts
+		case EvGoSleep, EvGoBlock:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+		case EvGoBlockNet:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockNetTime = ev.Ts
+		case EvGoBlockGC:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockGCTime = ev.Ts
+		case EvGoUnblock:
+			g := gs[ev.Args[0]]
+			if g.blockNetTime != 0 {
+				g.IOTime += ev.Ts - g.blockNetTime
+				g.blockNetTime = 0
+			}
+			if g.blockSyncTime != 0 {
+				g.BlockTime += ev.Ts - g.blockSyncTime
+				g.blockSyncTime = 0
+			}
+			g.blockSchedTime = ev.Ts
+		case EvGoSysBlock:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockSyscallTime = ev.Ts
+		case EvGoSysExit:
+			g := gs[ev.G]
+			if g.blockSyscallTime != 0 {
+				g.SyscallTime += ev.Ts - g.blockSyscallTime
+				g.blockSyscallTime = 0
+			}
+			g.blockSchedTime = ev.Ts
+		case EvGCSweepStart:
+			g := gs[ev.G]
+			if g != nil {
+				// Sweep can happen during GC on system goroutine.
+				g.blockSweepTime = ev.Ts
+			}
+		case EvGCSweepDone:
+			g := gs[ev.G]
+			if g != nil && g.blockSweepTime != 0 {
+				g.SweepTime += ev.Ts - g.blockSweepTime
+				g.blockSweepTime = 0
+			}
+		case EvGCStart:
+			gcStartTime = ev.Ts
+		case EvGCDone:
+			for _, g := range gs {
+				if g.EndTime != 0 {
+					continue
+				}
+				if gcStartTime < g.CreationTime {
+					g.GCTime += ev.Ts - g.CreationTime
+				} else {
+					g.GCTime += ev.Ts - gcStartTime
+				}
+			}
+			gcStartTime = 0 // indicates gc is inactive.
+		case EvUserRegion:
+			g := gs[ev.G]
+			switch mode := ev.Args[1]; mode {
+			case 0: // region start
+				g.activeRegions = append(g.activeRegions, &UserRegionDesc{
+					Name:           ev.SArgs[0],
+					TaskID:         ev.Args[0],
+					Start:          ev,
+					GExecutionStat: g.snapshotStat(lastTs, gcStartTime),
+				})
+			case 1: // region end
+				var sd *UserRegionDesc
+				if regionStk := g.activeRegions; len(regionStk) > 0 {
+					n := len(regionStk)
+					sd = regionStk[n-1]
+					regionStk = regionStk[:n-1] // pop
+					g.activeRegions = regionStk
+				} else {
+					sd = &UserRegionDesc{
+						Name:   ev.SArgs[0],
+						TaskID: ev.Args[0],
+					}
+				}
+				sd.GExecutionStat = g.snapshotStat(lastTs, gcStartTime).sub(sd.GExecutionStat)
+				sd.End = ev
+				g.Regions = append(g.Regions, sd)
+			}
+		}
+	}
+
+	for _, g := range gs {
+		g.finalize(lastTs, gcStartTime, nil)
+
+		// sort based on region start time
+		sort.Slice(g.Regions, func(i, j int) bool {
+			x := g.Regions[i].Start
+			y := g.Regions[j].Start
+			if x == nil {
+				return true
+			}
+			if y == nil {
+				return false
+			}
+			return x.Ts < y.Ts
+		})
+
+		g.gdesc = nil
+	}
+
+	return gs
+}
+
+// RelatedGoroutines finds a set of goroutines related to goroutine goid.
+func RelatedGoroutines(events []*Event, goid uint64) map[uint64]bool {
+	// BFS of depth 2 over "unblock" edges
+	// (what goroutines unblock goroutine goid?).
+	gmap := make(map[uint64]bool)
+	gmap[goid] = true
+	for i := 0; i < 2; i++ {
+		gmap1 := make(map[uint64]bool)
+		for g := range gmap {
+			gmap1[g] = true
+		}
+		for _, ev := range events {
+			if ev.Type == EvGoUnblock && gmap[ev.Args[0]] {
+				gmap1[ev.G] = true
+			}
+		}
+		gmap = gmap1
+	}
+	gmap[0] = true // for GC events
+	return gmap
+}
diff --git a/src/internal/trace/mkcanned.bash b/src/internal/trace/mkcanned.bash
new file mode 100755
index 0000000..b365b90
--- /dev/null
+++ b/src/internal/trace/mkcanned.bash
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Copyright 2016 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# mkcanned.bash creates canned traces for the trace test suite using
+# the current Go version.
+
+set -e
+
+if [ $# != 1 ]; then
+    echo "usage: $0 <label>" >&2
+    exit 1
+fi
+
+go test -run ClientServerParallel4 -trace "testdata/http_$1_good" net/http
+go test -run 'TraceStress$|TraceStressStartStop$|TestUserTaskSpan$' runtime/trace -savetraces
+mv ../../runtime/trace/TestTraceStress.trace "testdata/stress_$1_good"
+mv ../../runtime/trace/TestTraceStressStartStop.trace "testdata/stress_start_stop_$1_good"
+mv ../../runtime/trace/TestUserTaskSpan.trace "testdata/user_task_span_$1_good"
diff --git a/src/internal/trace/mud.go b/src/internal/trace/mud.go
new file mode 100644
index 0000000..8826306
--- /dev/null
+++ b/src/internal/trace/mud.go
@@ -0,0 +1,223 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"math"
+	"sort"
+)
+
+// mud is an updatable mutator utilization distribution.
+//
+// This is a continuous distribution of duration over mutator
+// utilization. For example, the integral from mutator utilization a
+// to b is the total duration during which the mutator utilization was
+// in the range [a, b].
+//
+// This distribution is *not* normalized (it is not a probability
+// distribution). This makes it easier to work with as it's being
+// updated.
+//
+// It is represented as the sum of scaled uniform distribution
+// functions and Dirac delta functions (which are treated as
+// degenerate uniform distributions).
+type mud struct {
+	sorted, unsorted []edge
+
+	// trackMass is the inverse cumulative sum to track as the
+	// distribution is updated.
+	trackMass float64
+	// trackBucket is the bucket in which trackMass falls. If the
+	// total mass of the distribution is < trackMass, this is
+	// len(hist).
+	trackBucket int
+	// trackSum is the cumulative sum of hist[:trackBucket]. Once
+	// trackSum >= trackMass, trackBucket must be recomputed.
+	trackSum float64
+
+	// hist is a hierarchical histogram of distribution mass.
+	hist [mudDegree]float64
+}
+
+const (
+	// mudDegree is the number of buckets in the MUD summary
+	// histogram.
+	mudDegree = 1024
+)
+
+type edge struct {
+	// At x, the function increases by y.
+	x, delta float64
+	// Additionally at x is a Dirac delta function with area dirac.
+	dirac float64
+}
+
+// add adds a uniform function over [l, r] scaled so the total weight
+// of the uniform is area. If l==r, this adds a Dirac delta function.
+func (d *mud) add(l, r, area float64) {
+	if area == 0 {
+		return
+	}
+
+	if r < l {
+		l, r = r, l
+	}
+
+	// Add the edges.
+	if l == r {
+		d.unsorted = append(d.unsorted, edge{l, 0, area})
+	} else {
+		delta := area / (r - l)
+		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
+	}
+
+	// Update the histogram.
+	h := &d.hist
+	lbFloat, lf := math.Modf(l * mudDegree)
+	lb := int(lbFloat)
+	if lb >= mudDegree {
+		lb, lf = mudDegree-1, 1
+	}
+	if l == r {
+		h[lb] += area
+	} else {
+		rbFloat, rf := math.Modf(r * mudDegree)
+		rb := int(rbFloat)
+		if rb >= mudDegree {
+			rb, rf = mudDegree-1, 1
+		}
+		if lb == rb {
+			h[lb] += area
+		} else {
+			perBucket := area / (r - l) / mudDegree
+			h[lb] += perBucket * (1 - lf)
+			h[rb] += perBucket * rf
+			for i := lb + 1; i < rb; i++ {
+				h[i] += perBucket
+			}
+		}
+	}
+
+	// Update mass tracking.
+	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
+		if r < thresh {
+			d.trackSum += area
+		} else {
+			d.trackSum += area * (thresh - l) / (r - l)
+		}
+		if d.trackSum >= d.trackMass {
+			// The tracked mass now falls in a different
+			// bucket. Recompute the inverse cumulative sum.
+			d.setTrackMass(d.trackMass)
+		}
+	}
+}
+
+// setTrackMass sets the mass to track the inverse cumulative sum for.
+//
+// Specifically, mass is a cumulative duration, and the mutator
+// utilization bounds for this duration can be queried using
+// approxInvCumulativeSum.
+func (d *mud) setTrackMass(mass float64) {
+	d.trackMass = mass
+
+	// Find the bucket currently containing trackMass by computing
+	// the cumulative sum.
+	sum := 0.0
+	for i, val := range d.hist[:] {
+		newSum := sum + val
+		if newSum > mass {
+			// mass falls in bucket i.
+			d.trackBucket = i
+			d.trackSum = sum
+			return
+		}
+		sum = newSum
+	}
+	d.trackBucket = len(d.hist)
+	d.trackSum = sum
+}
+
+// approxInvCumulativeSum is like invCumulativeSum, but specifically
+// operates on the tracked mass and returns an upper and lower bound
+// approximation of the inverse cumulative sum.
+//
+// The true inverse cumulative sum will be in the range [lower, upper).
+func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
+	if d.trackBucket == len(d.hist) {
+		return math.NaN(), math.NaN(), false
+	}
+	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
+}
+
+// invCumulativeSum returns x such that the integral of d from -∞ to x
+// is y. If the total weight of d is less than y, it returns the
+// maximum of the distribution and false.
+//
+// Specifically, y is a cumulative duration, and invCumulativeSum
+// returns the mutator utilization x such that at least y time has
+// been spent with mutator utilization <= x.
+func (d *mud) invCumulativeSum(y float64) (float64, bool) {
+	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
+		return math.NaN(), false
+	}
+
+	// Sort edges.
+	edges := d.unsorted
+	sort.Slice(edges, func(i, j int) bool {
+		return edges[i].x < edges[j].x
+	})
+	// Merge with sorted edges.
+	d.unsorted = nil
+	if d.sorted == nil {
+		d.sorted = edges
+	} else {
+		oldSorted := d.sorted
+		newSorted := make([]edge, len(oldSorted)+len(edges))
+		i, j := 0, 0
+		for o := range newSorted {
+			if i >= len(oldSorted) {
+				copy(newSorted[o:], edges[j:])
+				break
+			} else if j >= len(edges) {
+				copy(newSorted[o:], oldSorted[i:])
+				break
+			} else if oldSorted[i].x < edges[j].x {
+				newSorted[o] = oldSorted[i]
+				i++
+			} else {
+				newSorted[o] = edges[j]
+				j++
+			}
+		}
+		d.sorted = newSorted
+	}
+
+	// Traverse edges in order computing a cumulative sum.
+	csum, rate, prevX := 0.0, 0.0, 0.0
+	for _, e := range d.sorted {
+		newCsum := csum + (e.x-prevX)*rate
+		if newCsum >= y {
+			// y was exceeded between the previous edge
+			// and this one.
+			if rate == 0 {
+				// Anywhere between prevX and
+				// e.x will do. We return e.x
+				// because that takes care of
+				// the y==0 case naturally.
+				return e.x, true
+			}
+			return (y-csum)/rate + prevX, true
+		}
+		newCsum += e.dirac
+		if newCsum >= y {
+			// y was exceeded by the Dirac delta at e.x.
+			return e.x, true
+		}
+		csum, prevX = newCsum, e.x
+		rate += e.delta
+	}
+	return prevX, false
+}
diff --git a/src/internal/trace/mud_test.go b/src/internal/trace/mud_test.go
new file mode 100644
index 0000000..b3d74dc
--- /dev/null
+++ b/src/internal/trace/mud_test.go
@@ -0,0 +1,87 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"math/rand"
+	"testing"
+)
+
+func TestMUD(t *testing.T) {
+	// Insert random uniforms and check histogram mass and
+	// cumulative sum approximations.
+	rnd := rand.New(rand.NewSource(42))
+	mass := 0.0
+	var mud mud
+	for i := 0; i < 100; i++ {
+		area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+		if rnd.Intn(10) == 0 {
+			r = l
+		}
+		t.Log(l, r, area)
+		mud.add(l, r, area)
+		mass += area
+
+		// Check total histogram weight.
+		hmass := 0.0
+		for _, val := range mud.hist {
+			hmass += val
+		}
+		if !aeq(mass, hmass) {
+			t.Fatalf("want mass %g, got %g", mass, hmass)
+		}
+
+		// Check inverse cumulative sum approximations.
+		for j := 0.0; j < mass; j += mass * 0.099 {
+			mud.setTrackMass(j)
+			l, u, ok := mud.approxInvCumulativeSum()
+			inv, ok2 := mud.invCumulativeSum(j)
+			if !ok || !ok2 {
+				t.Fatalf("inverse cumulative sum failed: approx %v, exact %v", ok, ok2)
+			}
+			if !(l <= inv && inv < u) {
+				t.Fatalf("inverse(%g) = %g, not ∈ [%g, %g)", j, inv, l, u)
+			}
+		}
+	}
+}
+
+func TestMUDTracking(t *testing.T) {
+	// Test that the tracked mass is tracked correctly across
+	// updates.
+	rnd := rand.New(rand.NewSource(42))
+	const uniforms = 100
+	for trackMass := 0.0; trackMass < uniforms; trackMass += uniforms / 50 {
+		var mud mud
+		mass := 0.0
+		mud.setTrackMass(trackMass)
+		for i := 0; i < uniforms; i++ {
+			area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+			mud.add(l, r, area)
+			mass += area
+			l, u, ok := mud.approxInvCumulativeSum()
+			inv, ok2 := mud.invCumulativeSum(trackMass)
+
+			if mass < trackMass {
+				if ok {
+					t.Errorf("approx(%g) = [%g, %g), but mass = %g", trackMass, l, u, mass)
+				}
+				if ok2 {
+					t.Errorf("exact(%g) = %g, but mass = %g", trackMass, inv, mass)
+				}
+			} else {
+				if !ok {
+					t.Errorf("approx(%g) failed, but mass = %g", trackMass, mass)
+				}
+				if !ok2 {
+					t.Errorf("exact(%g) failed, but mass = %g", trackMass, mass)
+				}
+				if ok && ok2 && !(l <= inv && inv < u) {
+					t.Errorf("inverse(%g) = %g, not ∈ [%g, %g)", trackMass, inv, l, u)
+				}
+			}
+		}
+	}
+}
diff --git a/src/internal/trace/order.go b/src/internal/trace/order.go
new file mode 100644
index 0000000..36ed58d
--- /dev/null
+++ b/src/internal/trace/order.go
@@ -0,0 +1,279 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"fmt"
+	"sort"
+)
+
+type eventBatch struct {
+	events   []*Event
+	selected bool
+}
+
+type orderEvent struct {
+	ev    *Event
+	batch int
+	g     uint64
+	init  gState
+	next  gState
+}
+
+type gStatus int
+
+type gState struct {
+	seq    uint64
+	status gStatus
+}
+
+const (
+	gDead gStatus = iota
+	gRunnable
+	gRunning
+	gWaiting
+
+	unordered = ^uint64(0)
+	garbage   = ^uint64(0) - 1
+	noseq     = ^uint64(0)
+	seqinc    = ^uint64(0) - 1
+)
+
+// order1007 merges a set of per-P event batches into a single, consistent stream.
+// The high level idea is as follows. Events within an individual batch are in
+// correct order, because they are emitted by a single P. So we need to produce
+// a correct interleaving of the batches. To do this we take first unmerged event
+// from each batch (frontier). Then choose subset that is "ready" to be merged,
+// that is, events for which all dependencies are already merged. Then we choose
+// event with the lowest timestamp from the subset, merge it and repeat.
+// This approach ensures that we form a consistent stream even if timestamps are
+// incorrect (condition observed on some machines).
+func order1007(m map[int][]*Event) (events []*Event, err error) {
+	pending := 0
+	var batches []*eventBatch
+	for _, v := range m {
+		pending += len(v)
+		batches = append(batches, &eventBatch{v, false})
+	}
+	gs := make(map[uint64]gState)
+	var frontier []orderEvent
+	for ; pending != 0; pending-- {
+		for i, b := range batches {
+			if b.selected || len(b.events) == 0 {
+				continue
+			}
+			ev := b.events[0]
+			g, init, next := stateTransition(ev)
+			if !transitionReady(g, gs[g], init) {
+				continue
+			}
+			frontier = append(frontier, orderEvent{ev, i, g, init, next})
+			b.events = b.events[1:]
+			b.selected = true
+			// Get rid of "Local" events, they are intended merely for ordering.
+			switch ev.Type {
+			case EvGoStartLocal:
+				ev.Type = EvGoStart
+			case EvGoUnblockLocal:
+				ev.Type = EvGoUnblock
+			case EvGoSysExitLocal:
+				ev.Type = EvGoSysExit
+			}
+		}
+		if len(frontier) == 0 {
+			return nil, fmt.Errorf("no consistent ordering of events possible")
+		}
+		sort.Sort(orderEventList(frontier))
+		f := frontier[0]
+		frontier[0] = frontier[len(frontier)-1]
+		frontier = frontier[:len(frontier)-1]
+		events = append(events, f.ev)
+		transition(gs, f.g, f.init, f.next)
+		if !batches[f.batch].selected {
+			panic("frontier batch is not selected")
+		}
+		batches[f.batch].selected = false
+	}
+
+	// At this point we have a consistent stream of events.
+	// Make sure time stamps respect the ordering.
+	// The tests will skip (not fail) the test case if they see this error.
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+
+	// The last part is giving correct timestamps to EvGoSysExit events.
+	// The problem with EvGoSysExit is that actual syscall exit timestamp (ev.Args[2])
+	// is potentially acquired long before event emission. So far we've used
+	// timestamp of event emission (ev.Ts).
+	// We could not set ev.Ts = ev.Args[2] earlier, because it would produce
+	// seemingly broken timestamps (misplaced event).
+	// We also can't simply update the timestamp and resort events, because
+	// if timestamps are broken we will misplace the event and later report
+	// logically broken trace (instead of reporting broken timestamps).
+	lastSysBlock := make(map[uint64]int64)
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGoSysBlock, EvGoInSyscall:
+			lastSysBlock[ev.G] = ev.Ts
+		case EvGoSysExit:
+			ts := int64(ev.Args[2])
+			if ts == 0 {
+				continue
+			}
+			block := lastSysBlock[ev.G]
+			if block == 0 {
+				return nil, fmt.Errorf("stray syscall exit")
+			}
+			if ts < block {
+				return nil, ErrTimeOrder
+			}
+			ev.Ts = ts
+		}
+	}
+	sort.Stable(eventList(events))
+
+	return
+}
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+	switch ev.Type {
+	case EvGoCreate:
+		g = ev.Args[0]
+		init = gState{0, gDead}
+		next = gState{1, gRunnable}
+	case EvGoWaiting, EvGoInSyscall:
+		g = ev.G
+		init = gState{1, gRunnable}
+		next = gState{2, gWaiting}
+	case EvGoStart, EvGoStartLabel:
+		g = ev.G
+		init = gState{ev.Args[1], gRunnable}
+		next = gState{ev.Args[1] + 1, gRunning}
+	case EvGoStartLocal:
+		// noseq means that this event is ready for merging as soon as
+		// frontier reaches it (EvGoStartLocal is emitted on the same P
+		// as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+		// is already merged).
+		// seqinc is a stub for cases when event increments g sequence,
+		// but since we don't know current seq we also don't know next seq.
+		g = ev.G
+		init = gState{noseq, gRunnable}
+		next = gState{seqinc, gRunning}
+	case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+		EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep,
+		EvGoSysBlock, EvGoBlockGC:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gWaiting}
+	case EvGoSched, EvGoPreempt:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gRunnable}
+	case EvGoUnblock, EvGoSysExit:
+		g = ev.Args[0]
+		init = gState{ev.Args[1], gWaiting}
+		next = gState{ev.Args[1] + 1, gRunnable}
+	case EvGoUnblockLocal, EvGoSysExitLocal:
+		g = ev.Args[0]
+		init = gState{noseq, gWaiting}
+		next = gState{seqinc, gRunnable}
+	case EvGCStart:
+		g = garbage
+		init = gState{ev.Args[0], gDead}
+		next = gState{ev.Args[0] + 1, gDead}
+	default:
+		// no ordering requirements
+		g = unordered
+	}
+	return
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+	return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) {
+	if g == unordered {
+		return
+	}
+	curr := gs[g]
+	if !transitionReady(g, curr, init) {
+		panic("event sequences are broken")
+	}
+	switch next.seq {
+	case noseq:
+		next.seq = curr.seq
+	case seqinc:
+		next.seq = curr.seq + 1
+	}
+	gs[g] = next
+}
+
+// order1005 merges a set of per-P event batches into a single, consistent stream.
+func order1005(m map[int][]*Event) (events []*Event, err error) {
+	for _, batch := range m {
+		events = append(events, batch...)
+	}
+	for _, ev := range events {
+		if ev.Type == EvGoSysExit {
+			// EvGoSysExit emission is delayed until the thread has a P.
+			// Give it the real sequence number and time stamp.
+			ev.seq = int64(ev.Args[1])
+			if ev.Args[2] != 0 {
+				ev.Ts = int64(ev.Args[2])
+			}
+		}
+	}
+	sort.Sort(eventSeqList(events))
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+	return
+}
+
+type orderEventList []orderEvent
+
+func (l orderEventList) Len() int {
+	return len(l)
+}
+
+func (l orderEventList) Less(i, j int) bool {
+	return l[i].ev.Ts < l[j].ev.Ts
+}
+
+func (l orderEventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventList []*Event
+
+func (l eventList) Len() int {
+	return len(l)
+}
+
+func (l eventList) Less(i, j int) bool {
+	return l[i].Ts < l[j].Ts
+}
+
+func (l eventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventSeqList []*Event
+
+func (l eventSeqList) Len() int {
+	return len(l)
+}
+
+func (l eventSeqList) Less(i, j int) bool {
+	return l[i].seq < l[j].seq
+}
+
+func (l eventSeqList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
new file mode 100644
index 0000000..254f201
--- /dev/null
+++ b/src/internal/trace/parser.go
@@ -0,0 +1,1120 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"math/rand"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	_ "unsafe"
+)
+
+func goCmd() string {
+	var exeSuffix string
+	if runtime.GOOS == "windows" {
+		exeSuffix = ".exe"
+	}
+	path := filepath.Join(runtime.GOROOT(), "bin", "go"+exeSuffix)
+	if _, err := os.Stat(path); err == nil {
+		return path
+	}
+	return "go"
+}
+
+// Event describes one event in the trace.
+type Event struct {
+	Off   int       // offset in input file (for debugging and error reporting)
+	Type  byte      // one of Ev*
+	seq   int64     // sequence number
+	Ts    int64     // timestamp in nanoseconds
+	P     int       // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
+	G     uint64    // G on which the event happened
+	StkID uint64    // unique stack ID
+	Stk   []*Frame  // stack trace (can be empty)
+	Args  [3]uint64 // event-type-specific arguments
+	SArgs []string  // event-type-specific string args
+	// linked event (can be nil), depends on event type:
+	// for GCStart: the GCStop
+	// for GCSTWStart: the GCSTWDone
+	// for GCSweepStart: the GCSweepDone
+	// for GoCreate: first GoStart of the created goroutine
+	// for GoStart/GoStartLabel: the associated GoEnd, GoBlock or other blocking event
+	// for GoSched/GoPreempt: the next GoStart
+	// for GoBlock and other blocking events: the unblock event
+	// for GoUnblock: the associated GoStart
+	// for blocking GoSysCall: the associated GoSysExit
+	// for GoSysExit: the next GoStart
+	// for GCMarkAssistStart: the associated GCMarkAssistDone
+	// for UserTaskCreate: the UserTaskEnd
+	// for UserRegion: if the start region, the corresponding UserRegion end event
+	Link *Event
+}
+
+// Frame is a frame in stack traces.
+type Frame struct {
+	PC   uint64
+	Fn   string
+	File string
+	Line int
+}
+
+const (
+	// Special P identifiers:
+	FakeP    = 1000000 + iota
+	TimerP   // depicts timer unblocks
+	NetpollP // depicts network unblocks
+	SyscallP // depicts returns from syscalls
+	GCP      // depicts GC state
+)
+
+// ParseResult is the result of Parse.
+type ParseResult struct {
+	// Events is the sorted list of Events in the trace.
+	Events []*Event
+	// Stacks is the stack traces keyed by stack IDs from the trace.
+	Stacks map[uint64][]*Frame
+}
+
+// Parse parses, post-processes and verifies the trace.
+func Parse(r io.Reader, bin string) (ParseResult, error) {
+	ver, res, err := parse(r, bin)
+	if err != nil {
+		return ParseResult{}, err
+	}
+	if ver < 1007 && bin == "" {
+		return ParseResult{}, fmt.Errorf("for traces produced by go 1.6 or below, the binary argument must be provided")
+	}
+	return res, nil
+}
+
+// parse parses, post-processes and verifies the trace. It returns the
+// trace version and the list of events.
+func parse(r io.Reader, bin string) (int, ParseResult, error) {
+	ver, rawEvents, strings, err := readTrace(r)
+	if err != nil {
+		return 0, ParseResult{}, err
+	}
+	events, stacks, err := parseEvents(ver, rawEvents, strings)
+	if err != nil {
+		return 0, ParseResult{}, err
+	}
+	events = removeFutile(events)
+	err = postProcessTrace(ver, events)
+	if err != nil {
+		return 0, ParseResult{}, err
+	}
+	// Attach stack traces.
+	for _, ev := range events {
+		if ev.StkID != 0 {
+			ev.Stk = stacks[ev.StkID]
+		}
+	}
+	if ver < 1007 && bin != "" {
+		if err := symbolize(events, bin); err != nil {
+			return 0, ParseResult{}, err
+		}
+	}
+	return ver, ParseResult{Events: events, Stacks: stacks}, nil
+}
+
+// rawEvent is a helper type used during parsing.
+type rawEvent struct {
+	off   int
+	typ   byte
+	args  []uint64
+	sargs []string
+}
+
+// readTrace does wire-format parsing and verification.
+// It does not care about specific event types and argument meaning.
+func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]string, err error) {
+	// Read and validate trace header.
+	var buf [16]byte
+	off, err := io.ReadFull(r, buf[:])
+	if err != nil {
+		err = fmt.Errorf("failed to read header: read %v, err %v", off, err)
+		return
+	}
+	ver, err = parseHeader(buf[:])
+	if err != nil {
+		return
+	}
+	switch ver {
+	case 1005, 1007, 1008, 1009, 1010, 1011:
+		// Note: When adding a new version, add canned traces
+		// from the old version to the test suite using mkcanned.bash.
+		break
+	default:
+		err = fmt.Errorf("unsupported trace file version %v.%v (update Go toolchain) %v", ver/1000, ver%1000, ver)
+		return
+	}
+
+	// Read events.
+	strings = make(map[uint64]string)
+	for {
+		// Read event type and number of arguments (1 byte).
+		off0 := off
+		var n int
+		n, err = r.Read(buf[:1])
+		if err == io.EOF {
+			err = nil
+			break
+		}
+		if err != nil || n != 1 {
+			err = fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
+			return
+		}
+		off += n
+		typ := buf[0] << 2 >> 2
+		narg := buf[0]>>6 + 1
+		inlineArgs := byte(4)
+		if ver < 1007 {
+			narg++
+			inlineArgs++
+		}
+		if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
+			err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
+			return
+		}
+		if typ == EvString {
+			// String dictionary entry [ID, length, string].
+			var id uint64
+			id, off, err = readVal(r, off)
+			if err != nil {
+				return
+			}
+			if id == 0 {
+				err = fmt.Errorf("string at offset %d has invalid id 0", off)
+				return
+			}
+			if strings[id] != "" {
+				err = fmt.Errorf("string at offset %d has duplicate id %v", off, id)
+				return
+			}
+			var ln uint64
+			ln, off, err = readVal(r, off)
+			if err != nil {
+				return
+			}
+			if ln == 0 {
+				err = fmt.Errorf("string at offset %d has invalid length 0", off)
+				return
+			}
+			if ln > 1e6 {
+				err = fmt.Errorf("string at offset %d has too large length %v", off, ln)
+				return
+			}
+			buf := make([]byte, ln)
+			var n int
+			n, err = io.ReadFull(r, buf)
+			if err != nil {
+				err = fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, ln, err)
+				return
+			}
+			off += n
+			strings[id] = string(buf)
+			continue
+		}
+		ev := rawEvent{typ: typ, off: off0}
+		if narg < inlineArgs {
+			for i := 0; i < int(narg); i++ {
+				var v uint64
+				v, off, err = readVal(r, off)
+				if err != nil {
+					err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+					return
+				}
+				ev.args = append(ev.args, v)
+			}
+		} else {
+			// More than inlineArgs args, the first value is length of the event in bytes.
+			var v uint64
+			v, off, err = readVal(r, off)
+			if err != nil {
+				err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+				return
+			}
+			evLen := v
+			off1 := off
+			for evLen > uint64(off-off1) {
+				v, off, err = readVal(r, off)
+				if err != nil {
+					err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+					return
+				}
+				ev.args = append(ev.args, v)
+			}
+			if evLen != uint64(off-off1) {
+				err = fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
+				return
+			}
+		}
+		switch ev.typ {
+		case EvUserLog: // EvUserLog records are followed by a value string of length ev.args[len(ev.args)-1]
+			var s string
+			s, off, err = readStr(r, off)
+			ev.sargs = append(ev.sargs, s)
+		}
+		events = append(events, ev)
+	}
+	return
+}
+
+func readStr(r io.Reader, off0 int) (s string, off int, err error) {
+	var sz uint64
+	sz, off, err = readVal(r, off0)
+	if err != nil || sz == 0 {
+		return "", off, err
+	}
+	if sz > 1e6 {
+		return "", off, fmt.Errorf("string at offset %d is too large (len=%d)", off, sz)
+	}
+	buf := make([]byte, sz)
+	n, err := io.ReadFull(r, buf)
+	if err != nil || sz != uint64(n) {
+		return "", off + n, fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, sz, err)
+	}
+	return string(buf), off + n, nil
+}
+
+// parseHeader parses trace header of the form "go 1.7 trace\x00\x00\x00\x00"
+// and returns parsed version as 1007.
+func parseHeader(buf []byte) (int, error) {
+	if len(buf) != 16 {
+		return 0, fmt.Errorf("bad header length")
+	}
+	if buf[0] != 'g' || buf[1] != 'o' || buf[2] != ' ' ||
+		buf[3] < '1' || buf[3] > '9' ||
+		buf[4] != '.' ||
+		buf[5] < '1' || buf[5] > '9' {
+		return 0, fmt.Errorf("not a trace file")
+	}
+	ver := int(buf[5] - '0')
+	i := 0
+	for ; buf[6+i] >= '0' && buf[6+i] <= '9' && i < 2; i++ {
+		ver = ver*10 + int(buf[6+i]-'0')
+	}
+	ver += int(buf[3]-'0') * 1000
+	if !bytes.Equal(buf[6+i:], []byte(" trace\x00\x00\x00\x00")[:10-i]) {
+		return 0, fmt.Errorf("not a trace file")
+	}
+	return ver, nil
+}
+
+// Parse events transforms raw events into events.
+// It does analyze and verify per-event-type arguments.
+func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
+	var ticksPerSec, lastSeq, lastTs int64
+	var lastG uint64
+	var lastP int
+	timerGoids := make(map[uint64]bool)
+	lastGs := make(map[int]uint64) // last goroutine running on P
+	stacks = make(map[uint64][]*Frame)
+	batches := make(map[int][]*Event) // events by P
+	for _, raw := range rawEvents {
+		desc := EventDescriptions[raw.typ]
+		if desc.Name == "" {
+			err = fmt.Errorf("missing description for event type %v", raw.typ)
+			return
+		}
+		narg := argNum(raw, ver)
+		if len(raw.args) != narg {
+			err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
+				desc.Name, raw.off, narg, len(raw.args))
+			return
+		}
+		switch raw.typ {
+		case EvBatch:
+			lastGs[lastP] = lastG
+			lastP = int(raw.args[0])
+			lastG = lastGs[lastP]
+			if ver < 1007 {
+				lastSeq = int64(raw.args[1])
+				lastTs = int64(raw.args[2])
+			} else {
+				lastTs = int64(raw.args[1])
+			}
+		case EvFrequency:
+			ticksPerSec = int64(raw.args[0])
+			if ticksPerSec <= 0 {
+				// The most likely cause for this is tick skew on different CPUs.
+				// For example, solaris/amd64 seems to have wildly different
+				// ticks on different CPUs.
+				err = ErrTimeOrder
+				return
+			}
+		case EvTimerGoroutine:
+			timerGoids[raw.args[0]] = true
+		case EvStack:
+			if len(raw.args) < 2 {
+				err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want at least 2, got %v",
+					raw.off, len(raw.args))
+				return
+			}
+			size := raw.args[1]
+			if size > 1000 {
+				err = fmt.Errorf("EvStack has bad number of frames at offset 0x%x: %v",
+					raw.off, size)
+				return
+			}
+			want := 2 + 4*size
+			if ver < 1007 {
+				want = 2 + size
+			}
+			if uint64(len(raw.args)) != want {
+				err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want %v, got %v",
+					raw.off, want, len(raw.args))
+				return
+			}
+			id := raw.args[0]
+			if id != 0 && size > 0 {
+				stk := make([]*Frame, size)
+				for i := 0; i < int(size); i++ {
+					if ver < 1007 {
+						stk[i] = &Frame{PC: raw.args[2+i]}
+					} else {
+						pc := raw.args[2+i*4+0]
+						fn := raw.args[2+i*4+1]
+						file := raw.args[2+i*4+2]
+						line := raw.args[2+i*4+3]
+						stk[i] = &Frame{PC: pc, Fn: strings[fn], File: strings[file], Line: int(line)}
+					}
+				}
+				stacks[id] = stk
+			}
+		default:
+			e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
+			var argOffset int
+			if ver < 1007 {
+				e.seq = lastSeq + int64(raw.args[0])
+				e.Ts = lastTs + int64(raw.args[1])
+				lastSeq = e.seq
+				argOffset = 2
+			} else {
+				e.Ts = lastTs + int64(raw.args[0])
+				argOffset = 1
+			}
+			lastTs = e.Ts
+			for i := argOffset; i < narg; i++ {
+				if i == narg-1 && desc.Stack {
+					e.StkID = raw.args[i]
+				} else {
+					e.Args[i-argOffset] = raw.args[i]
+				}
+			}
+			switch raw.typ {
+			case EvGoStart, EvGoStartLocal, EvGoStartLabel:
+				lastG = e.Args[0]
+				e.G = lastG
+				if raw.typ == EvGoStartLabel {
+					e.SArgs = []string{strings[e.Args[2]]}
+				}
+			case EvGCSTWStart:
+				e.G = 0
+				switch e.Args[0] {
+				case 0:
+					e.SArgs = []string{"mark termination"}
+				case 1:
+					e.SArgs = []string{"sweep termination"}
+				default:
+					err = fmt.Errorf("unknown STW kind %d", e.Args[0])
+					return
+				}
+			case EvGCStart, EvGCDone, EvGCSTWDone:
+				e.G = 0
+			case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
+				EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+				EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
+				EvGoSysBlock, EvGoBlockGC:
+				lastG = 0
+			case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+				e.G = e.Args[0]
+			case EvUserTaskCreate:
+				// e.Args 0: taskID, 1:parentID, 2:nameID
+				e.SArgs = []string{strings[e.Args[2]]}
+			case EvUserRegion:
+				// e.Args 0: taskID, 1: mode, 2:nameID
+				e.SArgs = []string{strings[e.Args[2]]}
+			case EvUserLog:
+				// e.Args 0: taskID, 1:keyID, 2: stackID
+				e.SArgs = []string{strings[e.Args[1]], raw.sargs[0]}
+			}
+			batches[lastP] = append(batches[lastP], e)
+		}
+	}
+	if len(batches) == 0 {
+		err = fmt.Errorf("trace is empty")
+		return
+	}
+	if ticksPerSec == 0 {
+		err = fmt.Errorf("no EvFrequency event")
+		return
+	}
+	if BreakTimestampsForTesting {
+		var batchArr [][]*Event
+		for _, batch := range batches {
+			batchArr = append(batchArr, batch)
+		}
+		for i := 0; i < 5; i++ {
+			batch := batchArr[rand.Intn(len(batchArr))]
+			batch[rand.Intn(len(batch))].Ts += int64(rand.Intn(2000) - 1000)
+		}
+	}
+	if ver < 1007 {
+		events, err = order1005(batches)
+	} else {
+		events, err = order1007(batches)
+	}
+	if err != nil {
+		return
+	}
+
+	// Translate cpu ticks to real time.
+	minTs := events[0].Ts
+	// Use floating point to avoid integer overflows.
+	freq := 1e9 / float64(ticksPerSec)
+	for _, ev := range events {
+		ev.Ts = int64(float64(ev.Ts-minTs) * freq)
+		// Move timers and syscalls to separate fake Ps.
+		if timerGoids[ev.G] && ev.Type == EvGoUnblock {
+			ev.P = TimerP
+		}
+		if ev.Type == EvGoSysExit {
+			ev.P = SyscallP
+		}
+	}
+
+	return
+}
+
+// removeFutile removes all constituents of futile wakeups (block, unblock, start).
+// For example, a goroutine was unblocked on a mutex, but another goroutine got
+// ahead and acquired the mutex before the first goroutine is scheduled,
+// so the first goroutine has to block again. Such wakeups happen on buffered
+// channels and sync.Mutex, but are generally not interesting for end user.
+func removeFutile(events []*Event) []*Event {
+	// Two non-trivial aspects:
+	// 1. A goroutine can be preempted during a futile wakeup and migrate to another P.
+	//	We want to remove all of that.
+	// 2. Tracing can start in the middle of a futile wakeup.
+	//	That is, we can see a futile wakeup event w/o the actual wakeup before it.
+	// postProcessTrace runs after us and ensures that we leave the trace in a consistent state.
+
+	// Phase 1: determine futile wakeup sequences.
+	type G struct {
+		futile bool
+		wakeup []*Event // wakeup sequence (subject for removal)
+	}
+	gs := make(map[uint64]G)
+	futile := make(map[*Event]bool)
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGoUnblock:
+			g := gs[ev.Args[0]]
+			g.wakeup = []*Event{ev}
+			gs[ev.Args[0]] = g
+		case EvGoStart, EvGoPreempt, EvFutileWakeup:
+			g := gs[ev.G]
+			g.wakeup = append(g.wakeup, ev)
+			if ev.Type == EvFutileWakeup {
+				g.futile = true
+			}
+			gs[ev.G] = g
+		case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond:
+			g := gs[ev.G]
+			if g.futile {
+				futile[ev] = true
+				for _, ev1 := range g.wakeup {
+					futile[ev1] = true
+				}
+			}
+			delete(gs, ev.G)
+		}
+	}
+
+	// Phase 2: remove futile wakeup sequences.
+	newEvents := events[:0] // overwrite the original slice
+	for _, ev := range events {
+		if !futile[ev] {
+			newEvents = append(newEvents, ev)
+		}
+	}
+	return newEvents
+}
+
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = fmt.Errorf("time stamps out of order")
+
+// postProcessTrace does inter-event verification and information restoration.
+// The resulting trace is guaranteed to be consistent
+// (for example, a P does not run two Gs at the same time, or a G is indeed
+// blocked before an unblock event).
+func postProcessTrace(ver int, events []*Event) error {
+	const (
+		gDead = iota
+		gRunnable
+		gRunning
+		gWaiting
+	)
+	type gdesc struct {
+		state        int
+		ev           *Event
+		evStart      *Event
+		evCreate     *Event
+		evMarkAssist *Event
+	}
+	type pdesc struct {
+		running bool
+		g       uint64
+		evSTW   *Event
+		evSweep *Event
+	}
+
+	gs := make(map[uint64]gdesc)
+	ps := make(map[int]pdesc)
+	tasks := make(map[uint64]*Event)           // task id to task creation events
+	activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
+	gs[0] = gdesc{state: gRunning}
+	var evGC, evSTW *Event
+
+	checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
+		name := EventDescriptions[ev.Type].Name
+		if g.state != gRunning {
+			return fmt.Errorf("g %v is not running while %v (offset %v, time %v)", ev.G, name, ev.Off, ev.Ts)
+		}
+		if p.g != ev.G {
+			return fmt.Errorf("p %v is not running g %v while %v (offset %v, time %v)", ev.P, ev.G, name, ev.Off, ev.Ts)
+		}
+		if !allowG0 && ev.G == 0 {
+			return fmt.Errorf("g 0 did %v (offset %v, time %v)", EventDescriptions[ev.Type].Name, ev.Off, ev.Ts)
+		}
+		return nil
+	}
+
+	for _, ev := range events {
+		g := gs[ev.G]
+		p := ps[ev.P]
+
+		switch ev.Type {
+		case EvProcStart:
+			if p.running {
+				return fmt.Errorf("p %v is running before start (offset %v, time %v)", ev.P, ev.Off, ev.Ts)
+			}
+			p.running = true
+		case EvProcStop:
+			if !p.running {
+				return fmt.Errorf("p %v is not running before stop (offset %v, time %v)", ev.P, ev.Off, ev.Ts)
+			}
+			if p.g != 0 {
+				return fmt.Errorf("p %v is running a goroutine %v during stop (offset %v, time %v)", ev.P, p.g, ev.Off, ev.Ts)
+			}
+			p.running = false
+		case EvGCStart:
+			if evGC != nil {
+				return fmt.Errorf("previous GC is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			evGC = ev
+			// Attribute this to the global GC state.
+			ev.P = GCP
+		case EvGCDone:
+			if evGC == nil {
+				return fmt.Errorf("bogus GC end (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			evGC.Link = ev
+			evGC = nil
+		case EvGCSTWStart:
+			evp := &evSTW
+			if ver < 1010 {
+				// Before 1.10, EvGCSTWStart was per-P.
+				evp = &p.evSTW
+			}
+			if *evp != nil {
+				return fmt.Errorf("previous STW is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			*evp = ev
+		case EvGCSTWDone:
+			evp := &evSTW
+			if ver < 1010 {
+				// Before 1.10, EvGCSTWDone was per-P.
+				evp = &p.evSTW
+			}
+			if *evp == nil {
+				return fmt.Errorf("bogus STW end (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			(*evp).Link = ev
+			*evp = nil
+		case EvGCSweepStart:
+			if p.evSweep != nil {
+				return fmt.Errorf("previous sweeping is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			p.evSweep = ev
+		case EvGCMarkAssistStart:
+			if g.evMarkAssist != nil {
+				return fmt.Errorf("previous mark assist is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			g.evMarkAssist = ev
+		case EvGCMarkAssistDone:
+			// Unlike most events, mark assists can be in progress when a
+			// goroutine starts tracing, so we can't report an error here.
+			if g.evMarkAssist != nil {
+				g.evMarkAssist.Link = ev
+				g.evMarkAssist = nil
+			}
+		case EvGCSweepDone:
+			if p.evSweep == nil {
+				return fmt.Errorf("bogus sweeping end (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			p.evSweep.Link = ev
+			p.evSweep = nil
+		case EvGoWaiting:
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			g.state = gWaiting
+			g.ev = ev
+		case EvGoInSyscall:
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			g.state = gWaiting
+			g.ev = ev
+		case EvGoCreate:
+			if err := checkRunning(p, g, ev, true); err != nil {
+				return err
+			}
+			if _, ok := gs[ev.Args[0]]; ok {
+				return fmt.Errorf("g %v already exists (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			}
+			gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
+		case EvGoStart, EvGoStartLabel:
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before start (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			if p.g != 0 {
+				return fmt.Errorf("p %v is already running g %v while start g %v (offset %v, time %v)", ev.P, p.g, ev.G, ev.Off, ev.Ts)
+			}
+			g.state = gRunning
+			g.evStart = ev
+			p.g = ev.G
+			if g.evCreate != nil {
+				if ver < 1007 {
+					// +1 because symbolizer expects return pc.
+					ev.Stk = []*Frame{{PC: g.evCreate.Args[1] + 1}}
+				} else {
+					ev.StkID = g.evCreate.Args[1]
+				}
+				g.evCreate = nil
+			}
+
+			if g.ev != nil {
+				g.ev.Link = ev
+				g.ev = nil
+			}
+		case EvGoEnd, EvGoStop:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.evStart.Link = ev
+			g.evStart = nil
+			g.state = gDead
+			p.g = 0
+
+			if ev.Type == EvGoEnd { // flush all active regions
+				regions := activeRegions[ev.G]
+				for _, s := range regions {
+					s.Link = ev
+				}
+				delete(activeRegions, ev.G)
+			}
+
+		case EvGoSched, EvGoPreempt:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gRunnable
+			g.evStart.Link = ev
+			g.evStart = nil
+			p.g = 0
+			g.ev = ev
+		case EvGoUnblock:
+			if g.state != gRunning {
+				return fmt.Errorf("g %v is not running while unpark (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			if ev.P != TimerP && p.g != ev.G {
+				return fmt.Errorf("p %v is not running g %v while unpark (offset %v, time %v)", ev.P, ev.G, ev.Off, ev.Ts)
+			}
+			g1 := gs[ev.Args[0]]
+			if g1.state != gWaiting {
+				return fmt.Errorf("g %v is not waiting before unpark (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			}
+			if g1.ev != nil && g1.ev.Type == EvGoBlockNet && ev.P != TimerP {
+				ev.P = NetpollP
+			}
+			if g1.ev != nil {
+				g1.ev.Link = ev
+			}
+			g1.state = gRunnable
+			g1.ev = ev
+			gs[ev.Args[0]] = g1
+		case EvGoSysCall:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.ev = ev
+		case EvGoSysBlock:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gWaiting
+			g.evStart.Link = ev
+			g.evStart = nil
+			p.g = 0
+		case EvGoSysExit:
+			if g.state != gWaiting {
+				return fmt.Errorf("g %v is not waiting during syscall exit (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			if g.ev != nil && g.ev.Type == EvGoSysCall {
+				g.ev.Link = ev
+			}
+			g.state = gRunnable
+			g.ev = ev
+		case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gWaiting
+			g.ev = ev
+			g.evStart.Link = ev
+			g.evStart = nil
+			p.g = 0
+		case EvUserTaskCreate:
+			taskid := ev.Args[0]
+			if prevEv, ok := tasks[taskid]; ok {
+				return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
+			}
+			tasks[ev.Args[0]] = ev
+		case EvUserTaskEnd:
+			taskid := ev.Args[0]
+			if taskCreateEv, ok := tasks[taskid]; ok {
+				taskCreateEv.Link = ev
+				delete(tasks, taskid)
+			}
+		case EvUserRegion:
+			mode := ev.Args[1]
+			regions := activeRegions[ev.G]
+			if mode == 0 { // region start
+				activeRegions[ev.G] = append(regions, ev) // push
+			} else if mode == 1 { // region end
+				n := len(regions)
+				if n > 0 { // matching region start event is in the trace.
+					s := regions[n-1]
+					if s.Args[0] != ev.Args[0] || s.SArgs[0] != ev.SArgs[0] { // task id, region name mismatch
+						return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
+					}
+					// Link region start event with span end event
+					s.Link = ev
+
+					if n > 1 {
+						activeRegions[ev.G] = regions[:n-1]
+					} else {
+						delete(activeRegions, ev.G)
+					}
+				}
+			} else {
+				return fmt.Errorf("invalid user region mode: %q", ev)
+			}
+		}
+
+		gs[ev.G] = g
+		ps[ev.P] = p
+	}
+
+	// TODO(dvyukov): restore stacks for EvGoStart events.
+	// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
+
+	return nil
+}
+
+// symbolize attaches func/file/line info to stack traces.
+func symbolize(events []*Event, bin string) error {
+	// First, collect and dedup all pcs.
+	pcs := make(map[uint64]*Frame)
+	for _, ev := range events {
+		for _, f := range ev.Stk {
+			pcs[f.PC] = nil
+		}
+	}
+
+	// Start addr2line.
+	cmd := exec.Command(goCmd(), "tool", "addr2line", bin)
+	in, err := cmd.StdinPipe()
+	if err != nil {
+		return fmt.Errorf("failed to pipe addr2line stdin: %v", err)
+	}
+	cmd.Stderr = os.Stderr
+	out, err := cmd.StdoutPipe()
+	if err != nil {
+		return fmt.Errorf("failed to pipe addr2line stdout: %v", err)
+	}
+	err = cmd.Start()
+	if err != nil {
+		return fmt.Errorf("failed to start addr2line: %v", err)
+	}
+	outb := bufio.NewReader(out)
+
+	// Write all pcs to addr2line.
+	// Need to copy pcs to an array, because map iteration order is non-deterministic.
+	var pcArray []uint64
+	for pc := range pcs {
+		pcArray = append(pcArray, pc)
+		_, err := fmt.Fprintf(in, "0x%x\n", pc-1)
+		if err != nil {
+			return fmt.Errorf("failed to write to addr2line: %v", err)
+		}
+	}
+	in.Close()
+
+	// Read in answers.
+	for _, pc := range pcArray {
+		fn, err := outb.ReadString('\n')
+		if err != nil {
+			return fmt.Errorf("failed to read from addr2line: %v", err)
+		}
+		file, err := outb.ReadString('\n')
+		if err != nil {
+			return fmt.Errorf("failed to read from addr2line: %v", err)
+		}
+		f := &Frame{PC: pc}
+		f.Fn = fn[:len(fn)-1]
+		f.File = file[:len(file)-1]
+		if colon := strings.LastIndex(f.File, ":"); colon != -1 {
+			ln, err := strconv.Atoi(f.File[colon+1:])
+			if err == nil {
+				f.File = f.File[:colon]
+				f.Line = ln
+			}
+		}
+		pcs[pc] = f
+	}
+	cmd.Wait()
+
+	// Replace frames in events array.
+	for _, ev := range events {
+		for i, f := range ev.Stk {
+			ev.Stk[i] = pcs[f.PC]
+		}
+	}
+
+	return nil
+}
+
+// readVal reads unsigned base-128 value from r.
+func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
+	off = off0
+	for i := 0; i < 10; i++ {
+		var buf [1]byte
+		var n int
+		n, err = r.Read(buf[:])
+		if err != nil || n != 1 {
+			return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err)
+		}
+		off++
+		v |= uint64(buf[0]&0x7f) << (uint(i) * 7)
+		if buf[0]&0x80 == 0 {
+			return
+		}
+	}
+	return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
+}
+
+// Print dumps events to stdout. For debugging.
+func Print(events []*Event) {
+	for _, ev := range events {
+		PrintEvent(ev)
+	}
+}
+
+// PrintEvent dumps the event to stdout. For debugging.
+func PrintEvent(ev *Event) {
+	fmt.Printf("%s\n", ev)
+}
+
+func (ev *Event) String() string {
+	desc := EventDescriptions[ev.Type]
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
+	for i, a := range desc.Args {
+		fmt.Fprintf(w, " %v=%v", a, ev.Args[i])
+	}
+	for i, a := range desc.SArgs {
+		fmt.Fprintf(w, " %v=%v", a, ev.SArgs[i])
+	}
+	return w.String()
+}
+
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func argNum(raw rawEvent, ver int) int {
+	desc := EventDescriptions[raw.typ]
+	if raw.typ == EvStack {
+		return len(raw.args)
+	}
+	narg := len(desc.Args)
+	if desc.Stack {
+		narg++
+	}
+	switch raw.typ {
+	case EvBatch, EvFrequency, EvTimerGoroutine:
+		if ver < 1007 {
+			narg++ // there was an unused arg before 1.7
+		}
+		return narg
+	}
+	narg++ // timestamp
+	if ver < 1007 {
+		narg++ // sequence
+	}
+	switch raw.typ {
+	case EvGCSweepDone:
+		if ver < 1009 {
+			narg -= 2 // 1.9 added two arguments
+		}
+	case EvGCStart, EvGoStart, EvGoUnblock:
+		if ver < 1007 {
+			narg-- // 1.7 added an additional seq arg
+		}
+	case EvGCSTWStart:
+		if ver < 1010 {
+			narg-- // 1.10 added an argument
+		}
+	}
+	return narg
+}
+
+// BreakTimestampsForTesting causes the parser to randomly alter timestamps (for testing of broken cputicks).
+var BreakTimestampsForTesting bool
+
+// Event types in the trace.
+// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
+const (
+	EvNone              = 0  // unused
+	EvBatch             = 1  // start of per-P batch of events [pid, timestamp]
+	EvFrequency         = 2  // contains tracer timer frequency [frequency (ticks per second)]
+	EvStack             = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
+	EvGomaxprocs        = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
+	EvProcStart         = 5  // start of P [timestamp, thread id]
+	EvProcStop          = 6  // stop of P [timestamp]
+	EvGCStart           = 7  // GC start [timestamp, seq, stack id]
+	EvGCDone            = 8  // GC done [timestamp]
+	EvGCSTWStart        = 9  // GC mark termination start [timestamp, kind]
+	EvGCSTWDone         = 10 // GC mark termination done [timestamp]
+	EvGCSweepStart      = 11 // GC sweep start [timestamp, stack id]
+	EvGCSweepDone       = 12 // GC sweep done [timestamp, swept, reclaimed]
+	EvGoCreate          = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
+	EvGoStart           = 14 // goroutine starts running [timestamp, goroutine id, seq]
+	EvGoEnd             = 15 // goroutine ends [timestamp]
+	EvGoStop            = 16 // goroutine stops (like in select{}) [timestamp, stack]
+	EvGoSched           = 17 // goroutine calls Gosched [timestamp, stack]
+	EvGoPreempt         = 18 // goroutine is preempted [timestamp, stack]
+	EvGoSleep           = 19 // goroutine calls Sleep [timestamp, stack]
+	EvGoBlock           = 20 // goroutine blocks [timestamp, stack]
+	EvGoUnblock         = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
+	EvGoBlockSend       = 22 // goroutine blocks on chan send [timestamp, stack]
+	EvGoBlockRecv       = 23 // goroutine blocks on chan recv [timestamp, stack]
+	EvGoBlockSelect     = 24 // goroutine blocks on select [timestamp, stack]
+	EvGoBlockSync       = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
+	EvGoBlockCond       = 26 // goroutine blocks on Cond [timestamp, stack]
+	EvGoBlockNet        = 27 // goroutine blocks on network [timestamp, stack]
+	EvGoSysCall         = 28 // syscall enter [timestamp, stack]
+	EvGoSysExit         = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
+	EvGoSysBlock        = 30 // syscall blocks [timestamp]
+	EvGoWaiting         = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+	EvGoInSyscall       = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
+	EvHeapAlloc         = 33 // gcController.heapLive change [timestamp, heap live bytes]
+	EvHeapGoal          = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
+	EvTimerGoroutine    = 35 // denotes timer goroutine [timer goroutine id]
+	EvFutileWakeup      = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
+	EvString            = 37 // string dictionary entry [ID, length, string]
+	EvGoStartLocal      = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	EvGoUnblockLocal    = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	EvGoSysExitLocal    = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	EvGoStartLabel      = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
+	EvGoBlockGC         = 42 // goroutine blocks on GC assist [timestamp, stack]
+	EvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
+	EvGCMarkAssistDone  = 44 // GC mark assist done [timestamp]
+	EvUserTaskCreate    = 45 // trace.NewContext [timestamp, internal task id, internal parent id, stack, name string]
+	EvUserTaskEnd       = 46 // end of task [timestamp, internal task id, stack]
+	EvUserRegion        = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string]
+	EvUserLog           = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
+	EvCount             = 49
+)
+
+var EventDescriptions = [EvCount]struct {
+	Name       string
+	minVersion int
+	Stack      bool
+	Args       []string
+	SArgs      []string // string arguments
+}{
+	EvNone:              {"None", 1005, false, []string{}, nil},
+	EvBatch:             {"Batch", 1005, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
+	EvFrequency:         {"Frequency", 1005, false, []string{"freq"}, nil},   // in 1.5 format it was {"freq", "unused"}
+	EvStack:             {"Stack", 1005, false, []string{"id", "siz"}, nil},
+	EvGomaxprocs:        {"Gomaxprocs", 1005, true, []string{"procs"}, nil},
+	EvProcStart:         {"ProcStart", 1005, false, []string{"thread"}, nil},
+	EvProcStop:          {"ProcStop", 1005, false, []string{}, nil},
+	EvGCStart:           {"GCStart", 1005, true, []string{"seq"}, nil}, // in 1.5 format it was {}
+	EvGCDone:            {"GCDone", 1005, false, []string{}, nil},
+	EvGCSTWStart:        {"GCSTWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
+	EvGCSTWDone:         {"GCSTWDone", 1005, false, []string{}, nil},
+	EvGCSweepStart:      {"GCSweepStart", 1005, true, []string{}, nil},
+	EvGCSweepDone:       {"GCSweepDone", 1005, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
+	EvGoCreate:          {"GoCreate", 1005, true, []string{"g", "stack"}, nil},
+	EvGoStart:           {"GoStart", 1005, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+	EvGoEnd:             {"GoEnd", 1005, false, []string{}, nil},
+	EvGoStop:            {"GoStop", 1005, true, []string{}, nil},
+	EvGoSched:           {"GoSched", 1005, true, []string{}, nil},
+	EvGoPreempt:         {"GoPreempt", 1005, true, []string{}, nil},
+	EvGoSleep:           {"GoSleep", 1005, true, []string{}, nil},
+	EvGoBlock:           {"GoBlock", 1005, true, []string{}, nil},
+	EvGoUnblock:         {"GoUnblock", 1005, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+	EvGoBlockSend:       {"GoBlockSend", 1005, true, []string{}, nil},
+	EvGoBlockRecv:       {"GoBlockRecv", 1005, true, []string{}, nil},
+	EvGoBlockSelect:     {"GoBlockSelect", 1005, true, []string{}, nil},
+	EvGoBlockSync:       {"GoBlockSync", 1005, true, []string{}, nil},
+	EvGoBlockCond:       {"GoBlockCond", 1005, true, []string{}, nil},
+	EvGoBlockNet:        {"GoBlockNet", 1005, true, []string{}, nil},
+	EvGoSysCall:         {"GoSysCall", 1005, true, []string{}, nil},
+	EvGoSysExit:         {"GoSysExit", 1005, false, []string{"g", "seq", "ts"}, nil},
+	EvGoSysBlock:        {"GoSysBlock", 1005, false, []string{}, nil},
+	EvGoWaiting:         {"GoWaiting", 1005, false, []string{"g"}, nil},
+	EvGoInSyscall:       {"GoInSyscall", 1005, false, []string{"g"}, nil},
+	EvHeapAlloc:         {"HeapAlloc", 1005, false, []string{"mem"}, nil},
+	EvHeapGoal:          {"HeapGoal", 1005, false, []string{"mem"}, nil},
+	EvTimerGoroutine:    {"TimerGoroutine", 1005, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
+	EvFutileWakeup:      {"FutileWakeup", 1005, false, []string{}, nil},
+	EvString:            {"String", 1007, false, []string{}, nil},
+	EvGoStartLocal:      {"GoStartLocal", 1007, false, []string{"g"}, nil},
+	EvGoUnblockLocal:    {"GoUnblockLocal", 1007, true, []string{"g"}, nil},
+	EvGoSysExitLocal:    {"GoSysExitLocal", 1007, false, []string{"g", "ts"}, nil},
+	EvGoStartLabel:      {"GoStartLabel", 1008, false, []string{"g", "seq", "labelid"}, []string{"label"}},
+	EvGoBlockGC:         {"GoBlockGC", 1008, true, []string{}, nil},
+	EvGCMarkAssistStart: {"GCMarkAssistStart", 1009, true, []string{}, nil},
+	EvGCMarkAssistDone:  {"GCMarkAssistDone", 1009, false, []string{}, nil},
+	EvUserTaskCreate:    {"UserTaskCreate", 1011, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
+	EvUserTaskEnd:       {"UserTaskEnd", 1011, true, []string{"taskid"}, nil},
+	EvUserRegion:        {"UserRegion", 1011, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
+	EvUserLog:           {"UserLog", 1011, true, []string{"id", "keyid"}, []string{"category", "message"}},
+}
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
new file mode 100644
index 0000000..cdab95a
--- /dev/null
+++ b/src/internal/trace/parser_test.go
@@ -0,0 +1,110 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestCorruptedInputs(t *testing.T) {
+	// These inputs crashed parser previously.
+	tests := []string{
+		"gotrace\x00\x020",
+		"gotrace\x00Q00\x020",
+		"gotrace\x00T00\x020",
+		"gotrace\x00\xc3\x0200",
+		"go 1.5 trace\x00\x00\x00\x00\x020",
+		"go 1.5 trace\x00\x00\x00\x00Q00\x020",
+		"go 1.5 trace\x00\x00\x00\x00T00\x020",
+		"go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
+	}
+	for _, data := range tests {
+		res, err := Parse(strings.NewReader(data), "")
+		if err == nil || res.Events != nil || res.Stacks != nil {
+			t.Fatalf("no error on input: %q", data)
+		}
+	}
+}
+
+func TestParseCanned(t *testing.T) {
+	files, err := os.ReadDir("./testdata")
+	if err != nil {
+		t.Fatalf("failed to read ./testdata: %v", err)
+	}
+	for _, f := range files {
+		info, err := f.Info()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if testing.Short() && info.Size() > 10000 {
+			continue
+		}
+		name := filepath.Join("./testdata", f.Name())
+		data, err := os.ReadFile(name)
+		if err != nil {
+			t.Fatal(err)
+		}
+		// Instead of Parse that requires a proper binary name for old traces,
+		// we use 'parse' that omits symbol lookup if an empty string is given.
+		_, _, err = parse(bytes.NewReader(data), "")
+		switch {
+		case strings.HasSuffix(f.Name(), "_good"):
+			if err != nil {
+				t.Errorf("failed to parse good trace %v: %v", f.Name(), err)
+			}
+		case strings.HasSuffix(f.Name(), "_unordered"):
+			if err != ErrTimeOrder {
+				t.Errorf("unordered trace is not detected %v: %v", f.Name(), err)
+			}
+		default:
+			t.Errorf("unknown input file suffix: %v", f.Name())
+		}
+	}
+}
+
+func TestParseVersion(t *testing.T) {
+	tests := map[string]int{
+		"go 1.5 trace\x00\x00\x00\x00": 1005,
+		"go 1.7 trace\x00\x00\x00\x00": 1007,
+		"go 1.10 trace\x00\x00\x00":    1010,
+		"go 1.25 trace\x00\x00\x00":    1025,
+		"go 1.234 trace\x00\x00":       1234,
+		"go 1.2345 trace\x00":          -1,
+		"go 0.0 trace\x00\x00\x00\x00": -1,
+		"go a.b trace\x00\x00\x00\x00": -1,
+	}
+	for header, ver := range tests {
+		ver1, err := parseHeader([]byte(header))
+		if ver == -1 {
+			if err == nil {
+				t.Fatalf("no error on input: %q, version %v", header, ver1)
+			}
+		} else {
+			if err != nil {
+				t.Fatalf("failed to parse: %q (%v)", header, err)
+			}
+			if ver != ver1 {
+				t.Fatalf("wrong version: %v, want %v, input: %q", ver1, ver, header)
+			}
+		}
+	}
+}
+
+func TestTimestampOverflow(t *testing.T) {
+	// Test that parser correctly handles large timestamps (long tracing).
+	w := NewWriter()
+	w.Emit(EvBatch, 0, 0)
+	w.Emit(EvFrequency, 1e9)
+	for ts := uint64(1); ts < 1e16; ts *= 2 {
+		w.Emit(EvGoCreate, ts, ts, 0, 0)
+	}
+	if _, err := Parse(w, ""); err != nil {
+		t.Fatalf("failed to parse: %v", err)
+	}
+}
diff --git a/src/internal/trace/testdata/http_1_10_good b/src/internal/trace/testdata/http_1_10_good
new file mode 100644
index 0000000..a4f2ed8
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_10_good
diff --git a/src/internal/trace/testdata/http_1_11_good b/src/internal/trace/testdata/http_1_11_good
new file mode 100644
index 0000000..0efcc6f
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_11_good
diff --git a/src/internal/trace/testdata/http_1_5_good b/src/internal/trace/testdata/http_1_5_good
new file mode 100644
index 0000000..0736cae
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_5_good
diff --git a/src/internal/trace/testdata/http_1_7_good b/src/internal/trace/testdata/http_1_7_good
new file mode 100644
index 0000000..b0e318e
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_7_good
diff --git a/src/internal/trace/testdata/http_1_9_good b/src/internal/trace/testdata/http_1_9_good
new file mode 100644
index 0000000..ca89278
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_9_good
diff --git a/src/internal/trace/testdata/stress_1_10_good b/src/internal/trace/testdata/stress_1_10_good
new file mode 100644
index 0000000..19778b0
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_10_good
diff --git a/src/internal/trace/testdata/stress_1_11_good b/src/internal/trace/testdata/stress_1_11_good
new file mode 100644
index 0000000..6468d89
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_11_good
diff --git a/src/internal/trace/testdata/stress_1_5_good b/src/internal/trace/testdata/stress_1_5_good
new file mode 100644
index 0000000..c5055eb
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_5_good
diff --git a/src/internal/trace/testdata/stress_1_5_unordered b/src/internal/trace/testdata/stress_1_5_unordered
new file mode 100644
index 0000000..11f7d74
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_5_unordered
diff --git a/src/internal/trace/testdata/stress_1_7_good b/src/internal/trace/testdata/stress_1_7_good
new file mode 100644
index 0000000..b4d927d
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_7_good
diff --git a/src/internal/trace/testdata/stress_1_9_good b/src/internal/trace/testdata/stress_1_9_good
new file mode 100644
index 0000000..dcf17f1
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_9_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_10_good b/src/internal/trace/testdata/stress_start_stop_1_10_good
new file mode 100644
index 0000000..b908e10
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_10_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_11_good b/src/internal/trace/testdata/stress_start_stop_1_11_good
new file mode 100644
index 0000000..457f01a
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_11_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_5_good b/src/internal/trace/testdata/stress_start_stop_1_5_good
new file mode 100644
index 0000000..72a887b
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_5_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_7_good b/src/internal/trace/testdata/stress_start_stop_1_7_good
new file mode 100644
index 0000000..c23ed7d
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_7_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_9_good b/src/internal/trace/testdata/stress_start_stop_1_9_good
new file mode 100644
index 0000000..f00f190
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_9_good
diff --git a/src/internal/trace/testdata/user_task_span_1_11_good b/src/internal/trace/testdata/user_task_span_1_11_good
new file mode 100644
index 0000000..f4edb67
--- /dev/null
+++ b/src/internal/trace/testdata/user_task_span_1_11_good
diff --git a/src/internal/trace/writer.go b/src/internal/trace/writer.go
new file mode 100644
index 0000000..dd0b9f1
--- /dev/null
+++ b/src/internal/trace/writer.go
@@ -0,0 +1,49 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "bytes"
+
+// Writer is a test trace writer.
+type Writer struct {
+	bytes.Buffer
+}
+
+func NewWriter() *Writer {
+	w := new(Writer)
+	w.Write([]byte("go 1.9 trace\x00\x00\x00\x00"))
+	return w
+}
+
+// Emit writes an event record to the trace.
+// See Event types for valid types and required arguments.
+func (w *Writer) Emit(typ byte, args ...uint64) {
+	nargs := byte(len(args)) - 1
+	if nargs > 3 {
+		nargs = 3
+	}
+	buf := []byte{typ | nargs<<6}
+	if nargs == 3 {
+		buf = append(buf, 0)
+	}
+	for _, a := range args {
+		buf = appendVarint(buf, a)
+	}
+	if nargs == 3 {
+		buf[1] = byte(len(buf) - 2)
+	}
+	n, err := w.Write(buf)
+	if n != len(buf) || err != nil {
+		panic("failed to write")
+	}
+}
+
+func appendVarint(buf []byte, v uint64) []byte {
+	for ; v >= 0x80; v >>= 7 {
+		buf = append(buf, 0x80|byte(v))
+	}
+	buf = append(buf, byte(v))
+	return buf
+}