From 73df946d56c74384511a194dd01dbe099584fd1a Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 28 Apr 2024 15:14:23 +0200
Subject: Adding upstream version 1.16.10.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/cmd/trace/annotations.go      | 1314 +++++++++++++++++++++++++++++++++++++
 src/cmd/trace/annotations_test.go |  392 +++++++++++
 src/cmd/trace/doc.go              |   35 +
 src/cmd/trace/goroutines.go       |  297 +++++++++
 src/cmd/trace/main.go             |  238 +++++++
 src/cmd/trace/mmu.go              |  403 ++++++++++++
 src/cmd/trace/pprof.go            |  377 +++++++++++
 src/cmd/trace/trace.go            | 1273 +++++++++++++++++++++++++++++++++++
 src/cmd/trace/trace_test.go       |  269 ++++++++
 src/cmd/trace/trace_unix_test.go  |  103 +++
 10 files changed, 4701 insertions(+)
 create mode 100644 src/cmd/trace/annotations.go
 create mode 100644 src/cmd/trace/annotations_test.go
 create mode 100644 src/cmd/trace/doc.go
 create mode 100644 src/cmd/trace/goroutines.go
 create mode 100644 src/cmd/trace/main.go
 create mode 100644 src/cmd/trace/mmu.go
 create mode 100644 src/cmd/trace/pprof.go
 create mode 100644 src/cmd/trace/trace.go
 create mode 100644 src/cmd/trace/trace_test.go
 create mode 100644 src/cmd/trace/trace_unix_test.go

(limited to 'src/cmd/trace')

diff --git a/src/cmd/trace/annotations.go b/src/cmd/trace/annotations.go
new file mode 100644
index 0000000..9b45457
--- /dev/null
+++ b/src/cmd/trace/annotations.go
@@ -0,0 +1,1314 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"html/template"
+	"internal/trace"
+	"log"
+	"math"
+	"net/http"
+	"net/url"
+	"reflect"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+func init() {
+	http.HandleFunc("/usertasks", httpUserTasks)
+	http.HandleFunc("/usertask", httpUserTask)
+	http.HandleFunc("/userregions", httpUserRegions)
+	http.HandleFunc("/userregion", httpUserRegion)
+}
+
+// httpUserTasks reports all tasks found in the trace.
+func httpUserTasks(w http.ResponseWriter, r *http.Request) {
+	res, err := analyzeAnnotations()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	tasks := res.tasks
+	summary := make(map[string]taskStats)
+	for _, task := range tasks {
+		stats, ok := summary[task.name]
+		if !ok {
+			stats.Type = task.name
+		}
+
+		stats.add(task)
+		summary[task.name] = stats
+	}
+
+	// Sort tasks by type.
+	userTasks := make([]taskStats, 0, len(summary))
+	for _, stats := range summary {
+		userTasks = append(userTasks, stats)
+	}
+	sort.Slice(userTasks, func(i, j int) bool {
+		return userTasks[i].Type < userTasks[j].Type
+	})
+
+	// Emit table.
+	err = templUserTaskTypes.Execute(w, userTasks)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError)
+		return
+	}
+}
+
+func httpUserRegions(w http.ResponseWriter, r *http.Request) {
+	res, err := analyzeAnnotations()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	allRegions := res.regions
+
+	summary := make(map[regionTypeID]regionStats)
+	for id, regions := range allRegions {
+		stats, ok := summary[id]
+		if !ok {
+			stats.regionTypeID = id
+		}
+		for _, s := range regions {
+			stats.add(s)
+		}
+		summary[id] = stats
+	}
+	// Sort regions by pc and name
+	userRegions := make([]regionStats, 0, len(summary))
+	for _, stats := range summary {
+		userRegions = append(userRegions, stats)
+	}
+	sort.Slice(userRegions, func(i, j int) bool {
+		if userRegions[i].Type != userRegions[j].Type {
+			return userRegions[i].Type < userRegions[j].Type
+		}
+		return userRegions[i].Frame.PC < userRegions[j].Frame.PC
+	})
+	// Emit table.
+	err = templUserRegionTypes.Execute(w, userRegions)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError)
+		return
+	}
+}
+
+func httpUserRegion(w http.ResponseWriter, r *http.Request) {
+	filter, err := newRegionFilter(r)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+	res, err := analyzeAnnotations()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	allRegions := res.regions
+
+	var data []regionDesc
+
+	var maxTotal int64
+	for id, regions := range allRegions {
+		for _, s := range regions {
+			if !filter.match(id, s) {
+				continue
+			}
+			data = append(data, s)
+			if maxTotal < s.TotalTime {
+				maxTotal = s.TotalTime
+			}
+		}
+	}
+
+	sortby := r.FormValue("sortby")
+	_, ok := reflect.TypeOf(regionDesc{}).FieldByNameFunc(func(s string) bool {
+		return s == sortby
+	})
+	if !ok {
+		sortby = "TotalTime"
+	}
+	sort.Slice(data, func(i, j int) bool {
+		ival := reflect.ValueOf(data[i]).FieldByName(sortby).Int()
+		jval := reflect.ValueOf(data[j]).FieldByName(sortby).Int()
+		return ival > jval
+	})
+
+	err = templUserRegionType.Execute(w, struct {
+		MaxTotal int64
+		Data     []regionDesc
+		Name     string
+		Filter   *regionFilter
+	}{
+		MaxTotal: maxTotal,
+		Data:     data,
+		Name:     filter.name,
+		Filter:   filter,
+	})
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError)
+		return
+	}
+}
+
+// httpUserTask presents the details of the selected tasks.
+func httpUserTask(w http.ResponseWriter, r *http.Request) {
+	filter, err := newTaskFilter(r)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	res, err := analyzeAnnotations()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	tasks := res.tasks
+
+	type event struct {
+		WhenString string
+		Elapsed    time.Duration
+		Go         uint64
+		What       string
+		// TODO: include stack trace of creation time
+	}
+	type entry struct {
+		WhenString string
+		ID         uint64
+		Duration   time.Duration
+		Complete   bool
+		Events     []event
+		Start, End time.Duration // Time since the beginning of the trace
+		GCTime     time.Duration
+	}
+
+	base := time.Duration(firstTimestamp()) * time.Nanosecond // trace start
+
+	var data []entry
+
+	for _, task := range tasks {
+		if !filter.match(task) {
+			continue
+		}
+		// merge events in the task.events and task.regions.Start
+		rawEvents := append([]*trace.Event{}, task.events...)
+		for _, s := range task.regions {
+			if s.Start != nil {
+				rawEvents = append(rawEvents, s.Start)
+			}
+		}
+		sort.SliceStable(rawEvents, func(i, j int) bool { return rawEvents[i].Ts < rawEvents[j].Ts })
+
+		var events []event
+		var last time.Duration
+		for i, ev := range rawEvents {
+			when := time.Duration(ev.Ts)*time.Nanosecond - base
+			elapsed := time.Duration(ev.Ts)*time.Nanosecond - last
+			if i == 0 {
+				elapsed = 0
+			}
+
+			what := describeEvent(ev)
+			if what != "" {
+				events = append(events, event{
+					WhenString: fmt.Sprintf("%2.9f", when.Seconds()),
+					Elapsed:    elapsed,
+					What:       what,
+					Go:         ev.G,
+				})
+				last = time.Duration(ev.Ts) * time.Nanosecond
+			}
+		}
+
+		data = append(data, entry{
+			WhenString: fmt.Sprintf("%2.9fs", (time.Duration(task.firstTimestamp())*time.Nanosecond - base).Seconds()),
+			Duration:   task.duration(),
+			ID:         task.id,
+			Complete:   task.complete(),
+			Events:     events,
+			Start:      time.Duration(task.firstTimestamp()) * time.Nanosecond,
+			End:        time.Duration(task.endTimestamp()) * time.Nanosecond,
+			GCTime:     task.overlappingGCDuration(res.gcEvents),
+		})
+	}
+	sort.Slice(data, func(i, j int) bool {
+		return data[i].Duration < data[j].Duration
+	})
+
+	// Emit table.
+	err = templUserTaskType.Execute(w, struct {
+		Name  string
+		Entry []entry
+	}{
+		Name:  filter.name,
+		Entry: data,
+	})
+	if err != nil {
+		log.Printf("failed to execute template: %v", err)
+		http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError)
+		return
+	}
+}
+
+type annotationAnalysisResult struct {
+	tasks    map[uint64]*taskDesc          // tasks
+	regions  map[regionTypeID][]regionDesc // regions
+	gcEvents []*trace.Event                // GCStartevents, sorted
+}
+
+type regionTypeID struct {
+	Frame trace.Frame // top frame
+	Type  string
+}
+
+// analyzeAnnotations analyzes user annotation events and
+// returns the task descriptors keyed by internal task id.
+func analyzeAnnotations() (annotationAnalysisResult, error) {
+	res, err := parseTrace()
+	if err != nil {
+		return annotationAnalysisResult{}, fmt.Errorf("failed to parse trace: %v", err)
+	}
+
+	events := res.Events
+	if len(events) == 0 {
+		return annotationAnalysisResult{}, fmt.Errorf("empty trace")
+	}
+
+	tasks := allTasks{}
+	regions := map[regionTypeID][]regionDesc{}
+	var gcEvents []*trace.Event
+
+	for _, ev := range events {
+		switch typ := ev.Type; typ {
+		case trace.EvUserTaskCreate, trace.EvUserTaskEnd, trace.EvUserLog:
+			taskid := ev.Args[0]
+			task := tasks.task(taskid)
+			task.addEvent(ev)
+
+			// retrieve parent task information
+			if typ == trace.EvUserTaskCreate {
+				if parentID := ev.Args[1]; parentID != 0 {
+					parentTask := tasks.task(parentID)
+					task.parent = parentTask
+					if parentTask != nil {
+						parentTask.children = append(parentTask.children, task)
+					}
+				}
+			}
+
+		case trace.EvGCStart:
+			gcEvents = append(gcEvents, ev)
+		}
+	}
+	// combine region info.
+	analyzeGoroutines(events)
+	for goid, stats := range gs {
+		// gs is a global var defined in goroutines.go as a result
+		// of analyzeGoroutines. TODO(hyangah): fix this not to depend
+		// on a 'global' var.
+		for _, s := range stats.Regions {
+			if s.TaskID != 0 {
+				task := tasks.task(s.TaskID)
+				task.goroutines[goid] = struct{}{}
+				task.regions = append(task.regions, regionDesc{UserRegionDesc: s, G: goid})
+			}
+			var frame trace.Frame
+			if s.Start != nil {
+				frame = *s.Start.Stk[0]
+			}
+			id := regionTypeID{Frame: frame, Type: s.Name}
+			regions[id] = append(regions[id], regionDesc{UserRegionDesc: s, G: goid})
+		}
+	}
+
+	// sort regions in tasks based on the timestamps.
+	for _, task := range tasks {
+		sort.SliceStable(task.regions, func(i, j int) bool {
+			si, sj := task.regions[i].firstTimestamp(), task.regions[j].firstTimestamp()
+			if si != sj {
+				return si < sj
+			}
+			return task.regions[i].lastTimestamp() < task.regions[j].lastTimestamp()
+		})
+	}
+	return annotationAnalysisResult{tasks: tasks, regions: regions, gcEvents: gcEvents}, nil
+}
+
+// taskDesc represents a task.
+type taskDesc struct {
+	name       string              // user-provided task name
+	id         uint64              // internal task id
+	events     []*trace.Event      // sorted based on timestamp.
+	regions    []regionDesc        // associated regions, sorted based on the start timestamp and then the last timestamp.
+	goroutines map[uint64]struct{} // involved goroutines
+
+	create *trace.Event // Task create event
+	end    *trace.Event // Task end event
+
+	parent   *taskDesc
+	children []*taskDesc
+}
+
+func newTaskDesc(id uint64) *taskDesc {
+	return &taskDesc{
+		id:         id,
+		goroutines: make(map[uint64]struct{}),
+	}
+}
+
+func (task *taskDesc) String() string {
+	if task == nil {
+		return "task <nil>"
+	}
+	wb := new(bytes.Buffer)
+	fmt.Fprintf(wb, "task %d:\t%s\n", task.id, task.name)
+	fmt.Fprintf(wb, "\tstart: %v end: %v complete: %t\n", task.firstTimestamp(), task.endTimestamp(), task.complete())
+	fmt.Fprintf(wb, "\t%d goroutines\n", len(task.goroutines))
+	fmt.Fprintf(wb, "\t%d regions:\n", len(task.regions))
+	for _, s := range task.regions {
+		fmt.Fprintf(wb, "\t\t%s(goid=%d)\n", s.Name, s.G)
+	}
+	if task.parent != nil {
+		fmt.Fprintf(wb, "\tparent: %s\n", task.parent.name)
+	}
+	fmt.Fprintf(wb, "\t%d children:\n", len(task.children))
+	for _, c := range task.children {
+		fmt.Fprintf(wb, "\t\t%s\n", c.name)
+	}
+
+	return wb.String()
+}
+
+// regionDesc represents a region.
+type regionDesc struct {
+	*trace.UserRegionDesc
+	G uint64 // id of goroutine where the region was defined
+}
+
+type allTasks map[uint64]*taskDesc
+
+func (tasks allTasks) task(taskID uint64) *taskDesc {
+	if taskID == 0 {
+		return nil // notask
+	}
+
+	t, ok := tasks[taskID]
+	if ok {
+		return t
+	}
+
+	t = &taskDesc{
+		id:         taskID,
+		goroutines: make(map[uint64]struct{}),
+	}
+	tasks[taskID] = t
+	return t
+}
+
+func (task *taskDesc) addEvent(ev *trace.Event) {
+	if task == nil {
+		return
+	}
+
+	task.events = append(task.events, ev)
+	task.goroutines[ev.G] = struct{}{}
+
+	switch typ := ev.Type; typ {
+	case trace.EvUserTaskCreate:
+		task.name = ev.SArgs[0]
+		task.create = ev
+	case trace.EvUserTaskEnd:
+		task.end = ev
+	}
+}
+
+// complete is true only if both start and end events of this task
+// are present in the trace.
+func (task *taskDesc) complete() bool {
+	if task == nil {
+		return false
+	}
+	return task.create != nil && task.end != nil
+}
+
+// descendants returns all the task nodes in the subtree rooted from this task.
+func (task *taskDesc) descendants() []*taskDesc {
+	if task == nil {
+		return nil
+	}
+	res := []*taskDesc{task}
+	for i := 0; len(res[i:]) > 0; i++ {
+		t := res[i]
+		for _, c := range t.children {
+			res = append(res, c)
+		}
+	}
+	return res
+}
+
+// firstTimestamp returns the first timestamp of this task found in
+// this trace. If the trace does not contain the task creation event,
+// the first timestamp of the trace will be returned.
+func (task *taskDesc) firstTimestamp() int64 {
+	if task != nil && task.create != nil {
+		return task.create.Ts
+	}
+	return firstTimestamp()
+}
+
+// lastTimestamp returns the last timestamp of this task in this
+// trace. If the trace does not contain the task end event, the last
+// timestamp of the trace will be returned.
+func (task *taskDesc) lastTimestamp() int64 {
+	endTs := task.endTimestamp()
+	if last := task.lastEvent(); last != nil && last.Ts > endTs {
+		return last.Ts
+	}
+	return endTs
+}
+
+// endTimestamp returns the timestamp of this task's end event.
+// If the trace does not contain the task end event, the last
+// timestamp of the trace will be returned.
+func (task *taskDesc) endTimestamp() int64 {
+	if task != nil && task.end != nil {
+		return task.end.Ts
+	}
+	return lastTimestamp()
+}
+
+func (task *taskDesc) duration() time.Duration {
+	return time.Duration(task.endTimestamp()-task.firstTimestamp()) * time.Nanosecond
+}
+
+func (region *regionDesc) duration() time.Duration {
+	return time.Duration(region.lastTimestamp()-region.firstTimestamp()) * time.Nanosecond
+}
+
+// overlappingGCDuration returns the sum of GC period overlapping with the task's lifetime.
+func (task *taskDesc) overlappingGCDuration(evs []*trace.Event) (overlapping time.Duration) {
+	for _, ev := range evs {
+		// make sure we only consider the global GC events.
+		if typ := ev.Type; typ != trace.EvGCStart && typ != trace.EvGCSTWStart {
+			continue
+		}
+
+		if o, overlapped := task.overlappingDuration(ev); overlapped {
+			overlapping += o
+		}
+	}
+	return overlapping
+}
+
+// overlappingInstant reports whether the instantaneous event, ev, occurred during
+// any of the task's region if ev is a goroutine-local event, or overlaps with the
+// task's lifetime if ev is a global event.
+func (task *taskDesc) overlappingInstant(ev *trace.Event) bool {
+	if _, ok := isUserAnnotationEvent(ev); ok && task.id != ev.Args[0] {
+		return false // not this task's user event.
+	}
+
+	ts := ev.Ts
+	taskStart := task.firstTimestamp()
+	taskEnd := task.endTimestamp()
+	if ts < taskStart || taskEnd < ts {
+		return false
+	}
+	if ev.P == trace.GCP {
+		return true
+	}
+
+	// Goroutine local event. Check whether there are regions overlapping with the event.
+	goid := ev.G
+	for _, region := range task.regions {
+		if region.G != goid {
+			continue
+		}
+		if region.firstTimestamp() <= ts && ts <= region.lastTimestamp() {
+			return true
+		}
+	}
+	return false
+}
+
+// overlappingDuration reports whether the durational event, ev, overlaps with
+// any of the task's region if ev is a goroutine-local event, or overlaps with
+// the task's lifetime if ev is a global event. It returns the overlapping time
+// as well.
+func (task *taskDesc) overlappingDuration(ev *trace.Event) (time.Duration, bool) {
+	start := ev.Ts
+	end := lastTimestamp()
+	if ev.Link != nil {
+		end = ev.Link.Ts
+	}
+
+	if start > end {
+		return 0, false
+	}
+
+	goid := ev.G
+	goid2 := ev.G
+	if ev.Link != nil {
+		goid2 = ev.Link.G
+	}
+
+	// This event is a global GC event
+	if ev.P == trace.GCP {
+		taskStart := task.firstTimestamp()
+		taskEnd := task.endTimestamp()
+		o := overlappingDuration(taskStart, taskEnd, start, end)
+		return o, o > 0
+	}
+
+	// Goroutine local event. Check whether there are regions overlapping with the event.
+	var overlapping time.Duration
+	var lastRegionEnd int64 // the end of previous overlapping region
+	for _, region := range task.regions {
+		if region.G != goid && region.G != goid2 {
+			continue
+		}
+		regionStart, regionEnd := region.firstTimestamp(), region.lastTimestamp()
+		if regionStart < lastRegionEnd { // skip nested regions
+			continue
+		}
+
+		if o := overlappingDuration(regionStart, regionEnd, start, end); o > 0 {
+			// overlapping.
+			lastRegionEnd = regionEnd
+			overlapping += o
+		}
+	}
+	return overlapping, overlapping > 0
+}
+
+// overlappingDuration returns the overlapping time duration between
+// two time intervals [start1, end1] and [start2, end2] where
+// start, end parameters are all int64 representing nanoseconds.
+func overlappingDuration(start1, end1, start2, end2 int64) time.Duration {
+	// assume start1 <= end1 and start2 <= end2
+	if end1 < start2 || end2 < start1 {
+		return 0
+	}
+
+	if start1 < start2 { // choose the later one
+		start1 = start2
+	}
+	if end1 > end2 { // choose the earlier one
+		end1 = end2
+	}
+	return time.Duration(end1 - start1)
+}
+
+func (task *taskDesc) lastEvent() *trace.Event {
+	if task == nil {
+		return nil
+	}
+
+	if n := len(task.events); n > 0 {
+		return task.events[n-1]
+	}
+	return nil
+}
+
+// firstTimestamp returns the timestamp of region start event.
+// If the region's start event is not present in the trace,
+// the first timestamp of the trace will be returned.
+func (region *regionDesc) firstTimestamp() int64 {
+	if region.Start != nil {
+		return region.Start.Ts
+	}
+	return firstTimestamp()
+}
+
+// lastTimestamp returns the timestamp of region end event.
+// If the region's end event is not present in the trace,
+// the last timestamp of the trace will be returned.
+func (region *regionDesc) lastTimestamp() int64 {
+	if region.End != nil {
+		return region.End.Ts
+	}
+	return lastTimestamp()
+}
+
+// RelatedGoroutines returns IDs of goroutines related to the task. A goroutine
+// is related to the task if user annotation activities for the task occurred.
+// If non-zero depth is provided, this searches all events with BFS and includes
+// goroutines unblocked any of related goroutines to the result.
+func (task *taskDesc) RelatedGoroutines(events []*trace.Event, depth int) map[uint64]bool {
+	start, end := task.firstTimestamp(), task.endTimestamp()
+
+	gmap := map[uint64]bool{}
+	for k := range task.goroutines {
+		gmap[k] = true
+	}
+
+	for i := 0; i < depth; i++ {
+		gmap1 := make(map[uint64]bool)
+		for g := range gmap {
+			gmap1[g] = true
+		}
+		for _, ev := range events {
+			if ev.Ts < start || ev.Ts > end {
+				continue
+			}
+			if ev.Type == trace.EvGoUnblock && gmap[ev.Args[0]] {
+				gmap1[ev.G] = true
+			}
+			gmap = gmap1
+		}
+	}
+	gmap[0] = true // for GC events (goroutine id = 0)
+	return gmap
+}
+
+type taskFilter struct {
+	name string
+	cond []func(*taskDesc) bool
+}
+
+func (f *taskFilter) match(t *taskDesc) bool {
+	if t == nil {
+		return false
+	}
+	for _, c := range f.cond {
+		if !c(t) {
+			return false
+		}
+	}
+	return true
+}
+
+func newTaskFilter(r *http.Request) (*taskFilter, error) {
+	if err := r.ParseForm(); err != nil {
+		return nil, err
+	}
+
+	var name []string
+	var conditions []func(*taskDesc) bool
+
+	param := r.Form
+	if typ, ok := param["type"]; ok && len(typ) > 0 {
+		name = append(name, "type="+typ[0])
+		conditions = append(conditions, func(t *taskDesc) bool {
+			return t.name == typ[0]
+		})
+	}
+	if complete := r.FormValue("complete"); complete == "1" {
+		name = append(name, "complete")
+		conditions = append(conditions, func(t *taskDesc) bool {
+			return t.complete()
+		})
+	} else if complete == "0" {
+		name = append(name, "incomplete")
+		conditions = append(conditions, func(t *taskDesc) bool {
+			return !t.complete()
+		})
+	}
+	if lat, err := time.ParseDuration(r.FormValue("latmin")); err == nil {
+		name = append(name, fmt.Sprintf("latency >= %s", lat))
+		conditions = append(conditions, func(t *taskDesc) bool {
+			return t.complete() && t.duration() >= lat
+		})
+	}
+	if lat, err := time.ParseDuration(r.FormValue("latmax")); err == nil {
+		name = append(name, fmt.Sprintf("latency <= %s", lat))
+		conditions = append(conditions, func(t *taskDesc) bool {
+			return t.complete() && t.duration() <= lat
+		})
+	}
+	if text := r.FormValue("logtext"); text != "" {
+		name = append(name, fmt.Sprintf("log contains %q", text))
+		conditions = append(conditions, func(t *taskDesc) bool {
+			return taskMatches(t, text)
+		})
+	}
+
+	return &taskFilter{name: strings.Join(name, ","), cond: conditions}, nil
+}
+
+func taskMatches(t *taskDesc, text string) bool {
+	for _, ev := range t.events {
+		switch ev.Type {
+		case trace.EvUserTaskCreate, trace.EvUserRegion, trace.EvUserLog:
+			for _, s := range ev.SArgs {
+				if strings.Contains(s, text) {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+type regionFilter struct {
+	name   string
+	params url.Values
+	cond   []func(regionTypeID, regionDesc) bool
+}
+
+func (f *regionFilter) match(id regionTypeID, s regionDesc) bool {
+	for _, c := range f.cond {
+		if !c(id, s) {
+			return false
+		}
+	}
+	return true
+}
+
+func newRegionFilter(r *http.Request) (*regionFilter, error) {
+	if err := r.ParseForm(); err != nil {
+		return nil, err
+	}
+
+	var name []string
+	var conditions []func(regionTypeID, regionDesc) bool
+	filterParams := make(url.Values)
+
+	param := r.Form
+	if typ, ok := param["type"]; ok && len(typ) > 0 {
+		name = append(name, "type="+typ[0])
+		conditions = append(conditions, func(id regionTypeID, s regionDesc) bool {
+			return id.Type == typ[0]
+		})
+		filterParams.Add("type", typ[0])
+	}
+	if pc, err := strconv.ParseUint(r.FormValue("pc"), 16, 64); err == nil {
+		encPC := fmt.Sprintf("%x", pc)
+		name = append(name, "pc="+encPC)
+		conditions = append(conditions, func(id regionTypeID, s regionDesc) bool {
+			return id.Frame.PC == pc
+		})
+		filterParams.Add("pc", encPC)
+	}
+
+	if lat, err := time.ParseDuration(r.FormValue("latmin")); err == nil {
+		name = append(name, fmt.Sprintf("latency >= %s", lat))
+		conditions = append(conditions, func(_ regionTypeID, s regionDesc) bool {
+			return s.duration() >= lat
+		})
+		filterParams.Add("latmin", lat.String())
+	}
+	if lat, err := time.ParseDuration(r.FormValue("latmax")); err == nil {
+		name = append(name, fmt.Sprintf("latency <= %s", lat))
+		conditions = append(conditions, func(_ regionTypeID, s regionDesc) bool {
+			return s.duration() <= lat
+		})
+		filterParams.Add("latmax", lat.String())
+	}
+
+	return &regionFilter{
+		name:   strings.Join(name, ","),
+		cond:   conditions,
+		params: filterParams,
+	}, nil
+}
+
+type durationHistogram struct {
+	Count                int
+	Buckets              []int
+	MinBucket, MaxBucket int
+}
+
+// Five buckets for every power of 10.
+var logDiv = math.Log(math.Pow(10, 1.0/5))
+
+func (h *durationHistogram) add(d time.Duration) {
+	var bucket int
+	if d > 0 {
+		bucket = int(math.Log(float64(d)) / logDiv)
+	}
+	if len(h.Buckets) <= bucket {
+		h.Buckets = append(h.Buckets, make([]int, bucket-len(h.Buckets)+1)...)
+		h.Buckets = h.Buckets[:cap(h.Buckets)]
+	}
+	h.Buckets[bucket]++
+	if bucket < h.MinBucket || h.MaxBucket == 0 {
+		h.MinBucket = bucket
+	}
+	if bucket > h.MaxBucket {
+		h.MaxBucket = bucket
+	}
+	h.Count++
+}
+
+func (h *durationHistogram) BucketMin(bucket int) time.Duration {
+	return time.Duration(math.Exp(float64(bucket) * logDiv))
+}
+
+func niceDuration(d time.Duration) string {
+	var rnd time.Duration
+	var unit string
+	switch {
+	case d < 10*time.Microsecond:
+		rnd, unit = time.Nanosecond, "ns"
+	case d < 10*time.Millisecond:
+		rnd, unit = time.Microsecond, "µs"
+	case d < 10*time.Second:
+		rnd, unit = time.Millisecond, "ms"
+	default:
+		rnd, unit = time.Second, "s "
+	}
+	return fmt.Sprintf("%d%s", d/rnd, unit)
+}
+
+func (h *durationHistogram) ToHTML(urlmaker func(min, max time.Duration) string) template.HTML {
+	if h == nil || h.Count == 0 {
+		return template.HTML("")
+	}
+
+	const barWidth = 400
+
+	maxCount := 0
+	for _, count := range h.Buckets {
+		if count > maxCount {
+			maxCount = count
+		}
+	}
+
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, `<table>`)
+	for i := h.MinBucket; i <= h.MaxBucket; i++ {
+		// Tick label.
+		if h.Buckets[i] > 0 {
+			fmt.Fprintf(w, `<tr><td class="histoTime" align="right"><a href=%s>%s</a></td>`, urlmaker(h.BucketMin(i), h.BucketMin(i+1)), niceDuration(h.BucketMin(i)))
+		} else {
+			fmt.Fprintf(w, `<tr><td class="histoTime" align="right">%s</td>`, niceDuration(h.BucketMin(i)))
+		}
+		// Bucket bar.
+		width := h.Buckets[i] * barWidth / maxCount
+		fmt.Fprintf(w, `<td><div style="width:%dpx;background:blue;position:relative">&nbsp;</div></td>`, width)
+		// Bucket count.
+		fmt.Fprintf(w, `<td align="right"><div style="position:relative">%d</div></td>`, h.Buckets[i])
+		fmt.Fprintf(w, "</tr>\n")
+
+	}
+	// Final tick label.
+	fmt.Fprintf(w, `<tr><td align="right">%s</td></tr>`, niceDuration(h.BucketMin(h.MaxBucket+1)))
+	fmt.Fprintf(w, `</table>`)
+	return template.HTML(w.String())
+}
+
+func (h *durationHistogram) String() string {
+	const barWidth = 40
+
+	labels := []string{}
+	maxLabel := 0
+	maxCount := 0
+	for i := h.MinBucket; i <= h.MaxBucket; i++ {
+		// TODO: This formatting is pretty awful.
+		label := fmt.Sprintf("[%-12s%-11s)", h.BucketMin(i).String()+",", h.BucketMin(i+1))
+		labels = append(labels, label)
+		if len(label) > maxLabel {
+			maxLabel = len(label)
+		}
+		count := h.Buckets[i]
+		if count > maxCount {
+			maxCount = count
+		}
+	}
+
+	w := new(bytes.Buffer)
+	for i := h.MinBucket; i <= h.MaxBucket; i++ {
+		count := h.Buckets[i]
+		bar := count * barWidth / maxCount
+		fmt.Fprintf(w, "%*s %-*s %d\n", maxLabel, labels[i-h.MinBucket], barWidth, strings.Repeat("█", bar), count)
+	}
+	return w.String()
+}
+
+type regionStats struct {
+	regionTypeID
+	Histogram durationHistogram
+}
+
+func (s *regionStats) UserRegionURL() func(min, max time.Duration) string {
+	return func(min, max time.Duration) string {
+		return fmt.Sprintf("/userregion?type=%s&pc=%x&latmin=%v&latmax=%v", template.URLQueryEscaper(s.Type), s.Frame.PC, template.URLQueryEscaper(min), template.URLQueryEscaper(max))
+	}
+}
+
+func (s *regionStats) add(region regionDesc) {
+	s.Histogram.add(region.duration())
+}
+
+var templUserRegionTypes = template.Must(template.New("").Parse(`
+<html>
+<style type="text/css">
+.histoTime {
+   width: 20%;
+   white-space:nowrap;
+}
+
+</style>
+<body>
+<table border="1" sortable="1">
+<tr>
+<th>Region type</th>
+<th>Count</th>
+<th>Duration distribution (complete tasks)</th>
+</tr>
+{{range $}}
+  <tr>
+    <td>{{.Type}}<br>{{.Frame.Fn}}<br>{{.Frame.File}}:{{.Frame.Line}}</td>
+    <td><a href="/userregion?type={{.Type}}&pc={{.Frame.PC | printf "%x"}}">{{.Histogram.Count}}</a></td>
+    <td>{{.Histogram.ToHTML (.UserRegionURL)}}</td>
+  </tr>
+{{end}}
+</table>
+</body>
+</html>
+`))
+
+type taskStats struct {
+	Type      string
+	Count     int               // Complete + incomplete tasks
+	Histogram durationHistogram // Complete tasks only
+}
+
+func (s *taskStats) UserTaskURL(complete bool) func(min, max time.Duration) string {
+	return func(min, max time.Duration) string {
+		return fmt.Sprintf("/usertask?type=%s&complete=%v&latmin=%v&latmax=%v", template.URLQueryEscaper(s.Type), template.URLQueryEscaper(complete), template.URLQueryEscaper(min), template.URLQueryEscaper(max))
+	}
+}
+
+func (s *taskStats) add(task *taskDesc) {
+	s.Count++
+	if task.complete() {
+		s.Histogram.add(task.duration())
+	}
+}
+
+var templUserTaskTypes = template.Must(template.New("").Parse(`
+<html>
+<style type="text/css">
+.histoTime {
+   width: 20%;
+   white-space:nowrap;
+}
+
+</style>
+<body>
+Search log text: <form action="/usertask"><input name="logtext" type="text"><input type="submit"></form><br>
+<table border="1" sortable="1">
+<tr>
+<th>Task type</th>
+<th>Count</th>
+<th>Duration distribution (complete tasks)</th>
+</tr>
+{{range $}}
+  <tr>
+    <td>{{.Type}}</td>
+    <td><a href="/usertask?type={{.Type}}">{{.Count}}</a></td>
+    <td>{{.Histogram.ToHTML (.UserTaskURL true)}}</td>
+  </tr>
+{{end}}
+</table>
+</body>
+</html>
+`))
+
+var templUserTaskType = template.Must(template.New("userTask").Funcs(template.FuncMap{
+	"elapsed":       elapsed,
+	"asMillisecond": asMillisecond,
+	"trimSpace":     strings.TrimSpace,
+}).Parse(`
+<html>
+<head> <title>User Task: {{.Name}} </title> </head>
+        <style type="text/css">
+                body {
+                        font-family: sans-serif;
+                }
+                table#req-status td.family {
+                        padding-right: 2em;
+                }
+                table#req-status td.active {
+                        padding-right: 1em;
+                }
+                table#req-status td.empty {
+                        color: #aaa;
+                }
+                table#reqs {
+                        margin-top: 1em;
+                }
+                table#reqs tr.first {
+                        font-weight: bold;
+                }
+                table#reqs td {
+                        font-family: monospace;
+                }
+                table#reqs td.when {
+                        text-align: right;
+                        white-space: nowrap;
+                }
+                table#reqs td.elapsed {
+                        padding: 0 0.5em;
+                        text-align: right;
+                        white-space: pre;
+                        width: 10em;
+                }
+                address {
+                        font-size: smaller;
+                        margin-top: 5em;
+                }
+        </style>
+<body>
+
+<h2>User Task: {{.Name}}</h2>
+
+Search log text: <form onsubmit="window.location.search+='&logtext='+window.logtextinput.value; return false">
+<input name="logtext" id="logtextinput" type="text"><input type="submit">
+</form><br>
+
+<table id="reqs">
+<tr><th>When</th><th>Elapsed</th><th>Goroutine ID</th><th>Events</th></tr>
+     {{range $el := $.Entry}}
+        <tr class="first">
+                <td class="when">{{$el.WhenString}}</td>
+                <td class="elapsed">{{$el.Duration}}</td>
+		<td></td>
+                <td>
+<a href="/trace?focustask={{$el.ID}}#{{asMillisecond $el.Start}}:{{asMillisecond $el.End}}">Task {{$el.ID}}</a>
+<a href="/trace?taskid={{$el.ID}}#{{asMillisecond $el.Start}}:{{asMillisecond $el.End}}">(goroutine view)</a>
+({{if .Complete}}complete{{else}}incomplete{{end}})</td>
+        </tr>
+        {{range $el.Events}}
+        <tr>
+                <td class="when">{{.WhenString}}</td>
+                <td class="elapsed">{{elapsed .Elapsed}}</td>
+		<td class="goid">{{.Go}}</td>
+                <td>{{.What}}</td>
+        </tr>
+        {{end}}
+	<tr>
+		<td></td>
+		<td></td>
+		<td></td>
+		<td>GC:{{$el.GCTime}}</td>
+    {{end}}
+</body>
+</html>
+`))
+
+func elapsed(d time.Duration) string {
+	b := []byte(fmt.Sprintf("%.9f", d.Seconds()))
+
+	// For subsecond durations, blank all zeros before decimal point,
+	// and all zeros between the decimal point and the first non-zero digit.
+	if d < time.Second {
+		dot := bytes.IndexByte(b, '.')
+		for i := 0; i < dot; i++ {
+			b[i] = ' '
+		}
+		for i := dot + 1; i < len(b); i++ {
+			if b[i] == '0' {
+				b[i] = ' '
+			} else {
+				break
+			}
+		}
+	}
+
+	return string(b)
+}
+
+func asMillisecond(d time.Duration) float64 {
+	return float64(d.Nanoseconds()) / 1e6
+}
+
+func formatUserLog(ev *trace.Event) string {
+	k, v := ev.SArgs[0], ev.SArgs[1]
+	if k == "" {
+		return v
+	}
+	if v == "" {
+		return k
+	}
+	return fmt.Sprintf("%v=%v", k, v)
+}
+
+func describeEvent(ev *trace.Event) string {
+	switch ev.Type {
+	case trace.EvGoCreate:
+		goid := ev.Args[0]
+		return fmt.Sprintf("new goroutine %d: %s", goid, gs[goid].Name)
+	case trace.EvGoEnd, trace.EvGoStop:
+		return "goroutine stopped"
+	case trace.EvUserLog:
+		return formatUserLog(ev)
+	case trace.EvUserRegion:
+		if ev.Args[1] == 0 {
+			duration := "unknown"
+			if ev.Link != nil {
+				duration = (time.Duration(ev.Link.Ts-ev.Ts) * time.Nanosecond).String()
+			}
+			return fmt.Sprintf("region %s started (duration: %v)", ev.SArgs[0], duration)
+		}
+		return fmt.Sprintf("region %s ended", ev.SArgs[0])
+	case trace.EvUserTaskCreate:
+		return fmt.Sprintf("task %v (id %d, parent %d) created", ev.SArgs[0], ev.Args[0], ev.Args[1])
+		// TODO: add child task creation events into the parent task events
+	case trace.EvUserTaskEnd:
+		return "task end"
+	}
+	return ""
+}
+
+func isUserAnnotationEvent(ev *trace.Event) (taskID uint64, ok bool) {
+	switch ev.Type {
+	case trace.EvUserLog, trace.EvUserRegion, trace.EvUserTaskCreate, trace.EvUserTaskEnd:
+		return ev.Args[0], true
+	}
+	return 0, false
+}
+
+var templUserRegionType = template.Must(template.New("").Funcs(template.FuncMap{
+	"prettyDuration": func(nsec int64) template.HTML {
+		d := time.Duration(nsec) * time.Nanosecond
+		return template.HTML(niceDuration(d))
+	},
+	"percent": func(dividend, divisor int64) template.HTML {
+		if divisor == 0 {
+			return ""
+		}
+		return template.HTML(fmt.Sprintf("(%.1f%%)", float64(dividend)/float64(divisor)*100))
+	},
+	"barLen": func(dividend, divisor int64) template.HTML {
+		if divisor == 0 {
+			return "0"
+		}
+		return template.HTML(fmt.Sprintf("%.2f%%", float64(dividend)/float64(divisor)*100))
+	},
+	"unknownTime": func(desc regionDesc) int64 {
+		sum := desc.ExecTime + desc.IOTime + desc.BlockTime + desc.SyscallTime + desc.SchedWaitTime
+		if sum < desc.TotalTime {
+			return desc.TotalTime - sum
+		}
+		return 0
+	},
+	"filterParams": func(f *regionFilter) template.URL {
+		return template.URL(f.params.Encode())
+	},
+}).Parse(`
+<!DOCTYPE html>
+<title>User Region {{.Name}}</title>
+<style>
+th {
+  background-color: #050505;
+  color: #fff;
+}
+th.total-time,
+th.exec-time,
+th.io-time,
+th.block-time,
+th.syscall-time,
+th.sched-time,
+th.sweep-time,
+th.pause-time {
+  cursor: pointer;
+}
+table {
+  border-collapse: collapse;
+}
+.details tr:hover {
+  background-color: #f2f2f2;
+}
+.details td {
+  text-align: right;
+  border: 1px solid #000;
+}
+.details td.id {
+  text-align: left;
+}
+.stacked-bar-graph {
+  width: 300px;
+  height: 10px;
+  color: #414042;
+  white-space: nowrap;
+  font-size: 5px;
+}
+.stacked-bar-graph span {
+  display: inline-block;
+  width: 100%;
+  height: 100%;
+  box-sizing: border-box;
+  float: left;
+  padding: 0;
+}
+.unknown-time { background-color: #636363; }
+.exec-time { background-color: #d7191c; }
+.io-time { background-color: #fdae61; }
+.block-time { background-color: #d01c8b; }
+.syscall-time { background-color: #7b3294; }
+.sched-time { background-color: #2c7bb6; }
+</style>
+
+<script>
+function reloadTable(key, value) {
+  let params = new URLSearchParams(window.location.search);
+  params.set(key, value);
+  window.location.search = params.toString();
+}
+</script>
+
+<h2>{{.Name}}</h2>
+
+{{ with $p := filterParams .Filter}}
+<table class="summary">
+	<tr><td>Network Wait Time:</td><td> <a href="/regionio?{{$p}}">graph</a><a href="/regionio?{{$p}}&raw=1" download="io.profile">(download)</a></td></tr>
+	<tr><td>Sync Block Time:</td><td> <a href="/regionblock?{{$p}}">graph</a><a href="/regionblock?{{$p}}&raw=1" download="block.profile">(download)</a></td></tr>
+	<tr><td>Blocking Syscall Time:</td><td> <a href="/regionsyscall?{{$p}}">graph</a><a href="/regionsyscall?{{$p}}&raw=1" download="syscall.profile">(download)</a></td></tr>
+	<tr><td>Scheduler Wait Time:</td><td> <a href="/regionsched?{{$p}}">graph</a><a href="/regionsched?{{$p}}&raw=1" download="sched.profile">(download)</a></td></tr>
+</table>
+{{ end }}
+<p>
+<table class="details">
+<tr>
+<th> Goroutine </th>
+<th> Task </th>
+<th onclick="reloadTable('sortby', 'TotalTime')" class="total-time"> Total</th>
+<th></th>
+<th onclick="reloadTable('sortby', 'ExecTime')" class="exec-time"> Execution</th>
+<th onclick="reloadTable('sortby', 'IOTime')" class="io-time"> Network wait</th>
+<th onclick="reloadTable('sortby', 'BlockTime')" class="block-time"> Sync block </th>
+<th onclick="reloadTable('sortby', 'SyscallTime')" class="syscall-time"> Blocking syscall</th>
+<th onclick="reloadTable('sortby', 'SchedWaitTime')" class="sched-time"> Scheduler wait</th>
+<th onclick="reloadTable('sortby', 'SweepTime')" class="sweep-time"> GC sweeping</th>
+<th onclick="reloadTable('sortby', 'GCTime')" class="pause-time"> GC pause</th>
+</tr>
+{{range .Data}}
+  <tr>
+    <td> <a href="/trace?goid={{.G}}">{{.G}}</a> </td>
+    <td> {{if .TaskID}}<a href="/trace?focustask={{.TaskID}}">{{.TaskID}}</a>{{end}} </td>
+    <td> {{prettyDuration .TotalTime}} </td>
+    <td>
+        <div class="stacked-bar-graph">
+          {{if unknownTime .}}<span style="width:{{barLen (unknownTime .) $.MaxTotal}}" class="unknown-time">&nbsp;</span>{{end}}
+          {{if .ExecTime}}<span style="width:{{barLen .ExecTime $.MaxTotal}}" class="exec-time">&nbsp;</span>{{end}}
+          {{if .IOTime}}<span style="width:{{barLen .IOTime $.MaxTotal}}" class="io-time">&nbsp;</span>{{end}}
+          {{if .BlockTime}}<span style="width:{{barLen .BlockTime $.MaxTotal}}" class="block-time">&nbsp;</span>{{end}}
+          {{if .SyscallTime}}<span style="width:{{barLen .SyscallTime $.MaxTotal}}" class="syscall-time">&nbsp;</span>{{end}}
+          {{if .SchedWaitTime}}<span style="width:{{barLen .SchedWaitTime $.MaxTotal}}" class="sched-time">&nbsp;</span>{{end}}
+        </div>
+    </td>
+    <td> {{prettyDuration .ExecTime}}</td>
+    <td> {{prettyDuration .IOTime}}</td>
+    <td> {{prettyDuration .BlockTime}}</td>
+    <td> {{prettyDuration .SyscallTime}}</td>
+    <td> {{prettyDuration .SchedWaitTime}}</td>
+    <td> {{prettyDuration .SweepTime}} {{percent .SweepTime .TotalTime}}</td>
+    <td> {{prettyDuration .GCTime}} {{percent .GCTime .TotalTime}}</td>
+  </tr>
+{{end}}
+</table>
+</p>
+`))
diff --git a/src/cmd/trace/annotations_test.go b/src/cmd/trace/annotations_test.go
new file mode 100644
index 0000000..9c2d027
--- /dev/null
+++ b/src/cmd/trace/annotations_test.go
@@ -0,0 +1,392 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !js
+
+package main
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	traceparser "internal/trace"
+	"os"
+	"reflect"
+	"runtime/debug"
+	"runtime/trace"
+	"sort"
+	"sync"
+	"testing"
+	"time"
+)
+
+var saveTraces = flag.Bool("savetraces", false, "save traces collected by tests")
+
+func TestOverlappingDuration(t *testing.T) {
+	cases := []struct {
+		start0, end0, start1, end1 int64
+		want                       time.Duration
+	}{
+		{
+			1, 10, 11, 20, 0,
+		},
+		{
+			1, 10, 5, 20, 5 * time.Nanosecond,
+		},
+		{
+			1, 10, 2, 8, 6 * time.Nanosecond,
+		},
+	}
+
+	for _, tc := range cases {
+		s0, e0, s1, e1 := tc.start0, tc.end0, tc.start1, tc.end1
+		if got := overlappingDuration(s0, e0, s1, e1); got != tc.want {
+			t.Errorf("overlappingDuration(%d, %d, %d, %d)=%v; want %v", s0, e0, s1, e1, got, tc.want)
+		}
+		if got := overlappingDuration(s1, e1, s0, e0); got != tc.want {
+			t.Errorf("overlappingDuration(%d, %d, %d, %d)=%v; want %v", s1, e1, s0, e0, got, tc.want)
+		}
+	}
+}
+
+// prog0 starts three goroutines.
+//
+//   goroutine 1: taskless region
+//   goroutine 2: starts task0, do work in task0.region0, starts task1 which ends immediately.
+//   goroutine 3: do work in task0.region1 and task0.region2, ends task0
+func prog0() {
+	ctx := context.Background()
+
+	var wg sync.WaitGroup
+
+	wg.Add(1)
+	go func() { // goroutine 1
+		defer wg.Done()
+		trace.WithRegion(ctx, "taskless.region", func() {
+			trace.Log(ctx, "key0", "val0")
+		})
+	}()
+
+	wg.Add(1)
+	go func() { // goroutine 2
+		defer wg.Done()
+		ctx, task := trace.NewTask(ctx, "task0")
+		trace.WithRegion(ctx, "task0.region0", func() {
+			wg.Add(1)
+			go func() { // goroutine 3
+				defer wg.Done()
+				defer task.End()
+				trace.WithRegion(ctx, "task0.region1", func() {
+					trace.WithRegion(ctx, "task0.region2", func() {
+						trace.Log(ctx, "key2", "val2")
+					})
+					trace.Log(ctx, "key1", "val1")
+				})
+			}()
+		})
+		ctx2, task2 := trace.NewTask(ctx, "task1")
+		trace.Log(ctx2, "key3", "val3")
+		task2.End()
+	}()
+	wg.Wait()
+}
+
+func TestAnalyzeAnnotations(t *testing.T) {
+	// TODO: classify taskless regions
+
+	// Run prog0 and capture the execution trace.
+	if err := traceProgram(t, prog0, "TestAnalyzeAnnotations"); err != nil {
+		t.Fatalf("failed to trace the program: %v", err)
+	}
+
+	res, err := analyzeAnnotations()
+	if err != nil {
+		t.Fatalf("failed to analyzeAnnotations: %v", err)
+	}
+
+	// For prog0, we expect
+	//   - task with name = "task0", with three regions.
+	//   - task with name = "task1", with no region.
+	wantTasks := map[string]struct {
+		complete   bool
+		goroutines int
+		regions    []string
+	}{
+		"task0": {
+			complete:   true,
+			goroutines: 2,
+			regions:    []string{"task0.region0", "", "task0.region1", "task0.region2"},
+		},
+		"task1": {
+			complete:   true,
+			goroutines: 1,
+		},
+	}
+
+	for _, task := range res.tasks {
+		want, ok := wantTasks[task.name]
+		if !ok {
+			t.Errorf("unexpected task: %s", task)
+			continue
+		}
+		if task.complete() != want.complete || len(task.goroutines) != want.goroutines || !reflect.DeepEqual(regionNames(task), want.regions) {
+			t.Errorf("got task %v; want %+v", task, want)
+		}
+
+		delete(wantTasks, task.name)
+	}
+	if len(wantTasks) > 0 {
+		t.Errorf("no more tasks; want %+v", wantTasks)
+	}
+
+	wantRegions := []string{
+		"", // an auto-created region for the goroutine 3
+		"taskless.region",
+		"task0.region0",
+		"task0.region1",
+		"task0.region2",
+	}
+	var gotRegions []string
+	for regionID := range res.regions {
+		gotRegions = append(gotRegions, regionID.Type)
+	}
+
+	sort.Strings(wantRegions)
+	sort.Strings(gotRegions)
+	if !reflect.DeepEqual(gotRegions, wantRegions) {
+		t.Errorf("got regions %q, want regions %q", gotRegions, wantRegions)
+	}
+}
+
+// prog1 creates a task hierarchy consisting of three tasks.
+func prog1() {
+	ctx := context.Background()
+	ctx1, task1 := trace.NewTask(ctx, "task1")
+	defer task1.End()
+	trace.WithRegion(ctx1, "task1.region", func() {
+		ctx2, task2 := trace.NewTask(ctx1, "task2")
+		defer task2.End()
+		trace.WithRegion(ctx2, "task2.region", func() {
+			ctx3, task3 := trace.NewTask(ctx2, "task3")
+			defer task3.End()
+			trace.WithRegion(ctx3, "task3.region", func() {
+			})
+		})
+	})
+}
+
+func TestAnalyzeAnnotationTaskTree(t *testing.T) {
+	// Run prog1 and capture the execution trace.
+	if err := traceProgram(t, prog1, "TestAnalyzeAnnotationTaskTree"); err != nil {
+		t.Fatalf("failed to trace the program: %v", err)
+	}
+
+	res, err := analyzeAnnotations()
+	if err != nil {
+		t.Fatalf("failed to analyzeAnnotations: %v", err)
+	}
+	tasks := res.tasks
+
+	// For prog0, we expect
+	//   - task with name = "", with taskless.region in regions.
+	//   - task with name = "task0", with three regions.
+	wantTasks := map[string]struct {
+		parent   string
+		children []string
+		regions  []string
+	}{
+		"task1": {
+			parent:   "",
+			children: []string{"task2"},
+			regions:  []string{"task1.region"},
+		},
+		"task2": {
+			parent:   "task1",
+			children: []string{"task3"},
+			regions:  []string{"task2.region"},
+		},
+		"task3": {
+			parent:   "task2",
+			children: nil,
+			regions:  []string{"task3.region"},
+		},
+	}
+
+	for _, task := range tasks {
+		want, ok := wantTasks[task.name]
+		if !ok {
+			t.Errorf("unexpected task: %s", task)
+			continue
+		}
+		delete(wantTasks, task.name)
+
+		if parentName(task) != want.parent ||
+			!reflect.DeepEqual(childrenNames(task), want.children) ||
+			!reflect.DeepEqual(regionNames(task), want.regions) {
+			t.Errorf("got %v; want %+v", task, want)
+		}
+	}
+
+	if len(wantTasks) > 0 {
+		t.Errorf("no more tasks; want %+v", wantTasks)
+	}
+}
+
+// prog2 starts two tasks; "taskWithGC" that overlaps with GC
+// and "taskWithoutGC" that doesn't. In order to run this reliably,
+// the caller needs to set up to prevent GC from running automatically.
+// prog2 returns the upper-bound gc time that overlaps with the first task.
+func prog2() (gcTime time.Duration) {
+	ch := make(chan bool)
+	ctx1, task := trace.NewTask(context.Background(), "taskWithGC")
+	trace.WithRegion(ctx1, "taskWithGC.region1", func() {
+		go func() {
+			defer trace.StartRegion(ctx1, "taskWithGC.region2").End()
+			<-ch
+		}()
+		s := time.Now()
+		debug.FreeOSMemory() // task1 affected by gc
+		gcTime = time.Since(s)
+		close(ch)
+	})
+	task.End()
+
+	ctx2, task2 := trace.NewTask(context.Background(), "taskWithoutGC")
+	trace.WithRegion(ctx2, "taskWithoutGC.region1", func() {
+		// do nothing.
+	})
+	task2.End()
+	return gcTime
+}
+
+func TestAnalyzeAnnotationGC(t *testing.T) {
+	err := traceProgram(t, func() {
+		oldGC := debug.SetGCPercent(10000) // gc, and effectively disable GC
+		defer debug.SetGCPercent(oldGC)
+		prog2()
+	}, "TestAnalyzeAnnotationGC")
+	if err != nil {
+		t.Fatalf("failed to trace the program: %v", err)
+	}
+
+	res, err := analyzeAnnotations()
+	if err != nil {
+		t.Fatalf("failed to analyzeAnnotations: %v", err)
+	}
+
+	// Check collected GC Start events are all sorted and non-overlapping.
+	lastTS := int64(0)
+	for i, ev := range res.gcEvents {
+		if ev.Type != traceparser.EvGCStart {
+			t.Errorf("unwanted event in gcEvents: %v", ev)
+		}
+		if i > 0 && lastTS > ev.Ts {
+			t.Errorf("overlapping GC events:\n%d: %v\n%d: %v", i-1, res.gcEvents[i-1], i, res.gcEvents[i])
+		}
+		if ev.Link != nil {
+			lastTS = ev.Link.Ts
+		}
+	}
+
+	// Check whether only taskWithGC reports overlapping duration.
+	for _, task := range res.tasks {
+		got := task.overlappingGCDuration(res.gcEvents)
+		switch task.name {
+		case "taskWithoutGC":
+			if got != 0 {
+				t.Errorf("%s reported %v as overlapping GC time; want 0: %v", task.name, got, task)
+			}
+		case "taskWithGC":
+			upperBound := task.duration()
+			// TODO(hyangah): a tighter upper bound is gcTime, but
+			// use of it will make the test flaky due to the issue
+			// described in golang.org/issue/16755. Tighten the upper
+			// bound when the issue with the timestamp computed
+			// based on clockticks is resolved.
+			if got <= 0 || got > upperBound {
+				t.Errorf("%s reported %v as overlapping GC time; want (0, %v):\n%v", task.name, got, upperBound, task)
+				buf := new(bytes.Buffer)
+				fmt.Fprintln(buf, "GC Events")
+				for _, ev := range res.gcEvents {
+					fmt.Fprintf(buf, " %s -> %s\n", ev, ev.Link)
+				}
+				fmt.Fprintln(buf, "Events in Task")
+				for i, ev := range task.events {
+					fmt.Fprintf(buf, " %d: %s\n", i, ev)
+				}
+
+				t.Logf("\n%s", buf)
+			}
+		}
+	}
+}
+
+// traceProgram runs the provided function while tracing is enabled,
+// parses the captured trace, and sets the global trace loader to
+// point to the parsed trace.
+//
+// If savetraces flag is set, the captured trace will be saved in the named file.
+func traceProgram(t *testing.T, f func(), name string) error {
+	t.Helper()
+	buf := new(bytes.Buffer)
+	if err := trace.Start(buf); err != nil {
+		return err
+	}
+	f()
+	trace.Stop()
+
+	saveTrace(buf, name)
+	res, err := traceparser.Parse(buf, name+".faketrace")
+	if err == traceparser.ErrTimeOrder {
+		t.Skipf("skipping due to golang.org/issue/16755: %v", err)
+	} else if err != nil {
+		return err
+	}
+
+	swapLoaderData(res, err)
+	return nil
+}
+
+func regionNames(task *taskDesc) (ret []string) {
+	for _, s := range task.regions {
+		ret = append(ret, s.Name)
+	}
+	return ret
+}
+
+func parentName(task *taskDesc) string {
+	if task.parent != nil {
+		return task.parent.name
+	}
+	return ""
+}
+
+func childrenNames(task *taskDesc) (ret []string) {
+	for _, s := range task.children {
+		ret = append(ret, s.name)
+	}
+	return ret
+}
+
+func swapLoaderData(res traceparser.ParseResult, err error) {
+	// swap loader's data.
+	parseTrace() // fool loader.once.
+
+	loader.res = res
+	loader.err = err
+
+	analyzeGoroutines(nil) // fool gsInit once.
+	gs = traceparser.GoroutineStats(res.Events)
+
+}
+
+func saveTrace(buf *bytes.Buffer, name string) {
+	if !*saveTraces {
+		return
+	}
+	if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil {
+		panic(fmt.Errorf("failed to write trace file: %v", err))
+	}
+}
diff --git a/src/cmd/trace/doc.go b/src/cmd/trace/doc.go
new file mode 100644
index 0000000..ed1f930
--- /dev/null
+++ b/src/cmd/trace/doc.go
@@ -0,0 +1,35 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Trace is a tool for viewing trace files.
+
+Trace files can be generated with:
+	- runtime/trace.Start
+	- net/http/pprof package
+	- go test -trace
+
+Example usage:
+Generate a trace file with 'go test':
+	go test -trace trace.out pkg
+View the trace in a web browser:
+	go tool trace trace.out
+Generate a pprof-like profile from the trace:
+	go tool trace -pprof=TYPE trace.out > TYPE.pprof
+
+Supported profile types are:
+	- net: network blocking profile
+	- sync: synchronization blocking profile
+	- syscall: syscall blocking profile
+	- sched: scheduler latency profile
+
+Then, you can use the pprof tool to analyze the profile:
+	go tool pprof TYPE.pprof
+
+Note that while the various profiles available when launching
+'go tool trace' work on every browser, the trace viewer itself
+(the 'view trace' page) comes from the Chrome/Chromium project
+and is only actively tested on that browser.
+*/
+package main
diff --git a/src/cmd/trace/goroutines.go b/src/cmd/trace/goroutines.go
new file mode 100644
index 0000000..74d1b0a
--- /dev/null
+++ b/src/cmd/trace/goroutines.go
@@ -0,0 +1,297 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Goroutine-related profiles.
+
+package main
+
+import (
+	"fmt"
+	"html/template"
+	"internal/trace"
+	"log"
+	"net/http"
+	"reflect"
+	"sort"
+	"strconv"
+	"sync"
+	"time"
+)
+
+func init() {
+	http.HandleFunc("/goroutines", httpGoroutines)
+	http.HandleFunc("/goroutine", httpGoroutine)
+}
+
+// gtype describes a group of goroutines grouped by start PC.
+type gtype struct {
+	ID       uint64 // Unique identifier (PC).
+	Name     string // Start function.
+	N        int    // Total number of goroutines in this group.
+	ExecTime int64  // Total execution time of all goroutines in this group.
+}
+
+var (
+	gsInit sync.Once
+	gs     map[uint64]*trace.GDesc
+)
+
+// analyzeGoroutines generates statistics about execution of all goroutines and stores them in gs.
+func analyzeGoroutines(events []*trace.Event) {
+	gsInit.Do(func() {
+		gs = trace.GoroutineStats(events)
+	})
+}
+
+// httpGoroutines serves list of goroutine groups.
+func httpGoroutines(w http.ResponseWriter, r *http.Request) {
+	events, err := parseEvents()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	analyzeGoroutines(events)
+	gss := make(map[uint64]gtype)
+	for _, g := range gs {
+		gs1 := gss[g.PC]
+		gs1.ID = g.PC
+		gs1.Name = g.Name
+		gs1.N++
+		gs1.ExecTime += g.ExecTime
+		gss[g.PC] = gs1
+	}
+	var glist []gtype
+	for k, v := range gss {
+		v.ID = k
+		glist = append(glist, v)
+	}
+	sort.Slice(glist, func(i, j int) bool { return glist[i].ExecTime > glist[j].ExecTime })
+	w.Header().Set("Content-Type", "text/html;charset=utf-8")
+	if err := templGoroutines.Execute(w, glist); err != nil {
+		log.Printf("failed to execute template: %v", err)
+		return
+	}
+}
+
+var templGoroutines = template.Must(template.New("").Parse(`
+<html>
+<body>
+Goroutines: <br>
+{{range $}}
+  <a href="/goroutine?id={{.ID}}">{{.Name}}</a> N={{.N}} <br>
+{{end}}
+</body>
+</html>
+`))
+
+// httpGoroutine serves list of goroutines in a particular group.
+func httpGoroutine(w http.ResponseWriter, r *http.Request) {
+	// TODO(hyangah): support format=csv (raw data)
+
+	events, err := parseEvents()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	pc, err := strconv.ParseUint(r.FormValue("id"), 10, 64)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse id parameter '%v': %v", r.FormValue("id"), err), http.StatusInternalServerError)
+		return
+	}
+	analyzeGoroutines(events)
+	var (
+		glist                   []*trace.GDesc
+		name                    string
+		totalExecTime, execTime int64
+		maxTotalTime            int64
+	)
+
+	for _, g := range gs {
+		totalExecTime += g.ExecTime
+
+		if g.PC != pc {
+			continue
+		}
+		glist = append(glist, g)
+		name = g.Name
+		execTime += g.ExecTime
+		if maxTotalTime < g.TotalTime {
+			maxTotalTime = g.TotalTime
+		}
+	}
+
+	execTimePercent := ""
+	if totalExecTime > 0 {
+		execTimePercent = fmt.Sprintf("%.2f%%", float64(execTime)/float64(totalExecTime)*100)
+	}
+
+	sortby := r.FormValue("sortby")
+	_, ok := reflect.TypeOf(trace.GDesc{}).FieldByNameFunc(func(s string) bool {
+		return s == sortby
+	})
+	if !ok {
+		sortby = "TotalTime"
+	}
+
+	sort.Slice(glist, func(i, j int) bool {
+		ival := reflect.ValueOf(glist[i]).Elem().FieldByName(sortby).Int()
+		jval := reflect.ValueOf(glist[j]).Elem().FieldByName(sortby).Int()
+		return ival > jval
+	})
+
+	err = templGoroutine.Execute(w, struct {
+		Name            string
+		PC              uint64
+		N               int
+		ExecTimePercent string
+		MaxTotal        int64
+		GList           []*trace.GDesc
+	}{
+		Name:            name,
+		PC:              pc,
+		N:               len(glist),
+		ExecTimePercent: execTimePercent,
+		MaxTotal:        maxTotalTime,
+		GList:           glist})
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to execute template: %v", err), http.StatusInternalServerError)
+		return
+	}
+}
+
+var templGoroutine = template.Must(template.New("").Funcs(template.FuncMap{
+	"prettyDuration": func(nsec int64) template.HTML {
+		d := time.Duration(nsec) * time.Nanosecond
+		return template.HTML(niceDuration(d))
+	},
+	"percent": func(dividend, divisor int64) template.HTML {
+		if divisor == 0 {
+			return ""
+		}
+		return template.HTML(fmt.Sprintf("(%.1f%%)", float64(dividend)/float64(divisor)*100))
+	},
+	"barLen": func(dividend, divisor int64) template.HTML {
+		if divisor == 0 {
+			return "0"
+		}
+		return template.HTML(fmt.Sprintf("%.2f%%", float64(dividend)/float64(divisor)*100))
+	},
+	"unknownTime": func(desc *trace.GDesc) int64 {
+		sum := desc.ExecTime + desc.IOTime + desc.BlockTime + desc.SyscallTime + desc.SchedWaitTime
+		if sum < desc.TotalTime {
+			return desc.TotalTime - sum
+		}
+		return 0
+	},
+}).Parse(`
+<!DOCTYPE html>
+<title>Goroutine {{.Name}}</title>
+<style>
+th {
+  background-color: #050505;
+  color: #fff;
+}
+th.total-time,
+th.exec-time,
+th.io-time,
+th.block-time,
+th.syscall-time,
+th.sched-time,
+th.sweep-time,
+th.pause-time {
+  cursor: pointer;
+}
+table {
+  border-collapse: collapse;
+}
+.details tr:hover {
+  background-color: #f2f2f2;
+}
+.details td {
+  text-align: right;
+  border: 1px solid black;
+}
+.details td.id {
+  text-align: left;
+}
+.stacked-bar-graph {
+  width: 300px;
+  height: 10px;
+  color: #414042;
+  white-space: nowrap;
+  font-size: 5px;
+}
+.stacked-bar-graph span {
+  display: inline-block;
+  width: 100%;
+  height: 100%;
+  box-sizing: border-box;
+  float: left;
+  padding: 0;
+}
+.unknown-time { background-color: #636363; }
+.exec-time { background-color: #d7191c; }
+.io-time { background-color: #fdae61; }
+.block-time { background-color: #d01c8b; }
+.syscall-time { background-color: #7b3294; }
+.sched-time { background-color: #2c7bb6; }
+</style>
+
+<script>
+function reloadTable(key, value) {
+  let params = new URLSearchParams(window.location.search);
+  params.set(key, value);
+  window.location.search = params.toString();
+}
+</script>
+
+<table class="summary">
+	<tr><td>Goroutine Name:</td><td>{{.Name}}</td></tr>
+	<tr><td>Number of Goroutines:</td><td>{{.N}}</td></tr>
+	<tr><td>Execution Time:</td><td>{{.ExecTimePercent}} of total program execution time </td> </tr>
+	<tr><td>Network Wait Time:</td><td> <a href="/io?id={{.PC}}">graph</a><a href="/io?id={{.PC}}&raw=1" download="io.profile">(download)</a></td></tr>
+	<tr><td>Sync Block Time:</td><td> <a href="/block?id={{.PC}}">graph</a><a href="/block?id={{.PC}}&raw=1" download="block.profile">(download)</a></td></tr>
+	<tr><td>Blocking Syscall Time:</td><td> <a href="/syscall?id={{.PC}}">graph</a><a href="/syscall?id={{.PC}}&raw=1" download="syscall.profile">(download)</a></td></tr>
+	<tr><td>Scheduler Wait Time:</td><td> <a href="/sched?id={{.PC}}">graph</a><a href="/sched?id={{.PC}}&raw=1" download="sched.profile">(download)</a></td></tr>
+</table>
+<p>
+<table class="details">
+<tr>
+<th> Goroutine</th>
+<th onclick="reloadTable('sortby', 'TotalTime')" class="total-time"> Total</th>
+<th></th>
+<th onclick="reloadTable('sortby', 'ExecTime')" class="exec-time"> Execution</th>
+<th onclick="reloadTable('sortby', 'IOTime')" class="io-time"> Network wait</th>
+<th onclick="reloadTable('sortby', 'BlockTime')" class="block-time"> Sync block </th>
+<th onclick="reloadTable('sortby', 'SyscallTime')" class="syscall-time"> Blocking syscall</th>
+<th onclick="reloadTable('sortby', 'SchedWaitTime')" class="sched-time"> Scheduler wait</th>
+<th onclick="reloadTable('sortby', 'SweepTime')" class="sweep-time"> GC sweeping</th>
+<th onclick="reloadTable('sortby', 'GCTime')" class="pause-time"> GC pause</th>
+</tr>
+{{range .GList}}
+  <tr>
+    <td> <a href="/trace?goid={{.ID}}">{{.ID}}</a> </td>
+    <td> {{prettyDuration .TotalTime}} </td>
+    <td>
+	<div class="stacked-bar-graph">
+	  {{if unknownTime .}}<span style="width:{{barLen (unknownTime .) $.MaxTotal}}" class="unknown-time">&nbsp;</span>{{end}}
+          {{if .ExecTime}}<span style="width:{{barLen .ExecTime $.MaxTotal}}" class="exec-time">&nbsp;</span>{{end}}
+          {{if .IOTime}}<span style="width:{{barLen .IOTime $.MaxTotal}}" class="io-time">&nbsp;</span>{{end}}
+          {{if .BlockTime}}<span style="width:{{barLen .BlockTime $.MaxTotal}}" class="block-time">&nbsp;</span>{{end}}
+          {{if .SyscallTime}}<span style="width:{{barLen .SyscallTime $.MaxTotal}}" class="syscall-time">&nbsp;</span>{{end}}
+          {{if .SchedWaitTime}}<span style="width:{{barLen .SchedWaitTime $.MaxTotal}}" class="sched-time">&nbsp;</span>{{end}}
+        </div>
+    </td>
+    <td> {{prettyDuration .ExecTime}}</td>
+    <td> {{prettyDuration .IOTime}}</td>
+    <td> {{prettyDuration .BlockTime}}</td>
+    <td> {{prettyDuration .SyscallTime}}</td>
+    <td> {{prettyDuration .SchedWaitTime}}</td>
+    <td> {{prettyDuration .SweepTime}} {{percent .SweepTime .TotalTime}}</td>
+    <td> {{prettyDuration .GCTime}} {{percent .GCTime .TotalTime}}</td>
+  </tr>
+{{end}}
+</table>
+`))
diff --git a/src/cmd/trace/main.go b/src/cmd/trace/main.go
new file mode 100644
index 0000000..f94586a
--- /dev/null
+++ b/src/cmd/trace/main.go
@@ -0,0 +1,238 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bufio"
+	"cmd/internal/browser"
+	"flag"
+	"fmt"
+	"html/template"
+	"internal/trace"
+	"io"
+	"log"
+	"net"
+	"net/http"
+	"os"
+	"runtime"
+	"runtime/debug"
+	"sync"
+
+	_ "net/http/pprof" // Required to use pprof
+)
+
+const usageMessage = "" +
+	`Usage of 'go tool trace':
+Given a trace file produced by 'go test':
+	go test -trace=trace.out pkg
+
+Open a web browser displaying trace:
+	go tool trace [flags] [pkg.test] trace.out
+
+Generate a pprof-like profile from the trace:
+    go tool trace -pprof=TYPE [pkg.test] trace.out
+
+[pkg.test] argument is required for traces produced by Go 1.6 and below.
+Go 1.7 does not require the binary argument.
+
+Supported profile types are:
+    - net: network blocking profile
+    - sync: synchronization blocking profile
+    - syscall: syscall blocking profile
+    - sched: scheduler latency profile
+
+Flags:
+	-http=addr: HTTP service address (e.g., ':6060')
+	-pprof=type: print a pprof-like profile instead
+	-d: print debug info such as parsed events
+
+Note that while the various profiles available when launching
+'go tool trace' work on every browser, the trace viewer itself
+(the 'view trace' page) comes from the Chrome/Chromium project
+and is only actively tested on that browser.
+`
+
+var (
+	httpFlag  = flag.String("http", "localhost:0", "HTTP service address (e.g., ':6060')")
+	pprofFlag = flag.String("pprof", "", "print a pprof-like profile instead")
+	debugFlag = flag.Bool("d", false, "print debug information such as parsed events list")
+
+	// The binary file name, left here for serveSVGProfile.
+	programBinary string
+	traceFile     string
+)
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintln(os.Stderr, usageMessage)
+		os.Exit(2)
+	}
+	flag.Parse()
+
+	// Go 1.7 traces embed symbol info and does not require the binary.
+	// But we optionally accept binary as first arg for Go 1.5 traces.
+	switch flag.NArg() {
+	case 1:
+		traceFile = flag.Arg(0)
+	case 2:
+		programBinary = flag.Arg(0)
+		traceFile = flag.Arg(1)
+	default:
+		flag.Usage()
+	}
+
+	var pprofFunc func(io.Writer, *http.Request) error
+	switch *pprofFlag {
+	case "net":
+		pprofFunc = pprofByGoroutine(computePprofIO)
+	case "sync":
+		pprofFunc = pprofByGoroutine(computePprofBlock)
+	case "syscall":
+		pprofFunc = pprofByGoroutine(computePprofSyscall)
+	case "sched":
+		pprofFunc = pprofByGoroutine(computePprofSched)
+	}
+	if pprofFunc != nil {
+		if err := pprofFunc(os.Stdout, &http.Request{}); err != nil {
+			dief("failed to generate pprof: %v\n", err)
+		}
+		os.Exit(0)
+	}
+	if *pprofFlag != "" {
+		dief("unknown pprof type %s\n", *pprofFlag)
+	}
+
+	ln, err := net.Listen("tcp", *httpFlag)
+	if err != nil {
+		dief("failed to create server socket: %v\n", err)
+	}
+
+	log.Print("Parsing trace...")
+	res, err := parseTrace()
+	if err != nil {
+		dief("%v\n", err)
+	}
+
+	if *debugFlag {
+		trace.Print(res.Events)
+		os.Exit(0)
+	}
+	reportMemoryUsage("after parsing trace")
+	debug.FreeOSMemory()
+
+	log.Print("Splitting trace...")
+	ranges = splitTrace(res)
+	reportMemoryUsage("after spliting trace")
+	debug.FreeOSMemory()
+
+	addr := "http://" + ln.Addr().String()
+	log.Printf("Opening browser. Trace viewer is listening on %s", addr)
+	browser.Open(addr)
+
+	// Start http server.
+	http.HandleFunc("/", httpMain)
+	err = http.Serve(ln, nil)
+	dief("failed to start http server: %v\n", err)
+}
+
+var ranges []Range
+
+var loader struct {
+	once sync.Once
+	res  trace.ParseResult
+	err  error
+}
+
+// parseEvents is a compatibility wrapper that returns only
+// the Events part of trace.ParseResult returned by parseTrace.
+func parseEvents() ([]*trace.Event, error) {
+	res, err := parseTrace()
+	if err != nil {
+		return nil, err
+	}
+	return res.Events, err
+}
+
+func parseTrace() (trace.ParseResult, error) {
+	loader.once.Do(func() {
+		tracef, err := os.Open(traceFile)
+		if err != nil {
+			loader.err = fmt.Errorf("failed to open trace file: %v", err)
+			return
+		}
+		defer tracef.Close()
+
+		// Parse and symbolize.
+		res, err := trace.Parse(bufio.NewReader(tracef), programBinary)
+		if err != nil {
+			loader.err = fmt.Errorf("failed to parse trace: %v", err)
+			return
+		}
+		loader.res = res
+	})
+	return loader.res, loader.err
+}
+
+// httpMain serves the starting page.
+func httpMain(w http.ResponseWriter, r *http.Request) {
+	if err := templMain.Execute(w, ranges); err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+}
+
+var templMain = template.Must(template.New("").Parse(`
+<html>
+<body>
+{{if $}}
+	{{range $e := $}}
+		<a href="{{$e.URL}}">View trace ({{$e.Name}})</a><br>
+	{{end}}
+	<br>
+{{else}}
+	<a href="/trace">View trace</a><br>
+{{end}}
+<a href="/goroutines">Goroutine analysis</a><br>
+<a href="/io">Network blocking profile</a> (<a href="/io?raw=1" download="io.profile">⬇</a>)<br>
+<a href="/block">Synchronization blocking profile</a> (<a href="/block?raw=1" download="block.profile">⬇</a>)<br>
+<a href="/syscall">Syscall blocking profile</a> (<a href="/syscall?raw=1" download="syscall.profile">⬇</a>)<br>
+<a href="/sched">Scheduler latency profile</a> (<a href="/sche?raw=1" download="sched.profile">⬇</a>)<br>
+<a href="/usertasks">User-defined tasks</a><br>
+<a href="/userregions">User-defined regions</a><br>
+<a href="/mmu">Minimum mutator utilization</a><br>
+</body>
+</html>
+`))
+
+func dief(msg string, args ...interface{}) {
+	fmt.Fprintf(os.Stderr, msg, args...)
+	os.Exit(1)
+}
+
+var debugMemoryUsage bool
+
+func init() {
+	v := os.Getenv("DEBUG_MEMORY_USAGE")
+	debugMemoryUsage = v != ""
+}
+
+func reportMemoryUsage(msg string) {
+	if !debugMemoryUsage {
+		return
+	}
+	var s runtime.MemStats
+	runtime.ReadMemStats(&s)
+	w := os.Stderr
+	fmt.Fprintf(w, "%s\n", msg)
+	fmt.Fprintf(w, " Alloc:\t%d Bytes\n", s.Alloc)
+	fmt.Fprintf(w, " Sys:\t%d Bytes\n", s.Sys)
+	fmt.Fprintf(w, " HeapReleased:\t%d Bytes\n", s.HeapReleased)
+	fmt.Fprintf(w, " HeapSys:\t%d Bytes\n", s.HeapSys)
+	fmt.Fprintf(w, " HeapInUse:\t%d Bytes\n", s.HeapInuse)
+	fmt.Fprintf(w, " HeapAlloc:\t%d Bytes\n", s.HeapAlloc)
+	var dummy string
+	fmt.Printf("Enter to continue...")
+	fmt.Scanf("%s", &dummy)
+}
diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
new file mode 100644
index 0000000..b92fac6
--- /dev/null
+++ b/src/cmd/trace/mmu.go
@@ -0,0 +1,403 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Minimum mutator utilization (MMU) graphing.
+
+// TODO:
+//
+// In worst window list, show break-down of GC utilization sources
+// (STW, assist, etc). Probably requires a different MutatorUtil
+// representation.
+//
+// When a window size is selected, show a second plot of the mutator
+// utilization distribution for that window size.
+//
+// Render plot progressively so rough outline is visible quickly even
+// for very complex MUTs. Start by computing just a few window sizes
+// and then add more window sizes.
+//
+// Consider using sampling to compute an approximate MUT. This would
+// work by sampling the mutator utilization at randomly selected
+// points in time in the trace to build an empirical distribution. We
+// could potentially put confidence intervals on these estimates and
+// render this progressively as we refine the distributions.
+
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"internal/trace"
+	"log"
+	"math"
+	"net/http"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+)
+
+func init() {
+	http.HandleFunc("/mmu", httpMMU)
+	http.HandleFunc("/mmuPlot", httpMMUPlot)
+	http.HandleFunc("/mmuDetails", httpMMUDetails)
+}
+
+var utilFlagNames = map[string]trace.UtilFlags{
+	"perProc":    trace.UtilPerProc,
+	"stw":        trace.UtilSTW,
+	"background": trace.UtilBackground,
+	"assist":     trace.UtilAssist,
+	"sweep":      trace.UtilSweep,
+}
+
+type mmuCacheEntry struct {
+	init     sync.Once
+	util     [][]trace.MutatorUtil
+	mmuCurve *trace.MMUCurve
+	err      error
+}
+
+var mmuCache struct {
+	m    map[trace.UtilFlags]*mmuCacheEntry
+	lock sync.Mutex
+}
+
+func init() {
+	mmuCache.m = make(map[trace.UtilFlags]*mmuCacheEntry)
+}
+
+func getMMUCurve(r *http.Request) ([][]trace.MutatorUtil, *trace.MMUCurve, error) {
+	var flags trace.UtilFlags
+	for _, flagStr := range strings.Split(r.FormValue("flags"), "|") {
+		flags |= utilFlagNames[flagStr]
+	}
+
+	mmuCache.lock.Lock()
+	c := mmuCache.m[flags]
+	if c == nil {
+		c = new(mmuCacheEntry)
+		mmuCache.m[flags] = c
+	}
+	mmuCache.lock.Unlock()
+
+	c.init.Do(func() {
+		events, err := parseEvents()
+		if err != nil {
+			c.err = err
+		} else {
+			c.util = trace.MutatorUtilization(events, flags)
+			c.mmuCurve = trace.NewMMUCurve(c.util)
+		}
+	})
+	return c.util, c.mmuCurve, c.err
+}
+
+// httpMMU serves the MMU plot page.
+func httpMMU(w http.ResponseWriter, r *http.Request) {
+	http.ServeContent(w, r, "", time.Time{}, strings.NewReader(templMMU))
+}
+
+// httpMMUPlot serves the JSON data for the MMU plot.
+func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
+	mu, mmuCurve, err := getMMUCurve(r)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse events: %v", err), http.StatusInternalServerError)
+		return
+	}
+
+	var quantiles []float64
+	for _, flagStr := range strings.Split(r.FormValue("flags"), "|") {
+		if flagStr == "mut" {
+			quantiles = []float64{0, 1 - .999, 1 - .99, 1 - .95}
+			break
+		}
+	}
+
+	// Find a nice starting point for the plot.
+	xMin := time.Second
+	for xMin > 1 {
+		if mmu := mmuCurve.MMU(xMin); mmu < 0.0001 {
+			break
+		}
+		xMin /= 1000
+	}
+	// Cover six orders of magnitude.
+	xMax := xMin * 1e6
+	// But no more than the length of the trace.
+	minEvent, maxEvent := mu[0][0].Time, mu[0][len(mu[0])-1].Time
+	for _, mu1 := range mu[1:] {
+		if mu1[0].Time < minEvent {
+			minEvent = mu1[0].Time
+		}
+		if mu1[len(mu1)-1].Time > maxEvent {
+			maxEvent = mu1[len(mu1)-1].Time
+		}
+	}
+	if maxMax := time.Duration(maxEvent - minEvent); xMax > maxMax {
+		xMax = maxMax
+	}
+	// Compute MMU curve.
+	logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
+	const samples = 100
+	plot := make([][]float64, samples)
+	for i := 0; i < samples; i++ {
+		window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
+		if quantiles == nil {
+			plot[i] = make([]float64, 2)
+			plot[i][1] = mmuCurve.MMU(window)
+		} else {
+			plot[i] = make([]float64, 1+len(quantiles))
+			copy(plot[i][1:], mmuCurve.MUD(window, quantiles))
+		}
+		plot[i][0] = float64(window)
+	}
+
+	// Create JSON response.
+	err = json.NewEncoder(w).Encode(map[string]interface{}{"xMin": int64(xMin), "xMax": int64(xMax), "quantiles": quantiles, "curve": plot})
+	if err != nil {
+		log.Printf("failed to serialize response: %v", err)
+		return
+	}
+}
+
+var templMMU = `<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
+    <script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
+    <script type="text/javascript">
+      google.charts.load('current', {'packages':['corechart']});
+      var chartsReady = false;
+      google.charts.setOnLoadCallback(function() { chartsReady = true; refreshChart(); });
+
+      var chart;
+      var curve;
+
+      function niceDuration(ns) {
+          if (ns < 1e3) { return ns + 'ns'; }
+          else if (ns < 1e6) { return ns / 1e3 + 'µs'; }
+          else if (ns < 1e9) { return ns / 1e6 + 'ms'; }
+          else { return ns / 1e9 + 's'; }
+      }
+
+      function niceQuantile(q) {
+        return 'p' + q*100;
+      }
+
+      function mmuFlags() {
+        var flags = "";
+        $("#options input").each(function(i, elt) {
+          if (elt.checked)
+            flags += "|" + elt.id;
+        });
+        return flags.substr(1);
+      }
+
+      function refreshChart() {
+        if (!chartsReady) return;
+        var container = $('#mmu_chart');
+        container.css('opacity', '.5');
+        refreshChart.count++;
+        var seq = refreshChart.count;
+        $.getJSON('/mmuPlot?flags=' + mmuFlags())
+         .fail(function(xhr, status, error) {
+           alert('failed to load plot: ' + status);
+         })
+         .done(function(result) {
+           if (refreshChart.count === seq)
+             drawChart(result);
+         });
+      }
+      refreshChart.count = 0;
+
+      function drawChart(plotData) {
+        curve = plotData.curve;
+        var data = new google.visualization.DataTable();
+        data.addColumn('number', 'Window duration');
+        data.addColumn('number', 'Minimum mutator utilization');
+        if (plotData.quantiles) {
+          for (var i = 1; i < plotData.quantiles.length; i++) {
+            data.addColumn('number', niceQuantile(1 - plotData.quantiles[i]) + ' MU');
+          }
+        }
+        data.addRows(curve);
+        for (var i = 0; i < curve.length; i++) {
+          data.setFormattedValue(i, 0, niceDuration(curve[i][0]));
+        }
+
+        var options = {
+          chart: {
+            title: 'Minimum mutator utilization',
+          },
+          hAxis: {
+            title: 'Window duration',
+            scaleType: 'log',
+            ticks: [],
+          },
+          vAxis: {
+            title: 'Minimum mutator utilization',
+            minValue: 0.0,
+            maxValue: 1.0,
+          },
+          legend: { position: 'none' },
+          focusTarget: 'category',
+          width: 900,
+          height: 500,
+          chartArea: { width: '80%', height: '80%' },
+        };
+        for (var v = plotData.xMin; v <= plotData.xMax; v *= 10) {
+          options.hAxis.ticks.push({v:v, f:niceDuration(v)});
+        }
+        if (plotData.quantiles) {
+          options.vAxis.title = 'Mutator utilization';
+          options.legend.position = 'in';
+        }
+
+        var container = $('#mmu_chart');
+        container.empty();
+        container.css('opacity', '');
+        chart = new google.visualization.LineChart(container[0]);
+        chart = new google.visualization.LineChart(document.getElementById('mmu_chart'));
+        chart.draw(data, options);
+
+        google.visualization.events.addListener(chart, 'select', selectHandler);
+        $('#details').empty();
+      }
+
+      function selectHandler() {
+        var items = chart.getSelection();
+        if (items.length === 0) {
+          return;
+        }
+        var details = $('#details');
+        details.empty();
+        var windowNS = curve[items[0].row][0];
+        var url = '/mmuDetails?window=' + windowNS + '&flags=' + mmuFlags();
+        $.getJSON(url)
+         .fail(function(xhr, status, error) {
+            details.text(status + ': ' + url + ' could not be loaded');
+         })
+         .done(function(worst) {
+            details.text('Lowest mutator utilization in ' + niceDuration(windowNS) + ' windows:');
+            for (var i = 0; i < worst.length; i++) {
+              details.append($('<br/>'));
+              var text = worst[i].MutatorUtil.toFixed(3) + ' at time ' + niceDuration(worst[i].Time);
+              details.append($('<a/>').text(text).attr('href', worst[i].URL));
+            }
+         });
+      }
+
+      $.when($.ready).then(function() {
+        $("#options input").click(refreshChart);
+      });
+    </script>
+    <style>
+      .help {
+        display: inline-block;
+        position: relative;
+        width: 1em;
+        height: 1em;
+        border-radius: 50%;
+        color: #fff;
+        background: #555;
+        text-align: center;
+        cursor: help;
+      }
+      .help > span {
+        display: none;
+      }
+      .help:hover > span {
+        display: block;
+        position: absolute;
+        left: 1.1em;
+        top: 1.1em;
+        background: #555;
+        text-align: left;
+        width: 20em;
+        padding: 0.5em;
+        border-radius: 0.5em;
+        z-index: 5;
+      }
+    </style>
+  </head>
+  <body>
+    <div style="position: relative">
+      <div id="mmu_chart" style="width: 900px; height: 500px; display: inline-block; vertical-align: top">Loading plot...</div>
+      <div id="options" style="display: inline-block; vertical-align: top">
+        <p>
+          <b>View</b><br/>
+          <input type="radio" name="view" id="system" checked><label for="system">System</label>
+          <span class="help">?<span>Consider whole system utilization. For example, if one of four procs is available to the mutator, mutator utilization will be 0.25. This is the standard definition of an MMU.</span></span><br/>
+          <input type="radio" name="view" id="perProc"><label for="perProc">Per-goroutine</label>
+          <span class="help">?<span>Consider per-goroutine utilization. When even one goroutine is interrupted by GC, mutator utilization is 0.</span></span><br/>
+        </p>
+        <p>
+          <b>Include</b><br/>
+          <input type="checkbox" id="stw" checked><label for="stw">STW</label>
+          <span class="help">?<span>Stop-the-world stops all goroutines simultaneously.</span></span><br/>
+          <input type="checkbox" id="background" checked><label for="background">Background workers</label>
+          <span class="help">?<span>Background workers are GC-specific goroutines. 25% of the CPU is dedicated to background workers during GC.</span></span><br/>
+          <input type="checkbox" id="assist" checked><label for="assist">Mark assist</label>
+          <span class="help">?<span>Mark assists are performed by allocation to prevent the mutator from outpacing GC.</span></span><br/>
+          <input type="checkbox" id="sweep"><label for="sweep">Sweep</label>
+          <span class="help">?<span>Sweep reclaims unused memory between GCs. (Enabling this may be very slow.).</span></span><br/>
+        </p>
+        <p>
+          <b>Display</b><br/>
+          <input type="checkbox" id="mut"><label for="mut">Show percentiles</label>
+          <span class="help">?<span>Display percentile mutator utilization in addition to minimum. E.g., p99 MU drops the worst 1% of windows.</span></span><br/>
+        </p>
+      </div>
+    </div>
+    <div id="details">Select a point for details.</div>
+  </body>
+</html>
+`
+
+// httpMMUDetails serves details of an MMU graph at a particular window.
+func httpMMUDetails(w http.ResponseWriter, r *http.Request) {
+	_, mmuCurve, err := getMMUCurve(r)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse events: %v", err), http.StatusInternalServerError)
+		return
+	}
+
+	windowStr := r.FormValue("window")
+	window, err := strconv.ParseUint(windowStr, 10, 64)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse window parameter %q: %v", windowStr, err), http.StatusBadRequest)
+		return
+	}
+	worst := mmuCurve.Examples(time.Duration(window), 10)
+
+	// Construct a link for each window.
+	var links []linkedUtilWindow
+	for _, ui := range worst {
+		links = append(links, newLinkedUtilWindow(ui, time.Duration(window)))
+	}
+
+	err = json.NewEncoder(w).Encode(links)
+	if err != nil {
+		log.Printf("failed to serialize trace: %v", err)
+		return
+	}
+}
+
+type linkedUtilWindow struct {
+	trace.UtilWindow
+	URL string
+}
+
+func newLinkedUtilWindow(ui trace.UtilWindow, window time.Duration) linkedUtilWindow {
+	// Find the range containing this window.
+	var r Range
+	for _, r = range ranges {
+		if r.EndTime > ui.Time {
+			break
+		}
+	}
+	return linkedUtilWindow{ui, fmt.Sprintf("%s#%v:%v", r.URL(), float64(ui.Time)/1e6, float64(ui.Time+int64(window))/1e6)}
+}
diff --git a/src/cmd/trace/pprof.go b/src/cmd/trace/pprof.go
new file mode 100644
index 0000000..c4d3742
--- /dev/null
+++ b/src/cmd/trace/pprof.go
@@ -0,0 +1,377 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Serving of pprof-like profiles.
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	exec "internal/execabs"
+	"internal/trace"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strconv"
+	"time"
+
+	"github.com/google/pprof/profile"
+)
+
+func goCmd() string {
+	var exeSuffix string
+	if runtime.GOOS == "windows" {
+		exeSuffix = ".exe"
+	}
+	path := filepath.Join(runtime.GOROOT(), "bin", "go"+exeSuffix)
+	if _, err := os.Stat(path); err == nil {
+		return path
+	}
+	return "go"
+}
+
+func init() {
+	http.HandleFunc("/io", serveSVGProfile(pprofByGoroutine(computePprofIO)))
+	http.HandleFunc("/block", serveSVGProfile(pprofByGoroutine(computePprofBlock)))
+	http.HandleFunc("/syscall", serveSVGProfile(pprofByGoroutine(computePprofSyscall)))
+	http.HandleFunc("/sched", serveSVGProfile(pprofByGoroutine(computePprofSched)))
+
+	http.HandleFunc("/regionio", serveSVGProfile(pprofByRegion(computePprofIO)))
+	http.HandleFunc("/regionblock", serveSVGProfile(pprofByRegion(computePprofBlock)))
+	http.HandleFunc("/regionsyscall", serveSVGProfile(pprofByRegion(computePprofSyscall)))
+	http.HandleFunc("/regionsched", serveSVGProfile(pprofByRegion(computePprofSched)))
+}
+
+// Record represents one entry in pprof-like profiles.
+type Record struct {
+	stk  []*trace.Frame
+	n    uint64
+	time int64
+}
+
+// interval represents a time interval in the trace.
+type interval struct {
+	begin, end int64 // nanoseconds.
+}
+
+func pprofByGoroutine(compute func(io.Writer, map[uint64][]interval, []*trace.Event) error) func(w io.Writer, r *http.Request) error {
+	return func(w io.Writer, r *http.Request) error {
+		id := r.FormValue("id")
+		events, err := parseEvents()
+		if err != nil {
+			return err
+		}
+		gToIntervals, err := pprofMatchingGoroutines(id, events)
+		if err != nil {
+			return err
+		}
+		return compute(w, gToIntervals, events)
+	}
+}
+
+func pprofByRegion(compute func(io.Writer, map[uint64][]interval, []*trace.Event) error) func(w io.Writer, r *http.Request) error {
+	return func(w io.Writer, r *http.Request) error {
+		filter, err := newRegionFilter(r)
+		if err != nil {
+			return err
+		}
+		gToIntervals, err := pprofMatchingRegions(filter)
+		if err != nil {
+			return err
+		}
+		events, _ := parseEvents()
+
+		return compute(w, gToIntervals, events)
+	}
+}
+
+// pprofMatchingGoroutines parses the goroutine type id string (i.e. pc)
+// and returns the ids of goroutines of the matching type and its interval.
+// If the id string is empty, returns nil without an error.
+func pprofMatchingGoroutines(id string, events []*trace.Event) (map[uint64][]interval, error) {
+	if id == "" {
+		return nil, nil
+	}
+	pc, err := strconv.ParseUint(id, 10, 64) // id is string
+	if err != nil {
+		return nil, fmt.Errorf("invalid goroutine type: %v", id)
+	}
+	analyzeGoroutines(events)
+	var res map[uint64][]interval
+	for _, g := range gs {
+		if g.PC != pc {
+			continue
+		}
+		if res == nil {
+			res = make(map[uint64][]interval)
+		}
+		endTime := g.EndTime
+		if g.EndTime == 0 {
+			endTime = lastTimestamp() // the trace doesn't include the goroutine end event. Use the trace end time.
+		}
+		res[g.ID] = []interval{{begin: g.StartTime, end: endTime}}
+	}
+	if len(res) == 0 && id != "" {
+		return nil, fmt.Errorf("failed to find matching goroutines for id: %s", id)
+	}
+	return res, nil
+}
+
+// pprofMatchingRegions returns the time intervals of matching regions
+// grouped by the goroutine id. If the filter is nil, returns nil without an error.
+func pprofMatchingRegions(filter *regionFilter) (map[uint64][]interval, error) {
+	res, err := analyzeAnnotations()
+	if err != nil {
+		return nil, err
+	}
+	if filter == nil {
+		return nil, nil
+	}
+
+	gToIntervals := make(map[uint64][]interval)
+	for id, regions := range res.regions {
+		for _, s := range regions {
+			if filter.match(id, s) {
+				gToIntervals[s.G] = append(gToIntervals[s.G], interval{begin: s.firstTimestamp(), end: s.lastTimestamp()})
+			}
+		}
+	}
+
+	for g, intervals := range gToIntervals {
+		// in order to remove nested regions and
+		// consider only the outermost regions,
+		// first, we sort based on the start time
+		// and then scan through to select only the outermost regions.
+		sort.Slice(intervals, func(i, j int) bool {
+			x := intervals[i].begin
+			y := intervals[j].begin
+			if x == y {
+				return intervals[i].end < intervals[j].end
+			}
+			return x < y
+		})
+		var lastTimestamp int64
+		var n int
+		// select only the outermost regions.
+		for _, i := range intervals {
+			if lastTimestamp <= i.begin {
+				intervals[n] = i // new non-overlapping region starts.
+				lastTimestamp = i.end
+				n++
+			} // otherwise, skip because this region overlaps with a previous region.
+		}
+		gToIntervals[g] = intervals[:n]
+	}
+	return gToIntervals, nil
+}
+
+// computePprofIO generates IO pprof-like profile (time spent in IO wait, currently only network blocking event).
+func computePprofIO(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
+	prof := make(map[uint64]Record)
+	for _, ev := range events {
+		if ev.Type != trace.EvGoBlockNet || ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
+			continue
+		}
+		overlapping := pprofOverlappingDuration(gToIntervals, ev)
+		if overlapping > 0 {
+			rec := prof[ev.StkID]
+			rec.stk = ev.Stk
+			rec.n++
+			rec.time += overlapping.Nanoseconds()
+			prof[ev.StkID] = rec
+		}
+	}
+	return buildProfile(prof).Write(w)
+}
+
+// computePprofBlock generates blocking pprof-like profile (time spent blocked on synchronization primitives).
+func computePprofBlock(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
+	prof := make(map[uint64]Record)
+	for _, ev := range events {
+		switch ev.Type {
+		case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect,
+			trace.EvGoBlockSync, trace.EvGoBlockCond, trace.EvGoBlockGC:
+			// TODO(hyangah): figure out why EvGoBlockGC should be here.
+			// EvGoBlockGC indicates the goroutine blocks on GC assist, not
+			// on synchronization primitives.
+		default:
+			continue
+		}
+		if ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
+			continue
+		}
+		overlapping := pprofOverlappingDuration(gToIntervals, ev)
+		if overlapping > 0 {
+			rec := prof[ev.StkID]
+			rec.stk = ev.Stk
+			rec.n++
+			rec.time += overlapping.Nanoseconds()
+			prof[ev.StkID] = rec
+		}
+	}
+	return buildProfile(prof).Write(w)
+}
+
+// computePprofSyscall generates syscall pprof-like profile (time spent blocked in syscalls).
+func computePprofSyscall(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
+	prof := make(map[uint64]Record)
+	for _, ev := range events {
+		if ev.Type != trace.EvGoSysCall || ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
+			continue
+		}
+		overlapping := pprofOverlappingDuration(gToIntervals, ev)
+		if overlapping > 0 {
+			rec := prof[ev.StkID]
+			rec.stk = ev.Stk
+			rec.n++
+			rec.time += overlapping.Nanoseconds()
+			prof[ev.StkID] = rec
+		}
+	}
+	return buildProfile(prof).Write(w)
+}
+
+// computePprofSched generates scheduler latency pprof-like profile
+// (time between a goroutine become runnable and actually scheduled for execution).
+func computePprofSched(w io.Writer, gToIntervals map[uint64][]interval, events []*trace.Event) error {
+	prof := make(map[uint64]Record)
+	for _, ev := range events {
+		if (ev.Type != trace.EvGoUnblock && ev.Type != trace.EvGoCreate) ||
+			ev.Link == nil || ev.StkID == 0 || len(ev.Stk) == 0 {
+			continue
+		}
+		overlapping := pprofOverlappingDuration(gToIntervals, ev)
+		if overlapping > 0 {
+			rec := prof[ev.StkID]
+			rec.stk = ev.Stk
+			rec.n++
+			rec.time += overlapping.Nanoseconds()
+			prof[ev.StkID] = rec
+		}
+	}
+	return buildProfile(prof).Write(w)
+}
+
+// pprofOverlappingDuration returns the overlapping duration between
+// the time intervals in gToIntervals and the specified event.
+// If gToIntervals is nil, this simply returns the event's duration.
+func pprofOverlappingDuration(gToIntervals map[uint64][]interval, ev *trace.Event) time.Duration {
+	if gToIntervals == nil { // No filtering.
+		return time.Duration(ev.Link.Ts-ev.Ts) * time.Nanosecond
+	}
+	intervals := gToIntervals[ev.G]
+	if len(intervals) == 0 {
+		return 0
+	}
+
+	var overlapping time.Duration
+	for _, i := range intervals {
+		if o := overlappingDuration(i.begin, i.end, ev.Ts, ev.Link.Ts); o > 0 {
+			overlapping += o
+		}
+	}
+	return overlapping
+}
+
+// serveSVGProfile serves pprof-like profile generated by prof as svg.
+func serveSVGProfile(prof func(w io.Writer, r *http.Request) error) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+
+		if r.FormValue("raw") != "" {
+			w.Header().Set("Content-Type", "application/octet-stream")
+			if err := prof(w, r); err != nil {
+				w.Header().Set("Content-Type", "text/plain; charset=utf-8")
+				w.Header().Set("X-Go-Pprof", "1")
+				http.Error(w, fmt.Sprintf("failed to get profile: %v", err), http.StatusInternalServerError)
+				return
+			}
+			return
+		}
+
+		blockf, err := os.CreateTemp("", "block")
+		if err != nil {
+			http.Error(w, fmt.Sprintf("failed to create temp file: %v", err), http.StatusInternalServerError)
+			return
+		}
+		defer func() {
+			blockf.Close()
+			os.Remove(blockf.Name())
+		}()
+		blockb := bufio.NewWriter(blockf)
+		if err := prof(blockb, r); err != nil {
+			http.Error(w, fmt.Sprintf("failed to generate profile: %v", err), http.StatusInternalServerError)
+			return
+		}
+		if err := blockb.Flush(); err != nil {
+			http.Error(w, fmt.Sprintf("failed to flush temp file: %v", err), http.StatusInternalServerError)
+			return
+		}
+		if err := blockf.Close(); err != nil {
+			http.Error(w, fmt.Sprintf("failed to close temp file: %v", err), http.StatusInternalServerError)
+			return
+		}
+		svgFilename := blockf.Name() + ".svg"
+		if output, err := exec.Command(goCmd(), "tool", "pprof", "-svg", "-output", svgFilename, blockf.Name()).CombinedOutput(); err != nil {
+			http.Error(w, fmt.Sprintf("failed to execute go tool pprof: %v\n%s", err, output), http.StatusInternalServerError)
+			return
+		}
+		defer os.Remove(svgFilename)
+		w.Header().Set("Content-Type", "image/svg+xml")
+		http.ServeFile(w, r, svgFilename)
+	}
+}
+
+func buildProfile(prof map[uint64]Record) *profile.Profile {
+	p := &profile.Profile{
+		PeriodType: &profile.ValueType{Type: "trace", Unit: "count"},
+		Period:     1,
+		SampleType: []*profile.ValueType{
+			{Type: "contentions", Unit: "count"},
+			{Type: "delay", Unit: "nanoseconds"},
+		},
+	}
+	locs := make(map[uint64]*profile.Location)
+	funcs := make(map[string]*profile.Function)
+	for _, rec := range prof {
+		var sloc []*profile.Location
+		for _, frame := range rec.stk {
+			loc := locs[frame.PC]
+			if loc == nil {
+				fn := funcs[frame.File+frame.Fn]
+				if fn == nil {
+					fn = &profile.Function{
+						ID:         uint64(len(p.Function) + 1),
+						Name:       frame.Fn,
+						SystemName: frame.Fn,
+						Filename:   frame.File,
+					}
+					p.Function = append(p.Function, fn)
+					funcs[frame.File+frame.Fn] = fn
+				}
+				loc = &profile.Location{
+					ID:      uint64(len(p.Location) + 1),
+					Address: frame.PC,
+					Line: []profile.Line{
+						{
+							Function: fn,
+							Line:     int64(frame.Line),
+						},
+					},
+				}
+				p.Location = append(p.Location, loc)
+				locs[frame.PC] = loc
+			}
+			sloc = append(sloc, loc)
+		}
+		p.Sample = append(p.Sample, &profile.Sample{
+			Value:    []int64{int64(rec.n), rec.time},
+			Location: sloc,
+		})
+	}
+	return p
+}
diff --git a/src/cmd/trace/trace.go b/src/cmd/trace/trace.go
new file mode 100644
index 0000000..30c80f0
--- /dev/null
+++ b/src/cmd/trace/trace.go
@@ -0,0 +1,1273 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"cmd/internal/traceviewer"
+	"encoding/json"
+	"fmt"
+	"internal/trace"
+	"io"
+	"log"
+	"math"
+	"net/http"
+	"path/filepath"
+	"runtime"
+	"runtime/debug"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+func init() {
+	http.HandleFunc("/trace", httpTrace)
+	http.HandleFunc("/jsontrace", httpJsonTrace)
+	http.HandleFunc("/trace_viewer_html", httpTraceViewerHTML)
+	http.HandleFunc("/webcomponents.min.js", webcomponentsJS)
+}
+
+// httpTrace serves either whole trace (goid==0) or trace for goid goroutine.
+func httpTrace(w http.ResponseWriter, r *http.Request) {
+	_, err := parseTrace()
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	if err := r.ParseForm(); err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	html := strings.ReplaceAll(templTrace, "{{PARAMS}}", r.Form.Encode())
+	w.Write([]byte(html))
+
+}
+
+// https://chromium.googlesource.com/catapult/+/9508452e18f130c98499cb4c4f1e1efaedee8962/tracing/docs/embedding-trace-viewer.md
+// This is almost verbatim copy of https://chromium-review.googlesource.com/c/catapult/+/2062938/2/tracing/bin/index.html
+var templTrace = `
+<html>
+<head>
+<script src="/webcomponents.min.js"></script>
+<script>
+'use strict';
+
+function onTraceViewerImportFail() {
+  document.addEventListener('DOMContentLoaded', function() {
+    document.body.textContent =
+    '/trace_viewer_full.html is missing. File a bug in https://golang.org/issue';
+  });
+}
+</script>
+
+<link rel="import" href="/trace_viewer_html"
+      onerror="onTraceViewerImportFail(event)">
+
+<style type="text/css">
+  html, body {
+    box-sizing: border-box;
+    overflow: hidden;
+    margin: 0px;
+    padding: 0;
+    width: 100%;
+    height: 100%;
+  }
+  #trace-viewer {
+    width: 100%;
+    height: 100%;
+  }
+  #trace-viewer:focus {
+    outline: none;
+  }
+</style>
+<script>
+'use strict';
+(function() {
+  var viewer;
+  var url;
+  var model;
+
+  function load() {
+    var req = new XMLHttpRequest();
+    var isBinary = /[.]gz$/.test(url) || /[.]zip$/.test(url);
+    req.overrideMimeType('text/plain; charset=x-user-defined');
+    req.open('GET', url, true);
+    if (isBinary)
+      req.responseType = 'arraybuffer';
+
+    req.onreadystatechange = function(event) {
+      if (req.readyState !== 4)
+        return;
+
+      window.setTimeout(function() {
+        if (req.status === 200)
+          onResult(isBinary ? req.response : req.responseText);
+        else
+          onResultFail(req.status);
+      }, 0);
+    };
+    req.send(null);
+  }
+
+  function onResultFail(err) {
+    var overlay = new tr.ui.b.Overlay();
+    overlay.textContent = err + ': ' + url + ' could not be loaded';
+    overlay.title = 'Failed to fetch data';
+    overlay.visible = true;
+  }
+
+  function onResult(result) {
+    model = new tr.Model();
+    var opts = new tr.importer.ImportOptions();
+    opts.shiftWorldToZero = false;
+    var i = new tr.importer.Import(model, opts);
+    var p = i.importTracesWithProgressDialog([result]);
+    p.then(onModelLoaded, onImportFail);
+  }
+
+  function onModelLoaded() {
+    viewer.model = model;
+    viewer.viewTitle = "trace";
+
+    if (!model || model.bounds.isEmpty)
+      return;
+    var sel = window.location.hash.substr(1);
+    if (sel === '')
+      return;
+    var parts = sel.split(':');
+    var range = new (tr.b.Range || tr.b.math.Range)();
+    range.addValue(parseFloat(parts[0]));
+    range.addValue(parseFloat(parts[1]));
+    viewer.trackView.viewport.interestRange.set(range);
+  }
+
+  function onImportFail(err) {
+    var overlay = new tr.ui.b.Overlay();
+    overlay.textContent = tr.b.normalizeException(err).message;
+    overlay.title = 'Import error';
+    overlay.visible = true;
+  }
+
+  document.addEventListener('WebComponentsReady', function() {
+    var container = document.createElement('track-view-container');
+    container.id = 'track_view_container';
+
+    viewer = document.createElement('tr-ui-timeline-view');
+    viewer.track_view_container = container;
+    Polymer.dom(viewer).appendChild(container);
+
+    viewer.id = 'trace-viewer';
+    viewer.globalMode = true;
+    Polymer.dom(document.body).appendChild(viewer);
+
+    url = '/jsontrace?{{PARAMS}}';
+    load();
+  });
+}());
+</script>
+</head>
+<body>
+</body>
+</html>
+`
+
+// httpTraceViewerHTML serves static part of trace-viewer.
+// This URL is queried from templTrace HTML.
+func httpTraceViewerHTML(w http.ResponseWriter, r *http.Request) {
+	http.ServeFile(w, r, filepath.Join(runtime.GOROOT(), "misc", "trace", "trace_viewer_full.html"))
+}
+
+func webcomponentsJS(w http.ResponseWriter, r *http.Request) {
+	http.ServeFile(w, r, filepath.Join(runtime.GOROOT(), "misc", "trace", "webcomponents.min.js"))
+}
+
+// httpJsonTrace serves json trace, requested from within templTrace HTML.
+func httpJsonTrace(w http.ResponseWriter, r *http.Request) {
+	defer debug.FreeOSMemory()
+	defer reportMemoryUsage("after httpJsonTrace")
+	// This is an AJAX handler, so instead of http.Error we use log.Printf to log errors.
+	res, err := parseTrace()
+	if err != nil {
+		log.Printf("failed to parse trace: %v", err)
+		return
+	}
+
+	params := &traceParams{
+		parsed:  res,
+		endTime: math.MaxInt64,
+	}
+
+	if goids := r.FormValue("goid"); goids != "" {
+		// If goid argument is present, we are rendering a trace for this particular goroutine.
+		goid, err := strconv.ParseUint(goids, 10, 64)
+		if err != nil {
+			log.Printf("failed to parse goid parameter %q: %v", goids, err)
+			return
+		}
+		analyzeGoroutines(res.Events)
+		g, ok := gs[goid]
+		if !ok {
+			log.Printf("failed to find goroutine %d", goid)
+			return
+		}
+		params.mode = modeGoroutineOriented
+		params.startTime = g.StartTime
+		if g.EndTime != 0 {
+			params.endTime = g.EndTime
+		} else { // The goroutine didn't end.
+			params.endTime = lastTimestamp()
+		}
+		params.maing = goid
+		params.gs = trace.RelatedGoroutines(res.Events, goid)
+	} else if taskids := r.FormValue("taskid"); taskids != "" {
+		taskid, err := strconv.ParseUint(taskids, 10, 64)
+		if err != nil {
+			log.Printf("failed to parse taskid parameter %q: %v", taskids, err)
+			return
+		}
+		annotRes, _ := analyzeAnnotations()
+		task, ok := annotRes.tasks[taskid]
+		if !ok || len(task.events) == 0 {
+			log.Printf("failed to find task with id %d", taskid)
+			return
+		}
+		goid := task.events[0].G
+		params.mode = modeGoroutineOriented | modeTaskOriented
+		params.startTime = task.firstTimestamp() - 1
+		params.endTime = task.lastTimestamp() + 1
+		params.maing = goid
+		params.tasks = task.descendants()
+		gs := map[uint64]bool{}
+		for _, t := range params.tasks {
+			// find only directly involved goroutines
+			for k, v := range t.RelatedGoroutines(res.Events, 0) {
+				gs[k] = v
+			}
+		}
+		params.gs = gs
+	} else if taskids := r.FormValue("focustask"); taskids != "" {
+		taskid, err := strconv.ParseUint(taskids, 10, 64)
+		if err != nil {
+			log.Printf("failed to parse focustask parameter %q: %v", taskids, err)
+			return
+		}
+		annotRes, _ := analyzeAnnotations()
+		task, ok := annotRes.tasks[taskid]
+		if !ok || len(task.events) == 0 {
+			log.Printf("failed to find task with id %d", taskid)
+			return
+		}
+		params.mode = modeTaskOriented
+		params.startTime = task.firstTimestamp() - 1
+		params.endTime = task.lastTimestamp() + 1
+		params.tasks = task.descendants()
+	}
+
+	start := int64(0)
+	end := int64(math.MaxInt64)
+	if startStr, endStr := r.FormValue("start"), r.FormValue("end"); startStr != "" && endStr != "" {
+		// If start/end arguments are present, we are rendering a range of the trace.
+		start, err = strconv.ParseInt(startStr, 10, 64)
+		if err != nil {
+			log.Printf("failed to parse start parameter %q: %v", startStr, err)
+			return
+		}
+		end, err = strconv.ParseInt(endStr, 10, 64)
+		if err != nil {
+			log.Printf("failed to parse end parameter %q: %v", endStr, err)
+			return
+		}
+	}
+
+	c := viewerDataTraceConsumer(w, start, end)
+	if err := generateTrace(params, c); err != nil {
+		log.Printf("failed to generate trace: %v", err)
+		return
+	}
+}
+
+type Range struct {
+	Name      string
+	Start     int
+	End       int
+	StartTime int64
+	EndTime   int64
+}
+
+func (r Range) URL() string {
+	return fmt.Sprintf("/trace?start=%d&end=%d", r.Start, r.End)
+}
+
+// splitTrace splits the trace into a number of ranges,
+// each resulting in approx 100MB of json output
+// (trace viewer can hardly handle more).
+func splitTrace(res trace.ParseResult) []Range {
+	params := &traceParams{
+		parsed:  res,
+		endTime: math.MaxInt64,
+	}
+	s, c := splittingTraceConsumer(100 << 20) // 100M
+	if err := generateTrace(params, c); err != nil {
+		dief("%v\n", err)
+	}
+	return s.Ranges
+}
+
+type splitter struct {
+	Ranges []Range
+}
+
+func splittingTraceConsumer(max int) (*splitter, traceConsumer) {
+	type eventSz struct {
+		Time float64
+		Sz   int
+	}
+
+	var (
+		data = traceviewer.Data{Frames: make(map[string]traceviewer.Frame)}
+
+		sizes []eventSz
+		cw    countingWriter
+	)
+
+	s := new(splitter)
+
+	return s, traceConsumer{
+		consumeTimeUnit: func(unit string) {
+			data.TimeUnit = unit
+		},
+		consumeViewerEvent: func(v *traceviewer.Event, required bool) {
+			if required {
+				// Store required events inside data
+				// so flush can include them in the required
+				// part of the trace.
+				data.Events = append(data.Events, v)
+				return
+			}
+			enc := json.NewEncoder(&cw)
+			enc.Encode(v)
+			sizes = append(sizes, eventSz{v.Time, cw.size + 1}) // +1 for ",".
+			cw.size = 0
+		},
+		consumeViewerFrame: func(k string, v traceviewer.Frame) {
+			data.Frames[k] = v
+		},
+		flush: func() {
+			// Calculate size of the mandatory part of the trace.
+			// This includes stack traces and thread names.
+			cw.size = 0
+			enc := json.NewEncoder(&cw)
+			enc.Encode(data)
+			minSize := cw.size
+
+			// Then calculate size of each individual event
+			// and group them into ranges.
+			sum := minSize
+			start := 0
+			for i, ev := range sizes {
+				if sum+ev.Sz > max {
+					startTime := time.Duration(sizes[start].Time * 1000)
+					endTime := time.Duration(ev.Time * 1000)
+					ranges = append(ranges, Range{
+						Name:      fmt.Sprintf("%v-%v", startTime, endTime),
+						Start:     start,
+						End:       i + 1,
+						StartTime: int64(startTime),
+						EndTime:   int64(endTime),
+					})
+					start = i + 1
+					sum = minSize
+				} else {
+					sum += ev.Sz + 1
+				}
+			}
+			if len(ranges) <= 1 {
+				s.Ranges = nil
+				return
+			}
+
+			if end := len(sizes) - 1; start < end {
+				ranges = append(ranges, Range{
+					Name:      fmt.Sprintf("%v-%v", time.Duration(sizes[start].Time*1000), time.Duration(sizes[end].Time*1000)),
+					Start:     start,
+					End:       end,
+					StartTime: int64(sizes[start].Time * 1000),
+					EndTime:   int64(sizes[end].Time * 1000),
+				})
+			}
+			s.Ranges = ranges
+		},
+	}
+}
+
+type countingWriter struct {
+	size int
+}
+
+func (cw *countingWriter) Write(data []byte) (int, error) {
+	cw.size += len(data)
+	return len(data), nil
+}
+
+type traceParams struct {
+	parsed    trace.ParseResult
+	mode      traceviewMode
+	startTime int64
+	endTime   int64
+	maing     uint64          // for goroutine-oriented view, place this goroutine on the top row
+	gs        map[uint64]bool // Goroutines to be displayed for goroutine-oriented or task-oriented view
+	tasks     []*taskDesc     // Tasks to be displayed. tasks[0] is the top-most task
+}
+
+type traceviewMode uint
+
+const (
+	modeGoroutineOriented traceviewMode = 1 << iota
+	modeTaskOriented
+)
+
+type traceContext struct {
+	*traceParams
+	consumer  traceConsumer
+	frameTree frameNode
+	frameSeq  int
+	arrowSeq  uint64
+	gcount    uint64
+
+	heapStats, prevHeapStats     heapStats
+	threadStats, prevThreadStats threadStats
+	gstates, prevGstates         [gStateCount]int64
+
+	regionID int // last emitted region id. incremented in each emitRegion call.
+}
+
+type heapStats struct {
+	heapAlloc uint64
+	nextGC    uint64
+}
+
+type threadStats struct {
+	insyscallRuntime int64 // system goroutine in syscall
+	insyscall        int64 // user goroutine in syscall
+	prunning         int64 // thread running P
+}
+
+type frameNode struct {
+	id       int
+	children map[uint64]frameNode
+}
+
+type gState int
+
+const (
+	gDead gState = iota
+	gRunnable
+	gRunning
+	gWaiting
+	gWaitingGC
+
+	gStateCount
+)
+
+type gInfo struct {
+	state      gState // current state
+	name       string // name chosen for this goroutine at first EvGoStart
+	isSystemG  bool
+	start      *trace.Event // most recent EvGoStart
+	markAssist *trace.Event // if non-nil, the mark assist currently running.
+}
+
+type NameArg struct {
+	Name string `json:"name"`
+}
+
+type TaskArg struct {
+	ID     uint64 `json:"id"`
+	StartG uint64 `json:"start_g,omitempty"`
+	EndG   uint64 `json:"end_g,omitempty"`
+}
+
+type RegionArg struct {
+	TaskID uint64 `json:"taskid,omitempty"`
+}
+
+type SortIndexArg struct {
+	Index int `json:"sort_index"`
+}
+
+type traceConsumer struct {
+	consumeTimeUnit    func(unit string)
+	consumeViewerEvent func(v *traceviewer.Event, required bool)
+	consumeViewerFrame func(key string, f traceviewer.Frame)
+	flush              func()
+}
+
+const (
+	procsSection = 0 // where Goroutines or per-P timelines are presented.
+	statsSection = 1 // where counters are presented.
+	tasksSection = 2 // where Task hierarchy & timeline is presented.
+)
+
+// generateTrace generates json trace for trace-viewer:
+// https://github.com/google/trace-viewer
+// Trace format is described at:
+// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/view
+// If mode==goroutineMode, generate trace for goroutine goid, otherwise whole trace.
+// startTime, endTime determine part of the trace that we are interested in.
+// gset restricts goroutines that are included in the resulting trace.
+func generateTrace(params *traceParams, consumer traceConsumer) error {
+	defer consumer.flush()
+
+	ctx := &traceContext{traceParams: params}
+	ctx.frameTree.children = make(map[uint64]frameNode)
+	ctx.consumer = consumer
+
+	ctx.consumer.consumeTimeUnit("ns")
+	maxProc := 0
+	ginfos := make(map[uint64]*gInfo)
+	stacks := params.parsed.Stacks
+
+	getGInfo := func(g uint64) *gInfo {
+		info, ok := ginfos[g]
+		if !ok {
+			info = &gInfo{}
+			ginfos[g] = info
+		}
+		return info
+	}
+
+	// Since we make many calls to setGState, we record a sticky
+	// error in setGStateErr and check it after every event.
+	var setGStateErr error
+	setGState := func(ev *trace.Event, g uint64, oldState, newState gState) {
+		info := getGInfo(g)
+		if oldState == gWaiting && info.state == gWaitingGC {
+			// For checking, gWaiting counts as any gWaiting*.
+			oldState = info.state
+		}
+		if info.state != oldState && setGStateErr == nil {
+			setGStateErr = fmt.Errorf("expected G %d to be in state %d, but got state %d", g, oldState, newState)
+		}
+		ctx.gstates[info.state]--
+		ctx.gstates[newState]++
+		info.state = newState
+	}
+
+	for _, ev := range ctx.parsed.Events {
+		// Handle state transitions before we filter out events.
+		switch ev.Type {
+		case trace.EvGoStart, trace.EvGoStartLabel:
+			setGState(ev, ev.G, gRunnable, gRunning)
+			info := getGInfo(ev.G)
+			info.start = ev
+		case trace.EvProcStart:
+			ctx.threadStats.prunning++
+		case trace.EvProcStop:
+			ctx.threadStats.prunning--
+		case trace.EvGoCreate:
+			newG := ev.Args[0]
+			info := getGInfo(newG)
+			if info.name != "" {
+				return fmt.Errorf("duplicate go create event for go id=%d detected at offset %d", newG, ev.Off)
+			}
+
+			stk, ok := stacks[ev.Args[1]]
+			if !ok || len(stk) == 0 {
+				return fmt.Errorf("invalid go create event: missing stack information for go id=%d at offset %d", newG, ev.Off)
+			}
+
+			fname := stk[0].Fn
+			info.name = fmt.Sprintf("G%v %s", newG, fname)
+			info.isSystemG = isSystemGoroutine(fname)
+
+			ctx.gcount++
+			setGState(ev, newG, gDead, gRunnable)
+		case trace.EvGoEnd:
+			ctx.gcount--
+			setGState(ev, ev.G, gRunning, gDead)
+		case trace.EvGoUnblock:
+			setGState(ev, ev.Args[0], gWaiting, gRunnable)
+		case trace.EvGoSysExit:
+			setGState(ev, ev.G, gWaiting, gRunnable)
+			if getGInfo(ev.G).isSystemG {
+				ctx.threadStats.insyscallRuntime--
+			} else {
+				ctx.threadStats.insyscall--
+			}
+		case trace.EvGoSysBlock:
+			setGState(ev, ev.G, gRunning, gWaiting)
+			if getGInfo(ev.G).isSystemG {
+				ctx.threadStats.insyscallRuntime++
+			} else {
+				ctx.threadStats.insyscall++
+			}
+		case trace.EvGoSched, trace.EvGoPreempt:
+			setGState(ev, ev.G, gRunning, gRunnable)
+		case trace.EvGoStop,
+			trace.EvGoSleep, trace.EvGoBlock, trace.EvGoBlockSend, trace.EvGoBlockRecv,
+			trace.EvGoBlockSelect, trace.EvGoBlockSync, trace.EvGoBlockCond, trace.EvGoBlockNet:
+			setGState(ev, ev.G, gRunning, gWaiting)
+		case trace.EvGoBlockGC:
+			setGState(ev, ev.G, gRunning, gWaitingGC)
+		case trace.EvGCMarkAssistStart:
+			getGInfo(ev.G).markAssist = ev
+		case trace.EvGCMarkAssistDone:
+			getGInfo(ev.G).markAssist = nil
+		case trace.EvGoWaiting:
+			setGState(ev, ev.G, gRunnable, gWaiting)
+		case trace.EvGoInSyscall:
+			// Cancel out the effect of EvGoCreate at the beginning.
+			setGState(ev, ev.G, gRunnable, gWaiting)
+			if getGInfo(ev.G).isSystemG {
+				ctx.threadStats.insyscallRuntime++
+			} else {
+				ctx.threadStats.insyscall++
+			}
+		case trace.EvHeapAlloc:
+			ctx.heapStats.heapAlloc = ev.Args[0]
+		case trace.EvNextGC:
+			ctx.heapStats.nextGC = ev.Args[0]
+		}
+		if setGStateErr != nil {
+			return setGStateErr
+		}
+		if ctx.gstates[gRunnable] < 0 || ctx.gstates[gRunning] < 0 || ctx.threadStats.insyscall < 0 || ctx.threadStats.insyscallRuntime < 0 {
+			return fmt.Errorf("invalid state after processing %v: runnable=%d running=%d insyscall=%d insyscallRuntime=%d", ev, ctx.gstates[gRunnable], ctx.gstates[gRunning], ctx.threadStats.insyscall, ctx.threadStats.insyscallRuntime)
+		}
+
+		// Ignore events that are from uninteresting goroutines
+		// or outside of the interesting timeframe.
+		if ctx.gs != nil && ev.P < trace.FakeP && !ctx.gs[ev.G] {
+			continue
+		}
+		if !withinTimeRange(ev, ctx.startTime, ctx.endTime) {
+			continue
+		}
+
+		if ev.P < trace.FakeP && ev.P > maxProc {
+			maxProc = ev.P
+		}
+
+		// Emit trace objects.
+		switch ev.Type {
+		case trace.EvProcStart:
+			if ctx.mode&modeGoroutineOriented != 0 {
+				continue
+			}
+			ctx.emitInstant(ev, "proc start", "")
+		case trace.EvProcStop:
+			if ctx.mode&modeGoroutineOriented != 0 {
+				continue
+			}
+			ctx.emitInstant(ev, "proc stop", "")
+		case trace.EvGCStart:
+			ctx.emitSlice(ev, "GC")
+		case trace.EvGCDone:
+		case trace.EvGCSTWStart:
+			if ctx.mode&modeGoroutineOriented != 0 {
+				continue
+			}
+			ctx.emitSlice(ev, fmt.Sprintf("STW (%s)", ev.SArgs[0]))
+		case trace.EvGCSTWDone:
+		case trace.EvGCMarkAssistStart:
+			// Mark assists can continue past preemptions, so truncate to the
+			// whichever comes first. We'll synthesize another slice if
+			// necessary in EvGoStart.
+			markFinish := ev.Link
+			goFinish := getGInfo(ev.G).start.Link
+			fakeMarkStart := *ev
+			text := "MARK ASSIST"
+			if markFinish == nil || markFinish.Ts > goFinish.Ts {
+				fakeMarkStart.Link = goFinish
+				text = "MARK ASSIST (unfinished)"
+			}
+			ctx.emitSlice(&fakeMarkStart, text)
+		case trace.EvGCSweepStart:
+			slice := ctx.makeSlice(ev, "SWEEP")
+			if done := ev.Link; done != nil && done.Args[0] != 0 {
+				slice.Arg = struct {
+					Swept     uint64 `json:"Swept bytes"`
+					Reclaimed uint64 `json:"Reclaimed bytes"`
+				}{done.Args[0], done.Args[1]}
+			}
+			ctx.emit(slice)
+		case trace.EvGoStart, trace.EvGoStartLabel:
+			info := getGInfo(ev.G)
+			if ev.Type == trace.EvGoStartLabel {
+				ctx.emitSlice(ev, ev.SArgs[0])
+			} else {
+				ctx.emitSlice(ev, info.name)
+			}
+			if info.markAssist != nil {
+				// If we're in a mark assist, synthesize a new slice, ending
+				// either when the mark assist ends or when we're descheduled.
+				markFinish := info.markAssist.Link
+				goFinish := ev.Link
+				fakeMarkStart := *ev
+				text := "MARK ASSIST (resumed, unfinished)"
+				if markFinish != nil && markFinish.Ts < goFinish.Ts {
+					fakeMarkStart.Link = markFinish
+					text = "MARK ASSIST (resumed)"
+				}
+				ctx.emitSlice(&fakeMarkStart, text)
+			}
+		case trace.EvGoCreate:
+			ctx.emitArrow(ev, "go")
+		case trace.EvGoUnblock:
+			ctx.emitArrow(ev, "unblock")
+		case trace.EvGoSysCall:
+			ctx.emitInstant(ev, "syscall", "")
+		case trace.EvGoSysExit:
+			ctx.emitArrow(ev, "sysexit")
+		case trace.EvUserLog:
+			ctx.emitInstant(ev, formatUserLog(ev), "user event")
+		case trace.EvUserTaskCreate:
+			ctx.emitInstant(ev, "task start", "user event")
+		case trace.EvUserTaskEnd:
+			ctx.emitInstant(ev, "task end", "user event")
+		}
+		// Emit any counter updates.
+		ctx.emitThreadCounters(ev)
+		ctx.emitHeapCounters(ev)
+		ctx.emitGoroutineCounters(ev)
+	}
+
+	ctx.emitSectionFooter(statsSection, "STATS", 0)
+
+	if ctx.mode&modeTaskOriented != 0 {
+		ctx.emitSectionFooter(tasksSection, "TASKS", 1)
+	}
+
+	if ctx.mode&modeGoroutineOriented != 0 {
+		ctx.emitSectionFooter(procsSection, "G", 2)
+	} else {
+		ctx.emitSectionFooter(procsSection, "PROCS", 2)
+	}
+
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: procsSection, TID: trace.GCP, Arg: &NameArg{"GC"}})
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: trace.GCP, Arg: &SortIndexArg{-6}})
+
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: procsSection, TID: trace.NetpollP, Arg: &NameArg{"Network"}})
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: trace.NetpollP, Arg: &SortIndexArg{-5}})
+
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: procsSection, TID: trace.TimerP, Arg: &NameArg{"Timers"}})
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: trace.TimerP, Arg: &SortIndexArg{-4}})
+
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: procsSection, TID: trace.SyscallP, Arg: &NameArg{"Syscalls"}})
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: trace.SyscallP, Arg: &SortIndexArg{-3}})
+
+	// Display rows for Ps if we are in the default trace view mode (not goroutine-oriented presentation)
+	if ctx.mode&modeGoroutineOriented == 0 {
+		for i := 0; i <= maxProc; i++ {
+			ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: procsSection, TID: uint64(i), Arg: &NameArg{fmt.Sprintf("Proc %v", i)}})
+			ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: uint64(i), Arg: &SortIndexArg{i}})
+		}
+	}
+
+	// Display task and its regions if we are in task-oriented presentation mode.
+	if ctx.mode&modeTaskOriented != 0 {
+		// sort tasks based on the task start time.
+		sortedTask := make([]*taskDesc, 0, len(ctx.tasks))
+		for _, task := range ctx.tasks {
+			sortedTask = append(sortedTask, task)
+		}
+		sort.SliceStable(sortedTask, func(i, j int) bool {
+			ti, tj := sortedTask[i], sortedTask[j]
+			if ti.firstTimestamp() == tj.firstTimestamp() {
+				return ti.lastTimestamp() < tj.lastTimestamp()
+			}
+			return ti.firstTimestamp() < tj.firstTimestamp()
+		})
+
+		for i, task := range sortedTask {
+			ctx.emitTask(task, i)
+
+			// If we are in goroutine-oriented mode, we draw regions.
+			// TODO(hyangah): add this for task/P-oriented mode (i.e., focustask view) too.
+			if ctx.mode&modeGoroutineOriented != 0 {
+				for _, s := range task.regions {
+					ctx.emitRegion(s)
+				}
+			}
+		}
+	}
+
+	// Display goroutine rows if we are either in goroutine-oriented mode.
+	if ctx.mode&modeGoroutineOriented != 0 {
+		for k, v := range ginfos {
+			if !ctx.gs[k] {
+				continue
+			}
+			ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: procsSection, TID: k, Arg: &NameArg{v.name}})
+		}
+		// Row for the main goroutine (maing)
+		ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: ctx.maing, Arg: &SortIndexArg{-2}})
+		// Row for GC or global state (specified with G=0)
+		ctx.emitFooter(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: procsSection, TID: 0, Arg: &SortIndexArg{-1}})
+	}
+
+	return nil
+}
+
+func (ctx *traceContext) emit(e *traceviewer.Event) {
+	ctx.consumer.consumeViewerEvent(e, false)
+}
+
+func (ctx *traceContext) emitFooter(e *traceviewer.Event) {
+	ctx.consumer.consumeViewerEvent(e, true)
+}
+func (ctx *traceContext) emitSectionFooter(sectionID uint64, name string, priority int) {
+	ctx.emitFooter(&traceviewer.Event{Name: "process_name", Phase: "M", PID: sectionID, Arg: &NameArg{name}})
+	ctx.emitFooter(&traceviewer.Event{Name: "process_sort_index", Phase: "M", PID: sectionID, Arg: &SortIndexArg{priority}})
+}
+
+func (ctx *traceContext) time(ev *trace.Event) float64 {
+	// Trace viewer wants timestamps in microseconds.
+	return float64(ev.Ts) / 1000
+}
+
+func withinTimeRange(ev *trace.Event, s, e int64) bool {
+	if evEnd := ev.Link; evEnd != nil {
+		return ev.Ts <= e && evEnd.Ts >= s
+	}
+	return ev.Ts >= s && ev.Ts <= e
+}
+
+func tsWithinRange(ts, s, e int64) bool {
+	return s <= ts && ts <= e
+}
+
+func (ctx *traceContext) proc(ev *trace.Event) uint64 {
+	if ctx.mode&modeGoroutineOriented != 0 && ev.P < trace.FakeP {
+		return ev.G
+	} else {
+		return uint64(ev.P)
+	}
+}
+
+func (ctx *traceContext) emitSlice(ev *trace.Event, name string) {
+	ctx.emit(ctx.makeSlice(ev, name))
+}
+
+func (ctx *traceContext) makeSlice(ev *trace.Event, name string) *traceviewer.Event {
+	// If ViewerEvent.Dur is not a positive value,
+	// trace viewer handles it as a non-terminating time interval.
+	// Avoid it by setting the field with a small value.
+	durationUsec := ctx.time(ev.Link) - ctx.time(ev)
+	if ev.Link.Ts-ev.Ts <= 0 {
+		durationUsec = 0.0001 // 0.1 nanoseconds
+	}
+	sl := &traceviewer.Event{
+		Name:     name,
+		Phase:    "X",
+		Time:     ctx.time(ev),
+		Dur:      durationUsec,
+		TID:      ctx.proc(ev),
+		Stack:    ctx.stack(ev.Stk),
+		EndStack: ctx.stack(ev.Link.Stk),
+	}
+
+	// grey out non-overlapping events if the event is not a global event (ev.G == 0)
+	if ctx.mode&modeTaskOriented != 0 && ev.G != 0 {
+		// include P information.
+		if t := ev.Type; t == trace.EvGoStart || t == trace.EvGoStartLabel {
+			type Arg struct {
+				P int
+			}
+			sl.Arg = &Arg{P: ev.P}
+		}
+		// grey out non-overlapping events.
+		overlapping := false
+		for _, task := range ctx.tasks {
+			if _, overlapped := task.overlappingDuration(ev); overlapped {
+				overlapping = true
+				break
+			}
+		}
+		if !overlapping {
+			sl.Cname = colorLightGrey
+		}
+	}
+	return sl
+}
+
+func (ctx *traceContext) emitTask(task *taskDesc, sortIndex int) {
+	taskRow := uint64(task.id)
+	taskName := task.name
+	durationUsec := float64(task.lastTimestamp()-task.firstTimestamp()) / 1e3
+
+	ctx.emitFooter(&traceviewer.Event{Name: "thread_name", Phase: "M", PID: tasksSection, TID: taskRow, Arg: &NameArg{fmt.Sprintf("T%d %s", task.id, taskName)}})
+	ctx.emit(&traceviewer.Event{Name: "thread_sort_index", Phase: "M", PID: tasksSection, TID: taskRow, Arg: &SortIndexArg{sortIndex}})
+	ts := float64(task.firstTimestamp()) / 1e3
+	sl := &traceviewer.Event{
+		Name:  taskName,
+		Phase: "X",
+		Time:  ts,
+		Dur:   durationUsec,
+		PID:   tasksSection,
+		TID:   taskRow,
+		Cname: pickTaskColor(task.id),
+	}
+	targ := TaskArg{ID: task.id}
+	if task.create != nil {
+		sl.Stack = ctx.stack(task.create.Stk)
+		targ.StartG = task.create.G
+	}
+	if task.end != nil {
+		sl.EndStack = ctx.stack(task.end.Stk)
+		targ.EndG = task.end.G
+	}
+	sl.Arg = targ
+	ctx.emit(sl)
+
+	if task.create != nil && task.create.Type == trace.EvUserTaskCreate && task.create.Args[1] != 0 {
+		ctx.arrowSeq++
+		ctx.emit(&traceviewer.Event{Name: "newTask", Phase: "s", TID: task.create.Args[1], ID: ctx.arrowSeq, Time: ts, PID: tasksSection})
+		ctx.emit(&traceviewer.Event{Name: "newTask", Phase: "t", TID: taskRow, ID: ctx.arrowSeq, Time: ts, PID: tasksSection})
+	}
+}
+
+func (ctx *traceContext) emitRegion(s regionDesc) {
+	if s.Name == "" {
+		return
+	}
+
+	if !tsWithinRange(s.firstTimestamp(), ctx.startTime, ctx.endTime) &&
+		!tsWithinRange(s.lastTimestamp(), ctx.startTime, ctx.endTime) {
+		return
+	}
+
+	ctx.regionID++
+	regionID := ctx.regionID
+
+	id := s.TaskID
+	scopeID := fmt.Sprintf("%x", id)
+	name := s.Name
+
+	sl0 := &traceviewer.Event{
+		Category: "Region",
+		Name:     name,
+		Phase:    "b",
+		Time:     float64(s.firstTimestamp()) / 1e3,
+		TID:      s.G, // only in goroutine-oriented view
+		ID:       uint64(regionID),
+		Scope:    scopeID,
+		Cname:    pickTaskColor(s.TaskID),
+	}
+	if s.Start != nil {
+		sl0.Stack = ctx.stack(s.Start.Stk)
+	}
+	ctx.emit(sl0)
+
+	sl1 := &traceviewer.Event{
+		Category: "Region",
+		Name:     name,
+		Phase:    "e",
+		Time:     float64(s.lastTimestamp()) / 1e3,
+		TID:      s.G,
+		ID:       uint64(regionID),
+		Scope:    scopeID,
+		Cname:    pickTaskColor(s.TaskID),
+		Arg:      RegionArg{TaskID: s.TaskID},
+	}
+	if s.End != nil {
+		sl1.Stack = ctx.stack(s.End.Stk)
+	}
+	ctx.emit(sl1)
+}
+
+type heapCountersArg struct {
+	Allocated uint64
+	NextGC    uint64
+}
+
+func (ctx *traceContext) emitHeapCounters(ev *trace.Event) {
+	if ctx.prevHeapStats == ctx.heapStats {
+		return
+	}
+	diff := uint64(0)
+	if ctx.heapStats.nextGC > ctx.heapStats.heapAlloc {
+		diff = ctx.heapStats.nextGC - ctx.heapStats.heapAlloc
+	}
+	if tsWithinRange(ev.Ts, ctx.startTime, ctx.endTime) {
+		ctx.emit(&traceviewer.Event{Name: "Heap", Phase: "C", Time: ctx.time(ev), PID: 1, Arg: &heapCountersArg{ctx.heapStats.heapAlloc, diff}})
+	}
+	ctx.prevHeapStats = ctx.heapStats
+}
+
+type goroutineCountersArg struct {
+	Running   uint64
+	Runnable  uint64
+	GCWaiting uint64
+}
+
+func (ctx *traceContext) emitGoroutineCounters(ev *trace.Event) {
+	if ctx.prevGstates == ctx.gstates {
+		return
+	}
+	if tsWithinRange(ev.Ts, ctx.startTime, ctx.endTime) {
+		ctx.emit(&traceviewer.Event{Name: "Goroutines", Phase: "C", Time: ctx.time(ev), PID: 1, Arg: &goroutineCountersArg{uint64(ctx.gstates[gRunning]), uint64(ctx.gstates[gRunnable]), uint64(ctx.gstates[gWaitingGC])}})
+	}
+	ctx.prevGstates = ctx.gstates
+}
+
+type threadCountersArg struct {
+	Running   int64
+	InSyscall int64
+}
+
+func (ctx *traceContext) emitThreadCounters(ev *trace.Event) {
+	if ctx.prevThreadStats == ctx.threadStats {
+		return
+	}
+	if tsWithinRange(ev.Ts, ctx.startTime, ctx.endTime) {
+		ctx.emit(&traceviewer.Event{Name: "Threads", Phase: "C", Time: ctx.time(ev), PID: 1, Arg: &threadCountersArg{
+			Running:   ctx.threadStats.prunning,
+			InSyscall: ctx.threadStats.insyscall}})
+	}
+	ctx.prevThreadStats = ctx.threadStats
+}
+
+func (ctx *traceContext) emitInstant(ev *trace.Event, name, category string) {
+	if !tsWithinRange(ev.Ts, ctx.startTime, ctx.endTime) {
+		return
+	}
+
+	cname := ""
+	if ctx.mode&modeTaskOriented != 0 {
+		taskID, isUserAnnotation := isUserAnnotationEvent(ev)
+
+		show := false
+		for _, task := range ctx.tasks {
+			if isUserAnnotation && task.id == taskID || task.overlappingInstant(ev) {
+				show = true
+				break
+			}
+		}
+		// grey out or skip if non-overlapping instant.
+		if !show {
+			if isUserAnnotation {
+				return // don't display unrelated user annotation events.
+			}
+			cname = colorLightGrey
+		}
+	}
+	var arg interface{}
+	if ev.Type == trace.EvProcStart {
+		type Arg struct {
+			ThreadID uint64
+		}
+		arg = &Arg{ev.Args[0]}
+	}
+	ctx.emit(&traceviewer.Event{
+		Name:     name,
+		Category: category,
+		Phase:    "I",
+		Scope:    "t",
+		Time:     ctx.time(ev),
+		TID:      ctx.proc(ev),
+		Stack:    ctx.stack(ev.Stk),
+		Cname:    cname,
+		Arg:      arg})
+}
+
+func (ctx *traceContext) emitArrow(ev *trace.Event, name string) {
+	if ev.Link == nil {
+		// The other end of the arrow is not captured in the trace.
+		// For example, a goroutine was unblocked but was not scheduled before trace stop.
+		return
+	}
+	if ctx.mode&modeGoroutineOriented != 0 && (!ctx.gs[ev.Link.G] || ev.Link.Ts < ctx.startTime || ev.Link.Ts > ctx.endTime) {
+		return
+	}
+
+	if ev.P == trace.NetpollP || ev.P == trace.TimerP || ev.P == trace.SyscallP {
+		// Trace-viewer discards arrows if they don't start/end inside of a slice or instant.
+		// So emit a fake instant at the start of the arrow.
+		ctx.emitInstant(&trace.Event{P: ev.P, Ts: ev.Ts}, "unblock", "")
+	}
+
+	color := ""
+	if ctx.mode&modeTaskOriented != 0 {
+		overlapping := false
+		// skip non-overlapping arrows.
+		for _, task := range ctx.tasks {
+			if _, overlapped := task.overlappingDuration(ev); overlapped {
+				overlapping = true
+				break
+			}
+		}
+		if !overlapping {
+			return
+		}
+	}
+
+	ctx.arrowSeq++
+	ctx.emit(&traceviewer.Event{Name: name, Phase: "s", TID: ctx.proc(ev), ID: ctx.arrowSeq, Time: ctx.time(ev), Stack: ctx.stack(ev.Stk), Cname: color})
+	ctx.emit(&traceviewer.Event{Name: name, Phase: "t", TID: ctx.proc(ev.Link), ID: ctx.arrowSeq, Time: ctx.time(ev.Link), Cname: color})
+}
+
+func (ctx *traceContext) stack(stk []*trace.Frame) int {
+	return ctx.buildBranch(ctx.frameTree, stk)
+}
+
+// buildBranch builds one branch in the prefix tree rooted at ctx.frameTree.
+func (ctx *traceContext) buildBranch(parent frameNode, stk []*trace.Frame) int {
+	if len(stk) == 0 {
+		return parent.id
+	}
+	last := len(stk) - 1
+	frame := stk[last]
+	stk = stk[:last]
+
+	node, ok := parent.children[frame.PC]
+	if !ok {
+		ctx.frameSeq++
+		node.id = ctx.frameSeq
+		node.children = make(map[uint64]frameNode)
+		parent.children[frame.PC] = node
+		ctx.consumer.consumeViewerFrame(strconv.Itoa(node.id), traceviewer.Frame{Name: fmt.Sprintf("%v:%v", frame.Fn, frame.Line), Parent: parent.id})
+	}
+	return ctx.buildBranch(node, stk)
+}
+
+func isSystemGoroutine(entryFn string) bool {
+	// This mimics runtime.isSystemGoroutine as closely as
+	// possible.
+	return entryFn != "runtime.main" && strings.HasPrefix(entryFn, "runtime.")
+}
+
+// firstTimestamp returns the timestamp of the first event record.
+func firstTimestamp() int64 {
+	res, _ := parseTrace()
+	if len(res.Events) > 0 {
+		return res.Events[0].Ts
+	}
+	return 0
+}
+
+// lastTimestamp returns the timestamp of the last event record.
+func lastTimestamp() int64 {
+	res, _ := parseTrace()
+	if n := len(res.Events); n > 1 {
+		return res.Events[n-1].Ts
+	}
+	return 0
+}
+
+type jsonWriter struct {
+	w   io.Writer
+	enc *json.Encoder
+}
+
+func viewerDataTraceConsumer(w io.Writer, start, end int64) traceConsumer {
+	frames := make(map[string]traceviewer.Frame)
+	enc := json.NewEncoder(w)
+	written := 0
+	index := int64(-1)
+
+	io.WriteString(w, "{")
+	return traceConsumer{
+		consumeTimeUnit: func(unit string) {
+			io.WriteString(w, `"displayTimeUnit":`)
+			enc.Encode(unit)
+			io.WriteString(w, ",")
+		},
+		consumeViewerEvent: func(v *traceviewer.Event, required bool) {
+			index++
+			if !required && (index < start || index > end) {
+				// not in the range. Skip!
+				return
+			}
+			if written == 0 {
+				io.WriteString(w, `"traceEvents": [`)
+			}
+			if written > 0 {
+				io.WriteString(w, ",")
+			}
+			enc.Encode(v)
+			// TODO: get rid of the extra \n inserted by enc.Encode.
+			// Same should be applied to splittingTraceConsumer.
+			written++
+		},
+		consumeViewerFrame: func(k string, v traceviewer.Frame) {
+			frames[k] = v
+		},
+		flush: func() {
+			io.WriteString(w, `], "stackFrames":`)
+			enc.Encode(frames)
+			io.WriteString(w, `}`)
+		},
+	}
+}
+
+// Mapping from more reasonable color names to the reserved color names in
+// https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html#L50
+// The chrome trace viewer allows only those as cname values.
+const (
+	colorLightMauve     = "thread_state_uninterruptible" // 182, 125, 143
+	colorOrange         = "thread_state_iowait"          // 255, 140, 0
+	colorSeafoamGreen   = "thread_state_running"         // 126, 200, 148
+	colorVistaBlue      = "thread_state_runnable"        // 133, 160, 210
+	colorTan            = "thread_state_unknown"         // 199, 155, 125
+	colorIrisBlue       = "background_memory_dump"       // 0, 180, 180
+	colorMidnightBlue   = "light_memory_dump"            // 0, 0, 180
+	colorDeepMagenta    = "detailed_memory_dump"         // 180, 0, 180
+	colorBlue           = "vsync_highlight_color"        // 0, 0, 255
+	colorGrey           = "generic_work"                 // 125, 125, 125
+	colorGreen          = "good"                         // 0, 125, 0
+	colorDarkGoldenrod  = "bad"                          // 180, 125, 0
+	colorPeach          = "terrible"                     // 180, 0, 0
+	colorBlack          = "black"                        // 0, 0, 0
+	colorLightGrey      = "grey"                         // 221, 221, 221
+	colorWhite          = "white"                        // 255, 255, 255
+	colorYellow         = "yellow"                       // 255, 255, 0
+	colorOlive          = "olive"                        // 100, 100, 0
+	colorCornflowerBlue = "rail_response"                // 67, 135, 253
+	colorSunsetOrange   = "rail_animation"               // 244, 74, 63
+	colorTangerine      = "rail_idle"                    // 238, 142, 0
+	colorShamrockGreen  = "rail_load"                    // 13, 168, 97
+	colorGreenishYellow = "startup"                      // 230, 230, 0
+	colorDarkGrey       = "heap_dump_stack_frame"        // 128, 128, 128
+	colorTawny          = "heap_dump_child_node_arrow"   // 204, 102, 0
+	colorLemon          = "cq_build_running"             // 255, 255, 119
+	colorLime           = "cq_build_passed"              // 153, 238, 102
+	colorPink           = "cq_build_failed"              // 238, 136, 136
+	colorSilver         = "cq_build_abandoned"           // 187, 187, 187
+	colorManzGreen      = "cq_build_attempt_runnig"      // 222, 222, 75
+	colorKellyGreen     = "cq_build_attempt_passed"      // 108, 218, 35
+	colorAnotherGrey    = "cq_build_attempt_failed"      // 187, 187, 187
+)
+
+var colorForTask = []string{
+	colorLightMauve,
+	colorOrange,
+	colorSeafoamGreen,
+	colorVistaBlue,
+	colorTan,
+	colorMidnightBlue,
+	colorIrisBlue,
+	colorDeepMagenta,
+	colorGreen,
+	colorDarkGoldenrod,
+	colorPeach,
+	colorOlive,
+	colorCornflowerBlue,
+	colorSunsetOrange,
+	colorTangerine,
+	colorShamrockGreen,
+	colorTawny,
+	colorLemon,
+	colorLime,
+	colorPink,
+	colorSilver,
+	colorManzGreen,
+	colorKellyGreen,
+}
+
+func pickTaskColor(id uint64) string {
+	idx := id % uint64(len(colorForTask))
+	return colorForTask[idx]
+}
diff --git a/src/cmd/trace/trace_test.go b/src/cmd/trace/trace_test.go
new file mode 100644
index 0000000..ea0cc6f
--- /dev/null
+++ b/src/cmd/trace/trace_test.go
@@ -0,0 +1,269 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !js
+
+package main
+
+import (
+	"cmd/internal/traceviewer"
+	"context"
+	"internal/trace"
+	"io"
+	rtrace "runtime/trace"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+// stacks is a fake stack map populated for test.
+type stacks map[uint64][]*trace.Frame
+
+// add adds a stack with a single frame whose Fn field is
+// set to the provided fname and returns a unique stack id.
+func (s *stacks) add(fname string) uint64 {
+	if *s == nil {
+		*s = make(map[uint64][]*trace.Frame)
+	}
+
+	id := uint64(len(*s))
+	(*s)[id] = []*trace.Frame{{Fn: fname}}
+	return id
+}
+
+// TestGoroutineCount tests runnable/running goroutine counts computed by generateTrace
+// remain in the valid range.
+//   - the counts must not be negative. generateTrace will return an error.
+//   - the counts must not include goroutines blocked waiting on channels or in syscall.
+func TestGoroutineCount(t *testing.T) {
+	w := trace.NewWriter()
+	w.Emit(trace.EvBatch, 0, 0)  // start of per-P batch event [pid, timestamp]
+	w.Emit(trace.EvFrequency, 1) // [ticks per second]
+
+	var s stacks
+
+	// In this test, we assume a valid trace contains EvGoWaiting or EvGoInSyscall
+	// event for every blocked goroutine.
+
+	// goroutine 10: blocked
+	w.Emit(trace.EvGoCreate, 1, 10, s.add("pkg.f1"), s.add("main.f1")) // [timestamp, new goroutine id, new stack id, stack id]
+	w.Emit(trace.EvGoWaiting, 1, 10)                                   // [timestamp, goroutine id]
+
+	// goroutine 20: in syscall
+	w.Emit(trace.EvGoCreate, 1, 20, s.add("pkg.f2"), s.add("main.f2"))
+	w.Emit(trace.EvGoInSyscall, 1, 20) // [timestamp, goroutine id]
+
+	// goroutine 30: runnable
+	w.Emit(trace.EvGoCreate, 1, 30, s.add("pkg.f3"), s.add("main.f3"))
+
+	w.Emit(trace.EvProcStart, 2, 0) // [timestamp, thread id]
+
+	// goroutine 40: runnable->running->runnable
+	w.Emit(trace.EvGoCreate, 1, 40, s.add("pkg.f4"), s.add("main.f4"))
+	w.Emit(trace.EvGoStartLocal, 1, 40)          // [timestamp, goroutine id]
+	w.Emit(trace.EvGoSched, 1, s.add("main.f4")) // [timestamp, stack]
+
+	res, err := trace.Parse(w, "")
+	if err != nil {
+		t.Fatalf("failed to parse test trace: %v", err)
+	}
+	res.Stacks = s // use fake stacks.
+
+	params := &traceParams{
+		parsed:  res,
+		endTime: int64(1<<63 - 1),
+	}
+
+	// Use the default viewerDataTraceConsumer but replace
+	// consumeViewerEvent to intercept the ViewerEvents for testing.
+	c := viewerDataTraceConsumer(io.Discard, 0, 1<<63-1)
+	c.consumeViewerEvent = func(ev *traceviewer.Event, _ bool) {
+		if ev.Name == "Goroutines" {
+			cnt := ev.Arg.(*goroutineCountersArg)
+			if cnt.Runnable+cnt.Running > 2 {
+				t.Errorf("goroutine count=%+v; want no more than 2 goroutines in runnable/running state", cnt)
+			}
+			t.Logf("read %+v %+v", ev, cnt)
+		}
+	}
+
+	// If the counts drop below 0, generateTrace will return an error.
+	if err := generateTrace(params, c); err != nil {
+		t.Fatalf("generateTrace failed: %v", err)
+	}
+}
+
+func TestGoroutineFilter(t *testing.T) {
+	// Test that we handle state changes to selected goroutines
+	// caused by events on goroutines that are not selected.
+
+	var s stacks
+
+	w := trace.NewWriter()
+	w.Emit(trace.EvBatch, 0, 0)  // start of per-P batch event [pid, timestamp]
+	w.Emit(trace.EvFrequency, 1) // [ticks per second]
+
+	// goroutine 10: blocked
+	w.Emit(trace.EvGoCreate, 1, 10, s.add("pkg.f1"), s.add("main.f1")) // [timestamp, new goroutine id, new stack id, stack id]
+	w.Emit(trace.EvGoWaiting, 1, 10)                                   // [timestamp, goroutine id]
+
+	// goroutine 20: runnable->running->unblock 10
+	w.Emit(trace.EvGoCreate, 1, 20, s.add("pkg.f2"), s.add("main.f2"))
+	w.Emit(trace.EvGoStartLocal, 1, 20)                    // [timestamp, goroutine id]
+	w.Emit(trace.EvGoUnblockLocal, 1, 10, s.add("pkg.f2")) // [timestamp, goroutine id, stack]
+	w.Emit(trace.EvGoEnd, 1)                               // [timestamp]
+
+	// goroutine 10: runnable->running->block
+	w.Emit(trace.EvGoStartLocal, 1, 10)         // [timestamp, goroutine id]
+	w.Emit(trace.EvGoBlock, 1, s.add("pkg.f3")) // [timestamp, stack]
+
+	res, err := trace.Parse(w, "")
+	if err != nil {
+		t.Fatalf("failed to parse test trace: %v", err)
+	}
+	res.Stacks = s // use fake stacks
+
+	params := &traceParams{
+		parsed:  res,
+		endTime: int64(1<<63 - 1),
+		gs:      map[uint64]bool{10: true},
+	}
+
+	c := viewerDataTraceConsumer(io.Discard, 0, 1<<63-1)
+	if err := generateTrace(params, c); err != nil {
+		t.Fatalf("generateTrace failed: %v", err)
+	}
+}
+
+func TestPreemptedMarkAssist(t *testing.T) {
+	w := trace.NewWriter()
+	w.Emit(trace.EvBatch, 0, 0)  // start of per-P batch event [pid, timestamp]
+	w.Emit(trace.EvFrequency, 1) // [ticks per second]
+
+	var s stacks
+	// goroutine 9999: running -> mark assisting -> preempted -> assisting -> running -> block
+	w.Emit(trace.EvGoCreate, 1, 9999, s.add("pkg.f1"), s.add("main.f1")) // [timestamp, new goroutine id, new stack id, stack id]
+	w.Emit(trace.EvGoStartLocal, 1, 9999)                                // [timestamp, goroutine id]
+	w.Emit(trace.EvGCMarkAssistStart, 1, s.add("main.f1"))               // [timestamp, stack]
+	w.Emit(trace.EvGoPreempt, 1, s.add("main.f1"))                       // [timestamp, stack]
+	w.Emit(trace.EvGoStartLocal, 1, 9999)                                // [timestamp, goroutine id]
+	w.Emit(trace.EvGCMarkAssistDone, 1)                                  // [timestamp]
+	w.Emit(trace.EvGoBlock, 1, s.add("main.f2"))                         // [timestamp, stack]
+
+	res, err := trace.Parse(w, "")
+	if err != nil {
+		t.Fatalf("failed to parse test trace: %v", err)
+	}
+	res.Stacks = s // use fake stacks
+
+	params := &traceParams{
+		parsed:  res,
+		endTime: int64(1<<63 - 1),
+	}
+
+	c := viewerDataTraceConsumer(io.Discard, 0, 1<<63-1)
+
+	marks := 0
+	c.consumeViewerEvent = func(ev *traceviewer.Event, _ bool) {
+		if strings.Contains(ev.Name, "MARK ASSIST") {
+			marks++
+		}
+	}
+	if err := generateTrace(params, c); err != nil {
+		t.Fatalf("generateTrace failed: %v", err)
+	}
+
+	if marks != 2 {
+		t.Errorf("Got %v MARK ASSIST events, want %v", marks, 2)
+	}
+}
+
+func TestFoo(t *testing.T) {
+	prog0 := func() {
+		ctx, task := rtrace.NewTask(context.Background(), "ohHappyDay")
+		rtrace.Log(ctx, "", "log before task ends")
+		task.End()
+		rtrace.Log(ctx, "", "log after task ends") // log after task ends
+	}
+	if err := traceProgram(t, prog0, "TestFoo"); err != nil {
+		t.Fatalf("failed to trace the program: %v", err)
+	}
+	res, err := parseTrace()
+	if err != nil {
+		t.Fatalf("failed to parse the trace: %v", err)
+	}
+	annotRes, _ := analyzeAnnotations()
+	var task *taskDesc
+	for _, t := range annotRes.tasks {
+		if t.name == "ohHappyDay" {
+			task = t
+			break
+		}
+	}
+	if task == nil {
+		t.Fatal("failed to locate expected task event")
+	}
+
+	params := &traceParams{
+		parsed:    res,
+		mode:      modeTaskOriented,
+		startTime: task.firstTimestamp() - 1,
+		endTime:   task.lastTimestamp() + 1,
+		tasks:     []*taskDesc{task},
+	}
+
+	c := viewerDataTraceConsumer(io.Discard, 0, 1<<63-1)
+
+	var logBeforeTaskEnd, logAfterTaskEnd bool
+	c.consumeViewerEvent = func(ev *traceviewer.Event, _ bool) {
+		if ev.Name == "log before task ends" {
+			logBeforeTaskEnd = true
+		}
+		if ev.Name == "log after task ends" {
+			logAfterTaskEnd = true
+		}
+	}
+	if err := generateTrace(params, c); err != nil {
+		t.Fatalf("generateTrace failed: %v", err)
+	}
+	if !logBeforeTaskEnd {
+		t.Error("failed to find 'log before task ends'")
+	}
+	if !logAfterTaskEnd {
+		t.Error("failed to find 'log after task ends'")
+	}
+
+}
+
+func TestDirectSemaphoreHandoff(t *testing.T) {
+	prog0 := func() {
+		var mu sync.Mutex
+		var wg sync.WaitGroup
+		mu.Lock()
+		// This is modeled after src/sync/mutex_test.go to trigger Mutex
+		// starvation mode, in which the goroutine that calls Unlock hands off
+		// both the semaphore and its remaining time slice. See issue 36186.
+		for i := 0; i < 2; i++ {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				for i := 0; i < 100; i++ {
+					mu.Lock()
+					time.Sleep(100 * time.Microsecond)
+					mu.Unlock()
+				}
+			}()
+		}
+		mu.Unlock()
+		wg.Wait()
+	}
+	if err := traceProgram(t, prog0, "TestDirectSemaphoreHandoff"); err != nil {
+		t.Fatalf("failed to trace the program: %v", err)
+	}
+	_, err := parseTrace()
+	if err != nil {
+		t.Fatalf("failed to parse the trace: %v", err)
+	}
+}
diff --git a/src/cmd/trace/trace_unix_test.go b/src/cmd/trace/trace_unix_test.go
new file mode 100644
index 0000000..c569b40
--- /dev/null
+++ b/src/cmd/trace/trace_unix_test.go
@@ -0,0 +1,103 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package main
+
+import (
+	"bytes"
+	"cmd/internal/traceviewer"
+	traceparser "internal/trace"
+	"io"
+	"runtime"
+	"runtime/trace"
+	"sync"
+	"syscall"
+	"testing"
+	"time"
+)
+
+// TestGoroutineInSyscall tests threads for timer goroutines
+// that preexisted when the tracing started were not counted
+// as threads in syscall. See golang.org/issues/22574.
+func TestGoroutineInSyscall(t *testing.T) {
+	// Start one goroutine blocked in syscall.
+	//
+	// TODO: syscall.Pipe used to cause the goroutine to
+	// remain blocked in syscall is not portable. Replace
+	// it with a more portable way so this test can run
+	// on non-unix architecture e.g. Windows.
+	var p [2]int
+	if err := syscall.Pipe(p[:]); err != nil {
+		t.Fatalf("failed to create pipe: %v", err)
+	}
+
+	var wg sync.WaitGroup
+	defer func() {
+		syscall.Write(p[1], []byte("a"))
+		wg.Wait()
+
+		syscall.Close(p[0])
+		syscall.Close(p[1])
+	}()
+	wg.Add(1)
+	go func() {
+		var tmp [1]byte
+		syscall.Read(p[0], tmp[:])
+		wg.Done()
+	}()
+
+	// Start multiple timer goroutines.
+	allTimers := make([]*time.Timer, 2*runtime.GOMAXPROCS(0))
+	defer func() {
+		for _, timer := range allTimers {
+			timer.Stop()
+		}
+	}()
+
+	var timerSetup sync.WaitGroup
+	for i := range allTimers {
+		timerSetup.Add(1)
+		go func(i int) {
+			defer timerSetup.Done()
+			allTimers[i] = time.AfterFunc(time.Hour, nil)
+		}(i)
+	}
+	timerSetup.Wait()
+
+	// Collect and parse trace.
+	buf := new(bytes.Buffer)
+	if err := trace.Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+	trace.Stop()
+
+	res, err := traceparser.Parse(buf, "")
+	if err == traceparser.ErrTimeOrder {
+		t.Skipf("skipping due to golang.org/issue/16755 (timestamps are unreliable): %v", err)
+	} else if err != nil {
+		t.Fatalf("failed to parse trace: %v", err)
+	}
+
+	// Check only one thread for the pipe read goroutine is
+	// considered in-syscall.
+	c := viewerDataTraceConsumer(io.Discard, 0, 1<<63-1)
+	c.consumeViewerEvent = func(ev *traceviewer.Event, _ bool) {
+		if ev.Name == "Threads" {
+			arg := ev.Arg.(*threadCountersArg)
+			if arg.InSyscall > 1 {
+				t.Errorf("%d threads in syscall at time %v; want less than 1 thread in syscall", arg.InSyscall, ev.Time)
+			}
+		}
+	}
+
+	param := &traceParams{
+		parsed:  res,
+		endTime: int64(1<<63 - 1),
+	}
+	if err := generateTrace(param, c); err != nil {
+		t.Fatalf("failed to generate ViewerData: %v", err)
+	}
+}
-- 
cgit v1.2.3