25 files changed, 5286 insertions, 0 deletions
diff --git a/src/runtime/pprof/elf.go b/src/runtime/pprof/elf.go
new file mode 100644
index 0000000..a8b5ea6
--- /dev/null
+++ b/src/runtime/pprof/elf.go
@@ -0,0 +1,109 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"os"
+)
+
+var (
+	errBadELF    = errors.New("malformed ELF binary")
+	errNoBuildID = errors.New("no NT_GNU_BUILD_ID found in ELF binary")
+)
+
+// elfBuildID returns the GNU build ID of the named ELF binary,
+// without introducing a dependency on debug/elf and its dependencies.
+func elfBuildID(file string) (string, error) {
+	buf := make([]byte, 256)
+	f, err := os.Open(file)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	if _, err := f.ReadAt(buf[:64], 0); err != nil {
+		return "", err
+	}
+
+	// ELF file begins with \x7F E L F.
+	if buf[0] != 0x7F || buf[1] != 'E' || buf[2] != 'L' || buf[3] != 'F' {
+		return "", errBadELF
+	}
+
+	var byteOrder binary.ByteOrder
+	switch buf[5] {
+	default:
+		return "", errBadELF
+	case 1: // little-endian
+		byteOrder = binary.LittleEndian
+	case 2: // big-endian
+		byteOrder = binary.BigEndian
+	}
+
+	var shnum int
+	var shoff, shentsize int64
+	switch buf[4] {
+	default:
+		return "", errBadELF
+	case 1: // 32-bit file header
+		shoff = int64(byteOrder.Uint32(buf[32:]))
+		shentsize = int64(byteOrder.Uint16(buf[46:]))
+		if shentsize != 40 {
+			return "", errBadELF
+		}
+		shnum = int(byteOrder.Uint16(buf[48:]))
+	case 2: // 64-bit file header
+		shoff = int64(byteOrder.Uint64(buf[40:]))
+		shentsize = int64(byteOrder.Uint16(buf[58:]))
+		if shentsize != 64 {
+			return "", errBadELF
+		}
+		shnum = int(byteOrder.Uint16(buf[60:]))
+	}
+
+	for i := 0; i < shnum; i++ {
+		if _, err := f.ReadAt(buf[:shentsize], shoff+int64(i)*shentsize); err != nil {
+			return "", err
+		}
+		if typ := byteOrder.Uint32(buf[4:]); typ != 7 { // SHT_NOTE
+			continue
+		}
+		var off, size int64
+		if shentsize == 40 {
+			// 32-bit section header
+			off = int64(byteOrder.Uint32(buf[16:]))
+			size = int64(byteOrder.Uint32(buf[20:]))
+		} else {
+			// 64-bit section header
+			off = int64(byteOrder.Uint64(buf[24:]))
+			size = int64(byteOrder.Uint64(buf[32:]))
+		}
+		size += off
+		for off < size {
+			if _, err := f.ReadAt(buf[:16], off); err != nil { // room for header + name GNU\x00
+				return "", err
+			}
+			nameSize := int(byteOrder.Uint32(buf[0:]))
+			descSize := int(byteOrder.Uint32(buf[4:]))
+			noteType := int(byteOrder.Uint32(buf[8:]))
+			descOff := off + int64(12+(nameSize+3)&^3)
+			off = descOff + int64((descSize+3)&^3)
+			if nameSize != 4 || noteType != 3 || buf[12] != 'G' || buf[13] != 'N' || buf[14] != 'U' || buf[15] != '\x00' { // want name GNU\x00 type 3 (NT_GNU_BUILD_ID)
+				continue
+			}
+			if descSize > len(buf) {
+				return "", errBadELF
+			}
+			if _, err := f.ReadAt(buf[:descSize], descOff); err != nil {
+				return "", err
+			}
+			return fmt.Sprintf("%x", buf[:descSize]), nil
+		}
+	}
+	return "", errNoBuildID
+}
diff --git a/src/runtime/pprof/label.go b/src/runtime/pprof/label.go
new file mode 100644
index 0000000..b614f12
--- /dev/null
+++ b/src/runtime/pprof/label.go
@@ -0,0 +1,108 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+)
+
+type label struct {
+	key   string
+	value string
+}
+
+// LabelSet is a set of labels.
+type LabelSet struct {
+	list []label
+}
+
+// labelContextKey is the type of contextKeys used for profiler labels.
+type labelContextKey struct{}
+
+func labelValue(ctx context.Context) labelMap {
+	labels, _ := ctx.Value(labelContextKey{}).(*labelMap)
+	if labels == nil {
+		return labelMap(nil)
+	}
+	return *labels
+}
+
+// labelMap is the representation of the label set held in the context type.
+// This is an initial implementation, but it will be replaced with something
+// that admits incremental immutable modification more efficiently.
+type labelMap map[string]string
+
+// String statisfies Stringer and returns key, value pairs in a consistent
+// order.
+func (l *labelMap) String() string {
+	if l == nil {
+		return ""
+	}
+	keyVals := make([]string, 0, len(*l))
+
+	for k, v := range *l {
+		keyVals = append(keyVals, fmt.Sprintf("%q:%q", k, v))
+	}
+
+	sort.Strings(keyVals)
+
+	return "{" + strings.Join(keyVals, ", ") + "}"
+}
+
+// WithLabels returns a new context.Context with the given labels added.
+// A label overwrites a prior label with the same key.
+func WithLabels(ctx context.Context, labels LabelSet) context.Context {
+	childLabels := make(labelMap)
+	parentLabels := labelValue(ctx)
+	// TODO(matloob): replace the map implementation with something
+	// more efficient so creating a child context WithLabels doesn't need
+	// to clone the map.
+	for k, v := range parentLabels {
+		childLabels[k] = v
+	}
+	for _, label := range labels.list {
+		childLabels[label.key] = label.value
+	}
+	return context.WithValue(ctx, labelContextKey{}, &childLabels)
+}
+
+// Labels takes an even number of strings representing key-value pairs
+// and makes a LabelSet containing them.
+// A label overwrites a prior label with the same key.
+// Currently only the CPU and goroutine profiles utilize any labels
+// information.
+// See https://golang.org/issue/23458 for details.
+func Labels(args ...string) LabelSet {
+	if len(args)%2 != 0 {
+		panic("uneven number of arguments to pprof.Labels")
+	}
+	list := make([]label, 0, len(args)/2)
+	for i := 0; i+1 < len(args); i += 2 {
+		list = append(list, label{key: args[i], value: args[i+1]})
+	}
+	return LabelSet{list: list}
+}
+
+// Label returns the value of the label with the given key on ctx, and a boolean indicating
+// whether that label exists.
+func Label(ctx context.Context, key string) (string, bool) {
+	ctxLabels := labelValue(ctx)
+	v, ok := ctxLabels[key]
+	return v, ok
+}
+
+// ForLabels invokes f with each label set on the context.
+// The function f should return true to continue iteration or false to stop iteration early.
+func ForLabels(ctx context.Context, f func(key, value string) bool) {
+	ctxLabels := labelValue(ctx)
+	for k, v := range ctxLabels {
+		if !f(k, v) {
+			break
+		}
+	}
+}
diff --git a/src/runtime/pprof/label_test.go b/src/runtime/pprof/label_test.go
new file mode 100644
index 0000000..fcb00bd
--- /dev/null
+++ b/src/runtime/pprof/label_test.go
@@ -0,0 +1,114 @@
+package pprof
+
+import (
+	"context"
+	"reflect"
+	"sort"
+	"testing"
+)
+
+func labelsSorted(ctx context.Context) []label {
+	ls := []label{}
+	ForLabels(ctx, func(key, value string) bool {
+		ls = append(ls, label{key, value})
+		return true
+	})
+	sort.Sort(labelSorter(ls))
+	return ls
+}
+
+type labelSorter []label
+
+func (s labelSorter) Len() int           { return len(s) }
+func (s labelSorter) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s labelSorter) Less(i, j int) bool { return s[i].key < s[j].key }
+
+func TestContextLabels(t *testing.T) {
+	// Background context starts with no labels.
+	ctx := context.Background()
+	labels := labelsSorted(ctx)
+	if len(labels) != 0 {
+		t.Errorf("labels on background context: want [], got %v ", labels)
+	}
+
+	// Add a single label.
+	ctx = WithLabels(ctx, Labels("key", "value"))
+	// Retrieve it with Label.
+	v, ok := Label(ctx, "key")
+	if !ok || v != "value" {
+		t.Errorf(`Label(ctx, "key"): got %v, %v; want "value", ok`, v, ok)
+	}
+	gotLabels := labelsSorted(ctx)
+	wantLabels := []label{{"key", "value"}}
+	if !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels)
+	}
+
+	// Add a label with a different key.
+	ctx = WithLabels(ctx, Labels("key2", "value2"))
+	v, ok = Label(ctx, "key2")
+	if !ok || v != "value2" {
+		t.Errorf(`Label(ctx, "key2"): got %v, %v; want "value2", ok`, v, ok)
+	}
+	gotLabels = labelsSorted(ctx)
+	wantLabels = []label{{"key", "value"}, {"key2", "value2"}}
+	if !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels)
+	}
+
+	// Add label with first key to test label replacement.
+	ctx = WithLabels(ctx, Labels("key", "value3"))
+	v, ok = Label(ctx, "key")
+	if !ok || v != "value3" {
+		t.Errorf(`Label(ctx, "key3"): got %v, %v; want "value3", ok`, v, ok)
+	}
+	gotLabels = labelsSorted(ctx)
+	wantLabels = []label{{"key", "value3"}, {"key2", "value2"}}
+	if !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels)
+	}
+
+	// Labels called with two labels with the same key should pick the second.
+	ctx = WithLabels(ctx, Labels("key4", "value4a", "key4", "value4b"))
+	v, ok = Label(ctx, "key4")
+	if !ok || v != "value4b" {
+		t.Errorf(`Label(ctx, "key4"): got %v, %v; want "value4b", ok`, v, ok)
+	}
+	gotLabels = labelsSorted(ctx)
+	wantLabels = []label{{"key", "value3"}, {"key2", "value2"}, {"key4", "value4b"}}
+	if !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels)
+	}
+}
+
+func TestLabelMapStringer(t *testing.T) {
+	for _, tbl := range []struct {
+		m        labelMap
+		expected string
+	}{
+		{
+			m: labelMap{
+				// empty map
+			},
+			expected: "{}",
+		}, {
+			m: labelMap{
+				"foo": "bar",
+			},
+			expected: `{"foo":"bar"}`,
+		}, {
+			m: labelMap{
+				"foo":             "bar",
+				"key1":            "value1",
+				"key2":            "value2",
+				"key3":            "value3",
+				"key4WithNewline": "\nvalue4",
+			},
+			expected: `{"foo":"bar", "key1":"value1", "key2":"value2", "key3":"value3", "key4WithNewline":"\nvalue4"}`,
+		},
+	} {
+		if got := tbl.m.String(); tbl.expected != got {
+			t.Errorf("%#v.String() = %q; want %q", tbl.m, got, tbl.expected)
+		}
+	}
+}
diff --git a/src/runtime/pprof/map.go b/src/runtime/pprof/map.go
new file mode 100644
index 0000000..7c75872
--- /dev/null
+++ b/src/runtime/pprof/map.go
@@ -0,0 +1,90 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import "unsafe"
+
+// A profMap is a map from (stack, tag) to mapEntry.
+// It grows without bound, but that's assumed to be OK.
+type profMap struct {
+	hash    map[uintptr]*profMapEntry
+	all     *profMapEntry
+	last    *profMapEntry
+	free    []profMapEntry
+	freeStk []uintptr
+}
+
+// A profMapEntry is a single entry in the profMap.
+type profMapEntry struct {
+	nextHash *profMapEntry // next in hash list
+	nextAll  *profMapEntry // next in list of all entries
+	stk      []uintptr
+	tag      unsafe.Pointer
+	count    int64
+}
+
+func (m *profMap) lookup(stk []uint64, tag unsafe.Pointer) *profMapEntry {
+	// Compute hash of (stk, tag).
+	h := uintptr(0)
+	for _, x := range stk {
+		h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
+		h += uintptr(x) * 41
+	}
+	h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
+	h += uintptr(tag) * 41
+
+	// Find entry if present.
+	var last *profMapEntry
+Search:
+	for e := m.hash[h]; e != nil; last, e = e, e.nextHash {
+		if len(e.stk) != len(stk) || e.tag != tag {
+			continue
+		}
+		for j := range stk {
+			if e.stk[j] != uintptr(stk[j]) {
+				continue Search
+			}
+		}
+		// Move to front.
+		if last != nil {
+			last.nextHash = e.nextHash
+			e.nextHash = m.hash[h]
+			m.hash[h] = e
+		}
+		return e
+	}
+
+	// Add new entry.
+	if len(m.free) < 1 {
+		m.free = make([]profMapEntry, 128)
+	}
+	e := &m.free[0]
+	m.free = m.free[1:]
+	e.nextHash = m.hash[h]
+	e.tag = tag
+
+	if len(m.freeStk) < len(stk) {
+		m.freeStk = make([]uintptr, 1024)
+	}
+	// Limit cap to prevent append from clobbering freeStk.
+	e.stk = m.freeStk[:len(stk):len(stk)]
+	m.freeStk = m.freeStk[len(stk):]
+
+	for j := range stk {
+		e.stk[j] = uintptr(stk[j])
+	}
+	if m.hash == nil {
+		m.hash = make(map[uintptr]*profMapEntry)
+	}
+	m.hash[h] = e
+	if m.all == nil {
+		m.all = e
+		m.last = e
+	} else {
+		m.last.nextAll = e
+		m.last = e
+	}
+	return e
+}
diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go
new file mode 100644
index 0000000..665487a
--- /dev/null
+++ b/src/runtime/pprof/mprof_test.go
@@ -0,0 +1,176 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !js
+
+package pprof
+
+import (
+	"bytes"
+	"fmt"
+	"internal/profile"
+	"reflect"
+	"regexp"
+	"runtime"
+	"testing"
+	"unsafe"
+)
+
+var memSink any
+
+func allocateTransient1M() {
+	for i := 0; i < 1024; i++ {
+		memSink = &struct{ x [1024]byte }{}
+	}
+}
+
+//go:noinline
+func allocateTransient2M() {
+	memSink = make([]byte, 2<<20)
+}
+
+func allocateTransient2MInline() {
+	memSink = make([]byte, 2<<20)
+}
+
+type Obj32 struct {
+	link *Obj32
+	pad  [32 - unsafe.Sizeof(uintptr(0))]byte
+}
+
+var persistentMemSink *Obj32
+
+func allocatePersistent1K() {
+	for i := 0; i < 32; i++ {
+		// Can't use slice because that will introduce implicit allocations.
+		obj := &Obj32{link: persistentMemSink}
+		persistentMemSink = obj
+	}
+}
+
+// Allocate transient memory using reflect.Call.
+
+func allocateReflectTransient() {
+	memSink = make([]byte, 2<<20)
+}
+
+func allocateReflect() {
+	rv := reflect.ValueOf(allocateReflectTransient)
+	rv.Call(nil)
+}
+
+var memoryProfilerRun = 0
+
+func TestMemoryProfiler(t *testing.T) {
+	// Disable sampling, otherwise it's difficult to assert anything.
+	oldRate := runtime.MemProfileRate
+	runtime.MemProfileRate = 1
+	defer func() {
+		runtime.MemProfileRate = oldRate
+	}()
+
+	// Allocate a meg to ensure that mcache.nextSample is updated to 1.
+	for i := 0; i < 1024; i++ {
+		memSink = make([]byte, 1024)
+	}
+
+	// Do the interesting allocations.
+	allocateTransient1M()
+	allocateTransient2M()
+	allocateTransient2MInline()
+	allocatePersistent1K()
+	allocateReflect()
+	memSink = nil
+
+	runtime.GC() // materialize stats
+
+	memoryProfilerRun++
+
+	tests := []struct {
+		stk    []string
+		legacy string
+	}{{
+		stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
+		legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test\.go:47
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test\.go:82
+`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
+	}, {
+		stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
+		legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:24
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:79
+`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
+	}, {
+		stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
+		legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:30
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:80
+`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+	}, {
+		stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
+		legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:34
+#	0x[0-9,a-f]+	runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:81
+`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+	}, {
+		stk: []string{"runtime/pprof.allocateReflectTransient"},
+		legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
+#	0x[0-9,a-f]+	runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:55
+`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+	}}
+
+	t.Run("debug=1", func(t *testing.T) {
+		var buf bytes.Buffer
+		if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
+			t.Fatalf("failed to write heap profile: %v", err)
+		}
+
+		for _, test := range tests {
+			if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) {
+				t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String())
+			}
+		}
+	})
+
+	t.Run("proto", func(t *testing.T) {
+		var buf bytes.Buffer
+		if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+			t.Fatalf("failed to write heap profile: %v", err)
+		}
+		p, err := profile.Parse(&buf)
+		if err != nil {
+			t.Fatalf("failed to parse heap profile: %v", err)
+		}
+		t.Logf("Profile = %v", p)
+
+		stks := stacks(p)
+		for _, test := range tests {
+			if !containsStack(stks, test.stk) {
+				t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p)
+			}
+		}
+
+		if !containsInlinedCall(TestMemoryProfiler, 4<<10) {
+			t.Logf("Can't determine whether allocateTransient2MInline was inlined into TestMemoryProfiler.")
+			return
+		}
+
+		// Check the inlined function location is encoded correctly.
+		for _, loc := range p.Location {
+			inlinedCaller, inlinedCallee := false, false
+			for _, line := range loc.Line {
+				if line.Function.Name == "runtime/pprof.allocateTransient2MInline" {
+					inlinedCallee = true
+				}
+				if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" {
+					inlinedCaller = true
+				}
+			}
+			if inlinedCallee != inlinedCaller {
+				t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc)
+			}
+		}
+	})
+}
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
new file mode 100644
index 0000000..d75efce
--- /dev/null
+++ b/src/runtime/pprof/pprof.go
@@ -0,0 +1,914 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package pprof writes runtime profiling data in the format expected
+// by the pprof visualization tool.
+//
+// Profiling a Go program
+//
+// The first step to profiling a Go program is to enable profiling.
+// Support for profiling benchmarks built with the standard testing
+// package is built into go test. For example, the following command
+// runs benchmarks in the current directory and writes the CPU and
+// memory profiles to cpu.prof and mem.prof:
+//
+//     go test -cpuprofile cpu.prof -memprofile mem.prof -bench .
+//
+// To add equivalent profiling support to a standalone program, add
+// code like the following to your main function:
+//
+//    var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
+//    var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
+//
+//    func main() {
+//        flag.Parse()
+//        if *cpuprofile != "" {
+//            f, err := os.Create(*cpuprofile)
+//            if err != nil {
+//                log.Fatal("could not create CPU profile: ", err)
+//            }
+//            defer f.Close() // error handling omitted for example
+//            if err := pprof.StartCPUProfile(f); err != nil {
+//                log.Fatal("could not start CPU profile: ", err)
+//            }
+//            defer pprof.StopCPUProfile()
+//        }
+//
+//        // ... rest of the program ...
+//
+//        if *memprofile != "" {
+//            f, err := os.Create(*memprofile)
+//            if err != nil {
+//                log.Fatal("could not create memory profile: ", err)
+//            }
+//            defer f.Close() // error handling omitted for example
+//            runtime.GC() // get up-to-date statistics
+//            if err := pprof.WriteHeapProfile(f); err != nil {
+//                log.Fatal("could not write memory profile: ", err)
+//            }
+//        }
+//    }
+//
+// There is also a standard HTTP interface to profiling data. Adding
+// the following line will install handlers under the /debug/pprof/
+// URL to download live profiles:
+//
+//    import _ "net/http/pprof"
+//
+// See the net/http/pprof package for more details.
+//
+// Profiles can then be visualized with the pprof tool:
+//
+//    go tool pprof cpu.prof
+//
+// There are many commands available from the pprof command line.
+// Commonly used commands include "top", which prints a summary of the
+// top program hot-spots, and "web", which opens an interactive graph
+// of hot-spots and their call graphs. Use "help" for information on
+// all pprof commands.
+//
+// For more information about pprof, see
+// https://github.com/google/pprof/blob/master/doc/README.md.
+package pprof
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"internal/abi"
+	"io"
+	"runtime"
+	"sort"
+	"strings"
+	"sync"
+	"text/tabwriter"
+	"time"
+	"unsafe"
+)
+
+// BUG(rsc): Profiles are only as good as the kernel support used to generate them.
+// See https://golang.org/issue/13841 for details about known problems.
+
+// A Profile is a collection of stack traces showing the call sequences
+// that led to instances of a particular event, such as allocation.
+// Packages can create and maintain their own profiles; the most common
+// use is for tracking resources that must be explicitly closed, such as files
+// or network connections.
+//
+// A Profile's methods can be called from multiple goroutines simultaneously.
+//
+// Each Profile has a unique name. A few profiles are predefined:
+//
+//	goroutine    - stack traces of all current goroutines
+//	heap         - a sampling of memory allocations of live objects
+//	allocs       - a sampling of all past memory allocations
+//	threadcreate - stack traces that led to the creation of new OS threads
+//	block        - stack traces that led to blocking on synchronization primitives
+//	mutex        - stack traces of holders of contended mutexes
+//
+// These predefined profiles maintain themselves and panic on an explicit
+// Add or Remove method call.
+//
+// The heap profile reports statistics as of the most recently completed
+// garbage collection; it elides more recent allocation to avoid skewing
+// the profile away from live data and toward garbage.
+// If there has been no garbage collection at all, the heap profile reports
+// all known allocations. This exception helps mainly in programs running
+// without garbage collection enabled, usually for debugging purposes.
+//
+// The heap profile tracks both the allocation sites for all live objects in
+// the application memory and for all objects allocated since the program start.
+// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects
+// flags select which to display, defaulting to -inuse_space (live objects,
+// scaled by size).
+//
+// The allocs profile is the same as the heap profile but changes the default
+// pprof display to -alloc_space, the total number of bytes allocated since
+// the program began (including garbage-collected bytes).
+//
+// The CPU profile is not available as a Profile. It has a special API,
+// the StartCPUProfile and StopCPUProfile functions, because it streams
+// output to a writer during profiling.
+//
+type Profile struct {
+	name  string
+	mu    sync.Mutex
+	m     map[any][]uintptr
+	count func() int
+	write func(io.Writer, int) error
+}
+
+// profiles records all registered profiles.
+var profiles struct {
+	mu sync.Mutex
+	m  map[string]*Profile
+}
+
+var goroutineProfile = &Profile{
+	name:  "goroutine",
+	count: countGoroutine,
+	write: writeGoroutine,
+}
+
+var threadcreateProfile = &Profile{
+	name:  "threadcreate",
+	count: countThreadCreate,
+	write: writeThreadCreate,
+}
+
+var heapProfile = &Profile{
+	name:  "heap",
+	count: countHeap,
+	write: writeHeap,
+}
+
+var allocsProfile = &Profile{
+	name:  "allocs",
+	count: countHeap, // identical to heap profile
+	write: writeAlloc,
+}
+
+var blockProfile = &Profile{
+	name:  "block",
+	count: countBlock,
+	write: writeBlock,
+}
+
+var mutexProfile = &Profile{
+	name:  "mutex",
+	count: countMutex,
+	write: writeMutex,
+}
+
+func lockProfiles() {
+	profiles.mu.Lock()
+	if profiles.m == nil {
+		// Initial built-in profiles.
+		profiles.m = map[string]*Profile{
+			"goroutine":    goroutineProfile,
+			"threadcreate": threadcreateProfile,
+			"heap":         heapProfile,
+			"allocs":       allocsProfile,
+			"block":        blockProfile,
+			"mutex":        mutexProfile,
+		}
+	}
+}
+
+func unlockProfiles() {
+	profiles.mu.Unlock()
+}
+
+// NewProfile creates a new profile with the given name.
+// If a profile with that name already exists, NewProfile panics.
+// The convention is to use a 'import/path.' prefix to create
+// separate name spaces for each package.
+// For compatibility with various tools that read pprof data,
+// profile names should not contain spaces.
+func NewProfile(name string) *Profile {
+	lockProfiles()
+	defer unlockProfiles()
+	if name == "" {
+		panic("pprof: NewProfile with empty name")
+	}
+	if profiles.m[name] != nil {
+		panic("pprof: NewProfile name already in use: " + name)
+	}
+	p := &Profile{
+		name: name,
+		m:    map[any][]uintptr{},
+	}
+	profiles.m[name] = p
+	return p
+}
+
+// Lookup returns the profile with the given name, or nil if no such profile exists.
+func Lookup(name string) *Profile {
+	lockProfiles()
+	defer unlockProfiles()
+	return profiles.m[name]
+}
+
+// Profiles returns a slice of all the known profiles, sorted by name.
+func Profiles() []*Profile {
+	lockProfiles()
+	defer unlockProfiles()
+
+	all := make([]*Profile, 0, len(profiles.m))
+	for _, p := range profiles.m {
+		all = append(all, p)
+	}
+
+	sort.Slice(all, func(i, j int) bool { return all[i].name < all[j].name })
+	return all
+}
+
+// Name returns this profile's name, which can be passed to Lookup to reobtain the profile.
+func (p *Profile) Name() string {
+	return p.name
+}
+
+// Count returns the number of execution stacks currently in the profile.
+func (p *Profile) Count() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.count != nil {
+		return p.count()
+	}
+	return len(p.m)
+}
+
+// Add adds the current execution stack to the profile, associated with value.
+// Add stores value in an internal map, so value must be suitable for use as
+// a map key and will not be garbage collected until the corresponding
+// call to Remove. Add panics if the profile already contains a stack for value.
+//
+// The skip parameter has the same meaning as runtime.Caller's skip
+// and controls where the stack trace begins. Passing skip=0 begins the
+// trace in the function calling Add. For example, given this
+// execution stack:
+//
+//	Add
+//	called from rpc.NewClient
+//	called from mypkg.Run
+//	called from main.main
+//
+// Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient.
+// Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run.
+//
+func (p *Profile) Add(value any, skip int) {
+	if p.name == "" {
+		panic("pprof: use of uninitialized Profile")
+	}
+	if p.write != nil {
+		panic("pprof: Add called on built-in Profile " + p.name)
+	}
+
+	stk := make([]uintptr, 32)
+	n := runtime.Callers(skip+1, stk[:])
+	stk = stk[:n]
+	if len(stk) == 0 {
+		// The value for skip is too large, and there's no stack trace to record.
+		stk = []uintptr{abi.FuncPCABIInternal(lostProfileEvent)}
+	}
+
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.m[value] != nil {
+		panic("pprof: Profile.Add of duplicate value")
+	}
+	p.m[value] = stk
+}
+
+// Remove removes the execution stack associated with value from the profile.
+// It is a no-op if the value is not in the profile.
+func (p *Profile) Remove(value any) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	delete(p.m, value)
+}
+
+// WriteTo writes a pprof-formatted snapshot of the profile to w.
+// If a write to w returns an error, WriteTo returns that error.
+// Otherwise, WriteTo returns nil.
+//
+// The debug parameter enables additional output.
+// Passing debug=0 writes the gzip-compressed protocol buffer described
+// in https://github.com/google/pprof/tree/master/proto#overview.
+// Passing debug=1 writes the legacy text format with comments
+// translating addresses to function names and line numbers, so that a
+// programmer can read the profile without tools.
+//
+// The predefined profiles may assign meaning to other debug values;
+// for example, when printing the "goroutine" profile, debug=2 means to
+// print the goroutine stacks in the same form that a Go program uses
+// when dying due to an unrecovered panic.
+func (p *Profile) WriteTo(w io.Writer, debug int) error {
+	if p.name == "" {
+		panic("pprof: use of zero Profile")
+	}
+	if p.write != nil {
+		return p.write(w, debug)
+	}
+
+	// Obtain consistent snapshot under lock; then process without lock.
+	p.mu.Lock()
+	all := make([][]uintptr, 0, len(p.m))
+	for _, stk := range p.m {
+		all = append(all, stk)
+	}
+	p.mu.Unlock()
+
+	// Map order is non-deterministic; make output deterministic.
+	sort.Slice(all, func(i, j int) bool {
+		t, u := all[i], all[j]
+		for k := 0; k < len(t) && k < len(u); k++ {
+			if t[k] != u[k] {
+				return t[k] < u[k]
+			}
+		}
+		return len(t) < len(u)
+	})
+
+	return printCountProfile(w, debug, p.name, stackProfile(all))
+}
+
+type stackProfile [][]uintptr
+
+func (x stackProfile) Len() int              { return len(x) }
+func (x stackProfile) Stack(i int) []uintptr { return x[i] }
+func (x stackProfile) Label(i int) *labelMap { return nil }
+
+// A countProfile is a set of stack traces to be printed as counts
+// grouped by stack trace. There are multiple implementations:
+// all that matters is that we can find out how many traces there are
+// and obtain each trace in turn.
+type countProfile interface {
+	Len() int
+	Stack(i int) []uintptr
+	Label(i int) *labelMap
+}
+
+// printCountCycleProfile outputs block profile records (for block or mutex profiles)
+// as the pprof-proto format output. Translations from cycle count to time duration
+// are done because The proto expects count and time (nanoseconds) instead of count
+// and the number of cycles for block, contention profiles.
+// Possible 'scaler' functions are scaleBlockProfile and scaleMutexProfile.
+func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler func(int64, float64) (int64, float64), records []runtime.BlockProfileRecord) error {
+	// Output profile in protobuf form.
+	b := newProfileBuilder(w)
+	b.pbValueType(tagProfile_PeriodType, countName, "count")
+	b.pb.int64Opt(tagProfile_Period, 1)
+	b.pbValueType(tagProfile_SampleType, countName, "count")
+	b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds")
+
+	cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9
+
+	values := []int64{0, 0}
+	var locs []uint64
+	for _, r := range records {
+		count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
+		values[0] = count
+		values[1] = int64(nanosec)
+		// For count profiles, all stack addresses are
+		// return PCs, which is what appendLocsForStack expects.
+		locs = b.appendLocsForStack(locs[:0], r.Stack())
+		b.pbSample(values, locs, nil)
+	}
+	b.build()
+	return nil
+}
+
+// printCountProfile prints a countProfile at the specified debug level.
+// The profile will be in compressed proto format unless debug is nonzero.
+func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
+	// Build count of each stack.
+	var buf bytes.Buffer
+	key := func(stk []uintptr, lbls *labelMap) string {
+		buf.Reset()
+		fmt.Fprintf(&buf, "@")
+		for _, pc := range stk {
+			fmt.Fprintf(&buf, " %#x", pc)
+		}
+		if lbls != nil {
+			buf.WriteString("\n# labels: ")
+			buf.WriteString(lbls.String())
+		}
+		return buf.String()
+	}
+	count := map[string]int{}
+	index := map[string]int{}
+	var keys []string
+	n := p.Len()
+	for i := 0; i < n; i++ {
+		k := key(p.Stack(i), p.Label(i))
+		if count[k] == 0 {
+			index[k] = i
+			keys = append(keys, k)
+		}
+		count[k]++
+	}
+
+	sort.Sort(&keysByCount{keys, count})
+
+	if debug > 0 {
+		// Print debug profile in legacy format
+		tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+		fmt.Fprintf(tw, "%s profile: total %d\n", name, p.Len())
+		for _, k := range keys {
+			fmt.Fprintf(tw, "%d %s\n", count[k], k)
+			printStackRecord(tw, p.Stack(index[k]), false)
+		}
+		return tw.Flush()
+	}
+
+	// Output profile in protobuf form.
+	b := newProfileBuilder(w)
+	b.pbValueType(tagProfile_PeriodType, name, "count")
+	b.pb.int64Opt(tagProfile_Period, 1)
+	b.pbValueType(tagProfile_SampleType, name, "count")
+
+	values := []int64{0}
+	var locs []uint64
+	for _, k := range keys {
+		values[0] = int64(count[k])
+		// For count profiles, all stack addresses are
+		// return PCs, which is what appendLocsForStack expects.
+		locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
+		idx := index[k]
+		var labels func()
+		if p.Label(idx) != nil {
+			labels = func() {
+				for k, v := range *p.Label(idx) {
+					b.pbLabel(tagSample_Label, k, v, 0)
+				}
+			}
+		}
+		b.pbSample(values, locs, labels)
+	}
+	b.build()
+	return nil
+}
+
+// keysByCount sorts keys with higher counts first, breaking ties by key string order.
+type keysByCount struct {
+	keys  []string
+	count map[string]int
+}
+
+func (x *keysByCount) Len() int      { return len(x.keys) }
+func (x *keysByCount) Swap(i, j int) { x.keys[i], x.keys[j] = x.keys[j], x.keys[i] }
+func (x *keysByCount) Less(i, j int) bool {
+	ki, kj := x.keys[i], x.keys[j]
+	ci, cj := x.count[ki], x.count[kj]
+	if ci != cj {
+		return ci > cj
+	}
+	return ki < kj
+}
+
+// printStackRecord prints the function + source line information
+// for a single stack trace.
+func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) {
+	show := allFrames
+	frames := runtime.CallersFrames(stk)
+	for {
+		frame, more := frames.Next()
+		name := frame.Function
+		if name == "" {
+			show = true
+			fmt.Fprintf(w, "#\t%#x\n", frame.PC)
+		} else if name != "runtime.goexit" && (show || !strings.HasPrefix(name, "runtime.")) {
+			// Hide runtime.goexit and any runtime functions at the beginning.
+			// This is useful mainly for allocation traces.
+			show = true
+			fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", frame.PC, name, frame.PC-frame.Entry, frame.File, frame.Line)
+		}
+		if !more {
+			break
+		}
+	}
+	if !show {
+		// We didn't print anything; do it again,
+		// and this time include runtime functions.
+		printStackRecord(w, stk, true)
+		return
+	}
+	fmt.Fprintf(w, "\n")
+}
+
+// Interface to system profiles.
+
+// WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0).
+// It is preserved for backwards compatibility.
+func WriteHeapProfile(w io.Writer) error {
+	return writeHeap(w, 0)
+}
+
+// countHeap returns the number of records in the heap profile.
+func countHeap() int {
+	n, _ := runtime.MemProfile(nil, true)
+	return n
+}
+
+// writeHeap writes the current runtime heap profile to w.
+func writeHeap(w io.Writer, debug int) error {
+	return writeHeapInternal(w, debug, "")
+}
+
+// writeAlloc writes the current runtime heap profile to w
+// with the total allocation space as the default sample type.
+func writeAlloc(w io.Writer, debug int) error {
+	return writeHeapInternal(w, debug, "alloc_space")
+}
+
+func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
+	var memStats *runtime.MemStats
+	if debug != 0 {
+		// Read mem stats first, so that our other allocations
+		// do not appear in the statistics.
+		memStats = new(runtime.MemStats)
+		runtime.ReadMemStats(memStats)
+	}
+
+	// Find out how many records there are (MemProfile(nil, true)),
+	// allocate that many records, and get the data.
+	// There's a race—more records might be added between
+	// the two calls—so allocate a few extra records for safety
+	// and also try again if we're very unlucky.
+	// The loop should only execute one iteration in the common case.
+	var p []runtime.MemProfileRecord
+	n, ok := runtime.MemProfile(nil, true)
+	for {
+		// Allocate room for a slightly bigger profile,
+		// in case a few more entries have been added
+		// since the call to MemProfile.
+		p = make([]runtime.MemProfileRecord, n+50)
+		n, ok = runtime.MemProfile(p, true)
+		if ok {
+			p = p[0:n]
+			break
+		}
+		// Profile grew; try again.
+	}
+
+	if debug == 0 {
+		return writeHeapProto(w, p, int64(runtime.MemProfileRate), defaultSampleType)
+	}
+
+	sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() })
+
+	b := bufio.NewWriter(w)
+	tw := tabwriter.NewWriter(b, 1, 8, 1, '\t', 0)
+	w = tw
+
+	var total runtime.MemProfileRecord
+	for i := range p {
+		r := &p[i]
+		total.AllocBytes += r.AllocBytes
+		total.AllocObjects += r.AllocObjects
+		total.FreeBytes += r.FreeBytes
+		total.FreeObjects += r.FreeObjects
+	}
+
+	// Technically the rate is MemProfileRate not 2*MemProfileRate,
+	// but early versions of the C++ heap profiler reported 2*MemProfileRate,
+	// so that's what pprof has come to expect.
+	fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n",
+		total.InUseObjects(), total.InUseBytes(),
+		total.AllocObjects, total.AllocBytes,
+		2*runtime.MemProfileRate)
+
+	for i := range p {
+		r := &p[i]
+		fmt.Fprintf(w, "%d: %d [%d: %d] @",
+			r.InUseObjects(), r.InUseBytes(),
+			r.AllocObjects, r.AllocBytes)
+		for _, pc := range r.Stack() {
+			fmt.Fprintf(w, " %#x", pc)
+		}
+		fmt.Fprintf(w, "\n")
+		printStackRecord(w, r.Stack(), false)
+	}
+
+	// Print memstats information too.
+	// Pprof will ignore, but useful for people
+	s := memStats
+	fmt.Fprintf(w, "\n# runtime.MemStats\n")
+	fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
+	fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
+	fmt.Fprintf(w, "# Sys = %d\n", s.Sys)
+	fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups)
+	fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs)
+	fmt.Fprintf(w, "# Frees = %d\n", s.Frees)
+
+	fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc)
+	fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys)
+	fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle)
+	fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse)
+	fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased)
+	fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects)
+
+	fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys)
+	fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
+	fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
+	fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
+	fmt.Fprintf(w, "# GCSys = %d\n", s.GCSys)
+	fmt.Fprintf(w, "# OtherSys = %d\n", s.OtherSys)
+
+	fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
+	fmt.Fprintf(w, "# LastGC = %d\n", s.LastGC)
+	fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
+	fmt.Fprintf(w, "# PauseEnd = %d\n", s.PauseEnd)
+	fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
+	fmt.Fprintf(w, "# NumForcedGC = %d\n", s.NumForcedGC)
+	fmt.Fprintf(w, "# GCCPUFraction = %v\n", s.GCCPUFraction)
+	fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
+
+	// Also flush out MaxRSS on supported platforms.
+	addMaxRSS(w)
+
+	tw.Flush()
+	return b.Flush()
+}
+
+// countThreadCreate returns the size of the current ThreadCreateProfile.
+func countThreadCreate() int {
+	n, _ := runtime.ThreadCreateProfile(nil)
+	return n
+}
+
+// writeThreadCreate writes the current runtime ThreadCreateProfile to w.
+func writeThreadCreate(w io.Writer, debug int) error {
+	// Until https://golang.org/issues/6104 is addressed, wrap
+	// ThreadCreateProfile because there's no point in tracking labels when we
+	// don't get any stack-traces.
+	return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) {
+		return runtime.ThreadCreateProfile(p)
+	})
+}
+
+// countGoroutine returns the number of goroutines.
+func countGoroutine() int {
+	return runtime.NumGoroutine()
+}
+
+// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go
+func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
+
+// writeGoroutine writes the current runtime GoroutineProfile to w.
+func writeGoroutine(w io.Writer, debug int) error {
+	if debug >= 2 {
+		return writeGoroutineStacks(w)
+	}
+	return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels)
+}
+
+func writeGoroutineStacks(w io.Writer) error {
+	// We don't know how big the buffer needs to be to collect
+	// all the goroutines. Start with 1 MB and try a few times, doubling each time.
+	// Give up and use a truncated trace if 64 MB is not enough.
+	buf := make([]byte, 1<<20)
+	for i := 0; ; i++ {
+		n := runtime.Stack(buf, true)
+		if n < len(buf) {
+			buf = buf[:n]
+			break
+		}
+		if len(buf) >= 64<<20 {
+			// Filled 64 MB - stop there.
+			break
+		}
+		buf = make([]byte, 2*len(buf))
+	}
+	_, err := w.Write(buf)
+	return err
+}
+
+func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error {
+	// Find out how many records there are (fetch(nil)),
+	// allocate that many records, and get the data.
+	// There's a race—more records might be added between
+	// the two calls—so allocate a few extra records for safety
+	// and also try again if we're very unlucky.
+	// The loop should only execute one iteration in the common case.
+	var p []runtime.StackRecord
+	var labels []unsafe.Pointer
+	n, ok := fetch(nil, nil)
+	for {
+		// Allocate room for a slightly bigger profile,
+		// in case a few more entries have been added
+		// since the call to ThreadProfile.
+		p = make([]runtime.StackRecord, n+10)
+		labels = make([]unsafe.Pointer, n+10)
+		n, ok = fetch(p, labels)
+		if ok {
+			p = p[0:n]
+			break
+		}
+		// Profile grew; try again.
+	}
+
+	return printCountProfile(w, debug, name, &runtimeProfile{p, labels})
+}
+
+type runtimeProfile struct {
+	stk    []runtime.StackRecord
+	labels []unsafe.Pointer
+}
+
+func (p *runtimeProfile) Len() int              { return len(p.stk) }
+func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() }
+func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) }
+
+var cpu struct {
+	sync.Mutex
+	profiling bool
+	done      chan bool
+}
+
+// StartCPUProfile enables CPU profiling for the current process.
+// While profiling, the profile will be buffered and written to w.
+// StartCPUProfile returns an error if profiling is already enabled.
+//
+// On Unix-like systems, StartCPUProfile does not work by default for
+// Go code built with -buildmode=c-archive or -buildmode=c-shared.
+// StartCPUProfile relies on the SIGPROF signal, but that signal will
+// be delivered to the main program's SIGPROF signal handler (if any)
+// not to the one used by Go. To make it work, call os/signal.Notify
+// for syscall.SIGPROF, but note that doing so may break any profiling
+// being done by the main program.
+func StartCPUProfile(w io.Writer) error {
+	// The runtime routines allow a variable profiling rate,
+	// but in practice operating systems cannot trigger signals
+	// at more than about 500 Hz, and our processing of the
+	// signal is not cheap (mostly getting the stack trace).
+	// 100 Hz is a reasonable choice: it is frequent enough to
+	// produce useful data, rare enough not to bog down the
+	// system, and a nice round number to make it easy to
+	// convert sample counts to seconds. Instead of requiring
+	// each client to specify the frequency, we hard code it.
+	const hz = 100
+
+	cpu.Lock()
+	defer cpu.Unlock()
+	if cpu.done == nil {
+		cpu.done = make(chan bool)
+	}
+	// Double-check.
+	if cpu.profiling {
+		return fmt.Errorf("cpu profiling already in use")
+	}
+	cpu.profiling = true
+	runtime.SetCPUProfileRate(hz)
+	go profileWriter(w)
+	return nil
+}
+
+// readProfile, provided by the runtime, returns the next chunk of
+// binary CPU profiling stack trace data, blocking until data is available.
+// If profiling is turned off and all the profile data accumulated while it was
+// on has been returned, readProfile returns eof=true.
+// The caller must save the returned data and tags before calling readProfile again.
+func readProfile() (data []uint64, tags []unsafe.Pointer, eof bool)
+
+func profileWriter(w io.Writer) {
+	b := newProfileBuilder(w)
+	var err error
+	for {
+		time.Sleep(100 * time.Millisecond)
+		data, tags, eof := readProfile()
+		if e := b.addCPUData(data, tags); e != nil && err == nil {
+			err = e
+		}
+		if eof {
+			break
+		}
+	}
+	if err != nil {
+		// The runtime should never produce an invalid or truncated profile.
+		// It drops records that can't fit into its log buffers.
+		panic("runtime/pprof: converting profile: " + err.Error())
+	}
+	b.build()
+	cpu.done <- true
+}
+
+// StopCPUProfile stops the current CPU profile, if any.
+// StopCPUProfile only returns after all the writes for the
+// profile have completed.
+func StopCPUProfile() {
+	cpu.Lock()
+	defer cpu.Unlock()
+
+	if !cpu.profiling {
+		return
+	}
+	cpu.profiling = false
+	runtime.SetCPUProfileRate(0)
+	<-cpu.done
+}
+
+// countBlock returns the number of records in the blocking profile.
+func countBlock() int {
+	n, _ := runtime.BlockProfile(nil)
+	return n
+}
+
+// countMutex returns the number of records in the mutex profile.
+func countMutex() int {
+	n, _ := runtime.MutexProfile(nil)
+	return n
+}
+
+// writeBlock writes the current blocking profile to w.
+func writeBlock(w io.Writer, debug int) error {
+	return writeProfileInternal(w, debug, "contention", runtime.BlockProfile, scaleBlockProfile)
+}
+
+func scaleBlockProfile(cnt int64, ns float64) (int64, float64) {
+	// Do nothing.
+	// The current way of block profile sampling makes it
+	// hard to compute the unsampled number. The legacy block
+	// profile parse doesn't attempt to scale or unsample.
+	return cnt, ns
+}
+
+// writeMutex writes the current mutex profile to w.
+func writeMutex(w io.Writer, debug int) error {
+	return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile, scaleMutexProfile)
+}
+
+// writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters
+func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool), scaleProfile func(int64, float64) (int64, float64)) error {
+	var p []runtime.BlockProfileRecord
+	n, ok := runtimeProfile(nil)
+	for {
+		p = make([]runtime.BlockProfileRecord, n+50)
+		n, ok = runtimeProfile(p)
+		if ok {
+			p = p[:n]
+			break
+		}
+	}
+
+	sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })
+
+	if debug <= 0 {
+		return printCountCycleProfile(w, "contentions", "delay", scaleProfile, p)
+	}
+
+	b := bufio.NewWriter(w)
+	tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+	w = tw
+
+	fmt.Fprintf(w, "--- %v:\n", name)
+	fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
+	if name == "mutex" {
+		fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1))
+	}
+	for i := range p {
+		r := &p[i]
+		fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
+		for _, pc := range r.Stack() {
+			fmt.Fprintf(w, " %#x", pc)
+		}
+		fmt.Fprint(w, "\n")
+		if debug > 0 {
+			printStackRecord(w, r.Stack(), true)
+		}
+	}
+
+	if tw != nil {
+		tw.Flush()
+	}
+	return b.Flush()
+}
+
+func scaleMutexProfile(cnt int64, ns float64) (int64, float64) {
+	period := runtime.SetMutexProfileFraction(-1)
+	return cnt * int64(period), ns * float64(period)
+}
+
+func runtime_cyclesPerSecond() int64
diff --git a/src/runtime/pprof/pprof_norusage.go b/src/runtime/pprof/pprof_norusage.go
new file mode 100644
index 0000000..cbc5176
--- /dev/null
+++ b/src/runtime/pprof/pprof_norusage.go
@@ -0,0 +1,15 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !darwin && !linux
+
+package pprof
+
+import (
+	"io"
+)
+
+// Stub call for platforms that don't support rusage.
+func addMaxRSS(w io.Writer) {
+}
diff --git a/src/runtime/pprof/pprof_rusage.go b/src/runtime/pprof/pprof_rusage.go
new file mode 100644
index 0000000..46263fe
--- /dev/null
+++ b/src/runtime/pprof/pprof_rusage.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin || linux
+
+package pprof
+
+import (
+	"fmt"
+	"io"
+	"runtime"
+	"syscall"
+)
+
+// Adds MaxRSS to platforms that are supported.
+func addMaxRSS(w io.Writer) {
+	var rssToBytes uintptr
+	switch runtime.GOOS {
+	case "linux", "android":
+		rssToBytes = 1024
+	case "darwin", "ios":
+		rssToBytes = 1
+	default:
+		panic("unsupported OS")
+	}
+
+	var rusage syscall.Rusage
+	syscall.Getrusage(0, &rusage)
+	fmt.Fprintf(w, "# MaxRSS = %d\n", uintptr(rusage.Maxrss)*rssToBytes)
+}
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
new file mode 100644
index 0000000..322579c
--- /dev/null
+++ b/src/runtime/pprof/pprof_test.go
@@ -0,0 +1,1901 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !js
+
+package pprof
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"internal/abi"
+	"internal/profile"
+	"internal/testenv"
+	"io"
+	"math"
+	"math/big"
+	"os"
+	"os/exec"
+	"regexp"
+	"runtime"
+	"runtime/debug"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+	_ "unsafe"
+)
+
+func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
+	// We only need to get one 100 Hz clock tick, so we've got
+	// a large safety buffer.
+	// But do at least 500 iterations (which should take about 100ms),
+	// otherwise TestCPUProfileMultithreaded can fail if only one
+	// thread is scheduled during the testing period.
+	t0 := time.Now()
+	accum := *y
+	for i := 0; i < 500 || time.Since(t0) < dur; i++ {
+		accum = f(accum)
+	}
+	*y = accum
+}
+
+var (
+	salt1 = 0
+	salt2 = 0
+)
+
+// The actual CPU hogging function.
+// Must not call other functions nor access heap/globals in the loop,
+// otherwise under race detector the samples will be in the race runtime.
+func cpuHog1(x int) int {
+	return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
+	foo := x
+	for i := 0; i < n; i++ {
+		if foo > 0 {
+			foo *= foo
+		} else {
+			foo *= foo + 1
+		}
+	}
+	return foo
+}
+
+func cpuHog2(x int) int {
+	foo := x
+	for i := 0; i < 1e5; i++ {
+		if foo > 0 {
+			foo *= foo
+		} else {
+			foo *= foo + 2
+		}
+	}
+	return foo
+}
+
+// Return a list of functions that we don't want to ever appear in CPU
+// profiles. For gccgo, that list includes the sigprof handler itself.
+func avoidFunctions() []string {
+	if runtime.Compiler == "gccgo" {
+		return []string{"runtime.sigprof"}
+	}
+	return nil
+}
+
+func TestCPUProfile(t *testing.T) {
+	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
+	testCPUProfile(t, matches, func(dur time.Duration) {
+		cpuHogger(cpuHog1, &salt1, dur)
+	})
+}
+
+func TestCPUProfileMultithreaded(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, avoidFunctions())
+	testCPUProfile(t, matches, func(dur time.Duration) {
+		c := make(chan int)
+		go func() {
+			cpuHogger(cpuHog1, &salt1, dur)
+			c <- 1
+		}()
+		cpuHogger(cpuHog2, &salt2, dur)
+		<-c
+	})
+}
+
+func TestCPUProfileMultithreadMagnitude(t *testing.T) {
+	if runtime.GOOS != "linux" {
+		t.Skip("issue 35057 is only confirmed on Linux")
+	}
+
+	// Linux [5.9,5.16) has a kernel bug that can break CPU timers on newly
+	// created threads, breaking our CPU accounting.
+	major, minor, patch, err := linuxKernelVersion()
+	if err != nil {
+		t.Errorf("Error determining kernel version: %v", err)
+	}
+	t.Logf("Running on Linux %d.%d.%d", major, minor, patch)
+	defer func() {
+		if t.Failed() {
+			t.Logf("Failure of this test may indicate that your system suffers from a known Linux kernel bug fixed on newer kernels. See https://golang.org/issue/49065.")
+		}
+	}()
+
+	// Disable on affected builders to avoid flakiness, but otherwise keep
+	// it enabled to potentially warn users that they are on a broken
+	// kernel.
+	if testenv.Builder() != "" && (runtime.GOARCH == "386" || runtime.GOARCH == "amd64") {
+		have59 := major > 5 || (major == 5 && minor >= 9)
+		have516 := major > 5 || (major == 5 && minor >= 16)
+		if have59 && !have516 {
+			testenv.SkipFlaky(t, 49065)
+		}
+	}
+
+	// Run a workload in a single goroutine, then run copies of the same
+	// workload in several goroutines. For both the serial and parallel cases,
+	// the CPU time the process measures with its own profiler should match the
+	// total CPU usage that the OS reports.
+	//
+	// We could also check that increases in parallelism (GOMAXPROCS) lead to a
+	// linear increase in the CPU usage reported by both the OS and the
+	// profiler, but without a guarantee of exclusive access to CPU resources
+	// that is likely to be a flaky test.
+
+	// Require the smaller value to be within 10%, or 40% in short mode.
+	maxDiff := 0.10
+	if testing.Short() {
+		maxDiff = 0.40
+	}
+
+	compare := func(a, b time.Duration, maxDiff float64) error {
+		if a <= 0 || b <= 0 {
+			return fmt.Errorf("Expected both time reports to be positive")
+		}
+
+		if a < b {
+			a, b = b, a
+		}
+
+		diff := float64(a-b) / float64(a)
+		if diff > maxDiff {
+			return fmt.Errorf("CPU usage reports are too different (limit -%.1f%%, got -%.1f%%)", maxDiff*100, diff*100)
+		}
+
+		return nil
+	}
+
+	for _, tc := range []struct {
+		name    string
+		workers int
+	}{
+		{
+			name:    "serial",
+			workers: 1,
+		},
+		{
+			name:    "parallel",
+			workers: runtime.GOMAXPROCS(0),
+		},
+	} {
+		// check that the OS's perspective matches what the Go runtime measures.
+		t.Run(tc.name, func(t *testing.T) {
+			t.Logf("Running with %d workers", tc.workers)
+
+			var cpuTime time.Duration
+			matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
+			p := testCPUProfile(t, matches, func(dur time.Duration) {
+				cpuTime = diffCPUTime(t, func() {
+					var wg sync.WaitGroup
+					var once sync.Once
+					for i := 0; i < tc.workers; i++ {
+						wg.Add(1)
+						go func() {
+							defer wg.Done()
+							var salt = 0
+							cpuHogger(cpuHog1, &salt, dur)
+							once.Do(func() { salt1 = salt })
+						}()
+					}
+					wg.Wait()
+				})
+			})
+
+			for i, unit := range []string{"count", "nanoseconds"} {
+				if have, want := p.SampleType[i].Unit, unit; have != want {
+					t.Errorf("pN SampleType[%d]; %q != %q", i, have, want)
+				}
+			}
+
+			// cpuHog1 called above is the primary source of CPU
+			// load, but there may be some background work by the
+			// runtime. Since the OS rusage measurement will
+			// include all work done by the process, also compare
+			// against all samples in our profile.
+			var value time.Duration
+			for _, sample := range p.Sample {
+				value += time.Duration(sample.Value[1]) * time.Nanosecond
+			}
+
+			t.Logf("compare %s vs %s", cpuTime, value)
+			if err := compare(cpuTime, value, maxDiff); err != nil {
+				t.Errorf("compare got %v want nil", err)
+			}
+		})
+	}
+}
+
+// containsInlinedCall reports whether the function body for the function f is
+// known to contain an inlined function call within the first maxBytes bytes.
+func containsInlinedCall(f any, maxBytes int) bool {
+	_, found := findInlinedCall(f, maxBytes)
+	return found
+}
+
+// findInlinedCall returns the PC of an inlined function call within
+// the function body for the function f if any.
+func findInlinedCall(f any, maxBytes int) (pc uint64, found bool) {
+	fFunc := runtime.FuncForPC(uintptr(abi.FuncPCABIInternal(f)))
+	if fFunc == nil || fFunc.Entry() == 0 {
+		panic("failed to locate function entry")
+	}
+
+	for offset := 0; offset < maxBytes; offset++ {
+		innerPC := fFunc.Entry() + uintptr(offset)
+		inner := runtime.FuncForPC(innerPC)
+		if inner == nil {
+			// No function known for this PC value.
+			// It might simply be misaligned, so keep searching.
+			continue
+		}
+		if inner.Entry() != fFunc.Entry() {
+			// Scanned past f and didn't find any inlined functions.
+			break
+		}
+		if inner.Name() != fFunc.Name() {
+			// This PC has f as its entry-point, but is not f. Therefore, it must be a
+			// function inlined into f.
+			return uint64(innerPC), true
+		}
+	}
+
+	return 0, false
+}
+
+func TestCPUProfileInlining(t *testing.T) {
+	if !containsInlinedCall(inlinedCaller, 4<<10) {
+		t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
+	}
+
+	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
+		cpuHogger(inlinedCaller, &salt1, dur)
+	})
+
+	// Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
+	for _, loc := range p.Location {
+		hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
+		for _, line := range loc.Line {
+			if line.Function.Name == "runtime/pprof.inlinedCallee" {
+				hasInlinedCallee = true
+			}
+			if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
+				hasInlinedCallerAfterInlinedCallee = true
+			}
+		}
+		if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
+			t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
+		}
+	}
+}
+
+func inlinedCaller(x int) int {
+	x = inlinedCallee(x, 1e5)
+	return x
+}
+
+func inlinedCallee(x, n int) int {
+	return cpuHog0(x, n)
+}
+
+//go:noinline
+func dumpCallers(pcs []uintptr) {
+	if pcs == nil {
+		return
+	}
+
+	skip := 2 // Callers and dumpCallers
+	runtime.Callers(skip, pcs)
+}
+
+//go:noinline
+func inlinedCallerDump(pcs []uintptr) {
+	inlinedCalleeDump(pcs)
+}
+
+func inlinedCalleeDump(pcs []uintptr) {
+	dumpCallers(pcs)
+}
+
+func TestCPUProfileRecursion(t *testing.T) {
+	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
+		cpuHogger(recursionCaller, &salt1, dur)
+	})
+
+	// check the Location encoding was not confused by recursive calls.
+	for i, loc := range p.Location {
+		recursionFunc := 0
+		for _, line := range loc.Line {
+			if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
+				recursionFunc++
+			}
+		}
+		if recursionFunc > 1 {
+			t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
+		}
+	}
+}
+
+func recursionCaller(x int) int {
+	y := recursionCallee(3, x)
+	return y
+}
+
+func recursionCallee(n, x int) int {
+	if n == 0 {
+		return 1
+	}
+	y := inlinedCallee(x, 1e4)
+	return y * recursionCallee(n-1, x)
+}
+
+func recursionChainTop(x int, pcs []uintptr) {
+	if x < 0 {
+		return
+	}
+	recursionChainMiddle(x, pcs)
+}
+
+func recursionChainMiddle(x int, pcs []uintptr) {
+	recursionChainBottom(x, pcs)
+}
+
+func recursionChainBottom(x int, pcs []uintptr) {
+	// This will be called each time, we only care about the last. We
+	// can't make this conditional or this function won't be inlined.
+	dumpCallers(pcs)
+
+	recursionChainTop(x-1, pcs)
+}
+
+func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
+	p, err := profile.Parse(bytes.NewReader(valBytes))
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, sample := range p.Sample {
+		count := uintptr(sample.Value[0])
+		f(count, sample.Location, sample.Label)
+	}
+	return p
+}
+
+func cpuProfilingBroken() bool {
+	switch runtime.GOOS {
+	case "plan9":
+		// Profiling unimplemented.
+		return true
+	case "aix":
+		// See https://golang.org/issue/45170.
+		return true
+	case "ios", "dragonfly", "netbsd", "illumos", "solaris":
+		// See https://golang.org/issue/13841.
+		return true
+	case "openbsd":
+		if runtime.GOARCH == "arm" || runtime.GOARCH == "arm64" {
+			// See https://golang.org/issue/13841.
+			return true
+		}
+	}
+
+	return false
+}
+
+// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
+// as interpreted by matches, and returns the parsed profile.
+func testCPUProfile(t *testing.T, matches profileMatchFunc, f func(dur time.Duration)) *profile.Profile {
+	switch runtime.GOOS {
+	case "darwin":
+		out, err := exec.Command("uname", "-a").CombinedOutput()
+		if err != nil {
+			t.Fatal(err)
+		}
+		vers := string(out)
+		t.Logf("uname -a: %v", vers)
+	case "plan9":
+		t.Skip("skipping on plan9")
+	}
+
+	broken := cpuProfilingBroken()
+
+	maxDuration := 5 * time.Second
+	if testing.Short() && broken {
+		// If it's expected to be broken, no point waiting around.
+		maxDuration /= 10
+	}
+
+	// If we're running a long test, start with a long duration
+	// for tests that try to make sure something *doesn't* happen.
+	duration := 5 * time.Second
+	if testing.Short() {
+		duration = 100 * time.Millisecond
+	}
+
+	// Profiling tests are inherently flaky, especially on a
+	// loaded system, such as when this test is running with
+	// several others under go test std. If a test fails in a way
+	// that could mean it just didn't run long enough, try with a
+	// longer duration.
+	for duration <= maxDuration {
+		var prof bytes.Buffer
+		if err := StartCPUProfile(&prof); err != nil {
+			t.Fatal(err)
+		}
+		f(duration)
+		StopCPUProfile()
+
+		if p, ok := profileOk(t, matches, prof, duration); ok {
+			return p
+		}
+
+		duration *= 2
+		if duration <= maxDuration {
+			t.Logf("retrying with %s duration", duration)
+		}
+	}
+
+	if broken {
+		t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH)
+	}
+
+	// Ignore the failure if the tests are running in a QEMU-based emulator,
+	// QEMU is not perfect at emulating everything.
+	// IN_QEMU environmental variable is set by some of the Go builders.
+	// IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605.
+	if os.Getenv("IN_QEMU") == "1" {
+		t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
+	}
+	t.FailNow()
+	return nil
+}
+
+var diffCPUTimeImpl func(f func()) time.Duration
+
+func diffCPUTime(t *testing.T, f func()) time.Duration {
+	if fn := diffCPUTimeImpl; fn != nil {
+		return fn(f)
+	}
+	t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH)
+	return 0
+}
+
+func contains(slice []string, s string) bool {
+	for i := range slice {
+		if slice[i] == s {
+			return true
+		}
+	}
+	return false
+}
+
+// stackContains matches if a function named spec appears anywhere in the stack trace.
+func stackContains(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
+	for _, loc := range stk {
+		for _, line := range loc.Line {
+			if strings.Contains(line.Function.Name, spec) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+type sampleMatchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
+
+func profileOk(t *testing.T, matches profileMatchFunc, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
+	ok = true
+
+	var samples uintptr
+	var buf bytes.Buffer
+	p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
+		fmt.Fprintf(&buf, "%d:", count)
+		fprintStack(&buf, stk)
+		fmt.Fprintf(&buf, " labels: %v\n", labels)
+		samples += count
+		fmt.Fprintf(&buf, "\n")
+	})
+	t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String())
+
+	if samples < 10 && runtime.GOOS == "windows" {
+		// On some windows machines we end up with
+		// not enough samples due to coarse timer
+		// resolution. Let it go.
+		t.Log("too few samples on Windows (golang.org/issue/10842)")
+		return p, false
+	}
+
+	// Check that we got a reasonable number of samples.
+	// We used to always require at least ideal/4 samples,
+	// but that is too hard to guarantee on a loaded system.
+	// Now we accept 10 or more samples, which we take to be
+	// enough to show that at least some profiling is occurring.
+	if ideal := uintptr(duration * 100 / time.Second); samples == 0 || (samples < ideal/4 && samples < 10) {
+		t.Logf("too few samples; got %d, want at least %d, ideally %d", samples, ideal/4, ideal)
+		ok = false
+	}
+
+	if matches != nil && !matches(t, p) {
+		ok = false
+	}
+
+	return p, ok
+}
+
+type profileMatchFunc func(*testing.T, *profile.Profile) bool
+
+func matchAndAvoidStacks(matches sampleMatchFunc, need []string, avoid []string) profileMatchFunc {
+	return func(t *testing.T, p *profile.Profile) (ok bool) {
+		ok = true
+
+		// Check that profile is well formed, contains 'need', and does not contain
+		// anything from 'avoid'.
+		have := make([]uintptr, len(need))
+		avoidSamples := make([]uintptr, len(avoid))
+
+		for _, sample := range p.Sample {
+			count := uintptr(sample.Value[0])
+			for i, spec := range need {
+				if matches(spec, count, sample.Location, sample.Label) {
+					have[i] += count
+				}
+			}
+			for i, name := range avoid {
+				for _, loc := range sample.Location {
+					for _, line := range loc.Line {
+						if strings.Contains(line.Function.Name, name) {
+							avoidSamples[i] += count
+						}
+					}
+				}
+			}
+		}
+
+		for i, name := range avoid {
+			bad := avoidSamples[i]
+			if bad != 0 {
+				t.Logf("found %d samples in avoid-function %s\n", bad, name)
+				ok = false
+			}
+		}
+
+		if len(need) == 0 {
+			return
+		}
+
+		var total uintptr
+		for i, name := range need {
+			total += have[i]
+			t.Logf("%s: %d\n", name, have[i])
+		}
+		if total == 0 {
+			t.Logf("no samples in expected functions")
+			ok = false
+		}
+
+		// We'd like to check a reasonable minimum, like
+		// total / len(have) / smallconstant, but this test is
+		// pretty flaky (see bug 7095).  So we'll just test to
+		// make sure we got at least one sample.
+		min := uintptr(1)
+		for i, name := range need {
+			if have[i] < min {
+				t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
+				ok = false
+			}
+		}
+		return
+	}
+}
+
+// Fork can hang if preempted with signals frequently enough (see issue 5517).
+// Ensure that we do not do this.
+func TestCPUProfileWithFork(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	heap := 1 << 30
+	if runtime.GOOS == "android" {
+		// Use smaller size for Android to avoid crash.
+		heap = 100 << 20
+	}
+	if runtime.GOOS == "windows" && runtime.GOARCH == "arm" {
+		// Use smaller heap for Windows/ARM to avoid crash.
+		heap = 100 << 20
+	}
+	if testing.Short() {
+		heap = 100 << 20
+	}
+	// This makes fork slower.
+	garbage := make([]byte, heap)
+	// Need to touch the slice, otherwise it won't be paged in.
+	done := make(chan bool)
+	go func() {
+		for i := range garbage {
+			garbage[i] = 42
+		}
+		done <- true
+	}()
+	<-done
+
+	var prof bytes.Buffer
+	if err := StartCPUProfile(&prof); err != nil {
+		t.Fatal(err)
+	}
+	defer StopCPUProfile()
+
+	for i := 0; i < 10; i++ {
+		exec.Command(os.Args[0], "-h").CombinedOutput()
+	}
+}
+
+// Test that profiler does not observe runtime.gogo as "user" goroutine execution.
+// If it did, it would see inconsistent state and would either record an incorrect stack
+// or crash because the stack was malformed.
+func TestGoroutineSwitch(t *testing.T) {
+	if runtime.Compiler == "gccgo" {
+		t.Skip("not applicable for gccgo")
+	}
+	// How much to try. These defaults take about 1 seconds
+	// on a 2012 MacBook Pro. The ones in short mode take
+	// about 0.1 seconds.
+	tries := 10
+	count := 1000000
+	if testing.Short() {
+		tries = 1
+	}
+	for try := 0; try < tries; try++ {
+		var prof bytes.Buffer
+		if err := StartCPUProfile(&prof); err != nil {
+			t.Fatal(err)
+		}
+		for i := 0; i < count; i++ {
+			runtime.Gosched()
+		}
+		StopCPUProfile()
+
+		// Read profile to look for entries for gogo with an attempt at a traceback.
+		// "runtime.gogo" is OK, because that's the part of the context switch
+		// before the actual switch begins. But we should not see "gogo",
+		// aka "gogo<>(SB)", which does the actual switch and is marked SPWRITE.
+		parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, _ map[string][]string) {
+			// An entry with two frames with 'System' in its top frame
+			// exists to record a PC without a traceback. Those are okay.
+			if len(stk) == 2 {
+				name := stk[1].Line[0].Function.Name
+				if name == "runtime._System" || name == "runtime._ExternalCode" || name == "runtime._GC" {
+					return
+				}
+			}
+
+			// An entry with just one frame is OK too:
+			// it knew to stop at gogo.
+			if len(stk) == 1 {
+				return
+			}
+
+			// Otherwise, should not see gogo.
+			// The place we'd see it would be the inner most frame.
+			name := stk[0].Line[0].Function.Name
+			if name == "gogo" {
+				var buf bytes.Buffer
+				fprintStack(&buf, stk)
+				t.Fatalf("found profile entry for gogo:\n%s", buf.String())
+			}
+		})
+	}
+}
+
+func fprintStack(w io.Writer, stk []*profile.Location) {
+	for _, loc := range stk {
+		fmt.Fprintf(w, " %#x", loc.Address)
+		fmt.Fprintf(w, " (")
+		for i, line := range loc.Line {
+			if i > 0 {
+				fmt.Fprintf(w, " ")
+			}
+			fmt.Fprintf(w, "%s:%d", line.Function.Name, line.Line)
+		}
+		fmt.Fprintf(w, ")")
+	}
+}
+
+// Test that profiling of division operations is okay, especially on ARM. See issue 6681.
+func TestMathBigDivide(t *testing.T) {
+	testCPUProfile(t, nil, func(duration time.Duration) {
+		t := time.After(duration)
+		pi := new(big.Int)
+		for {
+			for i := 0; i < 100; i++ {
+				n := big.NewInt(2646693125139304345)
+				d := big.NewInt(842468587426513207)
+				pi.Div(n, d)
+			}
+			select {
+			case <-t:
+				return
+			default:
+			}
+		}
+	})
+}
+
+// stackContainsAll matches if all functions in spec (comma-separated) appear somewhere in the stack trace.
+func stackContainsAll(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
+	for _, f := range strings.Split(spec, ",") {
+		if !stackContains(f, count, stk, labels) {
+			return false
+		}
+	}
+	return true
+}
+
+func TestMorestack(t *testing.T) {
+	matches := matchAndAvoidStacks(stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions())
+	testCPUProfile(t, matches, func(duration time.Duration) {
+		t := time.After(duration)
+		c := make(chan bool)
+		for {
+			go func() {
+				growstack1()
+				c <- true
+			}()
+			select {
+			case <-t:
+				return
+			case <-c:
+			}
+		}
+	})
+}
+
+//go:noinline
+func growstack1() {
+	growstack(10)
+}
+
+//go:noinline
+func growstack(n int) {
+	var buf [8 << 18]byte
+	use(buf)
+	if n > 0 {
+		growstack(n - 1)
+	}
+}
+
+//go:noinline
+func use(x [8 << 18]byte) {}
+
+func TestBlockProfile(t *testing.T) {
+	type TestCase struct {
+		name string
+		f    func(*testing.T)
+		stk  []string
+		re   string
+	}
+	tests := [...]TestCase{
+		{
+			name: "chan recv",
+			f:    blockChanRecv,
+			stk: []string{
+				"runtime.chanrecv1",
+				"runtime/pprof.blockChanRecv",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	runtime\.chanrecv1\+0x[0-9a-f]+	.*/src/runtime/chan.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockChanRecv\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{
+			name: "chan send",
+			f:    blockChanSend,
+			stk: []string{
+				"runtime.chansend1",
+				"runtime/pprof.blockChanSend",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	runtime\.chansend1\+0x[0-9a-f]+	.*/src/runtime/chan.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockChanSend\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{
+			name: "chan close",
+			f:    blockChanClose,
+			stk: []string{
+				"runtime.chanrecv1",
+				"runtime/pprof.blockChanClose",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	runtime\.chanrecv1\+0x[0-9a-f]+	.*/src/runtime/chan.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockChanClose\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{
+			name: "select recv async",
+			f:    blockSelectRecvAsync,
+			stk: []string{
+				"runtime.selectgo",
+				"runtime/pprof.blockSelectRecvAsync",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	runtime\.selectgo\+0x[0-9a-f]+	.*/src/runtime/select.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{
+			name: "select send sync",
+			f:    blockSelectSendSync,
+			stk: []string{
+				"runtime.selectgo",
+				"runtime/pprof.blockSelectSendSync",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	runtime\.selectgo\+0x[0-9a-f]+	.*/src/runtime/select.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{
+			name: "mutex",
+			f:    blockMutex,
+			stk: []string{
+				"sync.(*Mutex).Lock",
+				"runtime/pprof.blockMutex",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+	.*/src/sync/mutex\.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockMutex\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{
+			name: "cond",
+			f:    blockCond,
+			stk: []string{
+				"sync.(*Cond).Wait",
+				"runtime/pprof.blockCond",
+				"runtime/pprof.TestBlockProfile",
+			},
+			re: `
+[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
+#	0x[0-9a-f]+	sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+	.*/src/sync/cond\.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.blockCond\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+	}
+
+	// Generate block profile
+	runtime.SetBlockProfileRate(1)
+	defer runtime.SetBlockProfileRate(0)
+	for _, test := range tests {
+		test.f(t)
+	}
+
+	t.Run("debug=1", func(t *testing.T) {
+		var w bytes.Buffer
+		Lookup("block").WriteTo(&w, 1)
+		prof := w.String()
+
+		if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
+			t.Fatalf("Bad profile header:\n%v", prof)
+		}
+
+		if strings.HasSuffix(prof, "#\t0x0\n\n") {
+			t.Errorf("Useless 0 suffix:\n%v", prof)
+		}
+
+		for _, test := range tests {
+			if !regexp.MustCompile(strings.ReplaceAll(test.re, "\t", "\t+")).MatchString(prof) {
+				t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
+			}
+		}
+	})
+
+	t.Run("proto", func(t *testing.T) {
+		// proto format
+		var w bytes.Buffer
+		Lookup("block").WriteTo(&w, 0)
+		p, err := profile.Parse(&w)
+		if err != nil {
+			t.Fatalf("failed to parse profile: %v", err)
+		}
+		t.Logf("parsed proto: %s", p)
+		if err := p.CheckValid(); err != nil {
+			t.Fatalf("invalid profile: %v", err)
+		}
+
+		stks := stacks(p)
+		for _, test := range tests {
+			if !containsStack(stks, test.stk) {
+				t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk)
+			}
+		}
+	})
+
+}
+
+func stacks(p *profile.Profile) (res [][]string) {
+	for _, s := range p.Sample {
+		var stk []string
+		for _, l := range s.Location {
+			for _, line := range l.Line {
+				stk = append(stk, line.Function.Name)
+			}
+		}
+		res = append(res, stk)
+	}
+	return res
+}
+
+func containsStack(got [][]string, want []string) bool {
+	for _, stk := range got {
+		if len(stk) < len(want) {
+			continue
+		}
+		for i, f := range want {
+			if f != stk[i] {
+				break
+			}
+			if i == len(want)-1 {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// awaitBlockedGoroutine spins on runtime.Gosched until a runtime stack dump
+// shows a goroutine in the given state with a stack frame in
+// runtime/pprof.<fName>.
+func awaitBlockedGoroutine(t *testing.T, state, fName string) {
+	re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime/pprof\.%s`, regexp.QuoteMeta(state), fName)
+	r := regexp.MustCompile(re)
+
+	if deadline, ok := t.Deadline(); ok {
+		if d := time.Until(deadline); d > 1*time.Second {
+			timer := time.AfterFunc(d-1*time.Second, func() {
+				debug.SetTraceback("all")
+				panic(fmt.Sprintf("timed out waiting for %#q", re))
+			})
+			defer timer.Stop()
+		}
+	}
+
+	buf := make([]byte, 64<<10)
+	for {
+		runtime.Gosched()
+		n := runtime.Stack(buf, true)
+		if n == len(buf) {
+			// Buffer wasn't large enough for a full goroutine dump.
+			// Resize it and try again.
+			buf = make([]byte, 2*len(buf))
+			continue
+		}
+		if r.Match(buf[:n]) {
+			return
+		}
+	}
+}
+
+func blockChanRecv(t *testing.T) {
+	c := make(chan bool)
+	go func() {
+		awaitBlockedGoroutine(t, "chan receive", "blockChanRecv")
+		c <- true
+	}()
+	<-c
+}
+
+func blockChanSend(t *testing.T) {
+	c := make(chan bool)
+	go func() {
+		awaitBlockedGoroutine(t, "chan send", "blockChanSend")
+		<-c
+	}()
+	c <- true
+}
+
+func blockChanClose(t *testing.T) {
+	c := make(chan bool)
+	go func() {
+		awaitBlockedGoroutine(t, "chan receive", "blockChanClose")
+		close(c)
+	}()
+	<-c
+}
+
+func blockSelectRecvAsync(t *testing.T) {
+	const numTries = 3
+	c := make(chan bool, 1)
+	c2 := make(chan bool, 1)
+	go func() {
+		for i := 0; i < numTries; i++ {
+			awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync")
+			c <- true
+		}
+	}()
+	for i := 0; i < numTries; i++ {
+		select {
+		case <-c:
+		case <-c2:
+		}
+	}
+}
+
+func blockSelectSendSync(t *testing.T) {
+	c := make(chan bool)
+	c2 := make(chan bool)
+	go func() {
+		awaitBlockedGoroutine(t, "select", "blockSelectSendSync")
+		<-c
+	}()
+	select {
+	case c <- true:
+	case c2 <- true:
+	}
+}
+
+func blockMutex(t *testing.T) {
+	var mu sync.Mutex
+	mu.Lock()
+	go func() {
+		awaitBlockedGoroutine(t, "semacquire", "blockMutex")
+		mu.Unlock()
+	}()
+	// Note: Unlock releases mu before recording the mutex event,
+	// so it's theoretically possible for this to proceed and
+	// capture the profile before the event is recorded. As long
+	// as this is blocked before the unlock happens, it's okay.
+	mu.Lock()
+}
+
+func blockCond(t *testing.T) {
+	var mu sync.Mutex
+	c := sync.NewCond(&mu)
+	mu.Lock()
+	go func() {
+		awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond")
+		mu.Lock()
+		c.Signal()
+		mu.Unlock()
+	}()
+	c.Wait()
+	mu.Unlock()
+}
+
+// See http://golang.org/cl/299991.
+func TestBlockProfileBias(t *testing.T) {
+	rate := int(1000) // arbitrary value
+	runtime.SetBlockProfileRate(rate)
+	defer runtime.SetBlockProfileRate(0)
+
+	// simulate blocking events
+	blockFrequentShort(rate)
+	blockInfrequentLong(rate)
+
+	var w bytes.Buffer
+	Lookup("block").WriteTo(&w, 0)
+	p, err := profile.Parse(&w)
+	if err != nil {
+		t.Fatalf("failed to parse profile: %v", err)
+	}
+	t.Logf("parsed proto: %s", p)
+
+	il := float64(-1) // blockInfrequentLong duration
+	fs := float64(-1) // blockFrequentShort duration
+	for _, s := range p.Sample {
+		for _, l := range s.Location {
+			for _, line := range l.Line {
+				if len(s.Value) < 2 {
+					t.Fatal("block profile has less than 2 sample types")
+				}
+
+				if line.Function.Name == "runtime/pprof.blockInfrequentLong" {
+					il = float64(s.Value[1])
+				} else if line.Function.Name == "runtime/pprof.blockFrequentShort" {
+					fs = float64(s.Value[1])
+				}
+			}
+		}
+	}
+	if il == -1 || fs == -1 {
+		t.Fatal("block profile is missing expected functions")
+	}
+
+	// stddev of bias from 100 runs on local machine multiplied by 10x
+	const threshold = 0.2
+	if bias := (il - fs) / il; math.Abs(bias) > threshold {
+		t.Fatalf("bias: abs(%f) > %f", bias, threshold)
+	} else {
+		t.Logf("bias: abs(%f) < %f", bias, threshold)
+	}
+}
+
+// blockFrequentShort produces 100000 block events with an average duration of
+// rate / 10.
+func blockFrequentShort(rate int) {
+	for i := 0; i < 100000; i++ {
+		blockevent(int64(rate/10), 1)
+	}
+}
+
+// blockFrequentShort produces 10000 block events with an average duration of
+// rate.
+func blockInfrequentLong(rate int) {
+	for i := 0; i < 10000; i++ {
+		blockevent(int64(rate), 1)
+	}
+}
+
+// Used by TestBlockProfileBias.
+//go:linkname blockevent runtime.blockevent
+func blockevent(cycles int64, skip int)
+
+func TestMutexProfile(t *testing.T) {
+	// Generate mutex profile
+
+	old := runtime.SetMutexProfileFraction(1)
+	defer runtime.SetMutexProfileFraction(old)
+	if old != 0 {
+		t.Fatalf("need MutexProfileRate 0, got %d", old)
+	}
+
+	blockMutex(t)
+
+	t.Run("debug=1", func(t *testing.T) {
+		var w bytes.Buffer
+		Lookup("mutex").WriteTo(&w, 1)
+		prof := w.String()
+		t.Logf("received profile: %v", prof)
+
+		if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
+			t.Errorf("Bad profile header:\n%v", prof)
+		}
+		prof = strings.Trim(prof, "\n")
+		lines := strings.Split(prof, "\n")
+		if len(lines) != 6 {
+			t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
+		}
+		if len(lines) < 6 {
+			return
+		}
+		// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
+		r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
+		//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
+		if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
+			t.Errorf("%q didn't match %q", lines[3], r2)
+		}
+		r3 := "^#.*runtime/pprof.blockMutex.*$"
+		if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
+			t.Errorf("%q didn't match %q", lines[5], r3)
+		}
+		t.Logf(prof)
+	})
+	t.Run("proto", func(t *testing.T) {
+		// proto format
+		var w bytes.Buffer
+		Lookup("mutex").WriteTo(&w, 0)
+		p, err := profile.Parse(&w)
+		if err != nil {
+			t.Fatalf("failed to parse profile: %v", err)
+		}
+		t.Logf("parsed proto: %s", p)
+		if err := p.CheckValid(); err != nil {
+			t.Fatalf("invalid profile: %v", err)
+		}
+
+		stks := stacks(p)
+		for _, want := range [][]string{
+			{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"},
+		} {
+			if !containsStack(stks, want) {
+				t.Errorf("No matching stack entry for %+v", want)
+			}
+		}
+	})
+}
+
+func func1(c chan int) { <-c }
+func func2(c chan int) { <-c }
+func func3(c chan int) { <-c }
+func func4(c chan int) { <-c }
+
+func TestGoroutineCounts(t *testing.T) {
+	// Setting GOMAXPROCS to 1 ensures we can force all goroutines to the
+	// desired blocking point.
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+
+	c := make(chan int)
+	for i := 0; i < 100; i++ {
+		switch {
+		case i%10 == 0:
+			go func1(c)
+		case i%2 == 0:
+			go func2(c)
+		default:
+			go func3(c)
+		}
+		// Let goroutines block on channel
+		for j := 0; j < 5; j++ {
+			runtime.Gosched()
+		}
+	}
+	ctx := context.Background()
+
+	// ... and again, with labels this time (just with fewer iterations to keep
+	// sorting deterministic).
+	Do(ctx, Labels("label", "value"), func(context.Context) {
+		for i := 0; i < 89; i++ {
+			switch {
+			case i%10 == 0:
+				go func1(c)
+			case i%2 == 0:
+				go func2(c)
+			default:
+				go func3(c)
+			}
+			// Let goroutines block on channel
+			for j := 0; j < 5; j++ {
+				runtime.Gosched()
+			}
+		}
+	})
+
+	var w bytes.Buffer
+	goroutineProf := Lookup("goroutine")
+
+	// Check debug profile
+	goroutineProf.WriteTo(&w, 1)
+	prof := w.String()
+
+	labels := labelMap{"label": "value"}
+	labelStr := "\n# labels: " + labels.String()
+	if !containsInOrder(prof, "\n50 @ ", "\n44 @", labelStr,
+		"\n40 @", "\n36 @", labelStr, "\n10 @", "\n9 @", labelStr, "\n1 @") {
+		t.Errorf("expected sorted goroutine counts with Labels:\n%s", prof)
+	}
+
+	// Check proto profile
+	w.Reset()
+	goroutineProf.WriteTo(&w, 0)
+	p, err := profile.Parse(&w)
+	if err != nil {
+		t.Errorf("error parsing protobuf profile: %v", err)
+	}
+	if err := p.CheckValid(); err != nil {
+		t.Errorf("protobuf profile is invalid: %v", err)
+	}
+	expectedLabels := map[int64]map[string]string{
+		50: map[string]string{},
+		44: map[string]string{"label": "value"},
+		40: map[string]string{},
+		36: map[string]string{"label": "value"},
+		10: map[string]string{},
+		9:  map[string]string{"label": "value"},
+		1:  map[string]string{},
+	}
+	if !containsCountsLabels(p, expectedLabels) {
+		t.Errorf("expected count profile to contain goroutines with counts and labels %v, got %v",
+			expectedLabels, p)
+	}
+
+	close(c)
+
+	time.Sleep(10 * time.Millisecond) // let goroutines exit
+}
+
+func containsInOrder(s string, all ...string) bool {
+	for _, t := range all {
+		var ok bool
+		if _, s, ok = strings.Cut(s, t); !ok {
+			return false
+		}
+	}
+	return true
+}
+
+func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[string]string) bool {
+	m := make(map[int64]int)
+	type nkey struct {
+		count    int64
+		key, val string
+	}
+	n := make(map[nkey]int)
+	for c, kv := range countLabels {
+		m[c]++
+		for k, v := range kv {
+			n[nkey{
+				count: c,
+				key:   k,
+				val:   v,
+			}]++
+
+		}
+	}
+	for _, s := range prof.Sample {
+		// The count is the single value in the sample
+		if len(s.Value) != 1 {
+			return false
+		}
+		m[s.Value[0]]--
+		for k, vs := range s.Label {
+			for _, v := range vs {
+				n[nkey{
+					count: s.Value[0],
+					key:   k,
+					val:   v,
+				}]--
+			}
+		}
+	}
+	for _, n := range m {
+		if n > 0 {
+			return false
+		}
+	}
+	for _, ncnt := range n {
+		if ncnt != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+var emptyCallStackTestRun int64
+
+// Issue 18836.
+func TestEmptyCallStack(t *testing.T) {
+	name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun)
+	emptyCallStackTestRun++
+
+	t.Parallel()
+	var buf bytes.Buffer
+	p := NewProfile(name)
+
+	p.Add("foo", 47674)
+	p.WriteTo(&buf, 1)
+	p.Remove("foo")
+	got := buf.String()
+	prefix := name + " profile: total 1\n"
+	if !strings.HasPrefix(got, prefix) {
+		t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix)
+	}
+	lostevent := "lostProfileEvent"
+	if !strings.Contains(got, lostevent) {
+		t.Fatalf("got:\n\t%q\ndoes not contain:\n\t%q\n", got, lostevent)
+	}
+}
+
+// stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key
+// and value and has funcname somewhere in the stack.
+func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
+	base, kv, ok := strings.Cut(spec, ";")
+	if !ok {
+		panic("no semicolon in key/value spec")
+	}
+	k, v, ok := strings.Cut(kv, "=")
+	if !ok {
+		panic("missing = in key/value spec")
+	}
+	if !contains(labels[k], v) {
+		return false
+	}
+	return stackContains(base, count, stk, labels)
+}
+
+func TestCPUProfileLabel(t *testing.T) {
+	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, avoidFunctions())
+	testCPUProfile(t, matches, func(dur time.Duration) {
+		Do(context.Background(), Labels("key", "value"), func(context.Context) {
+			cpuHogger(cpuHog1, &salt1, dur)
+		})
+	})
+}
+
+func TestLabelRace(t *testing.T) {
+	// Test the race detector annotations for synchronization
+	// between settings labels and consuming them from the
+	// profile.
+	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, nil)
+	testCPUProfile(t, matches, func(dur time.Duration) {
+		start := time.Now()
+		var wg sync.WaitGroup
+		for time.Since(start) < dur {
+			var salts [10]int
+			for i := 0; i < 10; i++ {
+				wg.Add(1)
+				go func(j int) {
+					Do(context.Background(), Labels("key", "value"), func(context.Context) {
+						cpuHogger(cpuHog1, &salts[j], time.Millisecond)
+					})
+					wg.Done()
+				}(i)
+			}
+			wg.Wait()
+		}
+	})
+}
+
+// TestLabelSystemstack makes sure CPU profiler samples of goroutines running
+// on systemstack include the correct pprof labels. See issue #48577
+func TestLabelSystemstack(t *testing.T) {
+	// Grab and re-set the initial value before continuing to ensure
+	// GOGC doesn't actually change following the test.
+	gogc := debug.SetGCPercent(100)
+	debug.SetGCPercent(gogc)
+
+	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime.systemstack;key=value"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
+		Do(context.Background(), Labels("key", "value"), func(ctx context.Context) {
+			parallelLabelHog(ctx, dur, gogc)
+		})
+	})
+
+	// Two conditions to check:
+	// * labelHog should always be labeled.
+	// * The label should _only_ appear on labelHog and the Do call above.
+	for _, s := range p.Sample {
+		isLabeled := s.Label != nil && contains(s.Label["key"], "value")
+		var (
+			mayBeLabeled     bool
+			mustBeLabeled    bool
+			mustNotBeLabeled bool
+		)
+		for _, loc := range s.Location {
+			for _, l := range loc.Line {
+				switch l.Function.Name {
+				case "runtime/pprof.labelHog", "runtime/pprof.parallelLabelHog", "runtime/pprof.parallelLabelHog.func1":
+					mustBeLabeled = true
+				case "runtime/pprof.Do":
+					// Do sets the labels, so samples may
+					// or may not be labeled depending on
+					// which part of the function they are
+					// at.
+					mayBeLabeled = true
+				case "runtime.bgsweep", "runtime.bgscavenge", "runtime.forcegchelper", "runtime.gcBgMarkWorker", "runtime.runfinq", "runtime.sysmon":
+					// Runtime system goroutines or threads
+					// (such as those identified by
+					// runtime.isSystemGoroutine). These
+					// should never be labeled.
+					mustNotBeLabeled = true
+				case "gogo", "gosave_systemstack_switch", "racecall":
+					// These are context switch/race
+					// critical that we can't do a full
+					// traceback from. Typically this would
+					// be covered by the runtime check
+					// below, but these symbols don't have
+					// the package name.
+					mayBeLabeled = true
+				}
+
+				if strings.HasPrefix(l.Function.Name, "runtime.") {
+					// There are many places in the runtime
+					// where we can't do a full traceback.
+					// Ideally we'd list them all, but
+					// barring that allow anything in the
+					// runtime, unless explicitly excluded
+					// above.
+					mayBeLabeled = true
+				}
+			}
+		}
+		if mustNotBeLabeled {
+			// If this must not be labeled, then mayBeLabeled hints
+			// are not relevant.
+			mayBeLabeled = false
+		}
+		if mustBeLabeled && !isLabeled {
+			var buf bytes.Buffer
+			fprintStack(&buf, s.Location)
+			t.Errorf("Sample labeled got false want true: %s", buf.String())
+		}
+		if mustNotBeLabeled && isLabeled {
+			var buf bytes.Buffer
+			fprintStack(&buf, s.Location)
+			t.Errorf("Sample labeled got true want false: %s", buf.String())
+		}
+		if isLabeled && !(mayBeLabeled || mustBeLabeled) {
+			var buf bytes.Buffer
+			fprintStack(&buf, s.Location)
+			t.Errorf("Sample labeled got true want false: %s", buf.String())
+		}
+	}
+}
+
+// labelHog is designed to burn CPU time in a way that a high number of CPU
+// samples end up running on systemstack.
+func labelHog(stop chan struct{}, gogc int) {
+	// Regression test for issue 50032. We must give GC an opportunity to
+	// be initially triggered by a labelled goroutine.
+	runtime.GC()
+
+	for i := 0; ; i++ {
+		select {
+		case <-stop:
+			return
+		default:
+			debug.SetGCPercent(gogc)
+		}
+	}
+}
+
+// parallelLabelHog runs GOMAXPROCS goroutines running labelHog.
+func parallelLabelHog(ctx context.Context, dur time.Duration, gogc int) {
+	var wg sync.WaitGroup
+	stop := make(chan struct{})
+	for i := 0; i < runtime.GOMAXPROCS(0); i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			labelHog(stop, gogc)
+		}()
+	}
+
+	time.Sleep(dur)
+	close(stop)
+	wg.Wait()
+}
+
+// Check that there is no deadlock when the program receives SIGPROF while in
+// 64bit atomics' critical section. Used to happen on mips{,le}. See #20146.
+func TestAtomicLoadStore64(t *testing.T) {
+	f, err := os.CreateTemp("", "profatomic")
+	if err != nil {
+		t.Fatalf("TempFile: %v", err)
+	}
+	defer os.Remove(f.Name())
+	defer f.Close()
+
+	if err := StartCPUProfile(f); err != nil {
+		t.Fatal(err)
+	}
+	defer StopCPUProfile()
+
+	var flag uint64
+	done := make(chan bool, 1)
+
+	go func() {
+		for atomic.LoadUint64(&flag) == 0 {
+			runtime.Gosched()
+		}
+		done <- true
+	}()
+	time.Sleep(50 * time.Millisecond)
+	atomic.StoreUint64(&flag, 1)
+	<-done
+}
+
+func TestTracebackAll(t *testing.T) {
+	// With gccgo, if a profiling signal arrives at the wrong time
+	// during traceback, it may crash or hang. See issue #29448.
+	f, err := os.CreateTemp("", "proftraceback")
+	if err != nil {
+		t.Fatalf("TempFile: %v", err)
+	}
+	defer os.Remove(f.Name())
+	defer f.Close()
+
+	if err := StartCPUProfile(f); err != nil {
+		t.Fatal(err)
+	}
+	defer StopCPUProfile()
+
+	ch := make(chan int)
+	defer close(ch)
+
+	count := 10
+	for i := 0; i < count; i++ {
+		go func() {
+			<-ch // block
+		}()
+	}
+
+	N := 10000
+	if testing.Short() {
+		N = 500
+	}
+	buf := make([]byte, 10*1024)
+	for i := 0; i < N; i++ {
+		runtime.Stack(buf, true)
+	}
+}
+
+// TestTryAdd tests the cases that are hard to test with real program execution.
+//
+// For example, the current go compilers may not always inline functions
+// involved in recursion but that may not be true in the future compilers. This
+// tests such cases by using fake call sequences and forcing the profile build
+// utilizing translateCPUProfile defined in proto_test.go
+func TestTryAdd(t *testing.T) {
+	if _, found := findInlinedCall(inlinedCallerDump, 4<<10); !found {
+		t.Skip("Can't determine whether anything was inlined into inlinedCallerDump.")
+	}
+
+	// inlinedCallerDump
+	//   inlinedCalleeDump
+	pcs := make([]uintptr, 2)
+	inlinedCallerDump(pcs)
+	inlinedCallerStack := make([]uint64, 2)
+	for i := range pcs {
+		inlinedCallerStack[i] = uint64(pcs[i])
+	}
+
+	if _, found := findInlinedCall(recursionChainBottom, 4<<10); !found {
+		t.Skip("Can't determine whether anything was inlined into recursionChainBottom.")
+	}
+
+	// recursionChainTop
+	//   recursionChainMiddle
+	//     recursionChainBottom
+	//       recursionChainTop
+	//         recursionChainMiddle
+	//           recursionChainBottom
+	pcs = make([]uintptr, 6)
+	recursionChainTop(1, pcs)
+	recursionStack := make([]uint64, len(pcs))
+	for i := range pcs {
+		recursionStack[i] = uint64(pcs[i])
+	}
+
+	period := int64(2000 * 1000) // 1/500*1e9 nanosec.
+
+	testCases := []struct {
+		name        string
+		input       []uint64          // following the input format assumed by profileBuilder.addCPUData.
+		count       int               // number of records in input.
+		wantLocs    [][]string        // ordered location entries with function names.
+		wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs.
+	}{{
+		// Sanity test for a normal, complete stack trace.
+		name: "full_stack_trace",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
+		},
+		count: 2,
+		wantLocs: [][]string{
+			{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
+		},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
+		},
+	}, {
+		name: "bug35538",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			// Fake frame: tryAdd will have inlinedCallerDump
+			// (stack[1]) on the deck when it encounters the next
+			// inline function. It should accept this.
+			7, 0, 10, inlinedCallerStack[0], inlinedCallerStack[1], inlinedCallerStack[0], inlinedCallerStack[1],
+			5, 0, 20, inlinedCallerStack[0], inlinedCallerStack[1],
+		},
+		count:    3,
+		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
+			{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
+		},
+	}, {
+		name: "bug38096",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			// count (data[2]) == 0 && len(stk) == 1 is an overflow
+			// entry. The "stk" entry is actually the count.
+			4, 0, 0, 4242,
+		},
+		count:    2,
+		wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}},
+		},
+	}, {
+		// If a function is directly called recursively then it must
+		// not be inlined in the caller.
+		//
+		// N.B. We're generating an impossible profile here, with a
+		// recursive inlineCalleeDump call. This is simulating a non-Go
+		// function that looks like an inlined Go function other than
+		// its recursive property. See pcDeck.tryAdd.
+		name: "directly_recursive_func_is_not_inlined",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			5, 0, 30, inlinedCallerStack[0], inlinedCallerStack[0],
+			4, 0, 40, inlinedCallerStack[0],
+		},
+		count: 3,
+		// inlinedCallerDump shows up here because
+		// runtime_expandFinalInlineFrame adds it to the stack frame.
+		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump"}, {"runtime/pprof.inlinedCallerDump"}},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
+			{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
+		},
+	}, {
+		name: "recursion_chain_inline",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			9, 0, 10, recursionStack[0], recursionStack[1], recursionStack[2], recursionStack[3], recursionStack[4], recursionStack[5],
+		},
+		count: 2,
+		wantLocs: [][]string{
+			{"runtime/pprof.recursionChainBottom"},
+			{
+				"runtime/pprof.recursionChainMiddle",
+				"runtime/pprof.recursionChainTop",
+				"runtime/pprof.recursionChainBottom",
+			},
+			{
+				"runtime/pprof.recursionChainMiddle",
+				"runtime/pprof.recursionChainTop",
+				"runtime/pprof.TestTryAdd", // inlined into the test.
+			},
+		},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}, {ID: 3}}},
+		},
+	}, {
+		name: "truncated_stack_trace_later",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
+			4, 0, 60, inlinedCallerStack[0],
+		},
+		count:    3,
+		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
+			{Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}},
+		},
+	}, {
+		name: "truncated_stack_trace_first",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			4, 0, 70, inlinedCallerStack[0],
+			5, 0, 80, inlinedCallerStack[0], inlinedCallerStack[1],
+		},
+		count:    3,
+		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+			{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
+		},
+	}, {
+		// We can recover the inlined caller from a truncated stack.
+		name: "truncated_stack_trace_only",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			4, 0, 70, inlinedCallerStack[0],
+		},
+		count:    2,
+		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+		},
+	}, {
+		// The same location is used for duplicated stacks.
+		name: "truncated_stack_trace_twice",
+		input: []uint64{
+			3, 0, 500, // hz = 500. Must match the period.
+			4, 0, 70, inlinedCallerStack[0],
+			// Fake frame: add a fake call to
+			// inlinedCallerDump to prevent this sample
+			// from getting merged into above.
+			5, 0, 80, inlinedCallerStack[1], inlinedCallerStack[0],
+		},
+		count: 3,
+		wantLocs: [][]string{
+			{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
+			{"runtime/pprof.inlinedCallerDump"},
+		},
+		wantSamples: []*profile.Sample{
+			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+			{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
+		},
+	}}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			p, err := translateCPUProfile(tc.input, tc.count)
+			if err != nil {
+				t.Fatalf("translating profile: %v", err)
+			}
+			t.Logf("Profile: %v\n", p)
+
+			// One location entry with all inlined functions.
+			var gotLoc [][]string
+			for _, loc := range p.Location {
+				var names []string
+				for _, line := range loc.Line {
+					names = append(names, line.Function.Name)
+				}
+				gotLoc = append(gotLoc, names)
+			}
+			if got, want := fmtJSON(gotLoc), fmtJSON(tc.wantLocs); got != want {
+				t.Errorf("Got Location = %+v\n\twant %+v", got, want)
+			}
+			// All samples should point to one location.
+			var gotSamples []*profile.Sample
+			for _, sample := range p.Sample {
+				var locs []*profile.Location
+				for _, loc := range sample.Location {
+					locs = append(locs, &profile.Location{ID: loc.ID})
+				}
+				gotSamples = append(gotSamples, &profile.Sample{Value: sample.Value, Location: locs})
+			}
+			if got, want := fmtJSON(gotSamples), fmtJSON(tc.wantSamples); got != want {
+				t.Errorf("Got Samples = %+v\n\twant %+v", got, want)
+			}
+		})
+	}
+}
+
+func TestTimeVDSO(t *testing.T) {
+	// Test that time functions have the right stack trace. In particular,
+	// it shouldn't be recursive.
+
+	if runtime.GOOS == "android" {
+		// Flaky on Android, issue 48655. VDSO may not be enabled.
+		testenv.SkipFlaky(t, 48655)
+	}
+
+	matches := matchAndAvoidStacks(stackContains, []string{"time.now"}, avoidFunctions())
+	p := testCPUProfile(t, matches, func(dur time.Duration) {
+		t0 := time.Now()
+		for {
+			t := time.Now()
+			if t.Sub(t0) >= dur {
+				return
+			}
+		}
+	})
+
+	// Check for recursive time.now sample.
+	for _, sample := range p.Sample {
+		var seenNow bool
+		for _, loc := range sample.Location {
+			for _, line := range loc.Line {
+				if line.Function.Name == "time.now" {
+					if seenNow {
+						t.Fatalf("unexpected recursive time.now")
+					}
+					seenNow = true
+				}
+			}
+		}
+	}
+}
diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go
new file mode 100644
index 0000000..073a076
--- /dev/null
+++ b/src/runtime/pprof/proto.go
@@ -0,0 +1,701 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"bytes"
+	"compress/gzip"
+	"fmt"
+	"internal/abi"
+	"io"
+	"os"
+	"runtime"
+	"strconv"
+	"strings"
+	"time"
+	"unsafe"
+)
+
+// lostProfileEvent is the function to which lost profiling
+// events are attributed.
+// (The name shows up in the pprof graphs.)
+func lostProfileEvent() { lostProfileEvent() }
+
+// A profileBuilder writes a profile incrementally from a
+// stream of profile samples delivered by the runtime.
+type profileBuilder struct {
+	start      time.Time
+	end        time.Time
+	havePeriod bool
+	period     int64
+	m          profMap
+
+	// encoding state
+	w         io.Writer
+	zw        *gzip.Writer
+	pb        protobuf
+	strings   []string
+	stringMap map[string]int
+	locs      map[uintptr]locInfo // list of locInfo starting with the given PC.
+	funcs     map[string]int      // Package path-qualified function name to Function.ID
+	mem       []memMap
+	deck      pcDeck
+}
+
+type memMap struct {
+	// initialized as reading mapping
+	start         uintptr
+	end           uintptr
+	offset        uint64
+	file, buildID string
+
+	funcs symbolizeFlag
+	fake  bool // map entry was faked; /proc/self/maps wasn't available
+}
+
+// symbolizeFlag keeps track of symbolization result.
+//   0                  : no symbol lookup was performed
+//   1<<0 (lookupTried) : symbol lookup was performed
+//   1<<1 (lookupFailed): symbol lookup was performed but failed
+type symbolizeFlag uint8
+
+const (
+	lookupTried  symbolizeFlag = 1 << iota
+	lookupFailed symbolizeFlag = 1 << iota
+)
+
+const (
+	// message Profile
+	tagProfile_SampleType        = 1  // repeated ValueType
+	tagProfile_Sample            = 2  // repeated Sample
+	tagProfile_Mapping           = 3  // repeated Mapping
+	tagProfile_Location          = 4  // repeated Location
+	tagProfile_Function          = 5  // repeated Function
+	tagProfile_StringTable       = 6  // repeated string
+	tagProfile_DropFrames        = 7  // int64 (string table index)
+	tagProfile_KeepFrames        = 8  // int64 (string table index)
+	tagProfile_TimeNanos         = 9  // int64
+	tagProfile_DurationNanos     = 10 // int64
+	tagProfile_PeriodType        = 11 // ValueType (really optional string???)
+	tagProfile_Period            = 12 // int64
+	tagProfile_Comment           = 13 // repeated int64
+	tagProfile_DefaultSampleType = 14 // int64
+
+	// message ValueType
+	tagValueType_Type = 1 // int64 (string table index)
+	tagValueType_Unit = 2 // int64 (string table index)
+
+	// message Sample
+	tagSample_Location = 1 // repeated uint64
+	tagSample_Value    = 2 // repeated int64
+	tagSample_Label    = 3 // repeated Label
+
+	// message Label
+	tagLabel_Key = 1 // int64 (string table index)
+	tagLabel_Str = 2 // int64 (string table index)
+	tagLabel_Num = 3 // int64
+
+	// message Mapping
+	tagMapping_ID              = 1  // uint64
+	tagMapping_Start           = 2  // uint64
+	tagMapping_Limit           = 3  // uint64
+	tagMapping_Offset          = 4  // uint64
+	tagMapping_Filename        = 5  // int64 (string table index)
+	tagMapping_BuildID         = 6  // int64 (string table index)
+	tagMapping_HasFunctions    = 7  // bool
+	tagMapping_HasFilenames    = 8  // bool
+	tagMapping_HasLineNumbers  = 9  // bool
+	tagMapping_HasInlineFrames = 10 // bool
+
+	// message Location
+	tagLocation_ID        = 1 // uint64
+	tagLocation_MappingID = 2 // uint64
+	tagLocation_Address   = 3 // uint64
+	tagLocation_Line      = 4 // repeated Line
+
+	// message Line
+	tagLine_FunctionID = 1 // uint64
+	tagLine_Line       = 2 // int64
+
+	// message Function
+	tagFunction_ID         = 1 // uint64
+	tagFunction_Name       = 2 // int64 (string table index)
+	tagFunction_SystemName = 3 // int64 (string table index)
+	tagFunction_Filename   = 4 // int64 (string table index)
+	tagFunction_StartLine  = 5 // int64
+)
+
+// stringIndex adds s to the string table if not already present
+// and returns the index of s in the string table.
+func (b *profileBuilder) stringIndex(s string) int64 {
+	id, ok := b.stringMap[s]
+	if !ok {
+		id = len(b.strings)
+		b.strings = append(b.strings, s)
+		b.stringMap[s] = id
+	}
+	return int64(id)
+}
+
+func (b *profileBuilder) flush() {
+	const dataFlush = 4096
+	if b.pb.nest == 0 && len(b.pb.data) > dataFlush {
+		b.zw.Write(b.pb.data)
+		b.pb.data = b.pb.data[:0]
+	}
+}
+
+// pbValueType encodes a ValueType message to b.pb.
+func (b *profileBuilder) pbValueType(tag int, typ, unit string) {
+	start := b.pb.startMessage()
+	b.pb.int64(tagValueType_Type, b.stringIndex(typ))
+	b.pb.int64(tagValueType_Unit, b.stringIndex(unit))
+	b.pb.endMessage(tag, start)
+}
+
+// pbSample encodes a Sample message to b.pb.
+func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) {
+	start := b.pb.startMessage()
+	b.pb.int64s(tagSample_Value, values)
+	b.pb.uint64s(tagSample_Location, locs)
+	if labels != nil {
+		labels()
+	}
+	b.pb.endMessage(tagProfile_Sample, start)
+	b.flush()
+}
+
+// pbLabel encodes a Label message to b.pb.
+func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) {
+	start := b.pb.startMessage()
+	b.pb.int64Opt(tagLabel_Key, b.stringIndex(key))
+	b.pb.int64Opt(tagLabel_Str, b.stringIndex(str))
+	b.pb.int64Opt(tagLabel_Num, num)
+	b.pb.endMessage(tag, start)
+}
+
+// pbLine encodes a Line message to b.pb.
+func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
+	start := b.pb.startMessage()
+	b.pb.uint64Opt(tagLine_FunctionID, funcID)
+	b.pb.int64Opt(tagLine_Line, line)
+	b.pb.endMessage(tag, start)
+}
+
+// pbMapping encodes a Mapping message to b.pb.
+func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
+	start := b.pb.startMessage()
+	b.pb.uint64Opt(tagMapping_ID, id)
+	b.pb.uint64Opt(tagMapping_Start, base)
+	b.pb.uint64Opt(tagMapping_Limit, limit)
+	b.pb.uint64Opt(tagMapping_Offset, offset)
+	b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
+	b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
+	// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
+	// Decide what to do about HasInlineFrames and HasLineNumbers.
+	// Also, another approach to handle the mapping entry with
+	// incomplete symbolization results is to dupliace the mapping
+	// entry (but with different Has* fields values) and use
+	// different entries for symbolized locations and unsymbolized locations.
+	if hasFuncs {
+		b.pb.bool(tagMapping_HasFunctions, true)
+	}
+	b.pb.endMessage(tag, start)
+}
+
+func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
+	// Expand this one address using CallersFrames so we can cache
+	// each expansion. In general, CallersFrames takes a whole
+	// stack, but in this case we know there will be no skips in
+	// the stack and we have return PCs anyway.
+	frames := runtime.CallersFrames([]uintptr{addr})
+	frame, more := frames.Next()
+	if frame.Function == "runtime.goexit" {
+		// Short-circuit if we see runtime.goexit so the loop
+		// below doesn't allocate a useless empty location.
+		return nil, 0
+	}
+
+	symbolizeResult := lookupTried
+	if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
+		symbolizeResult |= lookupFailed
+	}
+
+	if frame.PC == 0 {
+		// If we failed to resolve the frame, at least make up
+		// a reasonable call PC. This mostly happens in tests.
+		frame.PC = addr - 1
+	}
+	ret := []runtime.Frame{frame}
+	for frame.Function != "runtime.goexit" && more == true {
+		frame, more = frames.Next()
+		ret = append(ret, frame)
+	}
+	return ret, symbolizeResult
+}
+
+type locInfo struct {
+	// location id assigned by the profileBuilder
+	id uint64
+
+	// sequence of PCs, including the fake PCs returned by the traceback
+	// to represent inlined functions
+	// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
+	pcs []uintptr
+}
+
+// newProfileBuilder returns a new profileBuilder.
+// CPU profiling data obtained from the runtime can be added
+// by calling b.addCPUData, and then the eventual profile
+// can be obtained by calling b.finish.
+func newProfileBuilder(w io.Writer) *profileBuilder {
+	zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
+	b := &profileBuilder{
+		w:         w,
+		zw:        zw,
+		start:     time.Now(),
+		strings:   []string{""},
+		stringMap: map[string]int{"": 0},
+		locs:      map[uintptr]locInfo{},
+		funcs:     map[string]int{},
+	}
+	b.readMapping()
+	return b
+}
+
+// addCPUData adds the CPU profiling data to the profile.
+//
+// The data must be a whole number of records, as delivered by the runtime.
+// len(tags) must be equal to the number of records in data.
+func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error {
+	if !b.havePeriod {
+		// first record is period
+		if len(data) < 3 {
+			return fmt.Errorf("truncated profile")
+		}
+		if data[0] != 3 || data[2] == 0 {
+			return fmt.Errorf("malformed profile")
+		}
+		// data[2] is sampling rate in Hz. Convert to sampling
+		// period in nanoseconds.
+		b.period = 1e9 / int64(data[2])
+		b.havePeriod = true
+		data = data[3:]
+		// Consume tag slot. Note that there isn't a meaningful tag
+		// value for this record.
+		tags = tags[1:]
+	}
+
+	// Parse CPU samples from the profile.
+	// Each sample is 3+n uint64s:
+	//	data[0] = 3+n
+	//	data[1] = time stamp (ignored)
+	//	data[2] = count
+	//	data[3:3+n] = stack
+	// If the count is 0 and the stack has length 1,
+	// that's an overflow record inserted by the runtime
+	// to indicate that stack[0] samples were lost.
+	// Otherwise the count is usually 1,
+	// but in a few special cases like lost non-Go samples
+	// there can be larger counts.
+	// Because many samples with the same stack arrive,
+	// we want to deduplicate immediately, which we do
+	// using the b.m profMap.
+	for len(data) > 0 {
+		if len(data) < 3 || data[0] > uint64(len(data)) {
+			return fmt.Errorf("truncated profile")
+		}
+		if data[0] < 3 || tags != nil && len(tags) < 1 {
+			return fmt.Errorf("malformed profile")
+		}
+		if len(tags) < 1 {
+			return fmt.Errorf("mismatched profile records and tags")
+		}
+		count := data[2]
+		stk := data[3:data[0]]
+		data = data[data[0]:]
+		tag := tags[0]
+		tags = tags[1:]
+
+		if count == 0 && len(stk) == 1 {
+			// overflow record
+			count = uint64(stk[0])
+			stk = []uint64{
+				// gentraceback guarantees that PCs in the
+				// stack can be unconditionally decremented and
+				// still be valid, so we must do the same.
+				uint64(abi.FuncPCABIInternal(lostProfileEvent) + 1),
+			}
+		}
+		b.m.lookup(stk, tag).count += int64(count)
+	}
+
+	if len(tags) != 0 {
+		return fmt.Errorf("mismatched profile records and tags")
+	}
+	return nil
+}
+
+// build completes and returns the constructed profile.
+func (b *profileBuilder) build() {
+	b.end = time.Now()
+
+	b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
+	if b.havePeriod { // must be CPU profile
+		b.pbValueType(tagProfile_SampleType, "samples", "count")
+		b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds")
+		b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds())
+		b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds")
+		b.pb.int64Opt(tagProfile_Period, b.period)
+	}
+
+	values := []int64{0, 0}
+	var locs []uint64
+
+	for e := b.m.all; e != nil; e = e.nextAll {
+		values[0] = e.count
+		values[1] = e.count * b.period
+
+		var labels func()
+		if e.tag != nil {
+			labels = func() {
+				for k, v := range *(*labelMap)(e.tag) {
+					b.pbLabel(tagSample_Label, k, v, 0)
+				}
+			}
+		}
+
+		locs = b.appendLocsForStack(locs[:0], e.stk)
+
+		b.pbSample(values, locs, labels)
+	}
+
+	for i, m := range b.mem {
+		hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
+		b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
+	}
+
+	// TODO: Anything for tagProfile_DropFrames?
+	// TODO: Anything for tagProfile_KeepFrames?
+
+	b.pb.strings(tagProfile_StringTable, b.strings)
+	b.zw.Write(b.pb.data)
+	b.zw.Close()
+}
+
+// appendLocsForStack appends the location IDs for the given stack trace to the given
+// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
+// an inline marker as the runtime traceback function returns.
+//
+// It may emit to b.pb, so there must be no message encoding in progress.
+func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
+	b.deck.reset()
+
+	// The last frame might be truncated. Recover lost inline frames.
+	stk = runtime_expandFinalInlineFrame(stk)
+
+	for len(stk) > 0 {
+		addr := stk[0]
+		if l, ok := b.locs[addr]; ok {
+			// first record the location if there is any pending accumulated info.
+			if id := b.emitLocation(); id > 0 {
+				locs = append(locs, id)
+			}
+
+			// then, record the cached location.
+			locs = append(locs, l.id)
+
+			// Skip the matching pcs.
+			//
+			// Even if stk was truncated due to the stack depth
+			// limit, expandFinalInlineFrame above has already
+			// fixed the truncation, ensuring it is long enough.
+			stk = stk[len(l.pcs):]
+			continue
+		}
+
+		frames, symbolizeResult := allFrames(addr)
+		if len(frames) == 0 { // runtime.goexit.
+			if id := b.emitLocation(); id > 0 {
+				locs = append(locs, id)
+			}
+			stk = stk[1:]
+			continue
+		}
+
+		if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
+			stk = stk[1:]
+			continue
+		}
+		// add failed because this addr is not inlined with the
+		// existing PCs in the deck. Flush the deck and retry handling
+		// this pc.
+		if id := b.emitLocation(); id > 0 {
+			locs = append(locs, id)
+		}
+
+		// check cache again - previous emitLocation added a new entry
+		if l, ok := b.locs[addr]; ok {
+			locs = append(locs, l.id)
+			stk = stk[len(l.pcs):] // skip the matching pcs.
+		} else {
+			b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
+			stk = stk[1:]
+		}
+	}
+	if id := b.emitLocation(); id > 0 { // emit remaining location.
+		locs = append(locs, id)
+	}
+	return locs
+}
+
+// pcDeck is a helper to detect a sequence of inlined functions from
+// a stack trace returned by the runtime.
+//
+// The stack traces returned by runtime's trackback functions are fully
+// expanded (at least for Go functions) and include the fake pcs representing
+// inlined functions. The profile proto expects the inlined functions to be
+// encoded in one Location message.
+// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
+//
+// Runtime does not directly expose whether a frame is for an inlined function
+// and looking up debug info is not ideal, so we use a heuristic to filter
+// the fake pcs and restore the inlined and entry functions. Inlined functions
+// have the following properties:
+//   Frame's Func is nil (note: also true for non-Go functions), and
+//   Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
+//   Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
+//
+// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
+// we use pcDeck to temporarily hold the observed pcs and their expanded frames
+// until we observe the entry function frame.
+type pcDeck struct {
+	pcs             []uintptr
+	frames          []runtime.Frame
+	symbolizeResult symbolizeFlag
+}
+
+func (d *pcDeck) reset() {
+	d.pcs = d.pcs[:0]
+	d.frames = d.frames[:0]
+	d.symbolizeResult = 0
+}
+
+// tryAdd tries to add the pc and Frames expanded from it (most likely one,
+// since the stack trace is already fully expanded) and the symbolizeResult
+// to the deck. If it fails the caller needs to flush the deck and retry.
+func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
+	if existing := len(d.pcs); existing > 0 {
+		// 'd.frames' are all expanded from one 'pc' and represent all
+		// inlined functions so we check only the last one.
+		newFrame := frames[0]
+		last := d.frames[existing-1]
+		if last.Func != nil { // the last frame can't be inlined. Flush.
+			return false
+		}
+		if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
+			return false
+		}
+
+		if last.Entry != newFrame.Entry { // newFrame is for a different function.
+			return false
+		}
+		if last.Function == newFrame.Function { // maybe recursion.
+			return false
+		}
+	}
+	d.pcs = append(d.pcs, pc)
+	d.frames = append(d.frames, frames...)
+	d.symbolizeResult |= symbolizeResult
+	return true
+}
+
+// emitLocation emits the new location and function information recorded in the deck
+// and returns the location ID encoded in the profile protobuf.
+// It emits to b.pb, so there must be no message encoding in progress.
+// It resets the deck.
+func (b *profileBuilder) emitLocation() uint64 {
+	if len(b.deck.pcs) == 0 {
+		return 0
+	}
+	defer b.deck.reset()
+
+	addr := b.deck.pcs[0]
+	firstFrame := b.deck.frames[0]
+
+	// We can't write out functions while in the middle of the
+	// Location message, so record new functions we encounter and
+	// write them out after the Location.
+	type newFunc struct {
+		id         uint64
+		name, file string
+	}
+	newFuncs := make([]newFunc, 0, 8)
+
+	id := uint64(len(b.locs)) + 1
+	b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
+
+	start := b.pb.startMessage()
+	b.pb.uint64Opt(tagLocation_ID, id)
+	b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
+	for _, frame := range b.deck.frames {
+		// Write out each line in frame expansion.
+		funcID := uint64(b.funcs[frame.Function])
+		if funcID == 0 {
+			funcID = uint64(len(b.funcs)) + 1
+			b.funcs[frame.Function] = int(funcID)
+			newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+		}
+		b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
+	}
+	for i := range b.mem {
+		if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
+			b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+
+			m := b.mem[i]
+			m.funcs |= b.deck.symbolizeResult
+			b.mem[i] = m
+			break
+		}
+	}
+	b.pb.endMessage(tagProfile_Location, start)
+
+	// Write out functions we found during frame expansion.
+	for _, fn := range newFuncs {
+		start := b.pb.startMessage()
+		b.pb.uint64Opt(tagFunction_ID, fn.id)
+		b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
+		b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
+		b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+		b.pb.endMessage(tagProfile_Function, start)
+	}
+
+	b.flush()
+	return id
+}
+
+// readMapping reads /proc/self/maps and writes mappings to b.pb.
+// It saves the address ranges of the mappings in b.mem for use
+// when emitting locations.
+func (b *profileBuilder) readMapping() {
+	data, _ := os.ReadFile("/proc/self/maps")
+	parseProcSelfMaps(data, b.addMapping)
+	if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
+		b.addMappingEntry(0, 0, 0, "", "", true)
+		// TODO(hyangah): make addMapping return *memMap or
+		// take a memMap struct, and get rid of addMappingEntry
+		// that takes a bunch of positional arguments.
+	}
+}
+
+var space = []byte(" ")
+var newline = []byte("\n")
+
+func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
+	// $ cat /proc/self/maps
+	// 00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat
+	// 0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat
+	// 0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat
+	// 014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
+	// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
+	// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+	// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+	// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+	// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+	// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
+	// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+	// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
+	// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
+	// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+	// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+	// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
+	// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
+	// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
+	// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
+
+	var line []byte
+	// next removes and returns the next field in the line.
+	// It also removes from line any spaces following the field.
+	next := func() []byte {
+		var f []byte
+		f, line, _ = bytes.Cut(line, space)
+		line = bytes.TrimLeft(line, " ")
+		return f
+	}
+
+	for len(data) > 0 {
+		line, data, _ = bytes.Cut(data, newline)
+		addr := next()
+		loStr, hiStr, ok := strings.Cut(string(addr), "-")
+		if !ok {
+			continue
+		}
+		lo, err := strconv.ParseUint(loStr, 16, 64)
+		if err != nil {
+			continue
+		}
+		hi, err := strconv.ParseUint(hiStr, 16, 64)
+		if err != nil {
+			continue
+		}
+		perm := next()
+		if len(perm) < 4 || perm[2] != 'x' {
+			// Only interested in executable mappings.
+			continue
+		}
+		offset, err := strconv.ParseUint(string(next()), 16, 64)
+		if err != nil {
+			continue
+		}
+		next()          // dev
+		inode := next() // inode
+		if line == nil {
+			continue
+		}
+		file := string(line)
+
+		// Trim deleted file marker.
+		deletedStr := " (deleted)"
+		deletedLen := len(deletedStr)
+		if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr {
+			file = file[:len(file)-deletedLen]
+		}
+
+		if len(inode) == 1 && inode[0] == '0' && file == "" {
+			// Huge-page text mappings list the initial fragment of
+			// mapped but unpopulated memory as being inode 0.
+			// Don't report that part.
+			// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
+			continue
+		}
+
+		// TODO: pprof's remapMappingIDs makes two adjustments:
+		// 1. If there is an /anon_hugepage mapping first and it is
+		// consecutive to a next mapping, drop the /anon_hugepage.
+		// 2. If start-offset = 0x400000, change start to 0x400000 and offset to 0.
+		// There's no indication why either of these is needed.
+		// Let's try not doing these and see what breaks.
+		// If we do need them, they would go here, before we
+		// enter the mappings into b.mem in the first place.
+
+		buildID, _ := elfBuildID(file)
+		addMapping(lo, hi, offset, file, buildID)
+	}
+}
+
+func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
+	b.addMappingEntry(lo, hi, offset, file, buildID, false)
+}
+
+func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
+	b.mem = append(b.mem, memMap{
+		start:   uintptr(lo),
+		end:     uintptr(hi),
+		offset:  offset,
+		file:    file,
+		buildID: buildID,
+		fake:    fake,
+	})
+}
diff --git a/src/runtime/pprof/proto_test.go b/src/runtime/pprof/proto_test.go
new file mode 100644
index 0000000..84a051a
--- /dev/null
+++ b/src/runtime/pprof/proto_test.go
@@ -0,0 +1,440 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"internal/abi"
+	"internal/profile"
+	"internal/testenv"
+	"os"
+	"os/exec"
+	"reflect"
+	"runtime"
+	"strings"
+	"testing"
+	"unsafe"
+)
+
+// translateCPUProfile parses binary CPU profiling stack trace data
+// generated by runtime.CPUProfile() into a profile struct.
+// This is only used for testing. Real conversions stream the
+// data into the profileBuilder as it becomes available.
+//
+// count is the number of records in data.
+func translateCPUProfile(data []uint64, count int) (*profile.Profile, error) {
+	var buf bytes.Buffer
+	b := newProfileBuilder(&buf)
+	tags := make([]unsafe.Pointer, count)
+	if err := b.addCPUData(data, tags); err != nil {
+		return nil, err
+	}
+	b.build()
+	return profile.Parse(&buf)
+}
+
+// fmtJSON returns a pretty-printed JSON form for x.
+// It works reasonbly well for printing protocol-buffer
+// data structures like profile.Profile.
+func fmtJSON(x any) string {
+	js, _ := json.MarshalIndent(x, "", "\t")
+	return string(js)
+}
+
+func TestConvertCPUProfileEmpty(t *testing.T) {
+	// A test server with mock cpu profile data.
+	var buf bytes.Buffer
+
+	b := []uint64{3, 0, 500} // empty profile at 500 Hz (2ms sample period)
+	p, err := translateCPUProfile(b, 1)
+	if err != nil {
+		t.Fatalf("translateCPUProfile: %v", err)
+	}
+	if err := p.Write(&buf); err != nil {
+		t.Fatalf("writing profile: %v", err)
+	}
+
+	p, err = profile.Parse(&buf)
+	if err != nil {
+		t.Fatalf("profile.Parse: %v", err)
+	}
+
+	// Expected PeriodType and SampleType.
+	periodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"}
+	sampleType := []*profile.ValueType{
+		{Type: "samples", Unit: "count"},
+		{Type: "cpu", Unit: "nanoseconds"},
+	}
+
+	checkProfile(t, p, 2000*1000, periodType, sampleType, nil, "")
+}
+
+func f1() { f1() }
+func f2() { f2() }
+
+// testPCs returns two PCs and two corresponding memory mappings
+// to use in test profiles.
+func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) {
+	switch runtime.GOOS {
+	case "linux", "android", "netbsd":
+		// Figure out two addresses from /proc/self/maps.
+		mmap, err := os.ReadFile("/proc/self/maps")
+		if err != nil {
+			t.Fatal(err)
+		}
+		mprof := &profile.Profile{}
+		if err = mprof.ParseMemoryMap(bytes.NewReader(mmap)); err != nil {
+			t.Fatalf("parsing /proc/self/maps: %v", err)
+		}
+		if len(mprof.Mapping) < 2 {
+			// It is possible for a binary to only have 1 executable
+			// region of memory.
+			t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping))
+		}
+		addr1 = mprof.Mapping[0].Start
+		map1 = mprof.Mapping[0]
+		map1.BuildID, _ = elfBuildID(map1.File)
+		addr2 = mprof.Mapping[1].Start
+		map2 = mprof.Mapping[1]
+		map2.BuildID, _ = elfBuildID(map2.File)
+	case "js":
+		addr1 = uint64(abi.FuncPCABIInternal(f1))
+		addr2 = uint64(abi.FuncPCABIInternal(f2))
+	default:
+		addr1 = uint64(abi.FuncPCABIInternal(f1))
+		addr2 = uint64(abi.FuncPCABIInternal(f2))
+		// Fake mapping - HasFunctions will be true because two PCs from Go
+		// will be fully symbolized.
+		fake := &profile.Mapping{ID: 1, HasFunctions: true}
+		map1, map2 = fake, fake
+	}
+	return
+}
+
+func TestConvertCPUProfile(t *testing.T) {
+	addr1, addr2, map1, map2 := testPCs(t)
+
+	b := []uint64{
+		3, 0, 500, // hz = 500
+		5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
+		5, 0, 40, uint64(addr2 + 1), uint64(addr2 + 2), // 40 samples in addr2
+		5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
+	}
+	p, err := translateCPUProfile(b, 4)
+	if err != nil {
+		t.Fatalf("translating profile: %v", err)
+	}
+	period := int64(2000 * 1000)
+	periodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"}
+	sampleType := []*profile.ValueType{
+		{Type: "samples", Unit: "count"},
+		{Type: "cpu", Unit: "nanoseconds"},
+	}
+	samples := []*profile.Sample{
+		{Value: []int64{20, 20 * 2000 * 1000}, Location: []*profile.Location{
+			{ID: 1, Mapping: map1, Address: addr1},
+			{ID: 2, Mapping: map1, Address: addr1 + 1},
+		}},
+		{Value: []int64{40, 40 * 2000 * 1000}, Location: []*profile.Location{
+			{ID: 3, Mapping: map2, Address: addr2},
+			{ID: 4, Mapping: map2, Address: addr2 + 1},
+		}},
+	}
+	checkProfile(t, p, period, periodType, sampleType, samples, "")
+}
+
+func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample, defaultSampleType string) {
+	t.Helper()
+
+	if p.Period != period {
+		t.Errorf("p.Period = %d, want %d", p.Period, period)
+	}
+	if !reflect.DeepEqual(p.PeriodType, periodType) {
+		t.Errorf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType))
+	}
+	if !reflect.DeepEqual(p.SampleType, sampleType) {
+		t.Errorf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType))
+	}
+	if defaultSampleType != p.DefaultSampleType {
+		t.Errorf("p.DefaultSampleType = %v\nwant = %v", p.DefaultSampleType, defaultSampleType)
+	}
+	// Clear line info since it is not in the expected samples.
+	// If we used f1 and f2 above, then the samples will have line info.
+	for _, s := range p.Sample {
+		for _, l := range s.Location {
+			l.Line = nil
+		}
+	}
+	if fmtJSON(p.Sample) != fmtJSON(samples) { // ignore unexported fields
+		if len(p.Sample) == len(samples) {
+			for i := range p.Sample {
+				if !reflect.DeepEqual(p.Sample[i], samples[i]) {
+					t.Errorf("sample %d = %v\nwant = %v\n", i, fmtJSON(p.Sample[i]), fmtJSON(samples[i]))
+				}
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+		}
+		t.Fatalf("p.Sample = %v\nwant = %v", fmtJSON(p.Sample), fmtJSON(samples))
+	}
+}
+
+var profSelfMapsTests = `
+00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat
+0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat
+0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat
+014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
+7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
+7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
+7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
+7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
+7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
+7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
+7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
+ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0                  [vsyscall]
+->
+00400000 0040b000 00000000 /bin/cat
+7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so
+7ffc34343000 7ffc34345000 00000000 [vdso]
+ffffffffff600000 ffffffffff601000 00000090 [vsyscall]
+
+00400000-07000000 r-xp 00000000 00:00 0
+07000000-07093000 r-xp 06c00000 00:2e 536754                             /path/to/gobench_server_main
+07093000-0722d000 rw-p 06c92000 00:2e 536754                             /path/to/gobench_server_main
+0722d000-07b21000 rw-p 00000000 00:00 0
+c000000000-c000036000 rw-p 00000000 00:00 0
+->
+07000000 07093000 06c00000 /path/to/gobench_server_main
+`
+
+var profSelfMapsTestsWithDeleted = `
+00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat (deleted)
+0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat (deleted)
+0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat (deleted)
+014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
+7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
+7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
+7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
+7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
+7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
+7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
+7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
+ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0                  [vsyscall]
+->
+00400000 0040b000 00000000 /bin/cat
+7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so
+7ffc34343000 7ffc34345000 00000000 [vdso]
+ffffffffff600000 ffffffffff601000 00000090 [vsyscall]
+
+00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat with space
+0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat with space
+0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat with space
+014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
+7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
+7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
+7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
+7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
+7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
+7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
+7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
+7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
+ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0                  [vsyscall]
+->
+00400000 0040b000 00000000 /bin/cat with space
+7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so
+7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so
+7ffc34343000 7ffc34345000 00000000 [vdso]
+ffffffffff600000 ffffffffff601000 00000090 [vsyscall]
+`
+
+func TestProcSelfMaps(t *testing.T) {
+
+	f := func(t *testing.T, input string) {
+		for tx, tt := range strings.Split(input, "\n\n") {
+			in, out, ok := strings.Cut(tt, "->\n")
+			if !ok {
+				t.Fatal("malformed test case")
+			}
+			if len(out) > 0 && out[len(out)-1] != '\n' {
+				out += "\n"
+			}
+			var buf bytes.Buffer
+			parseProcSelfMaps([]byte(in), func(lo, hi, offset uint64, file, buildID string) {
+				fmt.Fprintf(&buf, "%08x %08x %08x %s\n", lo, hi, offset, file)
+			})
+			if buf.String() != out {
+				t.Errorf("#%d: have:\n%s\nwant:\n%s\n%q\n%q", tx, buf.String(), out, buf.String(), out)
+			}
+		}
+	}
+
+	t.Run("Normal", func(t *testing.T) {
+		f(t, profSelfMapsTests)
+	})
+
+	t.Run("WithDeletedFile", func(t *testing.T) {
+		f(t, profSelfMapsTestsWithDeleted)
+	})
+}
+
+// TestMapping checks the mapping section of CPU profiles
+// has the HasFunctions field set correctly. If all PCs included
+// in the samples are successfully symbolized, the corresponding
+// mapping entry (in this test case, only one entry) should have
+// its HasFunctions field set true.
+// The test generates a CPU profile that includes PCs from C side
+// that the runtime can't symbolize. See ./testdata/mappingtest.
+func TestMapping(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	testenv.MustHaveCGO(t)
+
+	prog := "./testdata/mappingtest/main.go"
+
+	// GoOnly includes only Go symbols that runtime will symbolize.
+	// Go+C includes C symbols that runtime will not symbolize.
+	for _, traceback := range []string{"GoOnly", "Go+C"} {
+		t.Run("traceback"+traceback, func(t *testing.T) {
+			cmd := exec.Command(testenv.GoToolPath(t), "run", prog)
+			if traceback != "GoOnly" {
+				cmd.Env = append(os.Environ(), "SETCGOTRACEBACK=1")
+			}
+			cmd.Stderr = new(bytes.Buffer)
+
+			out, err := cmd.Output()
+			if err != nil {
+				t.Fatalf("failed to run the test program %q: %v\n%v", prog, err, cmd.Stderr)
+			}
+
+			prof, err := profile.Parse(bytes.NewReader(out))
+			if err != nil {
+				t.Fatalf("failed to parse the generated profile data: %v", err)
+			}
+			t.Logf("Profile: %s", prof)
+
+			hit := make(map[*profile.Mapping]bool)
+			miss := make(map[*profile.Mapping]bool)
+			for _, loc := range prof.Location {
+				if symbolized(loc) {
+					hit[loc.Mapping] = true
+				} else {
+					miss[loc.Mapping] = true
+				}
+			}
+			if len(miss) == 0 {
+				t.Log("no location with missing symbol info was sampled")
+			}
+
+			for _, m := range prof.Mapping {
+				if miss[m] && m.HasFunctions {
+					t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m)
+					continue
+				}
+				if !miss[m] && hit[m] && !m.HasFunctions {
+					t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m)
+					continue
+				}
+			}
+
+			if traceback == "Go+C" {
+				// The test code was arranged to have PCs from C and
+				// they are not symbolized.
+				// Check no Location containing those unsymbolized PCs contains multiple lines.
+				for i, loc := range prof.Location {
+					if !symbolized(loc) && len(loc.Line) > 1 {
+						t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc)
+					}
+				}
+			}
+		})
+	}
+}
+
+func symbolized(loc *profile.Location) bool {
+	if len(loc.Line) == 0 {
+		return false
+	}
+	l := loc.Line[0]
+	f := l.Function
+	if l.Line == 0 || f == nil || f.Name == "" || f.Filename == "" {
+		return false
+	}
+	return true
+}
+
+// TestFakeMapping tests if at least one mapping exists
+// (including a fake mapping), and their HasFunctions bits
+// are set correctly.
+func TestFakeMapping(t *testing.T) {
+	var buf bytes.Buffer
+	if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+		t.Fatalf("failed to write heap profile: %v", err)
+	}
+	prof, err := profile.Parse(&buf)
+	if err != nil {
+		t.Fatalf("failed to parse the generated profile data: %v", err)
+	}
+	t.Logf("Profile: %s", prof)
+	if len(prof.Mapping) == 0 {
+		t.Fatal("want profile with at least one mapping entry, got 0 mapping")
+	}
+
+	hit := make(map[*profile.Mapping]bool)
+	miss := make(map[*profile.Mapping]bool)
+	for _, loc := range prof.Location {
+		if symbolized(loc) {
+			hit[loc.Mapping] = true
+		} else {
+			miss[loc.Mapping] = true
+		}
+	}
+	for _, m := range prof.Mapping {
+		if miss[m] && m.HasFunctions {
+			t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m)
+			continue
+		}
+		if !miss[m] && hit[m] && !m.HasFunctions {
+			t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m)
+			continue
+		}
+	}
+}
+
+// Make sure the profiler can handle an empty stack trace.
+// See issue 37967.
+func TestEmptyStack(t *testing.T) {
+	b := []uint64{
+		3, 0, 500, // hz = 500
+		3, 0, 10, // 10 samples with an empty stack trace
+	}
+	_, err := translateCPUProfile(b, 2)
+	if err != nil {
+		t.Fatalf("translating profile: %v", err)
+	}
+}
diff --git a/src/runtime/pprof/protobuf.go b/src/runtime/pprof/protobuf.go
new file mode 100644
index 0000000..7b99095
--- /dev/null
+++ b/src/runtime/pprof/protobuf.go
@@ -0,0 +1,141 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+// A protobuf is a simple protocol buffer encoder.
+type protobuf struct {
+	data []byte
+	tmp  [16]byte
+	nest int
+}
+
+func (b *protobuf) varint(x uint64) {
+	for x >= 128 {
+		b.data = append(b.data, byte(x)|0x80)
+		x >>= 7
+	}
+	b.data = append(b.data, byte(x))
+}
+
+func (b *protobuf) length(tag int, len int) {
+	b.varint(uint64(tag)<<3 | 2)
+	b.varint(uint64(len))
+}
+
+func (b *protobuf) uint64(tag int, x uint64) {
+	// append varint to b.data
+	b.varint(uint64(tag)<<3 | 0)
+	b.varint(x)
+}
+
+func (b *protobuf) uint64s(tag int, x []uint64) {
+	if len(x) > 2 {
+		// Use packed encoding
+		n1 := len(b.data)
+		for _, u := range x {
+			b.varint(u)
+		}
+		n2 := len(b.data)
+		b.length(tag, n2-n1)
+		n3 := len(b.data)
+		copy(b.tmp[:], b.data[n2:n3])
+		copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+		copy(b.data[n1:], b.tmp[:n3-n2])
+		return
+	}
+	for _, u := range x {
+		b.uint64(tag, u)
+	}
+}
+
+func (b *protobuf) uint64Opt(tag int, x uint64) {
+	if x == 0 {
+		return
+	}
+	b.uint64(tag, x)
+}
+
+func (b *protobuf) int64(tag int, x int64) {
+	u := uint64(x)
+	b.uint64(tag, u)
+}
+
+func (b *protobuf) int64Opt(tag int, x int64) {
+	if x == 0 {
+		return
+	}
+	b.int64(tag, x)
+}
+
+func (b *protobuf) int64s(tag int, x []int64) {
+	if len(x) > 2 {
+		// Use packed encoding
+		n1 := len(b.data)
+		for _, u := range x {
+			b.varint(uint64(u))
+		}
+		n2 := len(b.data)
+		b.length(tag, n2-n1)
+		n3 := len(b.data)
+		copy(b.tmp[:], b.data[n2:n3])
+		copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+		copy(b.data[n1:], b.tmp[:n3-n2])
+		return
+	}
+	for _, u := range x {
+		b.int64(tag, u)
+	}
+}
+
+func (b *protobuf) string(tag int, x string) {
+	b.length(tag, len(x))
+	b.data = append(b.data, x...)
+}
+
+func (b *protobuf) strings(tag int, x []string) {
+	for _, s := range x {
+		b.string(tag, s)
+	}
+}
+
+func (b *protobuf) stringOpt(tag int, x string) {
+	if x == "" {
+		return
+	}
+	b.string(tag, x)
+}
+
+func (b *protobuf) bool(tag int, x bool) {
+	if x {
+		b.uint64(tag, 1)
+	} else {
+		b.uint64(tag, 0)
+	}
+}
+
+func (b *protobuf) boolOpt(tag int, x bool) {
+	if x == false {
+		return
+	}
+	b.bool(tag, x)
+}
+
+type msgOffset int
+
+func (b *protobuf) startMessage() msgOffset {
+	b.nest++
+	return msgOffset(len(b.data))
+}
+
+func (b *protobuf) endMessage(tag int, start msgOffset) {
+	n1 := int(start)
+	n2 := len(b.data)
+	b.length(tag, n2-n1)
+	n3 := len(b.data)
+	copy(b.tmp[:], b.data[n2:n3])
+	copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+	copy(b.data[n1:], b.tmp[:n3-n2])
+	b.nest--
+}
diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go
new file mode 100644
index 0000000..fa75a28
--- /dev/null
+++ b/src/runtime/pprof/protomem.go
@@ -0,0 +1,93 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"io"
+	"math"
+	"runtime"
+	"strings"
+)
+
+// writeHeapProto writes the current heap profile in protobuf format to w.
+func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error {
+	b := newProfileBuilder(w)
+	b.pbValueType(tagProfile_PeriodType, "space", "bytes")
+	b.pb.int64Opt(tagProfile_Period, rate)
+	b.pbValueType(tagProfile_SampleType, "alloc_objects", "count")
+	b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes")
+	b.pbValueType(tagProfile_SampleType, "inuse_objects", "count")
+	b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes")
+	if defaultSampleType != "" {
+		b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType))
+	}
+
+	values := []int64{0, 0, 0, 0}
+	var locs []uint64
+	for _, r := range p {
+		hideRuntime := true
+		for tries := 0; tries < 2; tries++ {
+			stk := r.Stack()
+			// For heap profiles, all stack
+			// addresses are return PCs, which is
+			// what appendLocsForStack expects.
+			if hideRuntime {
+				for i, addr := range stk {
+					if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
+						continue
+					}
+					// Found non-runtime. Show any runtime uses above it.
+					stk = stk[i:]
+					break
+				}
+			}
+			locs = b.appendLocsForStack(locs[:0], stk)
+			if len(locs) > 0 {
+				break
+			}
+			hideRuntime = false // try again, and show all frames next time.
+		}
+
+		values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
+		values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
+		var blockSize int64
+		if r.AllocObjects > 0 {
+			blockSize = r.AllocBytes / r.AllocObjects
+		}
+		b.pbSample(values, locs, func() {
+			if blockSize != 0 {
+				b.pbLabel(tagSample_Label, "bytes", "", blockSize)
+			}
+		})
+	}
+	b.build()
+	return nil
+}
+
+// scaleHeapSample adjusts the data from a heap Sample to
+// account for its probability of appearing in the collected
+// data. heap profiles are a sampling of the memory allocations
+// requests in a program. We estimate the unsampled value by dividing
+// each collected sample by its probability of appearing in the
+// profile. heap profiles rely on a poisson process to determine
+// which samples to collect, based on the desired average collection
+// rate R. The probability of a sample of size S to appear in that
+// profile is 1-exp(-S/R).
+func scaleHeapSample(count, size, rate int64) (int64, int64) {
+	if count == 0 || size == 0 {
+		return 0, 0
+	}
+
+	if rate <= 1 {
+		// if rate==1 all samples were collected so no adjustment is needed.
+		// if rate<1 treat as unknown and skip scaling.
+		return count, size
+	}
+
+	avgSize := float64(size) / float64(count)
+	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+	return int64(float64(count) * scale), int64(float64(size) * scale)
+}
diff --git a/src/runtime/pprof/protomem_test.go b/src/runtime/pprof/protomem_test.go
new file mode 100644
index 0000000..156f628
--- /dev/null
+++ b/src/runtime/pprof/protomem_test.go
@@ -0,0 +1,84 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"bytes"
+	"internal/profile"
+	"runtime"
+	"testing"
+)
+
+func TestConvertMemProfile(t *testing.T) {
+	addr1, addr2, map1, map2 := testPCs(t)
+
+	// MemProfileRecord stacks are return PCs, so add one to the
+	// addresses recorded in the "profile". The proto profile
+	// locations are call PCs, so conversion will subtract one
+	// from these and get back to addr1 and addr2.
+	a1, a2 := uintptr(addr1)+1, uintptr(addr2)+1
+	rate := int64(512 * 1024)
+	rec := []runtime.MemProfileRecord{
+		{AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{a1, a2}},
+		{AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{a2 + 1, a2 + 2}},
+		{AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}},
+	}
+
+	periodType := &profile.ValueType{Type: "space", Unit: "bytes"}
+	sampleType := []*profile.ValueType{
+		{Type: "alloc_objects", Unit: "count"},
+		{Type: "alloc_space", Unit: "bytes"},
+		{Type: "inuse_objects", Unit: "count"},
+		{Type: "inuse_space", Unit: "bytes"},
+	}
+	samples := []*profile.Sample{
+		{
+			Value: []int64{2050, 2099200, 1537, 1574400},
+			Location: []*profile.Location{
+				{ID: 1, Mapping: map1, Address: addr1},
+				{ID: 2, Mapping: map2, Address: addr2},
+			},
+			NumLabel: map[string][]int64{"bytes": {1024}},
+		},
+		{
+			Value: []int64{1, 829411, 1, 829411},
+			Location: []*profile.Location{
+				{ID: 3, Mapping: map2, Address: addr2 + 1},
+				{ID: 4, Mapping: map2, Address: addr2 + 2},
+			},
+			NumLabel: map[string][]int64{"bytes": {512 * 1024}},
+		},
+		{
+			Value: []int64{1, 829411, 0, 0},
+			Location: []*profile.Location{
+				{ID: 5, Mapping: map1, Address: addr1 + 1},
+				{ID: 6, Mapping: map1, Address: addr1 + 2},
+				{ID: 7, Mapping: map2, Address: addr2 + 3},
+			},
+			NumLabel: map[string][]int64{"bytes": {512 * 1024}},
+		},
+	}
+	for _, tc := range []struct {
+		name              string
+		defaultSampleType string
+	}{
+		{"heap", ""},
+		{"allocs", "alloc_space"},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			var buf bytes.Buffer
+			if err := writeHeapProto(&buf, rec, rate, tc.defaultSampleType); err != nil {
+				t.Fatalf("writing profile: %v", err)
+			}
+
+			p, err := profile.Parse(&buf)
+			if err != nil {
+				t.Fatalf("profile.Parse: %v", err)
+			}
+
+			checkProfile(t, p, rate, periodType, sampleType, samples, tc.defaultSampleType)
+		})
+	}
+}
diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go
new file mode 100644
index 0000000..dd2545b
--- /dev/null
+++ b/src/runtime/pprof/runtime.go
@@ -0,0 +1,41 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"context"
+	"unsafe"
+)
+
+// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
+
+// runtime_setProfLabel is defined in runtime/proflabel.go.
+func runtime_setProfLabel(labels unsafe.Pointer)
+
+// runtime_getProfLabel is defined in runtime/proflabel.go.
+func runtime_getProfLabel() unsafe.Pointer
+
+// SetGoroutineLabels sets the current goroutine's labels to match ctx.
+// A new goroutine inherits the labels of the goroutine that created it.
+// This is a lower-level API than Do, which should be used instead when possible.
+func SetGoroutineLabels(ctx context.Context) {
+	ctxLabels, _ := ctx.Value(labelContextKey{}).(*labelMap)
+	runtime_setProfLabel(unsafe.Pointer(ctxLabels))
+}
+
+// Do calls f with a copy of the parent context with the
+// given labels added to the parent's label map.
+// Goroutines spawned while executing f will inherit the augmented label-set.
+// Each key/value pair in labels is inserted into the label map in the
+// order provided, overriding any previous value for the same key.
+// The augmented label map will be set for the duration of the call to f
+// and restored once f returns.
+func Do(ctx context.Context, labels LabelSet, f func(context.Context)) {
+	defer SetGoroutineLabels(ctx)
+	ctx = WithLabels(ctx, labels)
+	SetGoroutineLabels(ctx)
+	f(ctx)
+}
diff --git a/src/runtime/pprof/runtime_test.go b/src/runtime/pprof/runtime_test.go
new file mode 100644
index 0000000..0dd5324
--- /dev/null
+++ b/src/runtime/pprof/runtime_test.go
@@ -0,0 +1,96 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pprof
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+	"testing"
+)
+
+func TestSetGoroutineLabels(t *testing.T) {
+	sync := make(chan struct{})
+
+	wantLabels := map[string]string{}
+	if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("Expected parent goroutine's profile labels to be empty before test, got %v", gotLabels)
+	}
+	go func() {
+		if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+			t.Errorf("Expected child goroutine's profile labels to be empty before test, got %v", gotLabels)
+		}
+		sync <- struct{}{}
+	}()
+	<-sync
+
+	wantLabels = map[string]string{"key": "value"}
+	ctx := WithLabels(context.Background(), Labels("key", "value"))
+	SetGoroutineLabels(ctx)
+	if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("parent goroutine's profile labels: got %v, want %v", gotLabels, wantLabels)
+	}
+	go func() {
+		if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+			t.Errorf("child goroutine's profile labels: got %v, want %v", gotLabels, wantLabels)
+		}
+		sync <- struct{}{}
+	}()
+	<-sync
+
+	wantLabels = map[string]string{}
+	ctx = context.Background()
+	SetGoroutineLabels(ctx)
+	if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("Expected parent goroutine's profile labels to be empty, got %v", gotLabels)
+	}
+	go func() {
+		if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+			t.Errorf("Expected child goroutine's profile labels to be empty, got %v", gotLabels)
+		}
+		sync <- struct{}{}
+	}()
+	<-sync
+}
+
+func TestDo(t *testing.T) {
+	wantLabels := map[string]string{}
+	if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+		t.Errorf("Expected parent goroutine's profile labels to be empty before Do, got %v", gotLabels)
+	}
+
+	Do(context.Background(), Labels("key1", "value1", "key2", "value2"), func(ctx context.Context) {
+		wantLabels := map[string]string{"key1": "value1", "key2": "value2"}
+		if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+			t.Errorf("parent goroutine's profile labels: got %v, want %v", gotLabels, wantLabels)
+		}
+
+		sync := make(chan struct{})
+		go func() {
+			wantLabels := map[string]string{"key1": "value1", "key2": "value2"}
+			if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+				t.Errorf("child goroutine's profile labels: got %v, want %v", gotLabels, wantLabels)
+			}
+			sync <- struct{}{}
+		}()
+		<-sync
+
+	})
+
+	wantLabels = map[string]string{}
+	if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) {
+		fmt.Printf("%#v", gotLabels)
+		fmt.Printf("%#v", wantLabels)
+		t.Errorf("Expected parent goroutine's profile labels to be empty after Do, got %v", gotLabels)
+	}
+}
+
+func getProfLabel() map[string]string {
+	l := (*labelMap)(runtime_getProfLabel())
+	if l == nil {
+		return map[string]string{}
+	}
+	return *l
+}
diff --git a/src/runtime/pprof/rusage_test.go b/src/runtime/pprof/rusage_test.go
new file mode 100644
index 0000000..b0d651e
--- /dev/null
+++ b/src/runtime/pprof/rusage_test.go
@@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin || freebsd || linux || netbsd || openbsd
+
+package pprof
+
+import (
+	"syscall"
+	"time"
+)
+
+func init() {
+	diffCPUTimeImpl = diffCPUTimeRUsage
+}
+
+func diffCPUTimeRUsage(f func()) time.Duration {
+	ok := true
+	var before, after syscall.Rusage
+
+	err := syscall.Getrusage(syscall.RUSAGE_SELF, &before)
+	if err != nil {
+		ok = false
+	}
+
+	f()
+
+	err = syscall.Getrusage(syscall.RUSAGE_SELF, &after)
+	if err != nil {
+		ok = false
+	}
+
+	if !ok {
+		return 0
+	}
+
+	return time.Duration((after.Utime.Nano() + after.Stime.Nano()) - (before.Utime.Nano() + before.Stime.Nano()))
+}
diff --git a/src/runtime/pprof/testdata/README b/src/runtime/pprof/testdata/README
new file mode 100644
index 0000000..876538e
--- /dev/null
+++ b/src/runtime/pprof/testdata/README
@@ -0,0 +1,9 @@
+These binaries were generated by:
+
+$ cat empty.s
+.global _start
+_start:
+$ as --32 -o empty.o empty.s && ld  --build-id -m elf_i386 -o test32 empty.o
+$ as --64 -o empty.o empty.s && ld --build-id -o test64 empty.o
+$ powerpc-linux-gnu-as -o empty.o empty.s && powerpc-linux-gnu-ld --build-id -o test32be empty.o
+$ powerpc64-linux-gnu-as -o empty.o empty.s && powerpc64-linux-gnu-ld --build-id -o test64be empty.o
diff --git a/src/runtime/pprof/testdata/mappingtest/main.go b/src/runtime/pprof/testdata/mappingtest/main.go
new file mode 100644
index 0000000..484b7f9
--- /dev/null
+++ b/src/runtime/pprof/testdata/mappingtest/main.go
@@ -0,0 +1,108 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program outputs a CPU profile that includes
+// both Go and Cgo stacks. This is used by the mapping info
+// tests in runtime/pprof.
+//
+// If SETCGOTRACEBACK=1 is set, the CPU profile will includes
+// PCs from C side but they will not be symbolized.
+package main
+
+/*
+#include <stdint.h>
+#include <stdlib.h>
+
+int cpuHogCSalt1 = 0;
+int cpuHogCSalt2 = 0;
+
+void CPUHogCFunction0(int foo) {
+	int i;
+	for (i = 0; i < 100000; i++) {
+		if (foo > 0) {
+			foo *= foo;
+		} else {
+			foo *= foo + 1;
+		}
+		cpuHogCSalt2 = foo;
+	}
+}
+
+void CPUHogCFunction() {
+	CPUHogCFunction0(cpuHogCSalt1);
+}
+
+struct CgoTracebackArg {
+	uintptr_t context;
+        uintptr_t sigContext;
+	uintptr_t *buf;
+        uintptr_t max;
+};
+
+void CollectCgoTraceback(void* parg) {
+        struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
+	arg->buf[0] = (uintptr_t)(CPUHogCFunction0);
+	arg->buf[1] = (uintptr_t)(CPUHogCFunction);
+	arg->buf[2] = 0;
+};
+*/
+import "C"
+
+import (
+	"log"
+	"os"
+	"runtime"
+	"runtime/pprof"
+	"time"
+	"unsafe"
+)
+
+func init() {
+	if v := os.Getenv("SETCGOTRACEBACK"); v == "1" {
+		// Collect some PCs from C-side, but don't symbolize.
+		runtime.SetCgoTraceback(0, unsafe.Pointer(C.CollectCgoTraceback), nil, nil)
+	}
+}
+
+func main() {
+	go cpuHogGoFunction()
+	go cpuHogCFunction()
+	runtime.Gosched()
+
+	if err := pprof.StartCPUProfile(os.Stdout); err != nil {
+		log.Fatal("can't start CPU profile: ", err)
+	}
+	time.Sleep(200 * time.Millisecond)
+	pprof.StopCPUProfile()
+
+	if err := os.Stdout.Close(); err != nil {
+		log.Fatal("can't write CPU profile: ", err)
+	}
+}
+
+var salt1 int
+var salt2 int
+
+func cpuHogGoFunction() {
+	for {
+		foo := salt1
+		for i := 0; i < 1e5; i++ {
+			if foo > 0 {
+				foo *= foo
+			} else {
+				foo *= foo + 1
+			}
+			salt2 = foo
+		}
+		runtime.Gosched()
+	}
+}
+
+func cpuHogCFunction() {
+	// Generates CPU profile samples including a Cgo call path.
+	for {
+		C.CPUHogCFunction()
+		runtime.Gosched()
+	}
+}
diff --git a/src/runtime/pprof/testdata/test32 b/src/runtime/pprof/testdata/test32
new file mode 100755
index 0000000..ce59472
--- /dev/null
+++ b/src/runtime/pprof/testdata/test32
diff --git a/src/runtime/pprof/testdata/test32be b/src/runtime/pprof/testdata/test32be
new file mode 100755
index 0000000..f13a732
--- /dev/null
+++ b/src/runtime/pprof/testdata/test32be
diff --git a/src/runtime/pprof/testdata/test64 b/src/runtime/pprof/testdata/test64
new file mode 100755
index 0000000..3fb42fb
--- /dev/null
+++ b/src/runtime/pprof/testdata/test64
diff --git a/src/runtime/pprof/testdata/test64be b/src/runtime/pprof/testdata/test64be
new file mode 100755
index 0000000..09b4b01
--- /dev/null
+++ b/src/runtime/pprof/testdata/test64be
diff --git a/src/runtime/pprof/uname_linux_test.go b/src/runtime/pprof/uname_linux_test.go
new file mode 100644
index 0000000..8374c83
--- /dev/null
+++ b/src/runtime/pprof/uname_linux_test.go
@@ -0,0 +1,61 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux
+
+package pprof
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"syscall"
+)
+
+var versionRe = regexp.MustCompile(`^(\d+)(?:\.(\d+)(?:\.(\d+))).*$`)
+
+func linuxKernelVersion() (major, minor, patch int, err error) {
+	var uname syscall.Utsname
+	if err := syscall.Uname(&uname); err != nil {
+		return 0, 0, 0, err
+	}
+
+	buf := make([]byte, 0, len(uname.Release))
+	for _, b := range uname.Release {
+		if b == 0 {
+			break
+		}
+		buf = append(buf, byte(b))
+	}
+	rl := string(buf)
+
+	m := versionRe.FindStringSubmatch(rl)
+	if m == nil {
+		return 0, 0, 0, fmt.Errorf("error matching version number in %q", rl)
+	}
+
+	v, err := strconv.ParseInt(m[1], 10, 64)
+	if err != nil {
+		return 0, 0, 0, fmt.Errorf("error parsing major version %q in %s: %w", m[1], rl, err)
+	}
+	major = int(v)
+
+	if len(m) >= 3 {
+		v, err := strconv.ParseInt(m[2], 10, 64)
+		if err != nil {
+			return 0, 0, 0, fmt.Errorf("error parsing minor version %q in %s: %w", m[2], rl, err)
+		}
+		minor = int(v)
+	}
+
+	if len(m) >= 4 {
+		v, err := strconv.ParseInt(m[3], 10, 64)
+		if err != nil {
+			return 0, 0, 0, fmt.Errorf("error parsing patch version %q in %s: %w", m[3], rl, err)
+		}
+		patch = int(v)
+	}
+
+	return
+}
diff --git a/src/runtime/pprof/uname_other_test.go b/src/runtime/pprof/uname_other_test.go
new file mode 100644
index 0000000..3276407
--- /dev/null
+++ b/src/runtime/pprof/uname_other_test.go
@@ -0,0 +1,15 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux
+
+package pprof
+
+import (
+	"errors"
+)
+
+func linuxKernelVersion() (major, minor, patch int, err error) {
+	return 0, 0, 0, errors.New("not running on linux")
+}