diff options
Diffstat (limited to 'src/runtime/pprof')
27 files changed, 5850 insertions, 0 deletions
diff --git a/src/runtime/pprof/elf.go b/src/runtime/pprof/elf.go new file mode 100644 index 0000000..a8b5ea6 --- /dev/null +++ b/src/runtime/pprof/elf.go @@ -0,0 +1,109 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "encoding/binary" + "errors" + "fmt" + "os" +) + +var ( + errBadELF = errors.New("malformed ELF binary") + errNoBuildID = errors.New("no NT_GNU_BUILD_ID found in ELF binary") +) + +// elfBuildID returns the GNU build ID of the named ELF binary, +// without introducing a dependency on debug/elf and its dependencies. +func elfBuildID(file string) (string, error) { + buf := make([]byte, 256) + f, err := os.Open(file) + if err != nil { + return "", err + } + defer f.Close() + + if _, err := f.ReadAt(buf[:64], 0); err != nil { + return "", err + } + + // ELF file begins with \x7F E L F. + if buf[0] != 0x7F || buf[1] != 'E' || buf[2] != 'L' || buf[3] != 'F' { + return "", errBadELF + } + + var byteOrder binary.ByteOrder + switch buf[5] { + default: + return "", errBadELF + case 1: // little-endian + byteOrder = binary.LittleEndian + case 2: // big-endian + byteOrder = binary.BigEndian + } + + var shnum int + var shoff, shentsize int64 + switch buf[4] { + default: + return "", errBadELF + case 1: // 32-bit file header + shoff = int64(byteOrder.Uint32(buf[32:])) + shentsize = int64(byteOrder.Uint16(buf[46:])) + if shentsize != 40 { + return "", errBadELF + } + shnum = int(byteOrder.Uint16(buf[48:])) + case 2: // 64-bit file header + shoff = int64(byteOrder.Uint64(buf[40:])) + shentsize = int64(byteOrder.Uint16(buf[58:])) + if shentsize != 64 { + return "", errBadELF + } + shnum = int(byteOrder.Uint16(buf[60:])) + } + + for i := 0; i < shnum; i++ { + if _, err := f.ReadAt(buf[:shentsize], shoff+int64(i)*shentsize); err != nil { + return "", err + } + if typ := byteOrder.Uint32(buf[4:]); typ != 7 { // SHT_NOTE + continue + } + var off, size int64 + if shentsize == 40 { + // 32-bit section header + off = int64(byteOrder.Uint32(buf[16:])) + size = int64(byteOrder.Uint32(buf[20:])) + } else { + // 64-bit section header + off = int64(byteOrder.Uint64(buf[24:])) + size = int64(byteOrder.Uint64(buf[32:])) + } + size += off + for off < size { + if _, err := f.ReadAt(buf[:16], off); err != nil { // room for header + name GNU\x00 + return "", err + } + nameSize := int(byteOrder.Uint32(buf[0:])) + descSize := int(byteOrder.Uint32(buf[4:])) + noteType := int(byteOrder.Uint32(buf[8:])) + descOff := off + int64(12+(nameSize+3)&^3) + off = descOff + int64((descSize+3)&^3) + if nameSize != 4 || noteType != 3 || buf[12] != 'G' || buf[13] != 'N' || buf[14] != 'U' || buf[15] != '\x00' { // want name GNU\x00 type 3 (NT_GNU_BUILD_ID) + continue + } + if descSize > len(buf) { + return "", errBadELF + } + if _, err := f.ReadAt(buf[:descSize], descOff); err != nil { + return "", err + } + return fmt.Sprintf("%x", buf[:descSize]), nil + } + } + return "", errNoBuildID +} diff --git a/src/runtime/pprof/label.go b/src/runtime/pprof/label.go new file mode 100644 index 0000000..d39e0ad --- /dev/null +++ b/src/runtime/pprof/label.go @@ -0,0 +1,108 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "context" + "fmt" + "sort" + "strings" +) + +type label struct { + key string + value string +} + +// LabelSet is a set of labels. +type LabelSet struct { + list []label +} + +// labelContextKey is the type of contextKeys used for profiler labels. +type labelContextKey struct{} + +func labelValue(ctx context.Context) labelMap { + labels, _ := ctx.Value(labelContextKey{}).(*labelMap) + if labels == nil { + return labelMap(nil) + } + return *labels +} + +// labelMap is the representation of the label set held in the context type. +// This is an initial implementation, but it will be replaced with something +// that admits incremental immutable modification more efficiently. +type labelMap map[string]string + +// String satisfies Stringer and returns key, value pairs in a consistent +// order. +func (l *labelMap) String() string { + if l == nil { + return "" + } + keyVals := make([]string, 0, len(*l)) + + for k, v := range *l { + keyVals = append(keyVals, fmt.Sprintf("%q:%q", k, v)) + } + + sort.Strings(keyVals) + + return "{" + strings.Join(keyVals, ", ") + "}" +} + +// WithLabels returns a new context.Context with the given labels added. +// A label overwrites a prior label with the same key. +func WithLabels(ctx context.Context, labels LabelSet) context.Context { + parentLabels := labelValue(ctx) + childLabels := make(labelMap, len(parentLabels)) + // TODO(matloob): replace the map implementation with something + // more efficient so creating a child context WithLabels doesn't need + // to clone the map. + for k, v := range parentLabels { + childLabels[k] = v + } + for _, label := range labels.list { + childLabels[label.key] = label.value + } + return context.WithValue(ctx, labelContextKey{}, &childLabels) +} + +// Labels takes an even number of strings representing key-value pairs +// and makes a LabelSet containing them. +// A label overwrites a prior label with the same key. +// Currently only the CPU and goroutine profiles utilize any labels +// information. +// See https://golang.org/issue/23458 for details. +func Labels(args ...string) LabelSet { + if len(args)%2 != 0 { + panic("uneven number of arguments to pprof.Labels") + } + list := make([]label, 0, len(args)/2) + for i := 0; i+1 < len(args); i += 2 { + list = append(list, label{key: args[i], value: args[i+1]}) + } + return LabelSet{list: list} +} + +// Label returns the value of the label with the given key on ctx, and a boolean indicating +// whether that label exists. +func Label(ctx context.Context, key string) (string, bool) { + ctxLabels := labelValue(ctx) + v, ok := ctxLabels[key] + return v, ok +} + +// ForLabels invokes f with each label set on the context. +// The function f should return true to continue iteration or false to stop iteration early. +func ForLabels(ctx context.Context, f func(key, value string) bool) { + ctxLabels := labelValue(ctx) + for k, v := range ctxLabels { + if !f(k, v) { + break + } + } +} diff --git a/src/runtime/pprof/label_test.go b/src/runtime/pprof/label_test.go new file mode 100644 index 0000000..fcb00bd --- /dev/null +++ b/src/runtime/pprof/label_test.go @@ -0,0 +1,114 @@ +package pprof + +import ( + "context" + "reflect" + "sort" + "testing" +) + +func labelsSorted(ctx context.Context) []label { + ls := []label{} + ForLabels(ctx, func(key, value string) bool { + ls = append(ls, label{key, value}) + return true + }) + sort.Sort(labelSorter(ls)) + return ls +} + +type labelSorter []label + +func (s labelSorter) Len() int { return len(s) } +func (s labelSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s labelSorter) Less(i, j int) bool { return s[i].key < s[j].key } + +func TestContextLabels(t *testing.T) { + // Background context starts with no labels. + ctx := context.Background() + labels := labelsSorted(ctx) + if len(labels) != 0 { + t.Errorf("labels on background context: want [], got %v ", labels) + } + + // Add a single label. + ctx = WithLabels(ctx, Labels("key", "value")) + // Retrieve it with Label. + v, ok := Label(ctx, "key") + if !ok || v != "value" { + t.Errorf(`Label(ctx, "key"): got %v, %v; want "value", ok`, v, ok) + } + gotLabels := labelsSorted(ctx) + wantLabels := []label{{"key", "value"}} + if !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels) + } + + // Add a label with a different key. + ctx = WithLabels(ctx, Labels("key2", "value2")) + v, ok = Label(ctx, "key2") + if !ok || v != "value2" { + t.Errorf(`Label(ctx, "key2"): got %v, %v; want "value2", ok`, v, ok) + } + gotLabels = labelsSorted(ctx) + wantLabels = []label{{"key", "value"}, {"key2", "value2"}} + if !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels) + } + + // Add label with first key to test label replacement. + ctx = WithLabels(ctx, Labels("key", "value3")) + v, ok = Label(ctx, "key") + if !ok || v != "value3" { + t.Errorf(`Label(ctx, "key3"): got %v, %v; want "value3", ok`, v, ok) + } + gotLabels = labelsSorted(ctx) + wantLabels = []label{{"key", "value3"}, {"key2", "value2"}} + if !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels) + } + + // Labels called with two labels with the same key should pick the second. + ctx = WithLabels(ctx, Labels("key4", "value4a", "key4", "value4b")) + v, ok = Label(ctx, "key4") + if !ok || v != "value4b" { + t.Errorf(`Label(ctx, "key4"): got %v, %v; want "value4b", ok`, v, ok) + } + gotLabels = labelsSorted(ctx) + wantLabels = []label{{"key", "value3"}, {"key2", "value2"}, {"key4", "value4b"}} + if !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels) + } +} + +func TestLabelMapStringer(t *testing.T) { + for _, tbl := range []struct { + m labelMap + expected string + }{ + { + m: labelMap{ + // empty map + }, + expected: "{}", + }, { + m: labelMap{ + "foo": "bar", + }, + expected: `{"foo":"bar"}`, + }, { + m: labelMap{ + "foo": "bar", + "key1": "value1", + "key2": "value2", + "key3": "value3", + "key4WithNewline": "\nvalue4", + }, + expected: `{"foo":"bar", "key1":"value1", "key2":"value2", "key3":"value3", "key4WithNewline":"\nvalue4"}`, + }, + } { + if got := tbl.m.String(); tbl.expected != got { + t.Errorf("%#v.String() = %q; want %q", tbl.m, got, tbl.expected) + } + } +} diff --git a/src/runtime/pprof/map.go b/src/runtime/pprof/map.go new file mode 100644 index 0000000..7c75872 --- /dev/null +++ b/src/runtime/pprof/map.go @@ -0,0 +1,90 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import "unsafe" + +// A profMap is a map from (stack, tag) to mapEntry. +// It grows without bound, but that's assumed to be OK. +type profMap struct { + hash map[uintptr]*profMapEntry + all *profMapEntry + last *profMapEntry + free []profMapEntry + freeStk []uintptr +} + +// A profMapEntry is a single entry in the profMap. +type profMapEntry struct { + nextHash *profMapEntry // next in hash list + nextAll *profMapEntry // next in list of all entries + stk []uintptr + tag unsafe.Pointer + count int64 +} + +func (m *profMap) lookup(stk []uint64, tag unsafe.Pointer) *profMapEntry { + // Compute hash of (stk, tag). + h := uintptr(0) + for _, x := range stk { + h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) + h += uintptr(x) * 41 + } + h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) + h += uintptr(tag) * 41 + + // Find entry if present. + var last *profMapEntry +Search: + for e := m.hash[h]; e != nil; last, e = e, e.nextHash { + if len(e.stk) != len(stk) || e.tag != tag { + continue + } + for j := range stk { + if e.stk[j] != uintptr(stk[j]) { + continue Search + } + } + // Move to front. + if last != nil { + last.nextHash = e.nextHash + e.nextHash = m.hash[h] + m.hash[h] = e + } + return e + } + + // Add new entry. + if len(m.free) < 1 { + m.free = make([]profMapEntry, 128) + } + e := &m.free[0] + m.free = m.free[1:] + e.nextHash = m.hash[h] + e.tag = tag + + if len(m.freeStk) < len(stk) { + m.freeStk = make([]uintptr, 1024) + } + // Limit cap to prevent append from clobbering freeStk. + e.stk = m.freeStk[:len(stk):len(stk)] + m.freeStk = m.freeStk[len(stk):] + + for j := range stk { + e.stk[j] = uintptr(stk[j]) + } + if m.hash == nil { + m.hash = make(map[uintptr]*profMapEntry) + } + m.hash[h] = e + if m.all == nil { + m.all = e + m.last = e + } else { + m.last.nextAll = e + m.last = e + } + return e +} diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go new file mode 100644 index 0000000..391588d --- /dev/null +++ b/src/runtime/pprof/mprof_test.go @@ -0,0 +1,176 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !js + +package pprof + +import ( + "bytes" + "fmt" + "internal/profile" + "reflect" + "regexp" + "runtime" + "testing" + "unsafe" +) + +var memSink any + +func allocateTransient1M() { + for i := 0; i < 1024; i++ { + memSink = &struct{ x [1024]byte }{} + } +} + +//go:noinline +func allocateTransient2M() { + memSink = make([]byte, 2<<20) +} + +func allocateTransient2MInline() { + memSink = make([]byte, 2<<20) +} + +type Obj32 struct { + link *Obj32 + pad [32 - unsafe.Sizeof(uintptr(0))]byte +} + +var persistentMemSink *Obj32 + +func allocatePersistent1K() { + for i := 0; i < 32; i++ { + // Can't use slice because that will introduce implicit allocations. + obj := &Obj32{link: persistentMemSink} + persistentMemSink = obj + } +} + +// Allocate transient memory using reflect.Call. + +func allocateReflectTransient() { + memSink = make([]byte, 2<<20) +} + +func allocateReflect() { + rv := reflect.ValueOf(allocateReflectTransient) + rv.Call(nil) +} + +var memoryProfilerRun = 0 + +func TestMemoryProfiler(t *testing.T) { + // Disable sampling, otherwise it's difficult to assert anything. + oldRate := runtime.MemProfileRate + runtime.MemProfileRate = 1 + defer func() { + runtime.MemProfileRate = oldRate + }() + + // Allocate a meg to ensure that mcache.nextSample is updated to 1. + for i := 0; i < 1024; i++ { + memSink = make([]byte, 1024) + } + + // Do the interesting allocations. + allocateTransient1M() + allocateTransient2M() + allocateTransient2MInline() + allocatePersistent1K() + allocateReflect() + memSink = nil + + runtime.GC() // materialize stats + + memoryProfilerRun++ + + tests := []struct { + stk []string + legacy string + }{{ + stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"}, + legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ +# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test\.go:47 +# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test\.go:82 +`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun), + }, { + stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"}, + legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ +# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:24 +# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:79 +`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun), + }, { + stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"}, + legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ +# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:30 +# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:80 +`, memoryProfilerRun, (2<<20)*memoryProfilerRun), + }, { + stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"}, + legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ +# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:34 +# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:81 +`, memoryProfilerRun, (2<<20)*memoryProfilerRun), + }, { + stk: []string{"runtime/pprof.allocateReflectTransient"}, + legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+ +# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*runtime/pprof/mprof_test.go:55 +`, memoryProfilerRun, (2<<20)*memoryProfilerRun), + }} + + t.Run("debug=1", func(t *testing.T) { + var buf bytes.Buffer + if err := Lookup("heap").WriteTo(&buf, 1); err != nil { + t.Fatalf("failed to write heap profile: %v", err) + } + + for _, test := range tests { + if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) { + t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String()) + } + } + }) + + t.Run("proto", func(t *testing.T) { + var buf bytes.Buffer + if err := Lookup("heap").WriteTo(&buf, 0); err != nil { + t.Fatalf("failed to write heap profile: %v", err) + } + p, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("failed to parse heap profile: %v", err) + } + t.Logf("Profile = %v", p) + + stks := stacks(p) + for _, test := range tests { + if !containsStack(stks, test.stk) { + t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p) + } + } + + if !containsInlinedCall(TestMemoryProfiler, 4<<10) { + t.Logf("Can't determine whether allocateTransient2MInline was inlined into TestMemoryProfiler.") + return + } + + // Check the inlined function location is encoded correctly. + for _, loc := range p.Location { + inlinedCaller, inlinedCallee := false, false + for _, line := range loc.Line { + if line.Function.Name == "runtime/pprof.allocateTransient2MInline" { + inlinedCallee = true + } + if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" { + inlinedCaller = true + } + } + if inlinedCallee != inlinedCaller { + t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc) + } + } + }) +} diff --git a/src/runtime/pprof/pe.go b/src/runtime/pprof/pe.go new file mode 100644 index 0000000..4105458 --- /dev/null +++ b/src/runtime/pprof/pe.go @@ -0,0 +1,19 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import "os" + +// peBuildID returns a best effort unique ID for the named executable. +// +// It would be wasteful to calculate the hash of the whole file, +// instead use the binary name and the last modified time for the buildid. +func peBuildID(file string) string { + s, err := os.Stat(file) + if err != nil { + return file + } + return file + s.ModTime().String() +} diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go new file mode 100644 index 0000000..17a490e --- /dev/null +++ b/src/runtime/pprof/pprof.go @@ -0,0 +1,910 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pprof writes runtime profiling data in the format expected +// by the pprof visualization tool. +// +// # Profiling a Go program +// +// The first step to profiling a Go program is to enable profiling. +// Support for profiling benchmarks built with the standard testing +// package is built into go test. For example, the following command +// runs benchmarks in the current directory and writes the CPU and +// memory profiles to cpu.prof and mem.prof: +// +// go test -cpuprofile cpu.prof -memprofile mem.prof -bench . +// +// To add equivalent profiling support to a standalone program, add +// code like the following to your main function: +// +// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") +// var memprofile = flag.String("memprofile", "", "write memory profile to `file`") +// +// func main() { +// flag.Parse() +// if *cpuprofile != "" { +// f, err := os.Create(*cpuprofile) +// if err != nil { +// log.Fatal("could not create CPU profile: ", err) +// } +// defer f.Close() // error handling omitted for example +// if err := pprof.StartCPUProfile(f); err != nil { +// log.Fatal("could not start CPU profile: ", err) +// } +// defer pprof.StopCPUProfile() +// } +// +// // ... rest of the program ... +// +// if *memprofile != "" { +// f, err := os.Create(*memprofile) +// if err != nil { +// log.Fatal("could not create memory profile: ", err) +// } +// defer f.Close() // error handling omitted for example +// runtime.GC() // get up-to-date statistics +// if err := pprof.WriteHeapProfile(f); err != nil { +// log.Fatal("could not write memory profile: ", err) +// } +// } +// } +// +// There is also a standard HTTP interface to profiling data. Adding +// the following line will install handlers under the /debug/pprof/ +// URL to download live profiles: +// +// import _ "net/http/pprof" +// +// See the net/http/pprof package for more details. +// +// Profiles can then be visualized with the pprof tool: +// +// go tool pprof cpu.prof +// +// There are many commands available from the pprof command line. +// Commonly used commands include "top", which prints a summary of the +// top program hot-spots, and "web", which opens an interactive graph +// of hot-spots and their call graphs. Use "help" for information on +// all pprof commands. +// +// For more information about pprof, see +// https://github.com/google/pprof/blob/master/doc/README.md. +package pprof + +import ( + "bufio" + "fmt" + "internal/abi" + "io" + "runtime" + "sort" + "strings" + "sync" + "text/tabwriter" + "time" + "unsafe" +) + +// BUG(rsc): Profiles are only as good as the kernel support used to generate them. +// See https://golang.org/issue/13841 for details about known problems. + +// A Profile is a collection of stack traces showing the call sequences +// that led to instances of a particular event, such as allocation. +// Packages can create and maintain their own profiles; the most common +// use is for tracking resources that must be explicitly closed, such as files +// or network connections. +// +// A Profile's methods can be called from multiple goroutines simultaneously. +// +// Each Profile has a unique name. A few profiles are predefined: +// +// goroutine - stack traces of all current goroutines +// heap - a sampling of memory allocations of live objects +// allocs - a sampling of all past memory allocations +// threadcreate - stack traces that led to the creation of new OS threads +// block - stack traces that led to blocking on synchronization primitives +// mutex - stack traces of holders of contended mutexes +// +// These predefined profiles maintain themselves and panic on an explicit +// Add or Remove method call. +// +// The heap profile reports statistics as of the most recently completed +// garbage collection; it elides more recent allocation to avoid skewing +// the profile away from live data and toward garbage. +// If there has been no garbage collection at all, the heap profile reports +// all known allocations. This exception helps mainly in programs running +// without garbage collection enabled, usually for debugging purposes. +// +// The heap profile tracks both the allocation sites for all live objects in +// the application memory and for all objects allocated since the program start. +// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects +// flags select which to display, defaulting to -inuse_space (live objects, +// scaled by size). +// +// The allocs profile is the same as the heap profile but changes the default +// pprof display to -alloc_space, the total number of bytes allocated since +// the program began (including garbage-collected bytes). +// +// The CPU profile is not available as a Profile. It has a special API, +// the StartCPUProfile and StopCPUProfile functions, because it streams +// output to a writer during profiling. +type Profile struct { + name string + mu sync.Mutex + m map[any][]uintptr + count func() int + write func(io.Writer, int) error +} + +// profiles records all registered profiles. +var profiles struct { + mu sync.Mutex + m map[string]*Profile +} + +var goroutineProfile = &Profile{ + name: "goroutine", + count: countGoroutine, + write: writeGoroutine, +} + +var threadcreateProfile = &Profile{ + name: "threadcreate", + count: countThreadCreate, + write: writeThreadCreate, +} + +var heapProfile = &Profile{ + name: "heap", + count: countHeap, + write: writeHeap, +} + +var allocsProfile = &Profile{ + name: "allocs", + count: countHeap, // identical to heap profile + write: writeAlloc, +} + +var blockProfile = &Profile{ + name: "block", + count: countBlock, + write: writeBlock, +} + +var mutexProfile = &Profile{ + name: "mutex", + count: countMutex, + write: writeMutex, +} + +func lockProfiles() { + profiles.mu.Lock() + if profiles.m == nil { + // Initial built-in profiles. + profiles.m = map[string]*Profile{ + "goroutine": goroutineProfile, + "threadcreate": threadcreateProfile, + "heap": heapProfile, + "allocs": allocsProfile, + "block": blockProfile, + "mutex": mutexProfile, + } + } +} + +func unlockProfiles() { + profiles.mu.Unlock() +} + +// NewProfile creates a new profile with the given name. +// If a profile with that name already exists, NewProfile panics. +// The convention is to use a 'import/path.' prefix to create +// separate name spaces for each package. +// For compatibility with various tools that read pprof data, +// profile names should not contain spaces. +func NewProfile(name string) *Profile { + lockProfiles() + defer unlockProfiles() + if name == "" { + panic("pprof: NewProfile with empty name") + } + if profiles.m[name] != nil { + panic("pprof: NewProfile name already in use: " + name) + } + p := &Profile{ + name: name, + m: map[any][]uintptr{}, + } + profiles.m[name] = p + return p +} + +// Lookup returns the profile with the given name, or nil if no such profile exists. +func Lookup(name string) *Profile { + lockProfiles() + defer unlockProfiles() + return profiles.m[name] +} + +// Profiles returns a slice of all the known profiles, sorted by name. +func Profiles() []*Profile { + lockProfiles() + defer unlockProfiles() + + all := make([]*Profile, 0, len(profiles.m)) + for _, p := range profiles.m { + all = append(all, p) + } + + sort.Slice(all, func(i, j int) bool { return all[i].name < all[j].name }) + return all +} + +// Name returns this profile's name, which can be passed to Lookup to reobtain the profile. +func (p *Profile) Name() string { + return p.name +} + +// Count returns the number of execution stacks currently in the profile. +func (p *Profile) Count() int { + p.mu.Lock() + defer p.mu.Unlock() + if p.count != nil { + return p.count() + } + return len(p.m) +} + +// Add adds the current execution stack to the profile, associated with value. +// Add stores value in an internal map, so value must be suitable for use as +// a map key and will not be garbage collected until the corresponding +// call to Remove. Add panics if the profile already contains a stack for value. +// +// The skip parameter has the same meaning as runtime.Caller's skip +// and controls where the stack trace begins. Passing skip=0 begins the +// trace in the function calling Add. For example, given this +// execution stack: +// +// Add +// called from rpc.NewClient +// called from mypkg.Run +// called from main.main +// +// Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient. +// Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run. +func (p *Profile) Add(value any, skip int) { + if p.name == "" { + panic("pprof: use of uninitialized Profile") + } + if p.write != nil { + panic("pprof: Add called on built-in Profile " + p.name) + } + + stk := make([]uintptr, 32) + n := runtime.Callers(skip+1, stk[:]) + stk = stk[:n] + if len(stk) == 0 { + // The value for skip is too large, and there's no stack trace to record. + stk = []uintptr{abi.FuncPCABIInternal(lostProfileEvent)} + } + + p.mu.Lock() + defer p.mu.Unlock() + if p.m[value] != nil { + panic("pprof: Profile.Add of duplicate value") + } + p.m[value] = stk +} + +// Remove removes the execution stack associated with value from the profile. +// It is a no-op if the value is not in the profile. +func (p *Profile) Remove(value any) { + p.mu.Lock() + defer p.mu.Unlock() + delete(p.m, value) +} + +// WriteTo writes a pprof-formatted snapshot of the profile to w. +// If a write to w returns an error, WriteTo returns that error. +// Otherwise, WriteTo returns nil. +// +// The debug parameter enables additional output. +// Passing debug=0 writes the gzip-compressed protocol buffer described +// in https://github.com/google/pprof/tree/master/proto#overview. +// Passing debug=1 writes the legacy text format with comments +// translating addresses to function names and line numbers, so that a +// programmer can read the profile without tools. +// +// The predefined profiles may assign meaning to other debug values; +// for example, when printing the "goroutine" profile, debug=2 means to +// print the goroutine stacks in the same form that a Go program uses +// when dying due to an unrecovered panic. +func (p *Profile) WriteTo(w io.Writer, debug int) error { + if p.name == "" { + panic("pprof: use of zero Profile") + } + if p.write != nil { + return p.write(w, debug) + } + + // Obtain consistent snapshot under lock; then process without lock. + p.mu.Lock() + all := make([][]uintptr, 0, len(p.m)) + for _, stk := range p.m { + all = append(all, stk) + } + p.mu.Unlock() + + // Map order is non-deterministic; make output deterministic. + sort.Slice(all, func(i, j int) bool { + t, u := all[i], all[j] + for k := 0; k < len(t) && k < len(u); k++ { + if t[k] != u[k] { + return t[k] < u[k] + } + } + return len(t) < len(u) + }) + + return printCountProfile(w, debug, p.name, stackProfile(all)) +} + +type stackProfile [][]uintptr + +func (x stackProfile) Len() int { return len(x) } +func (x stackProfile) Stack(i int) []uintptr { return x[i] } +func (x stackProfile) Label(i int) *labelMap { return nil } + +// A countProfile is a set of stack traces to be printed as counts +// grouped by stack trace. There are multiple implementations: +// all that matters is that we can find out how many traces there are +// and obtain each trace in turn. +type countProfile interface { + Len() int + Stack(i int) []uintptr + Label(i int) *labelMap +} + +// printCountCycleProfile outputs block profile records (for block or mutex profiles) +// as the pprof-proto format output. Translations from cycle count to time duration +// are done because The proto expects count and time (nanoseconds) instead of count +// and the number of cycles for block, contention profiles. +func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error { + // Output profile in protobuf form. + b := newProfileBuilder(w) + b.pbValueType(tagProfile_PeriodType, countName, "count") + b.pb.int64Opt(tagProfile_Period, 1) + b.pbValueType(tagProfile_SampleType, countName, "count") + b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") + + cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9 + + values := []int64{0, 0} + var locs []uint64 + for _, r := range records { + values[0] = r.Count + values[1] = int64(float64(r.Cycles) / cpuGHz) + // For count profiles, all stack addresses are + // return PCs, which is what appendLocsForStack expects. + locs = b.appendLocsForStack(locs[:0], r.Stack()) + b.pbSample(values, locs, nil) + } + b.build() + return nil +} + +// printCountProfile prints a countProfile at the specified debug level. +// The profile will be in compressed proto format unless debug is nonzero. +func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { + // Build count of each stack. + var buf strings.Builder + key := func(stk []uintptr, lbls *labelMap) string { + buf.Reset() + fmt.Fprintf(&buf, "@") + for _, pc := range stk { + fmt.Fprintf(&buf, " %#x", pc) + } + if lbls != nil { + buf.WriteString("\n# labels: ") + buf.WriteString(lbls.String()) + } + return buf.String() + } + count := map[string]int{} + index := map[string]int{} + var keys []string + n := p.Len() + for i := 0; i < n; i++ { + k := key(p.Stack(i), p.Label(i)) + if count[k] == 0 { + index[k] = i + keys = append(keys, k) + } + count[k]++ + } + + sort.Sort(&keysByCount{keys, count}) + + if debug > 0 { + // Print debug profile in legacy format + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + fmt.Fprintf(tw, "%s profile: total %d\n", name, p.Len()) + for _, k := range keys { + fmt.Fprintf(tw, "%d %s\n", count[k], k) + printStackRecord(tw, p.Stack(index[k]), false) + } + return tw.Flush() + } + + // Output profile in protobuf form. + b := newProfileBuilder(w) + b.pbValueType(tagProfile_PeriodType, name, "count") + b.pb.int64Opt(tagProfile_Period, 1) + b.pbValueType(tagProfile_SampleType, name, "count") + + values := []int64{0} + var locs []uint64 + for _, k := range keys { + values[0] = int64(count[k]) + // For count profiles, all stack addresses are + // return PCs, which is what appendLocsForStack expects. + locs = b.appendLocsForStack(locs[:0], p.Stack(index[k])) + idx := index[k] + var labels func() + if p.Label(idx) != nil { + labels = func() { + for k, v := range *p.Label(idx) { + b.pbLabel(tagSample_Label, k, v, 0) + } + } + } + b.pbSample(values, locs, labels) + } + b.build() + return nil +} + +// keysByCount sorts keys with higher counts first, breaking ties by key string order. +type keysByCount struct { + keys []string + count map[string]int +} + +func (x *keysByCount) Len() int { return len(x.keys) } +func (x *keysByCount) Swap(i, j int) { x.keys[i], x.keys[j] = x.keys[j], x.keys[i] } +func (x *keysByCount) Less(i, j int) bool { + ki, kj := x.keys[i], x.keys[j] + ci, cj := x.count[ki], x.count[kj] + if ci != cj { + return ci > cj + } + return ki < kj +} + +// printStackRecord prints the function + source line information +// for a single stack trace. +func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { + show := allFrames + frames := runtime.CallersFrames(stk) + for { + frame, more := frames.Next() + name := frame.Function + if name == "" { + show = true + fmt.Fprintf(w, "#\t%#x\n", frame.PC) + } else if name != "runtime.goexit" && (show || !strings.HasPrefix(name, "runtime.")) { + // Hide runtime.goexit and any runtime functions at the beginning. + // This is useful mainly for allocation traces. + show = true + fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", frame.PC, name, frame.PC-frame.Entry, frame.File, frame.Line) + } + if !more { + break + } + } + if !show { + // We didn't print anything; do it again, + // and this time include runtime functions. + printStackRecord(w, stk, true) + return + } + fmt.Fprintf(w, "\n") +} + +// Interface to system profiles. + +// WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0). +// It is preserved for backwards compatibility. +func WriteHeapProfile(w io.Writer) error { + return writeHeap(w, 0) +} + +// countHeap returns the number of records in the heap profile. +func countHeap() int { + n, _ := runtime.MemProfile(nil, true) + return n +} + +// writeHeap writes the current runtime heap profile to w. +func writeHeap(w io.Writer, debug int) error { + return writeHeapInternal(w, debug, "") +} + +// writeAlloc writes the current runtime heap profile to w +// with the total allocation space as the default sample type. +func writeAlloc(w io.Writer, debug int) error { + return writeHeapInternal(w, debug, "alloc_space") +} + +func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error { + var memStats *runtime.MemStats + if debug != 0 { + // Read mem stats first, so that our other allocations + // do not appear in the statistics. + memStats = new(runtime.MemStats) + runtime.ReadMemStats(memStats) + } + + // Find out how many records there are (MemProfile(nil, true)), + // allocate that many records, and get the data. + // There's a race—more records might be added between + // the two calls—so allocate a few extra records for safety + // and also try again if we're very unlucky. + // The loop should only execute one iteration in the common case. + var p []runtime.MemProfileRecord + n, ok := runtime.MemProfile(nil, true) + for { + // Allocate room for a slightly bigger profile, + // in case a few more entries have been added + // since the call to MemProfile. + p = make([]runtime.MemProfileRecord, n+50) + n, ok = runtime.MemProfile(p, true) + if ok { + p = p[0:n] + break + } + // Profile grew; try again. + } + + if debug == 0 { + return writeHeapProto(w, p, int64(runtime.MemProfileRate), defaultSampleType) + } + + sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() }) + + b := bufio.NewWriter(w) + tw := tabwriter.NewWriter(b, 1, 8, 1, '\t', 0) + w = tw + + var total runtime.MemProfileRecord + for i := range p { + r := &p[i] + total.AllocBytes += r.AllocBytes + total.AllocObjects += r.AllocObjects + total.FreeBytes += r.FreeBytes + total.FreeObjects += r.FreeObjects + } + + // Technically the rate is MemProfileRate not 2*MemProfileRate, + // but early versions of the C++ heap profiler reported 2*MemProfileRate, + // so that's what pprof has come to expect. + rate := 2 * runtime.MemProfileRate + + // pprof reads a profile with alloc == inuse as being a "2-column" profile + // (objects and bytes, not distinguishing alloc from inuse), + // but then such a profile can't be merged using pprof *.prof with + // other 4-column profiles where alloc != inuse. + // The easiest way to avoid this bug is to adjust allocBytes so it's never == inuseBytes. + // pprof doesn't use these header values anymore except for checking equality. + inUseBytes := total.InUseBytes() + allocBytes := total.AllocBytes + if inUseBytes == allocBytes { + allocBytes++ + } + + fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n", + total.InUseObjects(), inUseBytes, + total.AllocObjects, allocBytes, + rate) + + for i := range p { + r := &p[i] + fmt.Fprintf(w, "%d: %d [%d: %d] @", + r.InUseObjects(), r.InUseBytes(), + r.AllocObjects, r.AllocBytes) + for _, pc := range r.Stack() { + fmt.Fprintf(w, " %#x", pc) + } + fmt.Fprintf(w, "\n") + printStackRecord(w, r.Stack(), false) + } + + // Print memstats information too. + // Pprof will ignore, but useful for people + s := memStats + fmt.Fprintf(w, "\n# runtime.MemStats\n") + fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) + fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) + fmt.Fprintf(w, "# Sys = %d\n", s.Sys) + fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups) + fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs) + fmt.Fprintf(w, "# Frees = %d\n", s.Frees) + + fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc) + fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys) + fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle) + fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse) + fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased) + fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects) + + fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys) + fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) + fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys) + fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys) + fmt.Fprintf(w, "# GCSys = %d\n", s.GCSys) + fmt.Fprintf(w, "# OtherSys = %d\n", s.OtherSys) + + fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) + fmt.Fprintf(w, "# LastGC = %d\n", s.LastGC) + fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) + fmt.Fprintf(w, "# PauseEnd = %d\n", s.PauseEnd) + fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) + fmt.Fprintf(w, "# NumForcedGC = %d\n", s.NumForcedGC) + fmt.Fprintf(w, "# GCCPUFraction = %v\n", s.GCCPUFraction) + fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC) + + // Also flush out MaxRSS on supported platforms. + addMaxRSS(w) + + tw.Flush() + return b.Flush() +} + +// countThreadCreate returns the size of the current ThreadCreateProfile. +func countThreadCreate() int { + n, _ := runtime.ThreadCreateProfile(nil) + return n +} + +// writeThreadCreate writes the current runtime ThreadCreateProfile to w. +func writeThreadCreate(w io.Writer, debug int) error { + // Until https://golang.org/issues/6104 is addressed, wrap + // ThreadCreateProfile because there's no point in tracking labels when we + // don't get any stack-traces. + return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { + return runtime.ThreadCreateProfile(p) + }) +} + +// countGoroutine returns the number of goroutines. +func countGoroutine() int { + return runtime.NumGoroutine() +} + +// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go +func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool) + +// writeGoroutine writes the current runtime GoroutineProfile to w. +func writeGoroutine(w io.Writer, debug int) error { + if debug >= 2 { + return writeGoroutineStacks(w) + } + return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels) +} + +func writeGoroutineStacks(w io.Writer) error { + // We don't know how big the buffer needs to be to collect + // all the goroutines. Start with 1 MB and try a few times, doubling each time. + // Give up and use a truncated trace if 64 MB is not enough. + buf := make([]byte, 1<<20) + for i := 0; ; i++ { + n := runtime.Stack(buf, true) + if n < len(buf) { + buf = buf[:n] + break + } + if len(buf) >= 64<<20 { + // Filled 64 MB - stop there. + break + } + buf = make([]byte, 2*len(buf)) + } + _, err := w.Write(buf) + return err +} + +func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error { + // Find out how many records there are (fetch(nil)), + // allocate that many records, and get the data. + // There's a race—more records might be added between + // the two calls—so allocate a few extra records for safety + // and also try again if we're very unlucky. + // The loop should only execute one iteration in the common case. + var p []runtime.StackRecord + var labels []unsafe.Pointer + n, ok := fetch(nil, nil) + for { + // Allocate room for a slightly bigger profile, + // in case a few more entries have been added + // since the call to ThreadProfile. + p = make([]runtime.StackRecord, n+10) + labels = make([]unsafe.Pointer, n+10) + n, ok = fetch(p, labels) + if ok { + p = p[0:n] + break + } + // Profile grew; try again. + } + + return printCountProfile(w, debug, name, &runtimeProfile{p, labels}) +} + +type runtimeProfile struct { + stk []runtime.StackRecord + labels []unsafe.Pointer +} + +func (p *runtimeProfile) Len() int { return len(p.stk) } +func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() } +func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) } + +var cpu struct { + sync.Mutex + profiling bool + done chan bool +} + +// StartCPUProfile enables CPU profiling for the current process. +// While profiling, the profile will be buffered and written to w. +// StartCPUProfile returns an error if profiling is already enabled. +// +// On Unix-like systems, StartCPUProfile does not work by default for +// Go code built with -buildmode=c-archive or -buildmode=c-shared. +// StartCPUProfile relies on the SIGPROF signal, but that signal will +// be delivered to the main program's SIGPROF signal handler (if any) +// not to the one used by Go. To make it work, call os/signal.Notify +// for syscall.SIGPROF, but note that doing so may break any profiling +// being done by the main program. +func StartCPUProfile(w io.Writer) error { + // The runtime routines allow a variable profiling rate, + // but in practice operating systems cannot trigger signals + // at more than about 500 Hz, and our processing of the + // signal is not cheap (mostly getting the stack trace). + // 100 Hz is a reasonable choice: it is frequent enough to + // produce useful data, rare enough not to bog down the + // system, and a nice round number to make it easy to + // convert sample counts to seconds. Instead of requiring + // each client to specify the frequency, we hard code it. + const hz = 100 + + cpu.Lock() + defer cpu.Unlock() + if cpu.done == nil { + cpu.done = make(chan bool) + } + // Double-check. + if cpu.profiling { + return fmt.Errorf("cpu profiling already in use") + } + cpu.profiling = true + runtime.SetCPUProfileRate(hz) + go profileWriter(w) + return nil +} + +// readProfile, provided by the runtime, returns the next chunk of +// binary CPU profiling stack trace data, blocking until data is available. +// If profiling is turned off and all the profile data accumulated while it was +// on has been returned, readProfile returns eof=true. +// The caller must save the returned data and tags before calling readProfile again. +func readProfile() (data []uint64, tags []unsafe.Pointer, eof bool) + +func profileWriter(w io.Writer) { + b := newProfileBuilder(w) + var err error + for { + time.Sleep(100 * time.Millisecond) + data, tags, eof := readProfile() + if e := b.addCPUData(data, tags); e != nil && err == nil { + err = e + } + if eof { + break + } + } + if err != nil { + // The runtime should never produce an invalid or truncated profile. + // It drops records that can't fit into its log buffers. + panic("runtime/pprof: converting profile: " + err.Error()) + } + b.build() + cpu.done <- true +} + +// StopCPUProfile stops the current CPU profile, if any. +// StopCPUProfile only returns after all the writes for the +// profile have completed. +func StopCPUProfile() { + cpu.Lock() + defer cpu.Unlock() + + if !cpu.profiling { + return + } + cpu.profiling = false + runtime.SetCPUProfileRate(0) + <-cpu.done +} + +// countBlock returns the number of records in the blocking profile. +func countBlock() int { + n, _ := runtime.BlockProfile(nil) + return n +} + +// countMutex returns the number of records in the mutex profile. +func countMutex() int { + n, _ := runtime.MutexProfile(nil) + return n +} + +// writeBlock writes the current blocking profile to w. +func writeBlock(w io.Writer, debug int) error { + return writeProfileInternal(w, debug, "contention", runtime.BlockProfile) +} + +// writeMutex writes the current mutex profile to w. +func writeMutex(w io.Writer, debug int) error { + return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile) +} + +// writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters. +func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool)) error { + var p []runtime.BlockProfileRecord + n, ok := runtimeProfile(nil) + for { + p = make([]runtime.BlockProfileRecord, n+50) + n, ok = runtimeProfile(p) + if ok { + p = p[:n] + break + } + } + + sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles }) + + if debug <= 0 { + return printCountCycleProfile(w, "contentions", "delay", p) + } + + b := bufio.NewWriter(w) + tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + + fmt.Fprintf(w, "--- %v:\n", name) + fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) + if name == "mutex" { + fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) + } + for i := range p { + r := &p[i] + fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) + for _, pc := range r.Stack() { + fmt.Fprintf(w, " %#x", pc) + } + fmt.Fprint(w, "\n") + if debug > 0 { + printStackRecord(w, r.Stack(), true) + } + } + + if tw != nil { + tw.Flush() + } + return b.Flush() +} + +func runtime_cyclesPerSecond() int64 diff --git a/src/runtime/pprof/pprof_norusage.go b/src/runtime/pprof/pprof_norusage.go new file mode 100644 index 0000000..8de3808 --- /dev/null +++ b/src/runtime/pprof/pprof_norusage.go @@ -0,0 +1,15 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !aix && !darwin && !dragonfly && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows + +package pprof + +import ( + "io" +) + +// Stub call for platforms that don't support rusage. +func addMaxRSS(w io.Writer) { +} diff --git a/src/runtime/pprof/pprof_rusage.go b/src/runtime/pprof/pprof_rusage.go new file mode 100644 index 0000000..aa429fb --- /dev/null +++ b/src/runtime/pprof/pprof_rusage.go @@ -0,0 +1,35 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix + +package pprof + +import ( + "fmt" + "io" + "runtime" + "syscall" +) + +// Adds MaxRSS to platforms that are supported. +func addMaxRSS(w io.Writer) { + var rssToBytes uintptr + switch runtime.GOOS { + case "aix", "android", "dragonfly", "freebsd", "linux", "netbsd", "openbsd": + rssToBytes = 1024 + case "darwin", "ios": + rssToBytes = 1 + case "illumos", "solaris": + rssToBytes = uintptr(syscall.Getpagesize()) + default: + panic("unsupported OS") + } + + var rusage syscall.Rusage + err := syscall.Getrusage(syscall.RUSAGE_SELF, &rusage) + if err == nil { + fmt.Fprintf(w, "# MaxRSS = %d\n", uintptr(rusage.Maxrss)*rssToBytes) + } +} diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go new file mode 100644 index 0000000..53688ad --- /dev/null +++ b/src/runtime/pprof/pprof_test.go @@ -0,0 +1,2301 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !js + +package pprof + +import ( + "bytes" + "context" + "fmt" + "internal/abi" + "internal/profile" + "internal/syscall/unix" + "internal/testenv" + "io" + "math" + "math/big" + "os" + "os/exec" + "regexp" + "runtime" + "runtime/debug" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + _ "unsafe" +) + +func cpuHogger(f func(x int) int, y *int, dur time.Duration) { + // We only need to get one 100 Hz clock tick, so we've got + // a large safety buffer. + // But do at least 500 iterations (which should take about 100ms), + // otherwise TestCPUProfileMultithreaded can fail if only one + // thread is scheduled during the testing period. + t0 := time.Now() + accum := *y + for i := 0; i < 500 || time.Since(t0) < dur; i++ { + accum = f(accum) + } + *y = accum +} + +var ( + salt1 = 0 + salt2 = 0 +) + +// The actual CPU hogging function. +// Must not call other functions nor access heap/globals in the loop, +// otherwise under race detector the samples will be in the race runtime. +func cpuHog1(x int) int { + return cpuHog0(x, 1e5) +} + +func cpuHog0(x, n int) int { + foo := x + for i := 0; i < n; i++ { + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + } + return foo +} + +func cpuHog2(x int) int { + foo := x + for i := 0; i < 1e5; i++ { + if foo > 0 { + foo *= foo + } else { + foo *= foo + 2 + } + } + return foo +} + +// Return a list of functions that we don't want to ever appear in CPU +// profiles. For gccgo, that list includes the sigprof handler itself. +func avoidFunctions() []string { + if runtime.Compiler == "gccgo" { + return []string{"runtime.sigprof"} + } + return nil +} + +func TestCPUProfile(t *testing.T) { + matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions()) + testCPUProfile(t, matches, func(dur time.Duration) { + cpuHogger(cpuHog1, &salt1, dur) + }) +} + +func TestCPUProfileMultithreaded(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) + matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, avoidFunctions()) + testCPUProfile(t, matches, func(dur time.Duration) { + c := make(chan int) + go func() { + cpuHogger(cpuHog1, &salt1, dur) + c <- 1 + }() + cpuHogger(cpuHog2, &salt2, dur) + <-c + }) +} + +func TestCPUProfileMultithreadMagnitude(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("issue 35057 is only confirmed on Linux") + } + + // Linux [5.9,5.16) has a kernel bug that can break CPU timers on newly + // created threads, breaking our CPU accounting. + major, minor := unix.KernelVersion() + t.Logf("Running on Linux %d.%d", major, minor) + defer func() { + if t.Failed() { + t.Logf("Failure of this test may indicate that your system suffers from a known Linux kernel bug fixed on newer kernels. See https://golang.org/issue/49065.") + } + }() + + // Disable on affected builders to avoid flakiness, but otherwise keep + // it enabled to potentially warn users that they are on a broken + // kernel. + if testenv.Builder() != "" && (runtime.GOARCH == "386" || runtime.GOARCH == "amd64") { + have59 := major > 5 || (major == 5 && minor >= 9) + have516 := major > 5 || (major == 5 && minor >= 16) + if have59 && !have516 { + testenv.SkipFlaky(t, 49065) + } + } + + // Run a workload in a single goroutine, then run copies of the same + // workload in several goroutines. For both the serial and parallel cases, + // the CPU time the process measures with its own profiler should match the + // total CPU usage that the OS reports. + // + // We could also check that increases in parallelism (GOMAXPROCS) lead to a + // linear increase in the CPU usage reported by both the OS and the + // profiler, but without a guarantee of exclusive access to CPU resources + // that is likely to be a flaky test. + + // Require the smaller value to be within 10%, or 40% in short mode. + maxDiff := 0.10 + if testing.Short() { + maxDiff = 0.40 + } + + compare := func(a, b time.Duration, maxDiff float64) error { + if a <= 0 || b <= 0 { + return fmt.Errorf("Expected both time reports to be positive") + } + + if a < b { + a, b = b, a + } + + diff := float64(a-b) / float64(a) + if diff > maxDiff { + return fmt.Errorf("CPU usage reports are too different (limit -%.1f%%, got -%.1f%%)", maxDiff*100, diff*100) + } + + return nil + } + + for _, tc := range []struct { + name string + workers int + }{ + { + name: "serial", + workers: 1, + }, + { + name: "parallel", + workers: runtime.GOMAXPROCS(0), + }, + } { + // check that the OS's perspective matches what the Go runtime measures. + t.Run(tc.name, func(t *testing.T) { + t.Logf("Running with %d workers", tc.workers) + + var userTime, systemTime time.Duration + matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions()) + acceptProfile := func(t *testing.T, p *profile.Profile) bool { + if !matches(t, p) { + return false + } + + ok := true + for i, unit := range []string{"count", "nanoseconds"} { + if have, want := p.SampleType[i].Unit, unit; have != want { + t.Logf("pN SampleType[%d]; %q != %q", i, have, want) + ok = false + } + } + + // cpuHog1 called below is the primary source of CPU + // load, but there may be some background work by the + // runtime. Since the OS rusage measurement will + // include all work done by the process, also compare + // against all samples in our profile. + var value time.Duration + for _, sample := range p.Sample { + value += time.Duration(sample.Value[1]) * time.Nanosecond + } + + totalTime := userTime + systemTime + t.Logf("compare %s user + %s system = %s vs %s", userTime, systemTime, totalTime, value) + if err := compare(totalTime, value, maxDiff); err != nil { + t.Logf("compare got %v want nil", err) + ok = false + } + + return ok + } + + testCPUProfile(t, acceptProfile, func(dur time.Duration) { + userTime, systemTime = diffCPUTime(t, func() { + var wg sync.WaitGroup + var once sync.Once + for i := 0; i < tc.workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + var salt = 0 + cpuHogger(cpuHog1, &salt, dur) + once.Do(func() { salt1 = salt }) + }() + } + wg.Wait() + }) + }) + }) + } +} + +// containsInlinedCall reports whether the function body for the function f is +// known to contain an inlined function call within the first maxBytes bytes. +func containsInlinedCall(f any, maxBytes int) bool { + _, found := findInlinedCall(f, maxBytes) + return found +} + +// findInlinedCall returns the PC of an inlined function call within +// the function body for the function f if any. +func findInlinedCall(f any, maxBytes int) (pc uint64, found bool) { + fFunc := runtime.FuncForPC(uintptr(abi.FuncPCABIInternal(f))) + if fFunc == nil || fFunc.Entry() == 0 { + panic("failed to locate function entry") + } + + for offset := 0; offset < maxBytes; offset++ { + innerPC := fFunc.Entry() + uintptr(offset) + inner := runtime.FuncForPC(innerPC) + if inner == nil { + // No function known for this PC value. + // It might simply be misaligned, so keep searching. + continue + } + if inner.Entry() != fFunc.Entry() { + // Scanned past f and didn't find any inlined functions. + break + } + if inner.Name() != fFunc.Name() { + // This PC has f as its entry-point, but is not f. Therefore, it must be a + // function inlined into f. + return uint64(innerPC), true + } + } + + return 0, false +} + +func TestCPUProfileInlining(t *testing.T) { + if !containsInlinedCall(inlinedCaller, 4<<10) { + t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.") + } + + matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions()) + p := testCPUProfile(t, matches, func(dur time.Duration) { + cpuHogger(inlinedCaller, &salt1, dur) + }) + + // Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location. + for _, loc := range p.Location { + hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false + for _, line := range loc.Line { + if line.Function.Name == "runtime/pprof.inlinedCallee" { + hasInlinedCallee = true + } + if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" { + hasInlinedCallerAfterInlinedCallee = true + } + } + if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee { + t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p) + } + } +} + +func inlinedCaller(x int) int { + x = inlinedCallee(x, 1e5) + return x +} + +func inlinedCallee(x, n int) int { + return cpuHog0(x, n) +} + +//go:noinline +func dumpCallers(pcs []uintptr) { + if pcs == nil { + return + } + + skip := 2 // Callers and dumpCallers + runtime.Callers(skip, pcs) +} + +//go:noinline +func inlinedCallerDump(pcs []uintptr) { + inlinedCalleeDump(pcs) +} + +func inlinedCalleeDump(pcs []uintptr) { + dumpCallers(pcs) +} + +func TestCPUProfileRecursion(t *testing.T) { + matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions()) + p := testCPUProfile(t, matches, func(dur time.Duration) { + cpuHogger(recursionCaller, &salt1, dur) + }) + + // check the Location encoding was not confused by recursive calls. + for i, loc := range p.Location { + recursionFunc := 0 + for _, line := range loc.Line { + if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" { + recursionFunc++ + } + } + if recursionFunc > 1 { + t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p) + } + } +} + +func recursionCaller(x int) int { + y := recursionCallee(3, x) + return y +} + +func recursionCallee(n, x int) int { + if n == 0 { + return 1 + } + y := inlinedCallee(x, 1e4) + return y * recursionCallee(n-1, x) +} + +func recursionChainTop(x int, pcs []uintptr) { + if x < 0 { + return + } + recursionChainMiddle(x, pcs) +} + +func recursionChainMiddle(x int, pcs []uintptr) { + recursionChainBottom(x, pcs) +} + +func recursionChainBottom(x int, pcs []uintptr) { + // This will be called each time, we only care about the last. We + // can't make this conditional or this function won't be inlined. + dumpCallers(pcs) + + recursionChainTop(x-1, pcs) +} + +func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile { + p, err := profile.Parse(bytes.NewReader(valBytes)) + if err != nil { + t.Fatal(err) + } + for _, sample := range p.Sample { + count := uintptr(sample.Value[0]) + f(count, sample.Location, sample.Label) + } + return p +} + +func cpuProfilingBroken() bool { + switch runtime.GOOS { + case "plan9": + // Profiling unimplemented. + return true + case "aix": + // See https://golang.org/issue/45170. + return true + case "ios", "dragonfly", "netbsd", "illumos", "solaris": + // See https://golang.org/issue/13841. + return true + case "openbsd": + if runtime.GOARCH == "arm" || runtime.GOARCH == "arm64" { + // See https://golang.org/issue/13841. + return true + } + } + + return false +} + +// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need, +// as interpreted by matches, and returns the parsed profile. +func testCPUProfile(t *testing.T, matches profileMatchFunc, f func(dur time.Duration)) *profile.Profile { + switch runtime.GOOS { + case "darwin": + out, err := exec.Command("uname", "-a").CombinedOutput() + if err != nil { + t.Fatal(err) + } + vers := string(out) + t.Logf("uname -a: %v", vers) + case "plan9": + t.Skip("skipping on plan9") + } + + broken := cpuProfilingBroken() + + deadline, ok := t.Deadline() + if broken || !ok { + if broken && testing.Short() { + // If it's expected to be broken, no point waiting around. + deadline = time.Now().Add(1 * time.Second) + } else { + deadline = time.Now().Add(10 * time.Second) + } + } + + // If we're running a long test, start with a long duration + // for tests that try to make sure something *doesn't* happen. + duration := 5 * time.Second + if testing.Short() { + duration = 100 * time.Millisecond + } + + // Profiling tests are inherently flaky, especially on a + // loaded system, such as when this test is running with + // several others under go test std. If a test fails in a way + // that could mean it just didn't run long enough, try with a + // longer duration. + for { + var prof bytes.Buffer + if err := StartCPUProfile(&prof); err != nil { + t.Fatal(err) + } + f(duration) + StopCPUProfile() + + if p, ok := profileOk(t, matches, prof, duration); ok { + return p + } + + duration *= 2 + if time.Until(deadline) < duration { + break + } + t.Logf("retrying with %s duration", duration) + } + + if broken { + t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH) + } + + // Ignore the failure if the tests are running in a QEMU-based emulator, + // QEMU is not perfect at emulating everything. + // IN_QEMU environmental variable is set by some of the Go builders. + // IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605. + if os.Getenv("IN_QEMU") == "1" { + t.Skip("ignore the failure in QEMU; see golang.org/issue/9605") + } + t.FailNow() + return nil +} + +var diffCPUTimeImpl func(f func()) (user, system time.Duration) + +func diffCPUTime(t *testing.T, f func()) (user, system time.Duration) { + if fn := diffCPUTimeImpl; fn != nil { + return fn(f) + } + t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH) + return 0, 0 +} + +func contains(slice []string, s string) bool { + for i := range slice { + if slice[i] == s { + return true + } + } + return false +} + +// stackContains matches if a function named spec appears anywhere in the stack trace. +func stackContains(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + for _, loc := range stk { + for _, line := range loc.Line { + if strings.Contains(line.Function.Name, spec) { + return true + } + } + } + return false +} + +type sampleMatchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool + +func profileOk(t *testing.T, matches profileMatchFunc, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) { + ok = true + + var samples uintptr + var buf strings.Builder + p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) { + fmt.Fprintf(&buf, "%d:", count) + fprintStack(&buf, stk) + fmt.Fprintf(&buf, " labels: %v\n", labels) + samples += count + fmt.Fprintf(&buf, "\n") + }) + t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String()) + + if samples < 10 && runtime.GOOS == "windows" { + // On some windows machines we end up with + // not enough samples due to coarse timer + // resolution. Let it go. + t.Log("too few samples on Windows (golang.org/issue/10842)") + return p, false + } + + // Check that we got a reasonable number of samples. + // We used to always require at least ideal/4 samples, + // but that is too hard to guarantee on a loaded system. + // Now we accept 10 or more samples, which we take to be + // enough to show that at least some profiling is occurring. + if ideal := uintptr(duration * 100 / time.Second); samples == 0 || (samples < ideal/4 && samples < 10) { + t.Logf("too few samples; got %d, want at least %d, ideally %d", samples, ideal/4, ideal) + ok = false + } + + if matches != nil && !matches(t, p) { + ok = false + } + + return p, ok +} + +type profileMatchFunc func(*testing.T, *profile.Profile) bool + +func matchAndAvoidStacks(matches sampleMatchFunc, need []string, avoid []string) profileMatchFunc { + return func(t *testing.T, p *profile.Profile) (ok bool) { + ok = true + + // Check that profile is well formed, contains 'need', and does not contain + // anything from 'avoid'. + have := make([]uintptr, len(need)) + avoidSamples := make([]uintptr, len(avoid)) + + for _, sample := range p.Sample { + count := uintptr(sample.Value[0]) + for i, spec := range need { + if matches(spec, count, sample.Location, sample.Label) { + have[i] += count + } + } + for i, name := range avoid { + for _, loc := range sample.Location { + for _, line := range loc.Line { + if strings.Contains(line.Function.Name, name) { + avoidSamples[i] += count + } + } + } + } + } + + for i, name := range avoid { + bad := avoidSamples[i] + if bad != 0 { + t.Logf("found %d samples in avoid-function %s\n", bad, name) + ok = false + } + } + + if len(need) == 0 { + return + } + + var total uintptr + for i, name := range need { + total += have[i] + t.Logf("found %d samples in expected function %s\n", have[i], name) + } + if total == 0 { + t.Logf("no samples in expected functions") + ok = false + } + + // We'd like to check a reasonable minimum, like + // total / len(have) / smallconstant, but this test is + // pretty flaky (see bug 7095). So we'll just test to + // make sure we got at least one sample. + min := uintptr(1) + for i, name := range need { + if have[i] < min { + t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have))) + ok = false + } + } + return + } +} + +// Fork can hang if preempted with signals frequently enough (see issue 5517). +// Ensure that we do not do this. +func TestCPUProfileWithFork(t *testing.T) { + testenv.MustHaveExec(t) + + heap := 1 << 30 + if runtime.GOOS == "android" { + // Use smaller size for Android to avoid crash. + heap = 100 << 20 + } + if runtime.GOOS == "windows" && runtime.GOARCH == "arm" { + // Use smaller heap for Windows/ARM to avoid crash. + heap = 100 << 20 + } + if testing.Short() { + heap = 100 << 20 + } + // This makes fork slower. + garbage := make([]byte, heap) + // Need to touch the slice, otherwise it won't be paged in. + done := make(chan bool) + go func() { + for i := range garbage { + garbage[i] = 42 + } + done <- true + }() + <-done + + var prof bytes.Buffer + if err := StartCPUProfile(&prof); err != nil { + t.Fatal(err) + } + defer StopCPUProfile() + + for i := 0; i < 10; i++ { + exec.Command(os.Args[0], "-h").CombinedOutput() + } +} + +// Test that profiler does not observe runtime.gogo as "user" goroutine execution. +// If it did, it would see inconsistent state and would either record an incorrect stack +// or crash because the stack was malformed. +func TestGoroutineSwitch(t *testing.T) { + if runtime.Compiler == "gccgo" { + t.Skip("not applicable for gccgo") + } + // How much to try. These defaults take about 1 seconds + // on a 2012 MacBook Pro. The ones in short mode take + // about 0.1 seconds. + tries := 10 + count := 1000000 + if testing.Short() { + tries = 1 + } + for try := 0; try < tries; try++ { + var prof bytes.Buffer + if err := StartCPUProfile(&prof); err != nil { + t.Fatal(err) + } + for i := 0; i < count; i++ { + runtime.Gosched() + } + StopCPUProfile() + + // Read profile to look for entries for gogo with an attempt at a traceback. + // "runtime.gogo" is OK, because that's the part of the context switch + // before the actual switch begins. But we should not see "gogo", + // aka "gogo<>(SB)", which does the actual switch and is marked SPWRITE. + parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, _ map[string][]string) { + // An entry with two frames with 'System' in its top frame + // exists to record a PC without a traceback. Those are okay. + if len(stk) == 2 { + name := stk[1].Line[0].Function.Name + if name == "runtime._System" || name == "runtime._ExternalCode" || name == "runtime._GC" { + return + } + } + + // An entry with just one frame is OK too: + // it knew to stop at gogo. + if len(stk) == 1 { + return + } + + // Otherwise, should not see gogo. + // The place we'd see it would be the inner most frame. + name := stk[0].Line[0].Function.Name + if name == "gogo" { + var buf strings.Builder + fprintStack(&buf, stk) + t.Fatalf("found profile entry for gogo:\n%s", buf.String()) + } + }) + } +} + +func fprintStack(w io.Writer, stk []*profile.Location) { + if len(stk) == 0 { + fmt.Fprintf(w, " (stack empty)") + } + for _, loc := range stk { + fmt.Fprintf(w, " %#x", loc.Address) + fmt.Fprintf(w, " (") + for i, line := range loc.Line { + if i > 0 { + fmt.Fprintf(w, " ") + } + fmt.Fprintf(w, "%s:%d", line.Function.Name, line.Line) + } + fmt.Fprintf(w, ")") + } +} + +// Test that profiling of division operations is okay, especially on ARM. See issue 6681. +func TestMathBigDivide(t *testing.T) { + testCPUProfile(t, nil, func(duration time.Duration) { + t := time.After(duration) + pi := new(big.Int) + for { + for i := 0; i < 100; i++ { + n := big.NewInt(2646693125139304345) + d := big.NewInt(842468587426513207) + pi.Div(n, d) + } + select { + case <-t: + return + default: + } + } + }) +} + +// stackContainsAll matches if all functions in spec (comma-separated) appear somewhere in the stack trace. +func stackContainsAll(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + for _, f := range strings.Split(spec, ",") { + if !stackContains(f, count, stk, labels) { + return false + } + } + return true +} + +func TestMorestack(t *testing.T) { + matches := matchAndAvoidStacks(stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions()) + testCPUProfile(t, matches, func(duration time.Duration) { + t := time.After(duration) + c := make(chan bool) + for { + go func() { + growstack1() + c <- true + }() + select { + case <-t: + return + case <-c: + } + } + }) +} + +//go:noinline +func growstack1() { + growstack(10) +} + +//go:noinline +func growstack(n int) { + var buf [8 << 18]byte + use(buf) + if n > 0 { + growstack(n - 1) + } +} + +//go:noinline +func use(x [8 << 18]byte) {} + +func TestBlockProfile(t *testing.T) { + type TestCase struct { + name string + f func(*testing.T) + stk []string + re string + } + tests := [...]TestCase{ + { + name: "chan recv", + f: blockChanRecv, + stk: []string{ + "runtime.chanrecv1", + "runtime/pprof.blockChanRecv", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*runtime/chan.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + { + name: "chan send", + f: blockChanSend, + stk: []string{ + "runtime.chansend1", + "runtime/pprof.blockChanSend", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ runtime\.chansend1\+0x[0-9a-f]+ .*runtime/chan.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockChanSend\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + { + name: "chan close", + f: blockChanClose, + stk: []string{ + "runtime.chanrecv1", + "runtime/pprof.blockChanClose", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ runtime\.chanrecv1\+0x[0-9a-f]+ .*runtime/chan.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockChanClose\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + { + name: "select recv async", + f: blockSelectRecvAsync, + stk: []string{ + "runtime.selectgo", + "runtime/pprof.blockSelectRecvAsync", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*runtime/select.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + { + name: "select send sync", + f: blockSelectSendSync, + stk: []string{ + "runtime.selectgo", + "runtime/pprof.blockSelectSendSync", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ runtime\.selectgo\+0x[0-9a-f]+ .*runtime/select.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + { + name: "mutex", + f: blockMutex, + stk: []string{ + "sync.(*Mutex).Lock", + "runtime/pprof.blockMutex", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+ .*sync/mutex\.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockMutex\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + { + name: "cond", + f: blockCond, + stk: []string{ + "sync.(*Cond).Wait", + "runtime/pprof.blockCond", + "runtime/pprof.TestBlockProfile", + }, + re: ` +[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+ +# 0x[0-9a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+ .*sync/cond\.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.blockCond\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +# 0x[0-9a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+ .*runtime/pprof/pprof_test.go:[0-9]+ +`}, + } + + // Generate block profile + runtime.SetBlockProfileRate(1) + defer runtime.SetBlockProfileRate(0) + for _, test := range tests { + test.f(t) + } + + t.Run("debug=1", func(t *testing.T) { + var w strings.Builder + Lookup("block").WriteTo(&w, 1) + prof := w.String() + + if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") { + t.Fatalf("Bad profile header:\n%v", prof) + } + + if strings.HasSuffix(prof, "#\t0x0\n\n") { + t.Errorf("Useless 0 suffix:\n%v", prof) + } + + for _, test := range tests { + if !regexp.MustCompile(strings.ReplaceAll(test.re, "\t", "\t+")).MatchString(prof) { + t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof) + } + } + }) + + t.Run("proto", func(t *testing.T) { + // proto format + var w bytes.Buffer + Lookup("block").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + stks := stacks(p) + for _, test := range tests { + if !containsStack(stks, test.stk) { + t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk) + } + } + }) + +} + +func stacks(p *profile.Profile) (res [][]string) { + for _, s := range p.Sample { + var stk []string + for _, l := range s.Location { + for _, line := range l.Line { + stk = append(stk, line.Function.Name) + } + } + res = append(res, stk) + } + return res +} + +func containsStack(got [][]string, want []string) bool { + for _, stk := range got { + if len(stk) < len(want) { + continue + } + for i, f := range want { + if f != stk[i] { + break + } + if i == len(want)-1 { + return true + } + } + } + return false +} + +// awaitBlockedGoroutine spins on runtime.Gosched until a runtime stack dump +// shows a goroutine in the given state with a stack frame in +// runtime/pprof.<fName>. +func awaitBlockedGoroutine(t *testing.T, state, fName string) { + re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime/pprof\.%s`, regexp.QuoteMeta(state), fName) + r := regexp.MustCompile(re) + + if deadline, ok := t.Deadline(); ok { + if d := time.Until(deadline); d > 1*time.Second { + timer := time.AfterFunc(d-1*time.Second, func() { + debug.SetTraceback("all") + panic(fmt.Sprintf("timed out waiting for %#q", re)) + }) + defer timer.Stop() + } + } + + buf := make([]byte, 64<<10) + for { + runtime.Gosched() + n := runtime.Stack(buf, true) + if n == len(buf) { + // Buffer wasn't large enough for a full goroutine dump. + // Resize it and try again. + buf = make([]byte, 2*len(buf)) + continue + } + if r.Match(buf[:n]) { + return + } + } +} + +func blockChanRecv(t *testing.T) { + c := make(chan bool) + go func() { + awaitBlockedGoroutine(t, "chan receive", "blockChanRecv") + c <- true + }() + <-c +} + +func blockChanSend(t *testing.T) { + c := make(chan bool) + go func() { + awaitBlockedGoroutine(t, "chan send", "blockChanSend") + <-c + }() + c <- true +} + +func blockChanClose(t *testing.T) { + c := make(chan bool) + go func() { + awaitBlockedGoroutine(t, "chan receive", "blockChanClose") + close(c) + }() + <-c +} + +func blockSelectRecvAsync(t *testing.T) { + const numTries = 3 + c := make(chan bool, 1) + c2 := make(chan bool, 1) + go func() { + for i := 0; i < numTries; i++ { + awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync") + c <- true + } + }() + for i := 0; i < numTries; i++ { + select { + case <-c: + case <-c2: + } + } +} + +func blockSelectSendSync(t *testing.T) { + c := make(chan bool) + c2 := make(chan bool) + go func() { + awaitBlockedGoroutine(t, "select", "blockSelectSendSync") + <-c + }() + select { + case c <- true: + case c2 <- true: + } +} + +func blockMutex(t *testing.T) { + var mu sync.Mutex + mu.Lock() + go func() { + awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex") + mu.Unlock() + }() + // Note: Unlock releases mu before recording the mutex event, + // so it's theoretically possible for this to proceed and + // capture the profile before the event is recorded. As long + // as this is blocked before the unlock happens, it's okay. + mu.Lock() +} + +func blockCond(t *testing.T) { + var mu sync.Mutex + c := sync.NewCond(&mu) + mu.Lock() + go func() { + awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond") + mu.Lock() + c.Signal() + mu.Unlock() + }() + c.Wait() + mu.Unlock() +} + +// See http://golang.org/cl/299991. +func TestBlockProfileBias(t *testing.T) { + rate := int(1000) // arbitrary value + runtime.SetBlockProfileRate(rate) + defer runtime.SetBlockProfileRate(0) + + // simulate blocking events + blockFrequentShort(rate) + blockInfrequentLong(rate) + + var w bytes.Buffer + Lookup("block").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + + il := float64(-1) // blockInfrequentLong duration + fs := float64(-1) // blockFrequentShort duration + for _, s := range p.Sample { + for _, l := range s.Location { + for _, line := range l.Line { + if len(s.Value) < 2 { + t.Fatal("block profile has less than 2 sample types") + } + + if line.Function.Name == "runtime/pprof.blockInfrequentLong" { + il = float64(s.Value[1]) + } else if line.Function.Name == "runtime/pprof.blockFrequentShort" { + fs = float64(s.Value[1]) + } + } + } + } + if il == -1 || fs == -1 { + t.Fatal("block profile is missing expected functions") + } + + // stddev of bias from 100 runs on local machine multiplied by 10x + const threshold = 0.2 + if bias := (il - fs) / il; math.Abs(bias) > threshold { + t.Fatalf("bias: abs(%f) > %f", bias, threshold) + } else { + t.Logf("bias: abs(%f) < %f", bias, threshold) + } +} + +// blockFrequentShort produces 100000 block events with an average duration of +// rate / 10. +func blockFrequentShort(rate int) { + for i := 0; i < 100000; i++ { + blockevent(int64(rate/10), 1) + } +} + +// blockFrequentShort produces 10000 block events with an average duration of +// rate. +func blockInfrequentLong(rate int) { + for i := 0; i < 10000; i++ { + blockevent(int64(rate), 1) + } +} + +// Used by TestBlockProfileBias. +// +//go:linkname blockevent runtime.blockevent +func blockevent(cycles int64, skip int) + +func TestMutexProfile(t *testing.T) { + // Generate mutex profile + + old := runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(old) + if old != 0 { + t.Fatalf("need MutexProfileRate 0, got %d", old) + } + + blockMutex(t) + + t.Run("debug=1", func(t *testing.T) { + var w strings.Builder + Lookup("mutex").WriteTo(&w, 1) + prof := w.String() + t.Logf("received profile: %v", prof) + + if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") { + t.Errorf("Bad profile header:\n%v", prof) + } + prof = strings.Trim(prof, "\n") + lines := strings.Split(prof, "\n") + if len(lines) != 6 { + t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof) + } + if len(lines) < 6 { + return + } + // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931" + r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+` + //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$" + if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok { + t.Errorf("%q didn't match %q", lines[3], r2) + } + r3 := "^#.*runtime/pprof.blockMutex.*$" + if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok { + t.Errorf("%q didn't match %q", lines[5], r3) + } + t.Logf(prof) + }) + t.Run("proto", func(t *testing.T) { + // proto format + var w bytes.Buffer + Lookup("mutex").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + stks := stacks(p) + for _, want := range [][]string{ + {"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"}, + } { + if !containsStack(stks, want) { + t.Errorf("No matching stack entry for %+v", want) + } + } + }) +} + +func TestMutexProfileRateAdjust(t *testing.T) { + old := runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(old) + if old != 0 { + t.Fatalf("need MutexProfileRate 0, got %d", old) + } + + readProfile := func() (contentions int64, delay int64) { + var w bytes.Buffer + Lookup("mutex").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + t.Logf("parsed proto: %s", p) + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + + for _, s := range p.Sample { + for _, l := range s.Location { + for _, line := range l.Line { + if line.Function.Name == "runtime/pprof.blockMutex.func1" { + contentions += s.Value[0] + delay += s.Value[1] + } + } + } + } + return + } + + blockMutex(t) + contentions, delay := readProfile() + if contentions == 0 || delay == 0 { + t.Fatal("did not see expected function in profile") + } + runtime.SetMutexProfileFraction(0) + newContentions, newDelay := readProfile() + if newContentions != contentions || newDelay != delay { + t.Fatalf("sample value changed: got [%d, %d], want [%d, %d]", newContentions, newDelay, contentions, delay) + } +} + +func func1(c chan int) { <-c } +func func2(c chan int) { <-c } +func func3(c chan int) { <-c } +func func4(c chan int) { <-c } + +func TestGoroutineCounts(t *testing.T) { + // Setting GOMAXPROCS to 1 ensures we can force all goroutines to the + // desired blocking point. + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + + c := make(chan int) + for i := 0; i < 100; i++ { + switch { + case i%10 == 0: + go func1(c) + case i%2 == 0: + go func2(c) + default: + go func3(c) + } + // Let goroutines block on channel + for j := 0; j < 5; j++ { + runtime.Gosched() + } + } + ctx := context.Background() + + // ... and again, with labels this time (just with fewer iterations to keep + // sorting deterministic). + Do(ctx, Labels("label", "value"), func(context.Context) { + for i := 0; i < 89; i++ { + switch { + case i%10 == 0: + go func1(c) + case i%2 == 0: + go func2(c) + default: + go func3(c) + } + // Let goroutines block on channel + for j := 0; j < 5; j++ { + runtime.Gosched() + } + } + }) + + var w bytes.Buffer + goroutineProf := Lookup("goroutine") + + // Check debug profile + goroutineProf.WriteTo(&w, 1) + prof := w.String() + + labels := labelMap{"label": "value"} + labelStr := "\n# labels: " + labels.String() + if !containsInOrder(prof, "\n50 @ ", "\n44 @", labelStr, + "\n40 @", "\n36 @", labelStr, "\n10 @", "\n9 @", labelStr, "\n1 @") { + t.Errorf("expected sorted goroutine counts with Labels:\n%s", prof) + } + + // Check proto profile + w.Reset() + goroutineProf.WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Errorf("error parsing protobuf profile: %v", err) + } + if err := p.CheckValid(); err != nil { + t.Errorf("protobuf profile is invalid: %v", err) + } + expectedLabels := map[int64]map[string]string{ + 50: {}, + 44: {"label": "value"}, + 40: {}, + 36: {"label": "value"}, + 10: {}, + 9: {"label": "value"}, + 1: {}, + } + if !containsCountsLabels(p, expectedLabels) { + t.Errorf("expected count profile to contain goroutines with counts and labels %v, got %v", + expectedLabels, p) + } + + close(c) + + time.Sleep(10 * time.Millisecond) // let goroutines exit +} + +func containsInOrder(s string, all ...string) bool { + for _, t := range all { + var ok bool + if _, s, ok = strings.Cut(s, t); !ok { + return false + } + } + return true +} + +func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[string]string) bool { + m := make(map[int64]int) + type nkey struct { + count int64 + key, val string + } + n := make(map[nkey]int) + for c, kv := range countLabels { + m[c]++ + for k, v := range kv { + n[nkey{ + count: c, + key: k, + val: v, + }]++ + + } + } + for _, s := range prof.Sample { + // The count is the single value in the sample + if len(s.Value) != 1 { + return false + } + m[s.Value[0]]-- + for k, vs := range s.Label { + for _, v := range vs { + n[nkey{ + count: s.Value[0], + key: k, + val: v, + }]-- + } + } + } + for _, n := range m { + if n > 0 { + return false + } + } + for _, ncnt := range n { + if ncnt != 0 { + return false + } + } + return true +} + +func TestGoroutineProfileConcurrency(t *testing.T) { + goroutineProf := Lookup("goroutine") + + profilerCalls := func(s string) int { + return strings.Count(s, "\truntime/pprof.runtime_goroutineProfileWithLabels+") + } + + includesFinalizer := func(s string) bool { + return strings.Contains(s, "runtime.runfinq") + } + + // Concurrent calls to the goroutine profiler should not trigger data races + // or corruption. + t.Run("overlapping profile requests", func(t *testing.T) { + ctx := context.Background() + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) { + go func() { + defer wg.Done() + for ctx.Err() == nil { + var w strings.Builder + goroutineProf.WriteTo(&w, 1) + prof := w.String() + count := profilerCalls(prof) + if count >= 2 { + t.Logf("prof %d\n%s", count, prof) + cancel() + } + } + }() + }) + } + wg.Wait() + }) + + // The finalizer goroutine should not show up in most profiles, since it's + // marked as a system goroutine when idle. + t.Run("finalizer not present", func(t *testing.T) { + var w strings.Builder + goroutineProf.WriteTo(&w, 1) + prof := w.String() + if includesFinalizer(prof) { + t.Errorf("profile includes finalizer (but finalizer should be marked as system):\n%s", prof) + } + }) + + // The finalizer goroutine should show up when it's running user code. + t.Run("finalizer present", func(t *testing.T) { + obj := new(byte) + ch1, ch2 := make(chan int), make(chan int) + defer close(ch2) + runtime.SetFinalizer(obj, func(_ interface{}) { + close(ch1) + <-ch2 + }) + obj = nil + for i := 10; i >= 0; i-- { + select { + case <-ch1: + default: + if i == 0 { + t.Fatalf("finalizer did not run") + } + runtime.GC() + } + } + var w strings.Builder + goroutineProf.WriteTo(&w, 1) + prof := w.String() + if !includesFinalizer(prof) { + t.Errorf("profile does not include finalizer (and it should be marked as user):\n%s", prof) + } + }) + + // Check that new goroutines only show up in order. + testLaunches := func(t *testing.T) { + var done sync.WaitGroup + defer done.Wait() + + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + ch := make(chan int) + defer close(ch) + + var ready sync.WaitGroup + + // These goroutines all survive until the end of the subtest, so we can + // check that a (numbered) goroutine appearing in the profile implies + // that all older goroutines also appear in the profile. + ready.Add(1) + done.Add(1) + go func() { + defer done.Done() + for i := 0; ctx.Err() == nil; i++ { + // Use SetGoroutineLabels rather than Do we can always expect an + // extra goroutine (this one) with most recent label. + SetGoroutineLabels(WithLabels(ctx, Labels(t.Name()+"-loop-i", fmt.Sprint(i)))) + done.Add(1) + go func() { + <-ch + done.Done() + }() + for j := 0; j < i; j++ { + // Spin for longer and longer as the test goes on. This + // goroutine will do O(N^2) work with the number of + // goroutines it launches. This should be slow relative to + // the work involved in collecting a goroutine profile, + // which is O(N) with the high-water mark of the number of + // goroutines in this process (in the allgs slice). + runtime.Gosched() + } + if i == 0 { + ready.Done() + } + } + }() + + // Short-lived goroutines exercise different code paths (goroutines with + // status _Gdead, for instance). This churn doesn't have behavior that + // we can test directly, but does help to shake out data races. + ready.Add(1) + var churn func(i int) + churn = func(i int) { + SetGoroutineLabels(WithLabels(ctx, Labels(t.Name()+"-churn-i", fmt.Sprint(i)))) + if i == 0 { + ready.Done() + } else if i%16 == 0 { + // Yield on occasion so this sequence of goroutine launches + // doesn't monopolize a P. See issue #52934. + runtime.Gosched() + } + if ctx.Err() == nil { + go churn(i + 1) + } + } + go func() { + churn(0) + }() + + ready.Wait() + + var w [3]bytes.Buffer + for i := range w { + goroutineProf.WriteTo(&w[i], 0) + } + for i := range w { + p, err := profile.Parse(bytes.NewReader(w[i].Bytes())) + if err != nil { + t.Errorf("error parsing protobuf profile: %v", err) + } + + // High-numbered loop-i goroutines imply that every lower-numbered + // loop-i goroutine should be present in the profile too. + counts := make(map[string]int) + for _, s := range p.Sample { + label := s.Label[t.Name()+"-loop-i"] + if len(label) > 0 { + counts[label[0]]++ + } + } + for j, max := 0, len(counts)-1; j <= max; j++ { + n := counts[fmt.Sprint(j)] + if n == 1 || (n == 2 && j == max) { + continue + } + t.Errorf("profile #%d's goroutines with label loop-i:%d; %d != 1 (or 2 for the last entry, %d)", + i+1, j, n, max) + t.Logf("counts %v", counts) + break + } + } + } + + runs := 100 + if testing.Short() { + runs = 5 + } + for i := 0; i < runs; i++ { + // Run multiple times to shake out data races + t.Run("goroutine launches", testLaunches) + } +} + +func BenchmarkGoroutine(b *testing.B) { + withIdle := func(n int, fn func(b *testing.B)) func(b *testing.B) { + return func(b *testing.B) { + c := make(chan int) + var ready, done sync.WaitGroup + defer func() { + close(c) + done.Wait() + }() + + for i := 0; i < n; i++ { + ready.Add(1) + done.Add(1) + go func() { + ready.Done() + <-c + done.Done() + }() + } + // Let goroutines block on channel + ready.Wait() + for i := 0; i < 5; i++ { + runtime.Gosched() + } + + fn(b) + } + } + + withChurn := func(fn func(b *testing.B)) func(b *testing.B) { + return func(b *testing.B) { + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var ready sync.WaitGroup + ready.Add(1) + var count int64 + var churn func(i int) + churn = func(i int) { + SetGoroutineLabels(WithLabels(ctx, Labels("churn-i", fmt.Sprint(i)))) + atomic.AddInt64(&count, 1) + if i == 0 { + ready.Done() + } + if ctx.Err() == nil { + go churn(i + 1) + } + } + go func() { + churn(0) + }() + ready.Wait() + + fn(b) + b.ReportMetric(float64(atomic.LoadInt64(&count))/float64(b.N), "concurrent_launches/op") + } + } + + benchWriteTo := func(b *testing.B) { + goroutineProf := Lookup("goroutine") + b.ResetTimer() + for i := 0; i < b.N; i++ { + goroutineProf.WriteTo(io.Discard, 0) + } + b.StopTimer() + } + + benchGoroutineProfile := func(b *testing.B) { + p := make([]runtime.StackRecord, 10000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + runtime.GoroutineProfile(p) + } + b.StopTimer() + } + + // Note that some costs of collecting a goroutine profile depend on the + // length of the runtime.allgs slice, which never shrinks. Stay within race + // detector's 8k-goroutine limit + for _, n := range []int{50, 500, 5000} { + b.Run(fmt.Sprintf("Profile.WriteTo idle %d", n), withIdle(n, benchWriteTo)) + b.Run(fmt.Sprintf("Profile.WriteTo churn %d", n), withIdle(n, withChurn(benchWriteTo))) + b.Run(fmt.Sprintf("runtime.GoroutineProfile churn %d", n), withIdle(n, withChurn(benchGoroutineProfile))) + } +} + +var emptyCallStackTestRun int64 + +// Issue 18836. +func TestEmptyCallStack(t *testing.T) { + name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun) + emptyCallStackTestRun++ + + t.Parallel() + var buf strings.Builder + p := NewProfile(name) + + p.Add("foo", 47674) + p.WriteTo(&buf, 1) + p.Remove("foo") + got := buf.String() + prefix := name + " profile: total 1\n" + if !strings.HasPrefix(got, prefix) { + t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix) + } + lostevent := "lostProfileEvent" + if !strings.Contains(got, lostevent) { + t.Fatalf("got:\n\t%q\ndoes not contain:\n\t%q\n", got, lostevent) + } +} + +// stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key +// and value and has funcname somewhere in the stack. +func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool { + base, kv, ok := strings.Cut(spec, ";") + if !ok { + panic("no semicolon in key/value spec") + } + k, v, ok := strings.Cut(kv, "=") + if !ok { + panic("missing = in key/value spec") + } + if !contains(labels[k], v) { + return false + } + return stackContains(base, count, stk, labels) +} + +func TestCPUProfileLabel(t *testing.T) { + matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, avoidFunctions()) + testCPUProfile(t, matches, func(dur time.Duration) { + Do(context.Background(), Labels("key", "value"), func(context.Context) { + cpuHogger(cpuHog1, &salt1, dur) + }) + }) +} + +func TestLabelRace(t *testing.T) { + // Test the race detector annotations for synchronization + // between setting labels and consuming them from the + // profile. + matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, nil) + testCPUProfile(t, matches, func(dur time.Duration) { + start := time.Now() + var wg sync.WaitGroup + for time.Since(start) < dur { + var salts [10]int + for i := 0; i < 10; i++ { + wg.Add(1) + go func(j int) { + Do(context.Background(), Labels("key", "value"), func(context.Context) { + cpuHogger(cpuHog1, &salts[j], time.Millisecond) + }) + wg.Done() + }(i) + } + wg.Wait() + } + }) +} + +func TestGoroutineProfileLabelRace(t *testing.T) { + // Test the race detector annotations for synchronization + // between setting labels and consuming them from the + // goroutine profile. See issue #50292. + + t.Run("reset", func(t *testing.T) { + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + go func() { + goroutineProf := Lookup("goroutine") + for ctx.Err() == nil { + var w strings.Builder + goroutineProf.WriteTo(&w, 1) + prof := w.String() + if strings.Contains(prof, "loop-i") { + cancel() + } + } + }() + + for i := 0; ctx.Err() == nil; i++ { + Do(ctx, Labels("loop-i", fmt.Sprint(i)), func(ctx context.Context) { + }) + } + }) + + t.Run("churn", func(t *testing.T) { + ctx := context.Background() + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var ready sync.WaitGroup + ready.Add(1) + var churn func(i int) + churn = func(i int) { + SetGoroutineLabels(WithLabels(ctx, Labels("churn-i", fmt.Sprint(i)))) + if i == 0 { + ready.Done() + } + if ctx.Err() == nil { + go churn(i + 1) + } + } + go func() { + churn(0) + }() + ready.Wait() + + goroutineProf := Lookup("goroutine") + for i := 0; i < 10; i++ { + goroutineProf.WriteTo(io.Discard, 1) + } + }) +} + +// TestLabelSystemstack makes sure CPU profiler samples of goroutines running +// on systemstack include the correct pprof labels. See issue #48577 +func TestLabelSystemstack(t *testing.T) { + // Grab and re-set the initial value before continuing to ensure + // GOGC doesn't actually change following the test. + gogc := debug.SetGCPercent(100) + debug.SetGCPercent(gogc) + + matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime.systemstack;key=value"}, avoidFunctions()) + p := testCPUProfile(t, matches, func(dur time.Duration) { + Do(context.Background(), Labels("key", "value"), func(ctx context.Context) { + parallelLabelHog(ctx, dur, gogc) + }) + }) + + // Two conditions to check: + // * labelHog should always be labeled. + // * The label should _only_ appear on labelHog and the Do call above. + for _, s := range p.Sample { + isLabeled := s.Label != nil && contains(s.Label["key"], "value") + var ( + mayBeLabeled bool + mustBeLabeled string + mustNotBeLabeled string + ) + for _, loc := range s.Location { + for _, l := range loc.Line { + switch l.Function.Name { + case "runtime/pprof.labelHog", "runtime/pprof.parallelLabelHog", "runtime/pprof.parallelLabelHog.func1": + mustBeLabeled = l.Function.Name + case "runtime/pprof.Do": + // Do sets the labels, so samples may + // or may not be labeled depending on + // which part of the function they are + // at. + mayBeLabeled = true + case "runtime.bgsweep", "runtime.bgscavenge", "runtime.forcegchelper", "runtime.gcBgMarkWorker", "runtime.runfinq", "runtime.sysmon": + // Runtime system goroutines or threads + // (such as those identified by + // runtime.isSystemGoroutine). These + // should never be labeled. + mustNotBeLabeled = l.Function.Name + case "gogo", "gosave_systemstack_switch", "racecall": + // These are context switch/race + // critical that we can't do a full + // traceback from. Typically this would + // be covered by the runtime check + // below, but these symbols don't have + // the package name. + mayBeLabeled = true + } + + if strings.HasPrefix(l.Function.Name, "runtime.") { + // There are many places in the runtime + // where we can't do a full traceback. + // Ideally we'd list them all, but + // barring that allow anything in the + // runtime, unless explicitly excluded + // above. + mayBeLabeled = true + } + } + } + errorStack := func(f string, args ...any) { + var buf strings.Builder + fprintStack(&buf, s.Location) + t.Errorf("%s: %s", fmt.Sprintf(f, args...), buf.String()) + } + if mustBeLabeled != "" && mustNotBeLabeled != "" { + errorStack("sample contains both %s, which must be labeled, and %s, which must not be labeled", mustBeLabeled, mustNotBeLabeled) + continue + } + if mustBeLabeled != "" || mustNotBeLabeled != "" { + // We found a definitive frame, so mayBeLabeled hints are not relevant. + mayBeLabeled = false + } + if mayBeLabeled { + // This sample may or may not be labeled, so there's nothing we can check. + continue + } + if mustBeLabeled != "" && !isLabeled { + errorStack("sample must be labeled because of %s, but is not", mustBeLabeled) + } + if mustNotBeLabeled != "" && isLabeled { + errorStack("sample must not be labeled because of %s, but is", mustNotBeLabeled) + } + } +} + +// labelHog is designed to burn CPU time in a way that a high number of CPU +// samples end up running on systemstack. +func labelHog(stop chan struct{}, gogc int) { + // Regression test for issue 50032. We must give GC an opportunity to + // be initially triggered by a labelled goroutine. + runtime.GC() + + for i := 0; ; i++ { + select { + case <-stop: + return + default: + debug.SetGCPercent(gogc) + } + } +} + +// parallelLabelHog runs GOMAXPROCS goroutines running labelHog. +func parallelLabelHog(ctx context.Context, dur time.Duration, gogc int) { + var wg sync.WaitGroup + stop := make(chan struct{}) + for i := 0; i < runtime.GOMAXPROCS(0); i++ { + wg.Add(1) + go func() { + defer wg.Done() + labelHog(stop, gogc) + }() + } + + time.Sleep(dur) + close(stop) + wg.Wait() +} + +// Check that there is no deadlock when the program receives SIGPROF while in +// 64bit atomics' critical section. Used to happen on mips{,le}. See #20146. +func TestAtomicLoadStore64(t *testing.T) { + f, err := os.CreateTemp("", "profatomic") + if err != nil { + t.Fatalf("TempFile: %v", err) + } + defer os.Remove(f.Name()) + defer f.Close() + + if err := StartCPUProfile(f); err != nil { + t.Fatal(err) + } + defer StopCPUProfile() + + var flag uint64 + done := make(chan bool, 1) + + go func() { + for atomic.LoadUint64(&flag) == 0 { + runtime.Gosched() + } + done <- true + }() + time.Sleep(50 * time.Millisecond) + atomic.StoreUint64(&flag, 1) + <-done +} + +func TestTracebackAll(t *testing.T) { + // With gccgo, if a profiling signal arrives at the wrong time + // during traceback, it may crash or hang. See issue #29448. + f, err := os.CreateTemp("", "proftraceback") + if err != nil { + t.Fatalf("TempFile: %v", err) + } + defer os.Remove(f.Name()) + defer f.Close() + + if err := StartCPUProfile(f); err != nil { + t.Fatal(err) + } + defer StopCPUProfile() + + ch := make(chan int) + defer close(ch) + + count := 10 + for i := 0; i < count; i++ { + go func() { + <-ch // block + }() + } + + N := 10000 + if testing.Short() { + N = 500 + } + buf := make([]byte, 10*1024) + for i := 0; i < N; i++ { + runtime.Stack(buf, true) + } +} + +// TestTryAdd tests the cases that are hard to test with real program execution. +// +// For example, the current go compilers may not always inline functions +// involved in recursion but that may not be true in the future compilers. This +// tests such cases by using fake call sequences and forcing the profile build +// utilizing translateCPUProfile defined in proto_test.go +func TestTryAdd(t *testing.T) { + if _, found := findInlinedCall(inlinedCallerDump, 4<<10); !found { + t.Skip("Can't determine whether anything was inlined into inlinedCallerDump.") + } + + // inlinedCallerDump + // inlinedCalleeDump + pcs := make([]uintptr, 2) + inlinedCallerDump(pcs) + inlinedCallerStack := make([]uint64, 2) + for i := range pcs { + inlinedCallerStack[i] = uint64(pcs[i]) + } + + if _, found := findInlinedCall(recursionChainBottom, 4<<10); !found { + t.Skip("Can't determine whether anything was inlined into recursionChainBottom.") + } + + // recursionChainTop + // recursionChainMiddle + // recursionChainBottom + // recursionChainTop + // recursionChainMiddle + // recursionChainBottom + pcs = make([]uintptr, 6) + recursionChainTop(1, pcs) + recursionStack := make([]uint64, len(pcs)) + for i := range pcs { + recursionStack[i] = uint64(pcs[i]) + } + + period := int64(2000 * 1000) // 1/500*1e9 nanosec. + + testCases := []struct { + name string + input []uint64 // following the input format assumed by profileBuilder.addCPUData. + count int // number of records in input. + wantLocs [][]string // ordered location entries with function names. + wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs. + }{{ + // Sanity test for a normal, complete stack trace. + name: "full_stack_trace", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1], + }, + count: 2, + wantLocs: [][]string{ + {"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}, + }, + wantSamples: []*profile.Sample{ + {Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + name: "bug35538", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + // Fake frame: tryAdd will have inlinedCallerDump + // (stack[1]) on the deck when it encounters the next + // inline function. It should accept this. + 7, 0, 10, inlinedCallerStack[0], inlinedCallerStack[1], inlinedCallerStack[0], inlinedCallerStack[1], + 5, 0, 20, inlinedCallerStack[0], inlinedCallerStack[1], + }, + count: 3, + wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}}, + {Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + name: "bug38096", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + // count (data[2]) == 0 && len(stk) == 1 is an overflow + // entry. The "stk" entry is actually the count. + 4, 0, 0, 4242, + }, + count: 2, + wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + // If a function is directly called recursively then it must + // not be inlined in the caller. + // + // N.B. We're generating an impossible profile here, with a + // recursive inlineCalleeDump call. This is simulating a non-Go + // function that looks like an inlined Go function other than + // its recursive property. See pcDeck.tryAdd. + name: "directly_recursive_func_is_not_inlined", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 5, 0, 30, inlinedCallerStack[0], inlinedCallerStack[0], + 4, 0, 40, inlinedCallerStack[0], + }, + count: 3, + // inlinedCallerDump shows up here because + // runtime_expandFinalInlineFrame adds it to the stack frame. + wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump"}, {"runtime/pprof.inlinedCallerDump"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}}, + {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}}, + }, + }, { + name: "recursion_chain_inline", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 9, 0, 10, recursionStack[0], recursionStack[1], recursionStack[2], recursionStack[3], recursionStack[4], recursionStack[5], + }, + count: 2, + wantLocs: [][]string{ + {"runtime/pprof.recursionChainBottom"}, + { + "runtime/pprof.recursionChainMiddle", + "runtime/pprof.recursionChainTop", + "runtime/pprof.recursionChainBottom", + }, + { + "runtime/pprof.recursionChainMiddle", + "runtime/pprof.recursionChainTop", + "runtime/pprof.TestTryAdd", // inlined into the test. + }, + }, + wantSamples: []*profile.Sample{ + {Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}, {ID: 3}}}, + }, + }, { + name: "truncated_stack_trace_later", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1], + 4, 0, 60, inlinedCallerStack[0], + }, + count: 3, + wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}}, + {Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + name: "truncated_stack_trace_first", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 4, 0, 70, inlinedCallerStack[0], + 5, 0, 80, inlinedCallerStack[0], inlinedCallerStack[1], + }, + count: 3, + wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}}, + {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + // We can recover the inlined caller from a truncated stack. + name: "truncated_stack_trace_only", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 4, 0, 70, inlinedCallerStack[0], + }, + count: 2, + wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}}, + wantSamples: []*profile.Sample{ + {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}}, + }, + }, { + // The same location is used for duplicated stacks. + name: "truncated_stack_trace_twice", + input: []uint64{ + 3, 0, 500, // hz = 500. Must match the period. + 4, 0, 70, inlinedCallerStack[0], + // Fake frame: add a fake call to + // inlinedCallerDump to prevent this sample + // from getting merged into above. + 5, 0, 80, inlinedCallerStack[1], inlinedCallerStack[0], + }, + count: 3, + wantLocs: [][]string{ + {"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}, + {"runtime/pprof.inlinedCallerDump"}, + }, + wantSamples: []*profile.Sample{ + {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}}, + {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}}, + }, + }} + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p, err := translateCPUProfile(tc.input, tc.count) + if err != nil { + t.Fatalf("translating profile: %v", err) + } + t.Logf("Profile: %v\n", p) + + // One location entry with all inlined functions. + var gotLoc [][]string + for _, loc := range p.Location { + var names []string + for _, line := range loc.Line { + names = append(names, line.Function.Name) + } + gotLoc = append(gotLoc, names) + } + if got, want := fmtJSON(gotLoc), fmtJSON(tc.wantLocs); got != want { + t.Errorf("Got Location = %+v\n\twant %+v", got, want) + } + // All samples should point to one location. + var gotSamples []*profile.Sample + for _, sample := range p.Sample { + var locs []*profile.Location + for _, loc := range sample.Location { + locs = append(locs, &profile.Location{ID: loc.ID}) + } + gotSamples = append(gotSamples, &profile.Sample{Value: sample.Value, Location: locs}) + } + if got, want := fmtJSON(gotSamples), fmtJSON(tc.wantSamples); got != want { + t.Errorf("Got Samples = %+v\n\twant %+v", got, want) + } + }) + } +} + +func TestTimeVDSO(t *testing.T) { + // Test that time functions have the right stack trace. In particular, + // it shouldn't be recursive. + + if runtime.GOOS == "android" { + // Flaky on Android, issue 48655. VDSO may not be enabled. + testenv.SkipFlaky(t, 48655) + } + + matches := matchAndAvoidStacks(stackContains, []string{"time.now"}, avoidFunctions()) + p := testCPUProfile(t, matches, func(dur time.Duration) { + t0 := time.Now() + for { + t := time.Now() + if t.Sub(t0) >= dur { + return + } + } + }) + + // Check for recursive time.now sample. + for _, sample := range p.Sample { + var seenNow bool + for _, loc := range sample.Location { + for _, line := range loc.Line { + if line.Function.Name == "time.now" { + if seenNow { + t.Fatalf("unexpected recursive time.now") + } + seenNow = true + } + } + } + } +} diff --git a/src/runtime/pprof/pprof_windows.go b/src/runtime/pprof/pprof_windows.go new file mode 100644 index 0000000..23ef2f8 --- /dev/null +++ b/src/runtime/pprof/pprof_windows.go @@ -0,0 +1,22 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "fmt" + "internal/syscall/windows" + "io" + "syscall" + "unsafe" +) + +func addMaxRSS(w io.Writer) { + var m windows.PROCESS_MEMORY_COUNTERS + p, _ := syscall.GetCurrentProcess() + err := windows.GetProcessMemoryInfo(p, &m, uint32(unsafe.Sizeof(m))) + if err == nil { + fmt.Fprintf(w, "# MaxRSS = %d\n", m.PeakWorkingSetSize) + } +} diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go new file mode 100644 index 0000000..b68f30d --- /dev/null +++ b/src/runtime/pprof/proto.go @@ -0,0 +1,761 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "bytes" + "compress/gzip" + "fmt" + "internal/abi" + "io" + "runtime" + "strconv" + "strings" + "time" + "unsafe" +) + +// lostProfileEvent is the function to which lost profiling +// events are attributed. +// (The name shows up in the pprof graphs.) +func lostProfileEvent() { lostProfileEvent() } + +// A profileBuilder writes a profile incrementally from a +// stream of profile samples delivered by the runtime. +type profileBuilder struct { + start time.Time + end time.Time + havePeriod bool + period int64 + m profMap + + // encoding state + w io.Writer + zw *gzip.Writer + pb protobuf + strings []string + stringMap map[string]int + locs map[uintptr]locInfo // list of locInfo starting with the given PC. + funcs map[string]int // Package path-qualified function name to Function.ID + mem []memMap + deck pcDeck +} + +type memMap struct { + // initialized as reading mapping + start uintptr // Address at which the binary (or DLL) is loaded into memory. + end uintptr // The limit of the address range occupied by this mapping. + offset uint64 // Offset in the binary that corresponds to the first mapped address. + file string // The object this entry is loaded from. + buildID string // A string that uniquely identifies a particular program version with high probability. + + funcs symbolizeFlag + fake bool // map entry was faked; /proc/self/maps wasn't available +} + +// symbolizeFlag keeps track of symbolization result. +// +// 0 : no symbol lookup was performed +// 1<<0 (lookupTried) : symbol lookup was performed +// 1<<1 (lookupFailed): symbol lookup was performed but failed +type symbolizeFlag uint8 + +const ( + lookupTried symbolizeFlag = 1 << iota + lookupFailed symbolizeFlag = 1 << iota +) + +const ( + // message Profile + tagProfile_SampleType = 1 // repeated ValueType + tagProfile_Sample = 2 // repeated Sample + tagProfile_Mapping = 3 // repeated Mapping + tagProfile_Location = 4 // repeated Location + tagProfile_Function = 5 // repeated Function + tagProfile_StringTable = 6 // repeated string + tagProfile_DropFrames = 7 // int64 (string table index) + tagProfile_KeepFrames = 8 // int64 (string table index) + tagProfile_TimeNanos = 9 // int64 + tagProfile_DurationNanos = 10 // int64 + tagProfile_PeriodType = 11 // ValueType (really optional string???) + tagProfile_Period = 12 // int64 + tagProfile_Comment = 13 // repeated int64 + tagProfile_DefaultSampleType = 14 // int64 + + // message ValueType + tagValueType_Type = 1 // int64 (string table index) + tagValueType_Unit = 2 // int64 (string table index) + + // message Sample + tagSample_Location = 1 // repeated uint64 + tagSample_Value = 2 // repeated int64 + tagSample_Label = 3 // repeated Label + + // message Label + tagLabel_Key = 1 // int64 (string table index) + tagLabel_Str = 2 // int64 (string table index) + tagLabel_Num = 3 // int64 + + // message Mapping + tagMapping_ID = 1 // uint64 + tagMapping_Start = 2 // uint64 + tagMapping_Limit = 3 // uint64 + tagMapping_Offset = 4 // uint64 + tagMapping_Filename = 5 // int64 (string table index) + tagMapping_BuildID = 6 // int64 (string table index) + tagMapping_HasFunctions = 7 // bool + tagMapping_HasFilenames = 8 // bool + tagMapping_HasLineNumbers = 9 // bool + tagMapping_HasInlineFrames = 10 // bool + + // message Location + tagLocation_ID = 1 // uint64 + tagLocation_MappingID = 2 // uint64 + tagLocation_Address = 3 // uint64 + tagLocation_Line = 4 // repeated Line + + // message Line + tagLine_FunctionID = 1 // uint64 + tagLine_Line = 2 // int64 + + // message Function + tagFunction_ID = 1 // uint64 + tagFunction_Name = 2 // int64 (string table index) + tagFunction_SystemName = 3 // int64 (string table index) + tagFunction_Filename = 4 // int64 (string table index) + tagFunction_StartLine = 5 // int64 +) + +// stringIndex adds s to the string table if not already present +// and returns the index of s in the string table. +func (b *profileBuilder) stringIndex(s string) int64 { + id, ok := b.stringMap[s] + if !ok { + id = len(b.strings) + b.strings = append(b.strings, s) + b.stringMap[s] = id + } + return int64(id) +} + +func (b *profileBuilder) flush() { + const dataFlush = 4096 + if b.pb.nest == 0 && len(b.pb.data) > dataFlush { + b.zw.Write(b.pb.data) + b.pb.data = b.pb.data[:0] + } +} + +// pbValueType encodes a ValueType message to b.pb. +func (b *profileBuilder) pbValueType(tag int, typ, unit string) { + start := b.pb.startMessage() + b.pb.int64(tagValueType_Type, b.stringIndex(typ)) + b.pb.int64(tagValueType_Unit, b.stringIndex(unit)) + b.pb.endMessage(tag, start) +} + +// pbSample encodes a Sample message to b.pb. +func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) { + start := b.pb.startMessage() + b.pb.int64s(tagSample_Value, values) + b.pb.uint64s(tagSample_Location, locs) + if labels != nil { + labels() + } + b.pb.endMessage(tagProfile_Sample, start) + b.flush() +} + +// pbLabel encodes a Label message to b.pb. +func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) { + start := b.pb.startMessage() + b.pb.int64Opt(tagLabel_Key, b.stringIndex(key)) + b.pb.int64Opt(tagLabel_Str, b.stringIndex(str)) + b.pb.int64Opt(tagLabel_Num, num) + b.pb.endMessage(tag, start) +} + +// pbLine encodes a Line message to b.pb. +func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) { + start := b.pb.startMessage() + b.pb.uint64Opt(tagLine_FunctionID, funcID) + b.pb.int64Opt(tagLine_Line, line) + b.pb.endMessage(tag, start) +} + +// pbMapping encodes a Mapping message to b.pb. +func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) { + start := b.pb.startMessage() + b.pb.uint64Opt(tagMapping_ID, id) + b.pb.uint64Opt(tagMapping_Start, base) + b.pb.uint64Opt(tagMapping_Limit, limit) + b.pb.uint64Opt(tagMapping_Offset, offset) + b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file)) + b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID)) + // TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs). + // Decide what to do about HasInlineFrames and HasLineNumbers. + // Also, another approach to handle the mapping entry with + // incomplete symbolization results is to dupliace the mapping + // entry (but with different Has* fields values) and use + // different entries for symbolized locations and unsymbolized locations. + if hasFuncs { + b.pb.bool(tagMapping_HasFunctions, true) + } + b.pb.endMessage(tag, start) +} + +func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) { + // Expand this one address using CallersFrames so we can cache + // each expansion. In general, CallersFrames takes a whole + // stack, but in this case we know there will be no skips in + // the stack and we have return PCs anyway. + frames := runtime.CallersFrames([]uintptr{addr}) + frame, more := frames.Next() + if frame.Function == "runtime.goexit" { + // Short-circuit if we see runtime.goexit so the loop + // below doesn't allocate a useless empty location. + return nil, 0 + } + + symbolizeResult := lookupTried + if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 { + symbolizeResult |= lookupFailed + } + + if frame.PC == 0 { + // If we failed to resolve the frame, at least make up + // a reasonable call PC. This mostly happens in tests. + frame.PC = addr - 1 + } + ret := []runtime.Frame{frame} + for frame.Function != "runtime.goexit" && more { + frame, more = frames.Next() + ret = append(ret, frame) + } + return ret, symbolizeResult +} + +type locInfo struct { + // location id assigned by the profileBuilder + id uint64 + + // sequence of PCs, including the fake PCs returned by the traceback + // to represent inlined functions + // https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368 + pcs []uintptr + + // firstPCFrames and firstPCSymbolizeResult hold the results of the + // allFrames call for the first (leaf-most) PC this locInfo represents + firstPCFrames []runtime.Frame + firstPCSymbolizeResult symbolizeFlag +} + +// newProfileBuilder returns a new profileBuilder. +// CPU profiling data obtained from the runtime can be added +// by calling b.addCPUData, and then the eventual profile +// can be obtained by calling b.finish. +func newProfileBuilder(w io.Writer) *profileBuilder { + zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed) + b := &profileBuilder{ + w: w, + zw: zw, + start: time.Now(), + strings: []string{""}, + stringMap: map[string]int{"": 0}, + locs: map[uintptr]locInfo{}, + funcs: map[string]int{}, + } + b.readMapping() + return b +} + +// addCPUData adds the CPU profiling data to the profile. +// +// The data must be a whole number of records, as delivered by the runtime. +// len(tags) must be equal to the number of records in data. +func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error { + if !b.havePeriod { + // first record is period + if len(data) < 3 { + return fmt.Errorf("truncated profile") + } + if data[0] != 3 || data[2] == 0 { + return fmt.Errorf("malformed profile") + } + // data[2] is sampling rate in Hz. Convert to sampling + // period in nanoseconds. + b.period = 1e9 / int64(data[2]) + b.havePeriod = true + data = data[3:] + // Consume tag slot. Note that there isn't a meaningful tag + // value for this record. + tags = tags[1:] + } + + // Parse CPU samples from the profile. + // Each sample is 3+n uint64s: + // data[0] = 3+n + // data[1] = time stamp (ignored) + // data[2] = count + // data[3:3+n] = stack + // If the count is 0 and the stack has length 1, + // that's an overflow record inserted by the runtime + // to indicate that stack[0] samples were lost. + // Otherwise the count is usually 1, + // but in a few special cases like lost non-Go samples + // there can be larger counts. + // Because many samples with the same stack arrive, + // we want to deduplicate immediately, which we do + // using the b.m profMap. + for len(data) > 0 { + if len(data) < 3 || data[0] > uint64(len(data)) { + return fmt.Errorf("truncated profile") + } + if data[0] < 3 || tags != nil && len(tags) < 1 { + return fmt.Errorf("malformed profile") + } + if len(tags) < 1 { + return fmt.Errorf("mismatched profile records and tags") + } + count := data[2] + stk := data[3:data[0]] + data = data[data[0]:] + tag := tags[0] + tags = tags[1:] + + if count == 0 && len(stk) == 1 { + // overflow record + count = uint64(stk[0]) + stk = []uint64{ + // gentraceback guarantees that PCs in the + // stack can be unconditionally decremented and + // still be valid, so we must do the same. + uint64(abi.FuncPCABIInternal(lostProfileEvent) + 1), + } + } + b.m.lookup(stk, tag).count += int64(count) + } + + if len(tags) != 0 { + return fmt.Errorf("mismatched profile records and tags") + } + return nil +} + +// build completes and returns the constructed profile. +func (b *profileBuilder) build() { + b.end = time.Now() + + b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano()) + if b.havePeriod { // must be CPU profile + b.pbValueType(tagProfile_SampleType, "samples", "count") + b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds") + b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds()) + b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds") + b.pb.int64Opt(tagProfile_Period, b.period) + } + + values := []int64{0, 0} + var locs []uint64 + + for e := b.m.all; e != nil; e = e.nextAll { + values[0] = e.count + values[1] = e.count * b.period + + var labels func() + if e.tag != nil { + labels = func() { + for k, v := range *(*labelMap)(e.tag) { + b.pbLabel(tagSample_Label, k, v, 0) + } + } + } + + locs = b.appendLocsForStack(locs[:0], e.stk) + + b.pbSample(values, locs, labels) + } + + for i, m := range b.mem { + hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed + b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions) + } + + // TODO: Anything for tagProfile_DropFrames? + // TODO: Anything for tagProfile_KeepFrames? + + b.pb.strings(tagProfile_StringTable, b.strings) + b.zw.Write(b.pb.data) + b.zw.Close() +} + +// appendLocsForStack appends the location IDs for the given stack trace to the given +// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of +// an inline marker as the runtime traceback function returns. +// +// It may return an empty slice even if locs is non-empty, for example if locs consists +// solely of runtime.goexit. We still count these empty stacks in profiles in order to +// get the right cumulative sample count. +// +// It may emit to b.pb, so there must be no message encoding in progress. +func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) { + b.deck.reset() + + // The last frame might be truncated. Recover lost inline frames. + stk = runtime_expandFinalInlineFrame(stk) + + for len(stk) > 0 { + addr := stk[0] + if l, ok := b.locs[addr]; ok { + // When generating code for an inlined function, the compiler adds + // NOP instructions to the outermost function as a placeholder for + // each layer of inlining. When the runtime generates tracebacks for + // stacks that include inlined functions, it uses the addresses of + // those NOPs as "fake" PCs on the stack as if they were regular + // function call sites. But if a profiling signal arrives while the + // CPU is executing one of those NOPs, its PC will show up as a leaf + // in the profile with its own Location entry. So, always check + // whether addr is a "fake" PC in the context of the current call + // stack by trying to add it to the inlining deck before assuming + // that the deck is complete. + if len(b.deck.pcs) > 0 { + if added := b.deck.tryAdd(addr, l.firstPCFrames, l.firstPCSymbolizeResult); added { + stk = stk[1:] + continue + } + } + + // first record the location if there is any pending accumulated info. + if id := b.emitLocation(); id > 0 { + locs = append(locs, id) + } + + // then, record the cached location. + locs = append(locs, l.id) + + // Skip the matching pcs. + // + // Even if stk was truncated due to the stack depth + // limit, expandFinalInlineFrame above has already + // fixed the truncation, ensuring it is long enough. + stk = stk[len(l.pcs):] + continue + } + + frames, symbolizeResult := allFrames(addr) + if len(frames) == 0 { // runtime.goexit. + if id := b.emitLocation(); id > 0 { + locs = append(locs, id) + } + stk = stk[1:] + continue + } + + if added := b.deck.tryAdd(addr, frames, symbolizeResult); added { + stk = stk[1:] + continue + } + // add failed because this addr is not inlined with the + // existing PCs in the deck. Flush the deck and retry handling + // this pc. + if id := b.emitLocation(); id > 0 { + locs = append(locs, id) + } + + // check cache again - previous emitLocation added a new entry + if l, ok := b.locs[addr]; ok { + locs = append(locs, l.id) + stk = stk[len(l.pcs):] // skip the matching pcs. + } else { + b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed. + stk = stk[1:] + } + } + if id := b.emitLocation(); id > 0 { // emit remaining location. + locs = append(locs, id) + } + return locs +} + +// Here's an example of how Go 1.17 writes out inlined functions, compiled for +// linux/amd64. The disassembly of main.main shows two levels of inlining: main +// calls b, b calls a, a does some work. +// +// inline.go:9 0x4553ec 90 NOPL // func main() { b(v) } +// inline.go:6 0x4553ed 90 NOPL // func b(v *int) { a(v) } +// inline.go:5 0x4553ee 48c7002a000000 MOVQ $0x2a, 0(AX) // func a(v *int) { *v = 42 } +// +// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line +// 5), the runtime will report the stack as the MOVQ frame being called by the +// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line +// 9). +// +// The role of pcDeck is to collapse those three frames back into a single +// location at 0x4553ee, with file/line/function symbolization info representing +// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd +// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be +// for the caller of main.main. Because main.main was not inlined in its caller, +// the deck will reject the addition, and the fourth PC on the stack will get +// its own location. + +// pcDeck is a helper to detect a sequence of inlined functions from +// a stack trace returned by the runtime. +// +// The stack traces returned by runtime's trackback functions are fully +// expanded (at least for Go functions) and include the fake pcs representing +// inlined functions. The profile proto expects the inlined functions to be +// encoded in one Location message. +// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184 +// +// Runtime does not directly expose whether a frame is for an inlined function +// and looking up debug info is not ideal, so we use a heuristic to filter +// the fake pcs and restore the inlined and entry functions. Inlined functions +// have the following properties: +// +// Frame's Func is nil (note: also true for non-Go functions), and +// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and +// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive). +// +// As reading and processing the pcs in a stack trace one by one (from leaf to the root), +// we use pcDeck to temporarily hold the observed pcs and their expanded frames +// until we observe the entry function frame. +type pcDeck struct { + pcs []uintptr + frames []runtime.Frame + symbolizeResult symbolizeFlag + + // firstPCFrames indicates the number of frames associated with the first + // (leaf-most) PC in the deck + firstPCFrames int + // firstPCSymbolizeResult holds the results of the allFrames call for the + // first (leaf-most) PC in the deck + firstPCSymbolizeResult symbolizeFlag +} + +func (d *pcDeck) reset() { + d.pcs = d.pcs[:0] + d.frames = d.frames[:0] + d.symbolizeResult = 0 + d.firstPCFrames = 0 + d.firstPCSymbolizeResult = 0 +} + +// tryAdd tries to add the pc and Frames expanded from it (most likely one, +// since the stack trace is already fully expanded) and the symbolizeResult +// to the deck. If it fails the caller needs to flush the deck and retry. +func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) { + if existing := len(d.frames); existing > 0 { + // 'd.frames' are all expanded from one 'pc' and represent all + // inlined functions so we check only the last one. + newFrame := frames[0] + last := d.frames[existing-1] + if last.Func != nil { // the last frame can't be inlined. Flush. + return false + } + if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge. + return false + } + + if last.Entry != newFrame.Entry { // newFrame is for a different function. + return false + } + if last.Function == newFrame.Function { // maybe recursion. + return false + } + } + d.pcs = append(d.pcs, pc) + d.frames = append(d.frames, frames...) + d.symbolizeResult |= symbolizeResult + if len(d.pcs) == 1 { + d.firstPCFrames = len(d.frames) + d.firstPCSymbolizeResult = symbolizeResult + } + return true +} + +// emitLocation emits the new location and function information recorded in the deck +// and returns the location ID encoded in the profile protobuf. +// It emits to b.pb, so there must be no message encoding in progress. +// It resets the deck. +func (b *profileBuilder) emitLocation() uint64 { + if len(b.deck.pcs) == 0 { + return 0 + } + defer b.deck.reset() + + addr := b.deck.pcs[0] + firstFrame := b.deck.frames[0] + + // We can't write out functions while in the middle of the + // Location message, so record new functions we encounter and + // write them out after the Location. + type newFunc struct { + id uint64 + name, file string + startLine int64 + } + newFuncs := make([]newFunc, 0, 8) + + id := uint64(len(b.locs)) + 1 + b.locs[addr] = locInfo{ + id: id, + pcs: append([]uintptr{}, b.deck.pcs...), + firstPCSymbolizeResult: b.deck.firstPCSymbolizeResult, + firstPCFrames: append([]runtime.Frame{}, b.deck.frames[:b.deck.firstPCFrames]...), + } + + start := b.pb.startMessage() + b.pb.uint64Opt(tagLocation_ID, id) + b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC)) + for _, frame := range b.deck.frames { + // Write out each line in frame expansion. + funcID := uint64(b.funcs[frame.Function]) + if funcID == 0 { + funcID = uint64(len(b.funcs)) + 1 + b.funcs[frame.Function] = int(funcID) + newFuncs = append(newFuncs, newFunc{ + id: funcID, + name: frame.Function, + file: frame.File, + startLine: int64(runtime_FrameStartLine(&frame)), + }) + } + b.pbLine(tagLocation_Line, funcID, int64(frame.Line)) + } + for i := range b.mem { + if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake { + b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1)) + + m := b.mem[i] + m.funcs |= b.deck.symbolizeResult + b.mem[i] = m + break + } + } + b.pb.endMessage(tagProfile_Location, start) + + // Write out functions we found during frame expansion. + for _, fn := range newFuncs { + start := b.pb.startMessage() + b.pb.uint64Opt(tagFunction_ID, fn.id) + b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name)) + b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name)) + b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file)) + b.pb.int64Opt(tagFunction_StartLine, fn.startLine) + b.pb.endMessage(tagProfile_Function, start) + } + + b.flush() + return id +} + +var space = []byte(" ") +var newline = []byte("\n") + +func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) { + // $ cat /proc/self/maps + // 00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat + // 0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat + // 0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat + // 014ab000-014cc000 rw-p 00000000 00:00 0 [heap] + // 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive + // 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so + // 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so + // 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so + // 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so + // 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0 + // 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so + // 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0 + // 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0 + // 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so + // 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so + // 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0 + // 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack] + // 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso] + // ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] + + var line []byte + // next removes and returns the next field in the line. + // It also removes from line any spaces following the field. + next := func() []byte { + var f []byte + f, line, _ = bytes.Cut(line, space) + line = bytes.TrimLeft(line, " ") + return f + } + + for len(data) > 0 { + line, data, _ = bytes.Cut(data, newline) + addr := next() + loStr, hiStr, ok := strings.Cut(string(addr), "-") + if !ok { + continue + } + lo, err := strconv.ParseUint(loStr, 16, 64) + if err != nil { + continue + } + hi, err := strconv.ParseUint(hiStr, 16, 64) + if err != nil { + continue + } + perm := next() + if len(perm) < 4 || perm[2] != 'x' { + // Only interested in executable mappings. + continue + } + offset, err := strconv.ParseUint(string(next()), 16, 64) + if err != nil { + continue + } + next() // dev + inode := next() // inode + if line == nil { + continue + } + file := string(line) + + // Trim deleted file marker. + deletedStr := " (deleted)" + deletedLen := len(deletedStr) + if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr { + file = file[:len(file)-deletedLen] + } + + if len(inode) == 1 && inode[0] == '0' && file == "" { + // Huge-page text mappings list the initial fragment of + // mapped but unpopulated memory as being inode 0. + // Don't report that part. + // But [vdso] and [vsyscall] are inode 0, so let non-empty file names through. + continue + } + + // TODO: pprof's remapMappingIDs makes one adjustment: + // 1. If there is an /anon_hugepage mapping first and it is + // consecutive to a next mapping, drop the /anon_hugepage. + // There's no indication why this is needed. + // Let's try not doing this and see what breaks. + // If we do need it, it would go here, before we + // enter the mappings into b.mem in the first place. + + buildID, _ := elfBuildID(file) + addMapping(lo, hi, offset, file, buildID) + } +} + +func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) { + b.addMappingEntry(lo, hi, offset, file, buildID, false) +} + +func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) { + b.mem = append(b.mem, memMap{ + start: uintptr(lo), + end: uintptr(hi), + offset: offset, + file: file, + buildID: buildID, + fake: fake, + }) +} diff --git a/src/runtime/pprof/proto_other.go b/src/runtime/pprof/proto_other.go new file mode 100644 index 0000000..4a7fe79 --- /dev/null +++ b/src/runtime/pprof/proto_other.go @@ -0,0 +1,30 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !windows + +package pprof + +import ( + "errors" + "os" +) + +// readMapping reads /proc/self/maps and writes mappings to b.pb. +// It saves the address ranges of the mappings in b.mem for use +// when emitting locations. +func (b *profileBuilder) readMapping() { + data, _ := os.ReadFile("/proc/self/maps") + parseProcSelfMaps(data, b.addMapping) + if len(b.mem) == 0 { // pprof expects a map entry, so fake one. + b.addMappingEntry(0, 0, 0, "", "", true) + // TODO(hyangah): make addMapping return *memMap or + // take a memMap struct, and get rid of addMappingEntry + // that takes a bunch of positional arguments. + } +} + +func readMainModuleMapping() (start, end uint64, err error) { + return 0, 0, errors.New("not implemented") +} diff --git a/src/runtime/pprof/proto_test.go b/src/runtime/pprof/proto_test.go new file mode 100644 index 0000000..780b481 --- /dev/null +++ b/src/runtime/pprof/proto_test.go @@ -0,0 +1,470 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "bytes" + "encoding/json" + "fmt" + "internal/abi" + "internal/profile" + "internal/testenv" + "os" + "os/exec" + "reflect" + "runtime" + "strings" + "testing" + "unsafe" +) + +// translateCPUProfile parses binary CPU profiling stack trace data +// generated by runtime.CPUProfile() into a profile struct. +// This is only used for testing. Real conversions stream the +// data into the profileBuilder as it becomes available. +// +// count is the number of records in data. +func translateCPUProfile(data []uint64, count int) (*profile.Profile, error) { + var buf bytes.Buffer + b := newProfileBuilder(&buf) + tags := make([]unsafe.Pointer, count) + if err := b.addCPUData(data, tags); err != nil { + return nil, err + } + b.build() + return profile.Parse(&buf) +} + +// fmtJSON returns a pretty-printed JSON form for x. +// It works reasonbly well for printing protocol-buffer +// data structures like profile.Profile. +func fmtJSON(x any) string { + js, _ := json.MarshalIndent(x, "", "\t") + return string(js) +} + +func TestConvertCPUProfileEmpty(t *testing.T) { + // A test server with mock cpu profile data. + var buf bytes.Buffer + + b := []uint64{3, 0, 500} // empty profile at 500 Hz (2ms sample period) + p, err := translateCPUProfile(b, 1) + if err != nil { + t.Fatalf("translateCPUProfile: %v", err) + } + if err := p.Write(&buf); err != nil { + t.Fatalf("writing profile: %v", err) + } + + p, err = profile.Parse(&buf) + if err != nil { + t.Fatalf("profile.Parse: %v", err) + } + + // Expected PeriodType and SampleType. + periodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"} + sampleType := []*profile.ValueType{ + {Type: "samples", Unit: "count"}, + {Type: "cpu", Unit: "nanoseconds"}, + } + + checkProfile(t, p, 2000*1000, periodType, sampleType, nil, "") +} + +func f1() { f1() } +func f2() { f2() } + +// testPCs returns two PCs and two corresponding memory mappings +// to use in test profiles. +func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) { + switch runtime.GOOS { + case "linux", "android", "netbsd": + // Figure out two addresses from /proc/self/maps. + mmap, err := os.ReadFile("/proc/self/maps") + if err != nil { + t.Fatal(err) + } + mprof := &profile.Profile{} + if err = mprof.ParseMemoryMap(bytes.NewReader(mmap)); err != nil { + t.Fatalf("parsing /proc/self/maps: %v", err) + } + if len(mprof.Mapping) < 2 { + // It is possible for a binary to only have 1 executable + // region of memory. + t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping)) + } + addr1 = mprof.Mapping[0].Start + map1 = mprof.Mapping[0] + map1.BuildID, _ = elfBuildID(map1.File) + addr2 = mprof.Mapping[1].Start + map2 = mprof.Mapping[1] + map2.BuildID, _ = elfBuildID(map2.File) + case "windows": + addr1 = uint64(abi.FuncPCABIInternal(f1)) + addr2 = uint64(abi.FuncPCABIInternal(f2)) + + exe, err := os.Executable() + if err != nil { + t.Fatal(err) + } + + start, end, err := readMainModuleMapping() + if err != nil { + t.Fatal(err) + } + + map1 = &profile.Mapping{ + ID: 1, + Start: start, + Limit: end, + File: exe, + BuildID: peBuildID(exe), + HasFunctions: true, + } + map2 = &profile.Mapping{ + ID: 1, + Start: start, + Limit: end, + File: exe, + BuildID: peBuildID(exe), + HasFunctions: true, + } + case "js": + addr1 = uint64(abi.FuncPCABIInternal(f1)) + addr2 = uint64(abi.FuncPCABIInternal(f2)) + default: + addr1 = uint64(abi.FuncPCABIInternal(f1)) + addr2 = uint64(abi.FuncPCABIInternal(f2)) + // Fake mapping - HasFunctions will be true because two PCs from Go + // will be fully symbolized. + fake := &profile.Mapping{ID: 1, HasFunctions: true} + map1, map2 = fake, fake + } + return +} + +func TestConvertCPUProfile(t *testing.T) { + addr1, addr2, map1, map2 := testPCs(t) + + b := []uint64{ + 3, 0, 500, // hz = 500 + 5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1 + 5, 0, 40, uint64(addr2 + 1), uint64(addr2 + 2), // 40 samples in addr2 + 5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1 + } + p, err := translateCPUProfile(b, 4) + if err != nil { + t.Fatalf("translating profile: %v", err) + } + period := int64(2000 * 1000) + periodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"} + sampleType := []*profile.ValueType{ + {Type: "samples", Unit: "count"}, + {Type: "cpu", Unit: "nanoseconds"}, + } + samples := []*profile.Sample{ + {Value: []int64{20, 20 * 2000 * 1000}, Location: []*profile.Location{ + {ID: 1, Mapping: map1, Address: addr1}, + {ID: 2, Mapping: map1, Address: addr1 + 1}, + }}, + {Value: []int64{40, 40 * 2000 * 1000}, Location: []*profile.Location{ + {ID: 3, Mapping: map2, Address: addr2}, + {ID: 4, Mapping: map2, Address: addr2 + 1}, + }}, + } + checkProfile(t, p, period, periodType, sampleType, samples, "") +} + +func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample, defaultSampleType string) { + t.Helper() + + if p.Period != period { + t.Errorf("p.Period = %d, want %d", p.Period, period) + } + if !reflect.DeepEqual(p.PeriodType, periodType) { + t.Errorf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType)) + } + if !reflect.DeepEqual(p.SampleType, sampleType) { + t.Errorf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType)) + } + if defaultSampleType != p.DefaultSampleType { + t.Errorf("p.DefaultSampleType = %v\nwant = %v", p.DefaultSampleType, defaultSampleType) + } + // Clear line info since it is not in the expected samples. + // If we used f1 and f2 above, then the samples will have line info. + for _, s := range p.Sample { + for _, l := range s.Location { + l.Line = nil + } + } + if fmtJSON(p.Sample) != fmtJSON(samples) { // ignore unexported fields + if len(p.Sample) == len(samples) { + for i := range p.Sample { + if !reflect.DeepEqual(p.Sample[i], samples[i]) { + t.Errorf("sample %d = %v\nwant = %v\n", i, fmtJSON(p.Sample[i]), fmtJSON(samples[i])) + } + } + if t.Failed() { + t.FailNow() + } + } + t.Fatalf("p.Sample = %v\nwant = %v", fmtJSON(p.Sample), fmtJSON(samples)) + } +} + +var profSelfMapsTests = ` +00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat +0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat +0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat +014ab000-014cc000 rw-p 00000000 00:00 0 [heap] +7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive +7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0 +7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0 +7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0 +7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0 +7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack] +7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso] +ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0 [vsyscall] +-> +00400000 0040b000 00000000 /bin/cat +7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so +7ffc34343000 7ffc34345000 00000000 [vdso] +ffffffffff600000 ffffffffff601000 00000090 [vsyscall] + +00400000-07000000 r-xp 00000000 00:00 0 +07000000-07093000 r-xp 06c00000 00:2e 536754 /path/to/gobench_server_main +07093000-0722d000 rw-p 06c92000 00:2e 536754 /path/to/gobench_server_main +0722d000-07b21000 rw-p 00000000 00:00 0 +c000000000-c000036000 rw-p 00000000 00:00 0 +-> +07000000 07093000 06c00000 /path/to/gobench_server_main +` + +var profSelfMapsTestsWithDeleted = ` +00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat (deleted) +0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat (deleted) +0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat (deleted) +014ab000-014cc000 rw-p 00000000 00:00 0 [heap] +7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive +7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0 +7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0 +7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0 +7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0 +7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack] +7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso] +ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0 [vsyscall] +-> +00400000 0040b000 00000000 /bin/cat +7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so +7ffc34343000 7ffc34345000 00000000 [vdso] +ffffffffff600000 ffffffffff601000 00000090 [vsyscall] + +00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat with space +0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat with space +0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat with space +014ab000-014cc000 rw-p 00000000 00:00 0 [heap] +7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive +7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0 +7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0 +7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0 +7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so +7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0 +7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack] +7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso] +ffffffffff600000-ffffffffff601000 r-xp 00000090 00:00 0 [vsyscall] +-> +00400000 0040b000 00000000 /bin/cat with space +7f7d7797c000 7f7d77b36000 00000000 /lib/x86_64-linux-gnu/libc-2.19.so +7f7d77d41000 7f7d77d64000 00000000 /lib/x86_64-linux-gnu/ld-2.19.so +7ffc34343000 7ffc34345000 00000000 [vdso] +ffffffffff600000 ffffffffff601000 00000090 [vsyscall] +` + +func TestProcSelfMaps(t *testing.T) { + + f := func(t *testing.T, input string) { + for tx, tt := range strings.Split(input, "\n\n") { + in, out, ok := strings.Cut(tt, "->\n") + if !ok { + t.Fatal("malformed test case") + } + if len(out) > 0 && out[len(out)-1] != '\n' { + out += "\n" + } + var buf strings.Builder + parseProcSelfMaps([]byte(in), func(lo, hi, offset uint64, file, buildID string) { + fmt.Fprintf(&buf, "%08x %08x %08x %s\n", lo, hi, offset, file) + }) + if buf.String() != out { + t.Errorf("#%d: have:\n%s\nwant:\n%s\n%q\n%q", tx, buf.String(), out, buf.String(), out) + } + } + } + + t.Run("Normal", func(t *testing.T) { + f(t, profSelfMapsTests) + }) + + t.Run("WithDeletedFile", func(t *testing.T) { + f(t, profSelfMapsTestsWithDeleted) + }) +} + +// TestMapping checks the mapping section of CPU profiles +// has the HasFunctions field set correctly. If all PCs included +// in the samples are successfully symbolized, the corresponding +// mapping entry (in this test case, only one entry) should have +// its HasFunctions field set true. +// The test generates a CPU profile that includes PCs from C side +// that the runtime can't symbolize. See ./testdata/mappingtest. +func TestMapping(t *testing.T) { + testenv.MustHaveGoRun(t) + testenv.MustHaveCGO(t) + + prog := "./testdata/mappingtest/main.go" + + // GoOnly includes only Go symbols that runtime will symbolize. + // Go+C includes C symbols that runtime will not symbolize. + for _, traceback := range []string{"GoOnly", "Go+C"} { + t.Run("traceback"+traceback, func(t *testing.T) { + cmd := exec.Command(testenv.GoToolPath(t), "run", prog) + if traceback != "GoOnly" { + cmd.Env = append(os.Environ(), "SETCGOTRACEBACK=1") + } + cmd.Stderr = new(bytes.Buffer) + + out, err := cmd.Output() + if err != nil { + t.Fatalf("failed to run the test program %q: %v\n%v", prog, err, cmd.Stderr) + } + + prof, err := profile.Parse(bytes.NewReader(out)) + if err != nil { + t.Fatalf("failed to parse the generated profile data: %v", err) + } + t.Logf("Profile: %s", prof) + + hit := make(map[*profile.Mapping]bool) + miss := make(map[*profile.Mapping]bool) + for _, loc := range prof.Location { + if symbolized(loc) { + hit[loc.Mapping] = true + } else { + miss[loc.Mapping] = true + } + } + if len(miss) == 0 { + t.Log("no location with missing symbol info was sampled") + } + + for _, m := range prof.Mapping { + if miss[m] && m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m) + continue + } + if !miss[m] && hit[m] && !m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m) + continue + } + } + + if traceback == "Go+C" { + // The test code was arranged to have PCs from C and + // they are not symbolized. + // Check no Location containing those unsymbolized PCs contains multiple lines. + for i, loc := range prof.Location { + if !symbolized(loc) && len(loc.Line) > 1 { + t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc) + } + } + } + }) + } +} + +func symbolized(loc *profile.Location) bool { + if len(loc.Line) == 0 { + return false + } + l := loc.Line[0] + f := l.Function + if l.Line == 0 || f == nil || f.Name == "" || f.Filename == "" { + return false + } + return true +} + +// TestFakeMapping tests if at least one mapping exists +// (including a fake mapping), and their HasFunctions bits +// are set correctly. +func TestFakeMapping(t *testing.T) { + var buf bytes.Buffer + if err := Lookup("heap").WriteTo(&buf, 0); err != nil { + t.Fatalf("failed to write heap profile: %v", err) + } + prof, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("failed to parse the generated profile data: %v", err) + } + t.Logf("Profile: %s", prof) + if len(prof.Mapping) == 0 { + t.Fatal("want profile with at least one mapping entry, got 0 mapping") + } + + hit := make(map[*profile.Mapping]bool) + miss := make(map[*profile.Mapping]bool) + for _, loc := range prof.Location { + if symbolized(loc) { + hit[loc.Mapping] = true + } else { + miss[loc.Mapping] = true + } + } + for _, m := range prof.Mapping { + if miss[m] && m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m) + continue + } + if !miss[m] && hit[m] && !m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m) + continue + } + } +} + +// Make sure the profiler can handle an empty stack trace. +// See issue 37967. +func TestEmptyStack(t *testing.T) { + b := []uint64{ + 3, 0, 500, // hz = 500 + 3, 0, 10, // 10 samples with an empty stack trace + } + _, err := translateCPUProfile(b, 2) + if err != nil { + t.Fatalf("translating profile: %v", err) + } +} diff --git a/src/runtime/pprof/proto_windows.go b/src/runtime/pprof/proto_windows.go new file mode 100644 index 0000000..d5ae4a5 --- /dev/null +++ b/src/runtime/pprof/proto_windows.go @@ -0,0 +1,73 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "errors" + "internal/syscall/windows" + "syscall" +) + +// readMapping adds memory mapping information to the profile. +func (b *profileBuilder) readMapping() { + snap, err := createModuleSnapshot() + if err != nil { + // pprof expects a map entry, so fake one, when we haven't added anything yet. + b.addMappingEntry(0, 0, 0, "", "", true) + return + } + defer func() { _ = syscall.CloseHandle(snap) }() + + var module windows.ModuleEntry32 + module.Size = uint32(windows.SizeofModuleEntry32) + err = windows.Module32First(snap, &module) + if err != nil { + // pprof expects a map entry, so fake one, when we haven't added anything yet. + b.addMappingEntry(0, 0, 0, "", "", true) + return + } + for err == nil { + exe := syscall.UTF16ToString(module.ExePath[:]) + b.addMappingEntry( + uint64(module.ModBaseAddr), + uint64(module.ModBaseAddr)+uint64(module.ModBaseSize), + 0, + exe, + peBuildID(exe), + false, + ) + err = windows.Module32Next(snap, &module) + } +} + +func readMainModuleMapping() (start, end uint64, err error) { + snap, err := createModuleSnapshot() + if err != nil { + return 0, 0, err + } + defer func() { _ = syscall.CloseHandle(snap) }() + + var module windows.ModuleEntry32 + module.Size = uint32(windows.SizeofModuleEntry32) + err = windows.Module32First(snap, &module) + if err != nil { + return 0, 0, err + } + + return uint64(module.ModBaseAddr), uint64(module.ModBaseAddr) + uint64(module.ModBaseSize), nil +} + +func createModuleSnapshot() (syscall.Handle, error) { + for { + snap, err := syscall.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE|windows.TH32CS_SNAPMODULE32, uint32(syscall.Getpid())) + var errno syscall.Errno + if err != nil && errors.As(err, &errno) && errno == windows.ERROR_BAD_LENGTH { + // When CreateToolhelp32Snapshot(SNAPMODULE|SNAPMODULE32, ...) fails + // with ERROR_BAD_LENGTH then it should be retried until it succeeds. + continue + } + return snap, err + } +} diff --git a/src/runtime/pprof/protobuf.go b/src/runtime/pprof/protobuf.go new file mode 100644 index 0000000..f7ec1ac --- /dev/null +++ b/src/runtime/pprof/protobuf.go @@ -0,0 +1,141 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +// A protobuf is a simple protocol buffer encoder. +type protobuf struct { + data []byte + tmp [16]byte + nest int +} + +func (b *protobuf) varint(x uint64) { + for x >= 128 { + b.data = append(b.data, byte(x)|0x80) + x >>= 7 + } + b.data = append(b.data, byte(x)) +} + +func (b *protobuf) length(tag int, len int) { + b.varint(uint64(tag)<<3 | 2) + b.varint(uint64(len)) +} + +func (b *protobuf) uint64(tag int, x uint64) { + // append varint to b.data + b.varint(uint64(tag)<<3 | 0) + b.varint(x) +} + +func (b *protobuf) uint64s(tag int, x []uint64) { + if len(x) > 2 { + // Use packed encoding + n1 := len(b.data) + for _, u := range x { + b.varint(u) + } + n2 := len(b.data) + b.length(tag, n2-n1) + n3 := len(b.data) + copy(b.tmp[:], b.data[n2:n3]) + copy(b.data[n1+(n3-n2):], b.data[n1:n2]) + copy(b.data[n1:], b.tmp[:n3-n2]) + return + } + for _, u := range x { + b.uint64(tag, u) + } +} + +func (b *protobuf) uint64Opt(tag int, x uint64) { + if x == 0 { + return + } + b.uint64(tag, x) +} + +func (b *protobuf) int64(tag int, x int64) { + u := uint64(x) + b.uint64(tag, u) +} + +func (b *protobuf) int64Opt(tag int, x int64) { + if x == 0 { + return + } + b.int64(tag, x) +} + +func (b *protobuf) int64s(tag int, x []int64) { + if len(x) > 2 { + // Use packed encoding + n1 := len(b.data) + for _, u := range x { + b.varint(uint64(u)) + } + n2 := len(b.data) + b.length(tag, n2-n1) + n3 := len(b.data) + copy(b.tmp[:], b.data[n2:n3]) + copy(b.data[n1+(n3-n2):], b.data[n1:n2]) + copy(b.data[n1:], b.tmp[:n3-n2]) + return + } + for _, u := range x { + b.int64(tag, u) + } +} + +func (b *protobuf) string(tag int, x string) { + b.length(tag, len(x)) + b.data = append(b.data, x...) +} + +func (b *protobuf) strings(tag int, x []string) { + for _, s := range x { + b.string(tag, s) + } +} + +func (b *protobuf) stringOpt(tag int, x string) { + if x == "" { + return + } + b.string(tag, x) +} + +func (b *protobuf) bool(tag int, x bool) { + if x { + b.uint64(tag, 1) + } else { + b.uint64(tag, 0) + } +} + +func (b *protobuf) boolOpt(tag int, x bool) { + if !x { + return + } + b.bool(tag, x) +} + +type msgOffset int + +func (b *protobuf) startMessage() msgOffset { + b.nest++ + return msgOffset(len(b.data)) +} + +func (b *protobuf) endMessage(tag int, start msgOffset) { + n1 := int(start) + n2 := len(b.data) + b.length(tag, n2-n1) + n3 := len(b.data) + copy(b.tmp[:], b.data[n2:n3]) + copy(b.data[n1+(n3-n2):], b.data[n1:n2]) + copy(b.data[n1:], b.tmp[:n3-n2]) + b.nest-- +} diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go new file mode 100644 index 0000000..fa75a28 --- /dev/null +++ b/src/runtime/pprof/protomem.go @@ -0,0 +1,93 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "io" + "math" + "runtime" + "strings" +) + +// writeHeapProto writes the current heap profile in protobuf format to w. +func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error { + b := newProfileBuilder(w) + b.pbValueType(tagProfile_PeriodType, "space", "bytes") + b.pb.int64Opt(tagProfile_Period, rate) + b.pbValueType(tagProfile_SampleType, "alloc_objects", "count") + b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes") + b.pbValueType(tagProfile_SampleType, "inuse_objects", "count") + b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes") + if defaultSampleType != "" { + b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType)) + } + + values := []int64{0, 0, 0, 0} + var locs []uint64 + for _, r := range p { + hideRuntime := true + for tries := 0; tries < 2; tries++ { + stk := r.Stack() + // For heap profiles, all stack + // addresses are return PCs, which is + // what appendLocsForStack expects. + if hideRuntime { + for i, addr := range stk { + if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") { + continue + } + // Found non-runtime. Show any runtime uses above it. + stk = stk[i:] + break + } + } + locs = b.appendLocsForStack(locs[:0], stk) + if len(locs) > 0 { + break + } + hideRuntime = false // try again, and show all frames next time. + } + + values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate) + values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate) + var blockSize int64 + if r.AllocObjects > 0 { + blockSize = r.AllocBytes / r.AllocObjects + } + b.pbSample(values, locs, func() { + if blockSize != 0 { + b.pbLabel(tagSample_Label, "bytes", "", blockSize) + } + }) + } + b.build() + return nil +} + +// scaleHeapSample adjusts the data from a heap Sample to +// account for its probability of appearing in the collected +// data. heap profiles are a sampling of the memory allocations +// requests in a program. We estimate the unsampled value by dividing +// each collected sample by its probability of appearing in the +// profile. heap profiles rely on a poisson process to determine +// which samples to collect, based on the desired average collection +// rate R. The probability of a sample of size S to appear in that +// profile is 1-exp(-S/R). +func scaleHeapSample(count, size, rate int64) (int64, int64) { + if count == 0 || size == 0 { + return 0, 0 + } + + if rate <= 1 { + // if rate==1 all samples were collected so no adjustment is needed. + // if rate<1 treat as unknown and skip scaling. + return count, size + } + + avgSize := float64(size) / float64(count) + scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) + + return int64(float64(count) * scale), int64(float64(size) * scale) +} diff --git a/src/runtime/pprof/protomem_test.go b/src/runtime/pprof/protomem_test.go new file mode 100644 index 0000000..156f628 --- /dev/null +++ b/src/runtime/pprof/protomem_test.go @@ -0,0 +1,84 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "bytes" + "internal/profile" + "runtime" + "testing" +) + +func TestConvertMemProfile(t *testing.T) { + addr1, addr2, map1, map2 := testPCs(t) + + // MemProfileRecord stacks are return PCs, so add one to the + // addresses recorded in the "profile". The proto profile + // locations are call PCs, so conversion will subtract one + // from these and get back to addr1 and addr2. + a1, a2 := uintptr(addr1)+1, uintptr(addr2)+1 + rate := int64(512 * 1024) + rec := []runtime.MemProfileRecord{ + {AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{a1, a2}}, + {AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{a2 + 1, a2 + 2}}, + {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}}, + } + + periodType := &profile.ValueType{Type: "space", Unit: "bytes"} + sampleType := []*profile.ValueType{ + {Type: "alloc_objects", Unit: "count"}, + {Type: "alloc_space", Unit: "bytes"}, + {Type: "inuse_objects", Unit: "count"}, + {Type: "inuse_space", Unit: "bytes"}, + } + samples := []*profile.Sample{ + { + Value: []int64{2050, 2099200, 1537, 1574400}, + Location: []*profile.Location{ + {ID: 1, Mapping: map1, Address: addr1}, + {ID: 2, Mapping: map2, Address: addr2}, + }, + NumLabel: map[string][]int64{"bytes": {1024}}, + }, + { + Value: []int64{1, 829411, 1, 829411}, + Location: []*profile.Location{ + {ID: 3, Mapping: map2, Address: addr2 + 1}, + {ID: 4, Mapping: map2, Address: addr2 + 2}, + }, + NumLabel: map[string][]int64{"bytes": {512 * 1024}}, + }, + { + Value: []int64{1, 829411, 0, 0}, + Location: []*profile.Location{ + {ID: 5, Mapping: map1, Address: addr1 + 1}, + {ID: 6, Mapping: map1, Address: addr1 + 2}, + {ID: 7, Mapping: map2, Address: addr2 + 3}, + }, + NumLabel: map[string][]int64{"bytes": {512 * 1024}}, + }, + } + for _, tc := range []struct { + name string + defaultSampleType string + }{ + {"heap", ""}, + {"allocs", "alloc_space"}, + } { + t.Run(tc.name, func(t *testing.T) { + var buf bytes.Buffer + if err := writeHeapProto(&buf, rec, rate, tc.defaultSampleType); err != nil { + t.Fatalf("writing profile: %v", err) + } + + p, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("profile.Parse: %v", err) + } + + checkProfile(t, p, rate, periodType, sampleType, samples, tc.defaultSampleType) + }) + } +} diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go new file mode 100644 index 0000000..57e9ca4 --- /dev/null +++ b/src/runtime/pprof/runtime.go @@ -0,0 +1,45 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "context" + "runtime" + "unsafe" +) + +// runtime_FrameStartLine is defined in runtime/symtab.go. +func runtime_FrameStartLine(f *runtime.Frame) int + +// runtime_expandFinalInlineFrame is defined in runtime/symtab.go. +func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr + +// runtime_setProfLabel is defined in runtime/proflabel.go. +func runtime_setProfLabel(labels unsafe.Pointer) + +// runtime_getProfLabel is defined in runtime/proflabel.go. +func runtime_getProfLabel() unsafe.Pointer + +// SetGoroutineLabels sets the current goroutine's labels to match ctx. +// A new goroutine inherits the labels of the goroutine that created it. +// This is a lower-level API than Do, which should be used instead when possible. +func SetGoroutineLabels(ctx context.Context) { + ctxLabels, _ := ctx.Value(labelContextKey{}).(*labelMap) + runtime_setProfLabel(unsafe.Pointer(ctxLabels)) +} + +// Do calls f with a copy of the parent context with the +// given labels added to the parent's label map. +// Goroutines spawned while executing f will inherit the augmented label-set. +// Each key/value pair in labels is inserted into the label map in the +// order provided, overriding any previous value for the same key. +// The augmented label map will be set for the duration of the call to f +// and restored once f returns. +func Do(ctx context.Context, labels LabelSet, f func(context.Context)) { + defer SetGoroutineLabels(ctx) + ctx = WithLabels(ctx, labels) + SetGoroutineLabels(ctx) + f(ctx) +} diff --git a/src/runtime/pprof/runtime_test.go b/src/runtime/pprof/runtime_test.go new file mode 100644 index 0000000..0dd5324 --- /dev/null +++ b/src/runtime/pprof/runtime_test.go @@ -0,0 +1,96 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof + +import ( + "context" + "fmt" + "reflect" + "testing" +) + +func TestSetGoroutineLabels(t *testing.T) { + sync := make(chan struct{}) + + wantLabels := map[string]string{} + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("Expected parent goroutine's profile labels to be empty before test, got %v", gotLabels) + } + go func() { + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("Expected child goroutine's profile labels to be empty before test, got %v", gotLabels) + } + sync <- struct{}{} + }() + <-sync + + wantLabels = map[string]string{"key": "value"} + ctx := WithLabels(context.Background(), Labels("key", "value")) + SetGoroutineLabels(ctx) + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("parent goroutine's profile labels: got %v, want %v", gotLabels, wantLabels) + } + go func() { + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("child goroutine's profile labels: got %v, want %v", gotLabels, wantLabels) + } + sync <- struct{}{} + }() + <-sync + + wantLabels = map[string]string{} + ctx = context.Background() + SetGoroutineLabels(ctx) + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("Expected parent goroutine's profile labels to be empty, got %v", gotLabels) + } + go func() { + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("Expected child goroutine's profile labels to be empty, got %v", gotLabels) + } + sync <- struct{}{} + }() + <-sync +} + +func TestDo(t *testing.T) { + wantLabels := map[string]string{} + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("Expected parent goroutine's profile labels to be empty before Do, got %v", gotLabels) + } + + Do(context.Background(), Labels("key1", "value1", "key2", "value2"), func(ctx context.Context) { + wantLabels := map[string]string{"key1": "value1", "key2": "value2"} + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("parent goroutine's profile labels: got %v, want %v", gotLabels, wantLabels) + } + + sync := make(chan struct{}) + go func() { + wantLabels := map[string]string{"key1": "value1", "key2": "value2"} + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + t.Errorf("child goroutine's profile labels: got %v, want %v", gotLabels, wantLabels) + } + sync <- struct{}{} + }() + <-sync + + }) + + wantLabels = map[string]string{} + if gotLabels := getProfLabel(); !reflect.DeepEqual(gotLabels, wantLabels) { + fmt.Printf("%#v", gotLabels) + fmt.Printf("%#v", wantLabels) + t.Errorf("Expected parent goroutine's profile labels to be empty after Do, got %v", gotLabels) + } +} + +func getProfLabel() map[string]string { + l := (*labelMap)(runtime_getProfLabel()) + if l == nil { + return map[string]string{} + } + return *l +} diff --git a/src/runtime/pprof/rusage_test.go b/src/runtime/pprof/rusage_test.go new file mode 100644 index 0000000..8039510 --- /dev/null +++ b/src/runtime/pprof/rusage_test.go @@ -0,0 +1,41 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix + +package pprof + +import ( + "syscall" + "time" +) + +func init() { + diffCPUTimeImpl = diffCPUTimeRUsage +} + +func diffCPUTimeRUsage(f func()) (user, system time.Duration) { + ok := true + var before, after syscall.Rusage + + err := syscall.Getrusage(syscall.RUSAGE_SELF, &before) + if err != nil { + ok = false + } + + f() + + err = syscall.Getrusage(syscall.RUSAGE_SELF, &after) + if err != nil { + ok = false + } + + if !ok { + return 0, 0 + } + + user = time.Duration(after.Utime.Nano() - before.Utime.Nano()) + system = time.Duration(after.Stime.Nano() - before.Stime.Nano()) + return user, system +} diff --git a/src/runtime/pprof/testdata/README b/src/runtime/pprof/testdata/README new file mode 100644 index 0000000..876538e --- /dev/null +++ b/src/runtime/pprof/testdata/README @@ -0,0 +1,9 @@ +These binaries were generated by: + +$ cat empty.s +.global _start +_start: +$ as --32 -o empty.o empty.s && ld --build-id -m elf_i386 -o test32 empty.o +$ as --64 -o empty.o empty.s && ld --build-id -o test64 empty.o +$ powerpc-linux-gnu-as -o empty.o empty.s && powerpc-linux-gnu-ld --build-id -o test32be empty.o +$ powerpc64-linux-gnu-as -o empty.o empty.s && powerpc64-linux-gnu-ld --build-id -o test64be empty.o diff --git a/src/runtime/pprof/testdata/mappingtest/main.go b/src/runtime/pprof/testdata/mappingtest/main.go new file mode 100644 index 0000000..484b7f9 --- /dev/null +++ b/src/runtime/pprof/testdata/mappingtest/main.go @@ -0,0 +1,108 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This program outputs a CPU profile that includes +// both Go and Cgo stacks. This is used by the mapping info +// tests in runtime/pprof. +// +// If SETCGOTRACEBACK=1 is set, the CPU profile will includes +// PCs from C side but they will not be symbolized. +package main + +/* +#include <stdint.h> +#include <stdlib.h> + +int cpuHogCSalt1 = 0; +int cpuHogCSalt2 = 0; + +void CPUHogCFunction0(int foo) { + int i; + for (i = 0; i < 100000; i++) { + if (foo > 0) { + foo *= foo; + } else { + foo *= foo + 1; + } + cpuHogCSalt2 = foo; + } +} + +void CPUHogCFunction() { + CPUHogCFunction0(cpuHogCSalt1); +} + +struct CgoTracebackArg { + uintptr_t context; + uintptr_t sigContext; + uintptr_t *buf; + uintptr_t max; +}; + +void CollectCgoTraceback(void* parg) { + struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg); + arg->buf[0] = (uintptr_t)(CPUHogCFunction0); + arg->buf[1] = (uintptr_t)(CPUHogCFunction); + arg->buf[2] = 0; +}; +*/ +import "C" + +import ( + "log" + "os" + "runtime" + "runtime/pprof" + "time" + "unsafe" +) + +func init() { + if v := os.Getenv("SETCGOTRACEBACK"); v == "1" { + // Collect some PCs from C-side, but don't symbolize. + runtime.SetCgoTraceback(0, unsafe.Pointer(C.CollectCgoTraceback), nil, nil) + } +} + +func main() { + go cpuHogGoFunction() + go cpuHogCFunction() + runtime.Gosched() + + if err := pprof.StartCPUProfile(os.Stdout); err != nil { + log.Fatal("can't start CPU profile: ", err) + } + time.Sleep(200 * time.Millisecond) + pprof.StopCPUProfile() + + if err := os.Stdout.Close(); err != nil { + log.Fatal("can't write CPU profile: ", err) + } +} + +var salt1 int +var salt2 int + +func cpuHogGoFunction() { + for { + foo := salt1 + for i := 0; i < 1e5; i++ { + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + salt2 = foo + } + runtime.Gosched() + } +} + +func cpuHogCFunction() { + // Generates CPU profile samples including a Cgo call path. + for { + C.CPUHogCFunction() + runtime.Gosched() + } +} diff --git a/src/runtime/pprof/testdata/test32 b/src/runtime/pprof/testdata/test32 Binary files differnew file mode 100755 index 0000000..ce59472 --- /dev/null +++ b/src/runtime/pprof/testdata/test32 diff --git a/src/runtime/pprof/testdata/test32be b/src/runtime/pprof/testdata/test32be Binary files differnew file mode 100755 index 0000000..f13a732 --- /dev/null +++ b/src/runtime/pprof/testdata/test32be diff --git a/src/runtime/pprof/testdata/test64 b/src/runtime/pprof/testdata/test64 Binary files differnew file mode 100755 index 0000000..3fb42fb --- /dev/null +++ b/src/runtime/pprof/testdata/test64 diff --git a/src/runtime/pprof/testdata/test64be b/src/runtime/pprof/testdata/test64be Binary files differnew file mode 100755 index 0000000..09b4b01 --- /dev/null +++ b/src/runtime/pprof/testdata/test64be |