diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/runtime/metrics | |
parent | Initial commit. (diff) | |
download | golang-1.20-upstream.tar.xz golang-1.20-upstream.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/runtime/metrics.go | 723 | ||||
-rw-r--r-- | src/runtime/metrics/description.go | 380 | ||||
-rw-r--r-- | src/runtime/metrics/description_test.go | 115 | ||||
-rw-r--r-- | src/runtime/metrics/doc.go | 283 | ||||
-rw-r--r-- | src/runtime/metrics/example_test.go | 96 | ||||
-rw-r--r-- | src/runtime/metrics/histogram.go | 33 | ||||
-rw-r--r-- | src/runtime/metrics/sample.go | 47 | ||||
-rw-r--r-- | src/runtime/metrics/value.go | 69 | ||||
-rw-r--r-- | src/runtime/metrics_test.go | 613 |
9 files changed, 2359 insertions, 0 deletions
diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go new file mode 100644 index 0000000..2061dc0 --- /dev/null +++ b/src/runtime/metrics.go @@ -0,0 +1,723 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +// Metrics implementation exported to runtime/metrics. + +import ( + "unsafe" +) + +var ( + // metrics is a map of runtime/metrics keys to data used by the runtime + // to sample each metric's value. metricsInit indicates it has been + // initialized. + // + // These fields are protected by metricsSema which should be + // locked/unlocked with metricsLock() / metricsUnlock(). + metricsSema uint32 = 1 + metricsInit bool + metrics map[string]metricData + + sizeClassBuckets []float64 + timeHistBuckets []float64 +) + +type metricData struct { + // deps is the set of runtime statistics that this metric + // depends on. Before compute is called, the statAggregate + // which will be passed must ensure() these dependencies. + deps statDepSet + + // compute is a function that populates a metricValue + // given a populated statAggregate structure. + compute func(in *statAggregate, out *metricValue) +} + +func metricsLock() { + // Acquire the metricsSema but with handoff. Operations are typically + // expensive enough that queueing up goroutines and handing off between + // them will be noticeably better-behaved. + semacquire1(&metricsSema, true, 0, 0, waitReasonSemacquire) + if raceenabled { + raceacquire(unsafe.Pointer(&metricsSema)) + } +} + +func metricsUnlock() { + if raceenabled { + racerelease(unsafe.Pointer(&metricsSema)) + } + semrelease(&metricsSema) +} + +// initMetrics initializes the metrics map if it hasn't been yet. +// +// metricsSema must be held. +func initMetrics() { + if metricsInit { + return + } + + sizeClassBuckets = make([]float64, _NumSizeClasses, _NumSizeClasses+1) + // Skip size class 0 which is a stand-in for large objects, but large + // objects are tracked separately (and they actually get placed in + // the last bucket, not the first). + sizeClassBuckets[0] = 1 // The smallest allocation is 1 byte in size. + for i := 1; i < _NumSizeClasses; i++ { + // Size classes have an inclusive upper-bound + // and exclusive lower bound (e.g. 48-byte size class is + // (32, 48]) whereas we want and inclusive lower-bound + // and exclusive upper-bound (e.g. 48-byte size class is + // [33, 49). We can achieve this by shifting all bucket + // boundaries up by 1. + // + // Also, a float64 can precisely represent integers with + // value up to 2^53 and size classes are relatively small + // (nowhere near 2^48 even) so this will give us exact + // boundaries. + sizeClassBuckets[i] = float64(class_to_size[i] + 1) + } + sizeClassBuckets = append(sizeClassBuckets, float64Inf()) + + timeHistBuckets = timeHistogramMetricsBuckets() + metrics = map[string]metricData{ + "/cgo/go-to-c-calls:calls": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(NumCgoCall()) + }, + }, + "/cpu/classes/gc/mark/assist:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.gcAssistTime)) + }, + }, + "/cpu/classes/gc/mark/dedicated:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.gcDedicatedTime)) + }, + }, + "/cpu/classes/gc/mark/idle:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.gcIdleTime)) + }, + }, + "/cpu/classes/gc/pause:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.gcPauseTime)) + }, + }, + "/cpu/classes/gc/total:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.gcTotalTime)) + }, + }, + "/cpu/classes/idle:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.idleTime)) + }, + }, + "/cpu/classes/scavenge/assist:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.scavengeAssistTime)) + }, + }, + "/cpu/classes/scavenge/background:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.scavengeBgTime)) + }, + }, + "/cpu/classes/scavenge/total:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.scavengeTotalTime)) + }, + }, + "/cpu/classes/total:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.totalTime)) + }, + }, + "/cpu/classes/user:cpu-seconds": { + deps: makeStatDepSet(cpuStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(in.cpuStats.userTime)) + }, + }, + "/gc/cycles/automatic:gc-cycles": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.gcCyclesDone - in.sysStats.gcCyclesForced + }, + }, + "/gc/cycles/forced:gc-cycles": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.gcCyclesForced + }, + }, + "/gc/cycles/total:gc-cycles": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.gcCyclesDone + }, + }, + "/gc/heap/allocs-by-size:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(sizeClassBuckets) + hist.counts[len(hist.counts)-1] = uint64(in.heapStats.largeAllocCount) + // Cut off the first index which is ostensibly for size class 0, + // but large objects are tracked separately so it's actually unused. + for i, count := range in.heapStats.smallAllocCount[1:] { + hist.counts[i] = uint64(count) + } + }, + }, + "/gc/heap/allocs:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.totalAllocated + }, + }, + "/gc/heap/allocs:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.totalAllocs + }, + }, + "/gc/heap/frees-by-size:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(sizeClassBuckets) + hist.counts[len(hist.counts)-1] = uint64(in.heapStats.largeFreeCount) + // Cut off the first index which is ostensibly for size class 0, + // but large objects are tracked separately so it's actually unused. + for i, count := range in.heapStats.smallFreeCount[1:] { + hist.counts[i] = uint64(count) + } + }, + }, + "/gc/heap/frees:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.totalFreed + }, + }, + "/gc/heap/frees:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.totalFrees + }, + }, + "/gc/heap/goal:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.heapGoal + }, + }, + "/gc/heap/objects:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.numObjects + }, + }, + "/gc/heap/tiny/allocs:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.tinyAllocCount) + }, + }, + "/gc/limiter/last-enabled:gc-cycle": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(gcCPULimiter.lastEnabledCycle.Load()) + }, + }, + "/gc/pauses:seconds": { + compute: func(_ *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(timeHistBuckets) + // The bottom-most bucket, containing negative values, is tracked + // as a separately as underflow, so fill that in manually and then + // iterate over the rest. + hist.counts[0] = memstats.gcPauseDist.underflow.Load() + for i := range memstats.gcPauseDist.counts { + hist.counts[i+1] = memstats.gcPauseDist.counts[i].Load() + } + hist.counts[len(hist.counts)-1] = memstats.gcPauseDist.overflow.Load() + }, + }, + "/gc/stack/starting-size:bytes": { + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(startingStackSize) + }, + }, + "/memory/classes/heap/free:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.committed - in.heapStats.inHeap - + in.heapStats.inStacks - in.heapStats.inWorkBufs - + in.heapStats.inPtrScalarBits) + }, + }, + "/memory/classes/heap/objects:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.inObjects + }, + }, + "/memory/classes/heap/released:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.released) + }, + }, + "/memory/classes/heap/stacks:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.inStacks) + }, + }, + "/memory/classes/heap/unused:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.inHeap) - in.heapStats.inObjects + }, + }, + "/memory/classes/metadata/mcache/free:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mCacheSys - in.sysStats.mCacheInUse + }, + }, + "/memory/classes/metadata/mcache/inuse:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mCacheInUse + }, + }, + "/memory/classes/metadata/mspan/free:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mSpanSys - in.sysStats.mSpanInUse + }, + }, + "/memory/classes/metadata/mspan/inuse:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mSpanInUse + }, + }, + "/memory/classes/metadata/other:bytes": { + deps: makeStatDepSet(heapStatsDep, sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.inWorkBufs+in.heapStats.inPtrScalarBits) + in.sysStats.gcMiscSys + }, + }, + "/memory/classes/os-stacks:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.stacksSys + }, + }, + "/memory/classes/other:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.otherSys + }, + }, + "/memory/classes/profiling/buckets:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.buckHashSys + }, + }, + "/memory/classes/total:bytes": { + deps: makeStatDepSet(heapStatsDep, sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.committed+in.heapStats.released) + + in.sysStats.stacksSys + in.sysStats.mSpanSys + + in.sysStats.mCacheSys + in.sysStats.buckHashSys + + in.sysStats.gcMiscSys + in.sysStats.otherSys + }, + }, + "/sched/gomaxprocs:threads": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(gomaxprocs) + }, + }, + "/sched/goroutines:goroutines": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(gcount()) + }, + }, + "/sched/latencies:seconds": { + compute: func(_ *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(timeHistBuckets) + hist.counts[0] = sched.timeToRun.underflow.Load() + for i := range sched.timeToRun.counts { + hist.counts[i+1] = sched.timeToRun.counts[i].Load() + } + hist.counts[len(hist.counts)-1] = sched.timeToRun.overflow.Load() + }, + }, + "/sync/mutex/wait/total:seconds": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindFloat64 + out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load())) + }, + }, + } + metricsInit = true +} + +// statDep is a dependency on a group of statistics +// that a metric might have. +type statDep uint + +const ( + heapStatsDep statDep = iota // corresponds to heapStatsAggregate + sysStatsDep // corresponds to sysStatsAggregate + cpuStatsDep // corresponds to cpuStatsAggregate + numStatsDeps +) + +// statDepSet represents a set of statDeps. +// +// Under the hood, it's a bitmap. +type statDepSet [1]uint64 + +// makeStatDepSet creates a new statDepSet from a list of statDeps. +func makeStatDepSet(deps ...statDep) statDepSet { + var s statDepSet + for _, d := range deps { + s[d/64] |= 1 << (d % 64) + } + return s +} + +// differennce returns set difference of s from b as a new set. +func (s statDepSet) difference(b statDepSet) statDepSet { + var c statDepSet + for i := range s { + c[i] = s[i] &^ b[i] + } + return c +} + +// union returns the union of the two sets as a new set. +func (s statDepSet) union(b statDepSet) statDepSet { + var c statDepSet + for i := range s { + c[i] = s[i] | b[i] + } + return c +} + +// empty returns true if there are no dependencies in the set. +func (s *statDepSet) empty() bool { + for _, c := range s { + if c != 0 { + return false + } + } + return true +} + +// has returns true if the set contains a given statDep. +func (s *statDepSet) has(d statDep) bool { + return s[d/64]&(1<<(d%64)) != 0 +} + +// heapStatsAggregate represents memory stats obtained from the +// runtime. This set of stats is grouped together because they +// depend on each other in some way to make sense of the runtime's +// current heap memory use. They're also sharded across Ps, so it +// makes sense to grab them all at once. +type heapStatsAggregate struct { + heapStatsDelta + + // Derived from values in heapStatsDelta. + + // inObjects is the bytes of memory occupied by objects, + inObjects uint64 + + // numObjects is the number of live objects in the heap. + numObjects uint64 + + // totalAllocated is the total bytes of heap objects allocated + // over the lifetime of the program. + totalAllocated uint64 + + // totalFreed is the total bytes of heap objects freed + // over the lifetime of the program. + totalFreed uint64 + + // totalAllocs is the number of heap objects allocated over + // the lifetime of the program. + totalAllocs uint64 + + // totalFrees is the number of heap objects freed over + // the lifetime of the program. + totalFrees uint64 +} + +// compute populates the heapStatsAggregate with values from the runtime. +func (a *heapStatsAggregate) compute() { + memstats.heapStats.read(&a.heapStatsDelta) + + // Calculate derived stats. + a.totalAllocs = a.largeAllocCount + a.totalFrees = a.largeFreeCount + a.totalAllocated = a.largeAlloc + a.totalFreed = a.largeFree + for i := range a.smallAllocCount { + na := a.smallAllocCount[i] + nf := a.smallFreeCount[i] + a.totalAllocs += na + a.totalFrees += nf + a.totalAllocated += na * uint64(class_to_size[i]) + a.totalFreed += nf * uint64(class_to_size[i]) + } + a.inObjects = a.totalAllocated - a.totalFreed + a.numObjects = a.totalAllocs - a.totalFrees +} + +// sysStatsAggregate represents system memory stats obtained +// from the runtime. This set of stats is grouped together because +// they're all relatively cheap to acquire and generally independent +// of one another and other runtime memory stats. The fact that they +// may be acquired at different times, especially with respect to +// heapStatsAggregate, means there could be some skew, but because of +// these stats are independent, there's no real consistency issue here. +type sysStatsAggregate struct { + stacksSys uint64 + mSpanSys uint64 + mSpanInUse uint64 + mCacheSys uint64 + mCacheInUse uint64 + buckHashSys uint64 + gcMiscSys uint64 + otherSys uint64 + heapGoal uint64 + gcCyclesDone uint64 + gcCyclesForced uint64 +} + +// compute populates the sysStatsAggregate with values from the runtime. +func (a *sysStatsAggregate) compute() { + a.stacksSys = memstats.stacks_sys.load() + a.buckHashSys = memstats.buckhash_sys.load() + a.gcMiscSys = memstats.gcMiscSys.load() + a.otherSys = memstats.other_sys.load() + a.heapGoal = gcController.heapGoal() + a.gcCyclesDone = uint64(memstats.numgc) + a.gcCyclesForced = uint64(memstats.numforcedgc) + + systemstack(func() { + lock(&mheap_.lock) + a.mSpanSys = memstats.mspan_sys.load() + a.mSpanInUse = uint64(mheap_.spanalloc.inuse) + a.mCacheSys = memstats.mcache_sys.load() + a.mCacheInUse = uint64(mheap_.cachealloc.inuse) + unlock(&mheap_.lock) + }) +} + +// cpuStatsAggregate represents CPU stats obtained from the runtime +// acquired together to avoid skew and inconsistencies. +type cpuStatsAggregate struct { + cpuStats +} + +// compute populates the cpuStatsAggregate with values from the runtime. +func (a *cpuStatsAggregate) compute() { + a.cpuStats = work.cpuStats +} + +// nsToSec takes a duration in nanoseconds and converts it to seconds as +// a float64. +func nsToSec(ns int64) float64 { + return float64(ns) / 1e9 +} + +// statAggregate is the main driver of the metrics implementation. +// +// It contains multiple aggregates of runtime statistics, as well +// as a set of these aggregates that it has populated. The aggergates +// are populated lazily by its ensure method. +type statAggregate struct { + ensured statDepSet + heapStats heapStatsAggregate + sysStats sysStatsAggregate + cpuStats cpuStatsAggregate +} + +// ensure populates statistics aggregates determined by deps if they +// haven't yet been populated. +func (a *statAggregate) ensure(deps *statDepSet) { + missing := deps.difference(a.ensured) + if missing.empty() { + return + } + for i := statDep(0); i < numStatsDeps; i++ { + if !missing.has(i) { + continue + } + switch i { + case heapStatsDep: + a.heapStats.compute() + case sysStatsDep: + a.sysStats.compute() + case cpuStatsDep: + a.cpuStats.compute() + } + } + a.ensured = a.ensured.union(missing) +} + +// metricKind is a runtime copy of runtime/metrics.ValueKind and +// must be kept structurally identical to that type. +type metricKind int + +const ( + // These values must be kept identical to their corresponding Kind* values + // in the runtime/metrics package. + metricKindBad metricKind = iota + metricKindUint64 + metricKindFloat64 + metricKindFloat64Histogram +) + +// metricSample is a runtime copy of runtime/metrics.Sample and +// must be kept structurally identical to that type. +type metricSample struct { + name string + value metricValue +} + +// metricValue is a runtime copy of runtime/metrics.Sample and +// must be kept structurally identical to that type. +type metricValue struct { + kind metricKind + scalar uint64 // contains scalar values for scalar Kinds. + pointer unsafe.Pointer // contains non-scalar values. +} + +// float64HistOrInit tries to pull out an existing float64Histogram +// from the value, but if none exists, then it allocates one with +// the given buckets. +func (v *metricValue) float64HistOrInit(buckets []float64) *metricFloat64Histogram { + var hist *metricFloat64Histogram + if v.kind == metricKindFloat64Histogram && v.pointer != nil { + hist = (*metricFloat64Histogram)(v.pointer) + } else { + v.kind = metricKindFloat64Histogram + hist = new(metricFloat64Histogram) + v.pointer = unsafe.Pointer(hist) + } + hist.buckets = buckets + if len(hist.counts) != len(hist.buckets)-1 { + hist.counts = make([]uint64, len(buckets)-1) + } + return hist +} + +// metricFloat64Histogram is a runtime copy of runtime/metrics.Float64Histogram +// and must be kept structurally identical to that type. +type metricFloat64Histogram struct { + counts []uint64 + buckets []float64 +} + +// agg is used by readMetrics, and is protected by metricsSema. +// +// Managed as a global variable because its pointer will be +// an argument to a dynamically-defined function, and we'd +// like to avoid it escaping to the heap. +var agg statAggregate + +// readMetrics is the implementation of runtime/metrics.Read. +// +//go:linkname readMetrics runtime/metrics.runtime_readMetrics +func readMetrics(samplesp unsafe.Pointer, len int, cap int) { + // Construct a slice from the args. + sl := slice{samplesp, len, cap} + samples := *(*[]metricSample)(unsafe.Pointer(&sl)) + + metricsLock() + + // Ensure the map is initialized. + initMetrics() + + // Clear agg defensively. + agg = statAggregate{} + + // Sample. + for i := range samples { + sample := &samples[i] + data, ok := metrics[sample.name] + if !ok { + sample.value.kind = metricKindBad + continue + } + // Ensure we have all the stats we need. + // agg is populated lazily. + agg.ensure(&data.deps) + + // Compute the value based on the stats we have. + data.compute(&agg, &sample.value) + } + + metricsUnlock() +} diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go new file mode 100644 index 0000000..dcfe01e --- /dev/null +++ b/src/runtime/metrics/description.go @@ -0,0 +1,380 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +// Description describes a runtime metric. +type Description struct { + // Name is the full name of the metric which includes the unit. + // + // The format of the metric may be described by the following regular expression. + // + // ^(?P<name>/[^:]+):(?P<unit>[^:*/]+(?:[*/][^:*/]+)*)$ + // + // The format splits the name into two components, separated by a colon: a path which always + // starts with a /, and a machine-parseable unit. The name may contain any valid Unicode + // codepoint in between / characters, but by convention will try to stick to lowercase + // characters and hyphens. An example of such a path might be "/memory/heap/free". + // + // The unit is by convention a series of lowercase English unit names (singular or plural) + // without prefixes delimited by '*' or '/'. The unit names may contain any valid Unicode + // codepoint that is not a delimiter. + // Examples of units might be "seconds", "bytes", "bytes/second", "cpu-seconds", + // "byte*cpu-seconds", and "bytes/second/second". + // + // For histograms, multiple units may apply. For instance, the units of the buckets and + // the count. By convention, for histograms, the units of the count are always "samples" + // with the type of sample evident by the metric's name, while the unit in the name + // specifies the buckets' unit. + // + // A complete name might look like "/memory/heap/free:bytes". + Name string + + // Description is an English language sentence describing the metric. + Description string + + // Kind is the kind of value for this metric. + // + // The purpose of this field is to allow users to filter out metrics whose values are + // types which their application may not understand. + Kind ValueKind + + // Cumulative is whether or not the metric is cumulative. If a cumulative metric is just + // a single number, then it increases monotonically. If the metric is a distribution, + // then each bucket count increases monotonically. + // + // This flag thus indicates whether or not it's useful to compute a rate from this value. + Cumulative bool +} + +// The English language descriptions below must be kept in sync with the +// descriptions of each metric in doc.go. +var allDesc = []Description{ + { + Name: "/cgo/go-to-c-calls:calls", + Description: "Count of calls made from Go to C by the current process.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/cpu/classes/gc/mark/assist:cpu-seconds", + Description: "Estimated total CPU time goroutines spent performing GC tasks " + + "to assist the GC and prevent it from falling behind the application. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/gc/mark/dedicated:cpu-seconds", + Description: "Estimated total CPU time spent performing GC tasks on " + + "processors (as defined by GOMAXPROCS) dedicated to those tasks. " + + "This includes time spent with the world stopped due to the GC. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/gc/mark/idle:cpu-seconds", + Description: "Estimated total CPU time spent performing GC tasks on " + + "spare CPU resources that the Go scheduler could not otherwise find " + + "a use for. This should be subtracted from the total GC CPU time to " + + "obtain a measure of compulsory GC CPU time. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/gc/pause:cpu-seconds", + Description: "Estimated total CPU time spent with the application paused by " + + "the GC. Even if only one thread is running during the pause, this is " + + "computed as GOMAXPROCS times the pause latency because nothing else " + + "can be executing. This is the exact sum of samples in /gc/pause:seconds " + + "if each sample is multiplied by GOMAXPROCS at the time it is taken. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/gc/total:cpu-seconds", + Description: "Estimated total CPU time spent performing GC tasks. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics. Sum of all metrics in /cpu/classes/gc.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/idle:cpu-seconds", + Description: "Estimated total available CPU time not spent executing any Go or Go runtime code. " + + "In other words, the part of /cpu/classes/total:cpu-seconds that was unused. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/scavenge/assist:cpu-seconds", + Description: "Estimated total CPU time spent returning unused memory to the " + + "underlying platform in response eagerly in response to memory pressure. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/scavenge/background:cpu-seconds", + Description: "Estimated total CPU time spent performing background tasks " + + "to return unused memory to the underlying platform. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/scavenge/total:cpu-seconds", + Description: "Estimated total CPU time spent performing tasks that return " + + "unused memory to the underlying platform. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics. Sum of all metrics in /cpu/classes/scavenge.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/total:cpu-seconds", + Description: "Estimated total available CPU time for user Go code " + + "or the Go runtime, as defined by GOMAXPROCS. In other words, GOMAXPROCS " + + "integrated over the wall-clock duration this process has been executing for. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics. Sum of all metrics in /cpu/classes.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/cpu/classes/user:cpu-seconds", + Description: "Estimated total CPU time spent running user Go code. This may " + + "also include some small amount of time spent in the Go runtime. " + + "This metric is an overestimate, and not directly comparable to " + + "system CPU time measurements. Compare only with other /cpu/classes " + + "metrics.", + Kind: KindFloat64, + Cumulative: true, + }, + { + Name: "/gc/cycles/automatic:gc-cycles", + Description: "Count of completed GC cycles generated by the Go runtime.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/cycles/forced:gc-cycles", + Description: "Count of completed GC cycles forced by the application.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/cycles/total:gc-cycles", + Description: "Count of all completed GC cycles.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/heap/allocs-by-size:bytes", + Description: "Distribution of heap allocations by approximate size. " + + "Note that this does not include tiny objects as defined by " + + "/gc/heap/tiny/allocs:objects, only tiny blocks.", + Kind: KindFloat64Histogram, + Cumulative: true, + }, + { + Name: "/gc/heap/allocs:bytes", + Description: "Cumulative sum of memory allocated to the heap by the application.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/heap/allocs:objects", + Description: "Cumulative count of heap allocations triggered by the application. " + + "Note that this does not include tiny objects as defined by " + + "/gc/heap/tiny/allocs:objects, only tiny blocks.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/heap/frees-by-size:bytes", + Description: "Distribution of freed heap allocations by approximate size. " + + "Note that this does not include tiny objects as defined by " + + "/gc/heap/tiny/allocs:objects, only tiny blocks.", + Kind: KindFloat64Histogram, + Cumulative: true, + }, + { + Name: "/gc/heap/frees:bytes", + Description: "Cumulative sum of heap memory freed by the garbage collector.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/heap/frees:objects", + Description: "Cumulative count of heap allocations whose storage was freed " + + "by the garbage collector. " + + "Note that this does not include tiny objects as defined by " + + "/gc/heap/tiny/allocs:objects, only tiny blocks.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/heap/goal:bytes", + Description: "Heap size target for the end of the GC cycle.", + Kind: KindUint64, + }, + { + Name: "/gc/heap/objects:objects", + Description: "Number of objects, live or unswept, occupying heap memory.", + Kind: KindUint64, + }, + { + Name: "/gc/heap/tiny/allocs:objects", + Description: "Count of small allocations that are packed together into blocks. " + + "These allocations are counted separately from other allocations " + + "because each individual allocation is not tracked by the runtime, " + + "only their block. Each block is already accounted for in " + + "allocs-by-size and frees-by-size.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/limiter/last-enabled:gc-cycle", + Description: "GC cycle the last time the GC CPU limiter was enabled. " + + "This metric is useful for diagnosing the root cause of an out-of-memory " + + "error, because the limiter trades memory for CPU time when the GC's CPU " + + "time gets too high. This is most likely to occur with use of SetMemoryLimit. " + + "The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled.", + Kind: KindUint64, + }, + { + Name: "/gc/pauses:seconds", + Description: "Distribution individual GC-related stop-the-world pause latencies.", + Kind: KindFloat64Histogram, + Cumulative: true, + }, + { + Name: "/gc/stack/starting-size:bytes", + Description: "The stack size of new goroutines.", + Kind: KindUint64, + Cumulative: false, + }, + { + Name: "/memory/classes/heap/free:bytes", + Description: "Memory that is completely free and eligible to be returned to the underlying system, " + + "but has not been. This metric is the runtime's estimate of free address space that is backed by " + + "physical memory.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/objects:bytes", + Description: "Memory occupied by live objects and dead objects that have not yet been marked free by the garbage collector.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/released:bytes", + Description: "Memory that is completely free and has been returned to the underlying system. This " + + "metric is the runtime's estimate of free address space that is still mapped into the process, " + + "but is not backed by physical memory.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/stacks:bytes", + Description: "Memory allocated from the heap that is reserved for stack space, whether or not it is currently in-use.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/unused:bytes", + Description: "Memory that is reserved for heap objects but is not currently used to hold heap objects.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mcache/free:bytes", + Description: "Memory that is reserved for runtime mcache structures, but not in-use.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mcache/inuse:bytes", + Description: "Memory that is occupied by runtime mcache structures that are currently being used.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mspan/free:bytes", + Description: "Memory that is reserved for runtime mspan structures, but not in-use.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mspan/inuse:bytes", + Description: "Memory that is occupied by runtime mspan structures that are currently being used.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/other:bytes", + Description: "Memory that is reserved for or used to hold runtime metadata.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/os-stacks:bytes", + Description: "Stack memory allocated by the underlying operating system.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/other:bytes", + Description: "Memory used by execution trace buffers, structures for debugging the runtime, finalizer and profiler specials, and more.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/profiling/buckets:bytes", + Description: "Memory that is used by the stack trace hash map used for profiling.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/total:bytes", + Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.", + Kind: KindUint64, + }, + { + Name: "/sched/gomaxprocs:threads", + Description: "The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously.", + Kind: KindUint64, + }, + { + Name: "/sched/goroutines:goroutines", + Description: "Count of live goroutines.", + Kind: KindUint64, + }, + { + Name: "/sched/latencies:seconds", + Description: "Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running.", + Kind: KindFloat64Histogram, + }, + { + Name: "/sync/mutex/wait/total:seconds", + Description: "Approximate cumulative time goroutines have spent blocked on a sync.Mutex or sync.RWMutex. This metric is useful for identifying global changes in lock contention. Collect a mutex or block profile using the runtime/pprof package for more detailed contention data.", + Kind: KindFloat64, + Cumulative: true, + }, +} + +// All returns a slice of containing metric descriptions for all supported metrics. +func All() []Description { + return allDesc +} diff --git a/src/runtime/metrics/description_test.go b/src/runtime/metrics/description_test.go new file mode 100644 index 0000000..192c1f2 --- /dev/null +++ b/src/runtime/metrics/description_test.go @@ -0,0 +1,115 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics_test + +import ( + "bufio" + "os" + "regexp" + "runtime/metrics" + "strings" + "testing" +) + +func TestDescriptionNameFormat(t *testing.T) { + r := regexp.MustCompile("^(?P<name>/[^:]+):(?P<unit>[^:*/]+(?:[*/][^:*/]+)*)$") + descriptions := metrics.All() + for _, desc := range descriptions { + if !r.MatchString(desc.Name) { + t.Errorf("metrics %q does not match regexp %s", desc.Name, r) + } + } +} + +func extractMetricDocs(t *testing.T) map[string]string { + f, err := os.Open("doc.go") + if err != nil { + t.Fatalf("failed to open doc.go in runtime/metrics package: %v", err) + } + const ( + stateSearch = iota // look for list of metrics + stateNextMetric // look for next metric + stateNextDescription // build description + ) + state := stateSearch + s := bufio.NewScanner(f) + result := make(map[string]string) + var metric string + var prevMetric string + var desc strings.Builder + for s.Scan() { + line := strings.TrimSpace(s.Text()) + switch state { + case stateSearch: + if line == "Below is the full list of supported metrics, ordered lexicographically." { + state = stateNextMetric + } + case stateNextMetric: + // Ignore empty lines until we find a non-empty + // one. This will be our metric name. + if len(line) != 0 { + prevMetric = metric + metric = line + if prevMetric > metric { + t.Errorf("metrics %s and %s are out of lexicographical order", prevMetric, metric) + } + state = stateNextDescription + } + case stateNextDescription: + if len(line) == 0 || line == `*/` { + // An empty line means we're done. + // Write down the description and look + // for a new metric. + result[metric] = desc.String() + desc.Reset() + state = stateNextMetric + } else { + // As long as we're seeing data, assume that's + // part of the description and append it. + if desc.Len() != 0 { + // Turn previous newlines into spaces. + desc.WriteString(" ") + } + desc.WriteString(line) + } + } + if line == `*/` { + break + } + } + if state == stateSearch { + t.Fatalf("failed to find supported metrics docs in %s", f.Name()) + } + return result +} + +func TestDescriptionDocs(t *testing.T) { + docs := extractMetricDocs(t) + descriptions := metrics.All() + for _, d := range descriptions { + want := d.Description + got, ok := docs[d.Name] + if !ok { + t.Errorf("no docs found for metric %s", d.Name) + continue + } + if got != want { + t.Errorf("mismatched description and docs for metric %s", d.Name) + t.Errorf("want: %q, got %q", want, got) + continue + } + } + if len(docs) > len(descriptions) { + docsLoop: + for name := range docs { + for _, d := range descriptions { + if name == d.Name { + continue docsLoop + } + } + t.Errorf("stale documentation for non-existent metric: %s", name) + } + } +} diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go new file mode 100644 index 0000000..b593d8d --- /dev/null +++ b/src/runtime/metrics/doc.go @@ -0,0 +1,283 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package metrics provides a stable interface to access implementation-defined +metrics exported by the Go runtime. This package is similar to existing functions +like runtime.ReadMemStats and debug.ReadGCStats, but significantly more general. + +The set of metrics defined by this package may evolve as the runtime itself +evolves, and also enables variation across Go implementations, whose relevant +metric sets may not intersect. + +# Interface + +Metrics are designated by a string key, rather than, for example, a field name in +a struct. The full list of supported metrics is always available in the slice of +Descriptions returned by All. Each Description also includes useful information +about the metric. + +Thus, users of this API are encouraged to sample supported metrics defined by the +slice returned by All to remain compatible across Go versions. Of course, situations +arise where reading specific metrics is critical. For these cases, users are +encouraged to use build tags, and although metrics may be deprecated and removed, +users should consider this to be an exceptional and rare event, coinciding with a +very large change in a particular Go implementation. + +Each metric key also has a "kind" that describes the format of the metric's value. +In the interest of not breaking users of this package, the "kind" for a given metric +is guaranteed not to change. If it must change, then a new metric will be introduced +with a new key and a new "kind." + +# Metric key format + +As mentioned earlier, metric keys are strings. Their format is simple and well-defined, +designed to be both human and machine readable. It is split into two components, +separated by a colon: a rooted path and a unit. The choice to include the unit in +the key is motivated by compatibility: if a metric's unit changes, its semantics likely +did also, and a new key should be introduced. + +For more details on the precise definition of the metric key's path and unit formats, see +the documentation of the Name field of the Description struct. + +# A note about floats + +This package supports metrics whose values have a floating-point representation. In +order to improve ease-of-use, this package promises to never produce the following +classes of floating-point values: NaN, infinity. + +# Supported metrics + +Below is the full list of supported metrics, ordered lexicographically. + + /cgo/go-to-c-calls:calls + Count of calls made from Go to C by the current process. + + /cpu/classes/gc/mark/assist:cpu-seconds + Estimated total CPU time goroutines spent performing GC tasks + to assist the GC and prevent it from falling behind the application. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/gc/mark/dedicated:cpu-seconds + Estimated total CPU time spent performing GC tasks on + processors (as defined by GOMAXPROCS) dedicated to those tasks. + This includes time spent with the world stopped due to the GC. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/gc/mark/idle:cpu-seconds + Estimated total CPU time spent performing GC tasks on + spare CPU resources that the Go scheduler could not otherwise find + a use for. This should be subtracted from the total GC CPU time to + obtain a measure of compulsory GC CPU time. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/gc/pause:cpu-seconds + Estimated total CPU time spent with the application paused by + the GC. Even if only one thread is running during the pause, this is + computed as GOMAXPROCS times the pause latency because nothing else + can be executing. This is the exact sum of samples in /gc/pause:seconds + if each sample is multiplied by GOMAXPROCS at the time it is taken. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/gc/total:cpu-seconds + Estimated total CPU time spent performing GC tasks. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. Sum of all metrics in /cpu/classes/gc. + + /cpu/classes/idle:cpu-seconds + Estimated total available CPU time not spent executing any Go or Go + runtime code. In other words, the part of /cpu/classes/total:cpu-seconds + that was unused. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/scavenge/assist:cpu-seconds + Estimated total CPU time spent returning unused memory to the + underlying platform in response eagerly in response to memory pressure. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/scavenge/background:cpu-seconds + Estimated total CPU time spent performing background tasks + to return unused memory to the underlying platform. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /cpu/classes/scavenge/total:cpu-seconds + Estimated total CPU time spent performing tasks that return + unused memory to the underlying platform. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. Sum of all metrics in /cpu/classes/scavenge. + + /cpu/classes/total:cpu-seconds + Estimated total available CPU time for user Go code or the Go runtime, as + defined by GOMAXPROCS. In other words, GOMAXPROCS integrated over the + wall-clock duration this process has been executing for. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. Sum of all metrics in /cpu/classes. + + /cpu/classes/user:cpu-seconds + Estimated total CPU time spent running user Go code. This may + also include some small amount of time spent in the Go runtime. + This metric is an overestimate, and not directly comparable to + system CPU time measurements. Compare only with other /cpu/classes + metrics. + + /gc/cycles/automatic:gc-cycles + Count of completed GC cycles generated by the Go runtime. + + /gc/cycles/forced:gc-cycles + Count of completed GC cycles forced by the application. + + /gc/cycles/total:gc-cycles + Count of all completed GC cycles. + + /gc/heap/allocs-by-size:bytes + Distribution of heap allocations by approximate size. + Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, + only tiny blocks. + + /gc/heap/allocs:bytes + Cumulative sum of memory allocated to the heap by the application. + + /gc/heap/allocs:objects + Cumulative count of heap allocations triggered by the application. + Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, + only tiny blocks. + + /gc/heap/frees-by-size:bytes + Distribution of freed heap allocations by approximate size. + Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, + only tiny blocks. + + /gc/heap/frees:bytes + Cumulative sum of heap memory freed by the garbage collector. + + /gc/heap/frees:objects + Cumulative count of heap allocations whose storage was freed by the garbage collector. + Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, + only tiny blocks. + + /gc/heap/goal:bytes + Heap size target for the end of the GC cycle. + + /gc/heap/objects:objects + Number of objects, live or unswept, occupying heap memory. + + /gc/heap/tiny/allocs:objects + Count of small allocations that are packed together into blocks. + These allocations are counted separately from other allocations + because each individual allocation is not tracked by the runtime, + only their block. Each block is already accounted for in + allocs-by-size and frees-by-size. + + /gc/limiter/last-enabled:gc-cycle + GC cycle the last time the GC CPU limiter was enabled. + This metric is useful for diagnosing the root cause of an out-of-memory + error, because the limiter trades memory for CPU time when the GC's CPU + time gets too high. This is most likely to occur with use of SetMemoryLimit. + The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled. + + /gc/pauses:seconds + Distribution individual GC-related stop-the-world pause latencies. + + /gc/stack/starting-size:bytes + The stack size of new goroutines. + + /memory/classes/heap/free:bytes + Memory that is completely free and eligible to be returned to + the underlying system, but has not been. This metric is the + runtime's estimate of free address space that is backed by + physical memory. + + /memory/classes/heap/objects:bytes + Memory occupied by live objects and dead objects that have + not yet been marked free by the garbage collector. + + /memory/classes/heap/released:bytes + Memory that is completely free and has been returned to + the underlying system. This metric is the runtime's estimate of + free address space that is still mapped into the process, but + is not backed by physical memory. + + /memory/classes/heap/stacks:bytes + Memory allocated from the heap that is reserved for stack + space, whether or not it is currently in-use. + + /memory/classes/heap/unused:bytes + Memory that is reserved for heap objects but is not currently + used to hold heap objects. + + /memory/classes/metadata/mcache/free:bytes + Memory that is reserved for runtime mcache structures, but + not in-use. + + /memory/classes/metadata/mcache/inuse:bytes + Memory that is occupied by runtime mcache structures that + are currently being used. + + /memory/classes/metadata/mspan/free:bytes + Memory that is reserved for runtime mspan structures, but + not in-use. + + /memory/classes/metadata/mspan/inuse:bytes + Memory that is occupied by runtime mspan structures that are + currently being used. + + /memory/classes/metadata/other:bytes + Memory that is reserved for or used to hold runtime + metadata. + + /memory/classes/os-stacks:bytes + Stack memory allocated by the underlying operating system. + + /memory/classes/other:bytes + Memory used by execution trace buffers, structures for + debugging the runtime, finalizer and profiler specials, and + more. + + /memory/classes/profiling/buckets:bytes + Memory that is used by the stack trace hash map used for + profiling. + + /memory/classes/total:bytes + All memory mapped by the Go runtime into the current process + as read-write. Note that this does not include memory mapped + by code called via cgo or via the syscall package. + Sum of all metrics in /memory/classes. + + /sched/gomaxprocs:threads + The current runtime.GOMAXPROCS setting, or the number of + operating system threads that can execute user-level Go code + simultaneously. + + /sched/goroutines:goroutines + Count of live goroutines. + + /sched/latencies:seconds + Distribution of the time goroutines have spent in the scheduler + in a runnable state before actually running. + + /sync/mutex/wait/total:seconds + Approximate cumulative time goroutines have spent blocked on a + sync.Mutex or sync.RWMutex. This metric is useful for identifying + global changes in lock contention. Collect a mutex or block + profile using the runtime/pprof package for more detailed + contention data. +*/ +package metrics diff --git a/src/runtime/metrics/example_test.go b/src/runtime/metrics/example_test.go new file mode 100644 index 0000000..624d9d8 --- /dev/null +++ b/src/runtime/metrics/example_test.go @@ -0,0 +1,96 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics_test + +import ( + "fmt" + "runtime/metrics" +) + +func ExampleRead_readingOneMetric() { + // Name of the metric we want to read. + const myMetric = "/memory/classes/heap/free:bytes" + + // Create a sample for the metric. + sample := make([]metrics.Sample, 1) + sample[0].Name = myMetric + + // Sample the metric. + metrics.Read(sample) + + // Check if the metric is actually supported. + // If it's not, the resulting value will always have + // kind KindBad. + if sample[0].Value.Kind() == metrics.KindBad { + panic(fmt.Sprintf("metric %q no longer supported", myMetric)) + } + + // Handle the result. + // + // It's OK to assume a particular Kind for a metric; + // they're guaranteed not to change. + freeBytes := sample[0].Value.Uint64() + + fmt.Printf("free but not released memory: %d\n", freeBytes) +} + +func ExampleRead_readingAllMetrics() { + // Get descriptions for all supported metrics. + descs := metrics.All() + + // Create a sample for each metric. + samples := make([]metrics.Sample, len(descs)) + for i := range samples { + samples[i].Name = descs[i].Name + } + + // Sample the metrics. Re-use the samples slice if you can! + metrics.Read(samples) + + // Iterate over all results. + for _, sample := range samples { + // Pull out the name and value. + name, value := sample.Name, sample.Value + + // Handle each sample. + switch value.Kind() { + case metrics.KindUint64: + fmt.Printf("%s: %d\n", name, value.Uint64()) + case metrics.KindFloat64: + fmt.Printf("%s: %f\n", name, value.Float64()) + case metrics.KindFloat64Histogram: + // The histogram may be quite large, so let's just pull out + // a crude estimate for the median for the sake of this example. + fmt.Printf("%s: %f\n", name, medianBucket(value.Float64Histogram())) + case metrics.KindBad: + // This should never happen because all metrics are supported + // by construction. + panic("bug in runtime/metrics package!") + default: + // This may happen as new metrics get added. + // + // The safest thing to do here is to simply log it somewhere + // as something to look into, but ignore it for now. + // In the worst case, you might temporarily miss out on a new metric. + fmt.Printf("%s: unexpected metric Kind: %v\n", name, value.Kind()) + } + } +} + +func medianBucket(h *metrics.Float64Histogram) float64 { + total := uint64(0) + for _, count := range h.Counts { + total += count + } + thresh := total / 2 + total = 0 + for i, count := range h.Counts { + total += count + if total >= thresh { + return h.Buckets[i] + } + } + panic("should not happen") +} diff --git a/src/runtime/metrics/histogram.go b/src/runtime/metrics/histogram.go new file mode 100644 index 0000000..956422b --- /dev/null +++ b/src/runtime/metrics/histogram.go @@ -0,0 +1,33 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +// Float64Histogram represents a distribution of float64 values. +type Float64Histogram struct { + // Counts contains the weights for each histogram bucket. + // + // Given N buckets, Count[n] is the weight of the range + // [bucket[n], bucket[n+1]), for 0 <= n < N. + Counts []uint64 + + // Buckets contains the boundaries of the histogram buckets, in increasing order. + // + // Buckets[0] is the inclusive lower bound of the minimum bucket while + // Buckets[len(Buckets)-1] is the exclusive upper bound of the maximum bucket. + // Hence, there are len(Buckets)-1 counts. Furthermore, len(Buckets) != 1, always, + // since at least two boundaries are required to describe one bucket (and 0 + // boundaries are used to describe 0 buckets). + // + // Buckets[0] is permitted to have value -Inf and Buckets[len(Buckets)-1] is + // permitted to have value Inf. + // + // For a given metric name, the value of Buckets is guaranteed not to change + // between calls until program exit. + // + // This slice value is permitted to alias with other Float64Histograms' Buckets + // fields, so the values within should only ever be read. If they need to be + // modified, the user must make a copy. + Buckets []float64 +} diff --git a/src/runtime/metrics/sample.go b/src/runtime/metrics/sample.go new file mode 100644 index 0000000..4cf8cdf --- /dev/null +++ b/src/runtime/metrics/sample.go @@ -0,0 +1,47 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +import ( + _ "runtime" // depends on the runtime via a linkname'd function + "unsafe" +) + +// Sample captures a single metric sample. +type Sample struct { + // Name is the name of the metric sampled. + // + // It must correspond to a name in one of the metric descriptions + // returned by All. + Name string + + // Value is the value of the metric sample. + Value Value +} + +// Implemented in the runtime. +func runtime_readMetrics(unsafe.Pointer, int, int) + +// Read populates each Value field in the given slice of metric samples. +// +// Desired metrics should be present in the slice with the appropriate name. +// The user of this API is encouraged to re-use the same slice between calls for +// efficiency, but is not required to do so. +// +// Note that re-use has some caveats. Notably, Values should not be read or +// manipulated while a Read with that value is outstanding; that is a data race. +// This property includes pointer-typed Values (for example, Float64Histogram) +// whose underlying storage will be reused by Read when possible. To safely use +// such values in a concurrent setting, all data must be deep-copied. +// +// It is safe to execute multiple Read calls concurrently, but their arguments +// must share no underlying memory. When in doubt, create a new []Sample from +// scratch, which is always safe, though may be inefficient. +// +// Sample values with names not appearing in All will have their Value populated +// as KindBad to indicate that the name is unknown. +func Read(m []Sample) { + runtime_readMetrics(unsafe.Pointer(&m[0]), len(m), cap(m)) +} diff --git a/src/runtime/metrics/value.go b/src/runtime/metrics/value.go new file mode 100644 index 0000000..ed9a33d --- /dev/null +++ b/src/runtime/metrics/value.go @@ -0,0 +1,69 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +import ( + "math" + "unsafe" +) + +// ValueKind is a tag for a metric Value which indicates its type. +type ValueKind int + +const ( + // KindBad indicates that the Value has no type and should not be used. + KindBad ValueKind = iota + + // KindUint64 indicates that the type of the Value is a uint64. + KindUint64 + + // KindFloat64 indicates that the type of the Value is a float64. + KindFloat64 + + // KindFloat64Histogram indicates that the type of the Value is a *Float64Histogram. + KindFloat64Histogram +) + +// Value represents a metric value returned by the runtime. +type Value struct { + kind ValueKind + scalar uint64 // contains scalar values for scalar Kinds. + pointer unsafe.Pointer // contains non-scalar values. +} + +// Kind returns the tag representing the kind of value this is. +func (v Value) Kind() ValueKind { + return v.kind +} + +// Uint64 returns the internal uint64 value for the metric. +// +// If v.Kind() != KindUint64, this method panics. +func (v Value) Uint64() uint64 { + if v.kind != KindUint64 { + panic("called Uint64 on non-uint64 metric value") + } + return v.scalar +} + +// Float64 returns the internal float64 value for the metric. +// +// If v.Kind() != KindFloat64, this method panics. +func (v Value) Float64() float64 { + if v.kind != KindFloat64 { + panic("called Float64 on non-float64 metric value") + } + return math.Float64frombits(v.scalar) +} + +// Float64Histogram returns the internal *Float64Histogram value for the metric. +// +// If v.Kind() != KindFloat64Histogram, this method panics. +func (v Value) Float64Histogram() *Float64Histogram { + if v.kind != KindFloat64Histogram { + panic("called Float64Histogram on non-Float64Histogram metric value") + } + return (*Float64Histogram)(v.pointer) +} diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go new file mode 100644 index 0000000..d981c8e --- /dev/null +++ b/src/runtime/metrics_test.go @@ -0,0 +1,613 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "reflect" + "runtime" + "runtime/metrics" + "sort" + "strings" + "sync" + "testing" + "time" + "unsafe" +) + +func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) { + all := metrics.All() + samples := make([]metrics.Sample, len(all)) + descs := make(map[string]metrics.Description) + for i := range all { + samples[i].Name = all[i].Name + descs[all[i].Name] = all[i] + } + return descs, samples +} + +func TestReadMetrics(t *testing.T) { + // Tests whether readMetrics produces values aligning + // with ReadMemStats while the world is stopped. + var mstats runtime.MemStats + _, samples := prepareAllMetricsSamples() + runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples)) + + checkUint64 := func(t *testing.T, m string, got, want uint64) { + t.Helper() + if got != want { + t.Errorf("metric %q: got %d, want %d", m, got, want) + } + } + + // Check to make sure the values we read line up with other values we read. + var allocsBySize *metrics.Float64Histogram + var tinyAllocs uint64 + var mallocs, frees uint64 + for i := range samples { + switch name := samples[i].Name; name { + case "/cgo/go-to-c-calls:calls": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall())) + case "/memory/classes/heap/free:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased) + case "/memory/classes/heap/released:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased) + case "/memory/classes/heap/objects:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc) + case "/memory/classes/heap/unused:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc) + case "/memory/classes/heap/stacks:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse) + case "/memory/classes/metadata/mcache/free:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse) + case "/memory/classes/metadata/mcache/inuse:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse) + case "/memory/classes/metadata/mspan/free:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse) + case "/memory/classes/metadata/mspan/inuse:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse) + case "/memory/classes/metadata/other:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys) + case "/memory/classes/os-stacks:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse) + case "/memory/classes/other:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys) + case "/memory/classes/profiling/buckets:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys) + case "/memory/classes/total:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) + case "/gc/heap/allocs-by-size:bytes": + hist := samples[i].Value.Float64Histogram() + // Skip size class 0 in BySize, because it's always empty and not represented + // in the histogram. + for i, sc := range mstats.BySize[1:] { + if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { + t.Errorf("bucket does not match size class: got %f, want %f", b, s) + // The rest of the checks aren't expected to work anyway. + continue + } + if c, m := hist.Counts[i], sc.Mallocs; c != m { + t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m) + } + } + allocsBySize = hist + case "/gc/heap/allocs:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc) + case "/gc/heap/frees-by-size:bytes": + hist := samples[i].Value.Float64Histogram() + // Skip size class 0 in BySize, because it's always empty and not represented + // in the histogram. + for i, sc := range mstats.BySize[1:] { + if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { + t.Errorf("bucket does not match size class: got %f, want %f", b, s) + // The rest of the checks aren't expected to work anyway. + continue + } + if c, f := hist.Counts[i], sc.Frees; c != f { + t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f) + } + } + case "/gc/heap/frees:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc) + case "/gc/heap/tiny/allocs:objects": + // Currently, MemStats adds tiny alloc count to both Mallocs AND Frees. + // The reason for this is because MemStats couldn't be extended at the time + // but there was a desire to have Mallocs at least be a little more representative, + // while having Mallocs - Frees still represent a live object count. + // Unfortunately, MemStats doesn't actually export a large allocation count, + // so it's impossible to pull this number out directly. + // + // Check tiny allocation count outside of this loop, by using the allocs-by-size + // histogram in order to figure out how many large objects there are. + tinyAllocs = samples[i].Value.Uint64() + // Because the next two metrics tests are checking against Mallocs and Frees, + // we can't check them directly for the same reason: we need to account for tiny + // allocations included in Mallocs and Frees. + case "/gc/heap/allocs:objects": + mallocs = samples[i].Value.Uint64() + case "/gc/heap/frees:objects": + frees = samples[i].Value.Uint64() + case "/gc/heap/objects:objects": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects) + case "/gc/heap/goal:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC) + case "/gc/cycles/automatic:gc-cycles": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC)) + case "/gc/cycles/forced:gc-cycles": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC)) + case "/gc/cycles/total:gc-cycles": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC)) + } + } + + // Check tinyAllocs. + nonTinyAllocs := uint64(0) + for _, c := range allocsBySize.Counts { + nonTinyAllocs += c + } + checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs) + + // Check allocation and free counts. + checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs) + checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs) +} + +func TestReadMetricsConsistency(t *testing.T) { + // Tests whether readMetrics produces consistent, sensible values. + // The values are read concurrently with the runtime doing other + // things (e.g. allocating) so what we read can't reasonably compared + // to other runtime values (e.g. MemStats). + + // Run a few GC cycles to get some of the stats to be non-zero. + runtime.GC() + runtime.GC() + runtime.GC() + + // Set GOMAXPROCS high then sleep briefly to ensure we generate + // some idle time. + oldmaxprocs := runtime.GOMAXPROCS(10) + time.Sleep(time.Millisecond) + runtime.GOMAXPROCS(oldmaxprocs) + + // Read all the supported metrics through the metrics package. + descs, samples := prepareAllMetricsSamples() + metrics.Read(samples) + + // Check to make sure the values we read make sense. + var totalVirtual struct { + got, want uint64 + } + var objects struct { + alloc, free *metrics.Float64Histogram + allocs, frees uint64 + allocdBytes, freedBytes uint64 + total, totalBytes uint64 + } + var gc struct { + numGC uint64 + pauses uint64 + } + var cpu struct { + gcAssist float64 + gcDedicated float64 + gcIdle float64 + gcPause float64 + gcTotal float64 + + idle float64 + user float64 + + scavengeAssist float64 + scavengeBg float64 + scavengeTotal float64 + + total float64 + } + for i := range samples { + kind := samples[i].Value.Kind() + if want := descs[samples[i].Name].Kind; kind != want { + t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want) + continue + } + if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") { + v := samples[i].Value.Uint64() + totalVirtual.want += v + + // None of these stats should ever get this big. + // If they do, there's probably overflow involved, + // usually due to bad accounting. + if int64(v) < 0 { + t.Errorf("%q has high/negative value: %d", samples[i].Name, v) + } + } + switch samples[i].Name { + case "/cpu/classes/gc/mark/assist:cpu-seconds": + cpu.gcAssist = samples[i].Value.Float64() + case "/cpu/classes/gc/mark/dedicated:cpu-seconds": + cpu.gcDedicated = samples[i].Value.Float64() + case "/cpu/classes/gc/mark/idle:cpu-seconds": + cpu.gcIdle = samples[i].Value.Float64() + case "/cpu/classes/gc/pause:cpu-seconds": + cpu.gcPause = samples[i].Value.Float64() + case "/cpu/classes/gc/total:cpu-seconds": + cpu.gcTotal = samples[i].Value.Float64() + case "/cpu/classes/idle:cpu-seconds": + cpu.idle = samples[i].Value.Float64() + case "/cpu/classes/scavenge/assist:cpu-seconds": + cpu.scavengeAssist = samples[i].Value.Float64() + case "/cpu/classes/scavenge/background:cpu-seconds": + cpu.scavengeBg = samples[i].Value.Float64() + case "/cpu/classes/scavenge/total:cpu-seconds": + cpu.scavengeTotal = samples[i].Value.Float64() + case "/cpu/classes/total:cpu-seconds": + cpu.total = samples[i].Value.Float64() + case "/cpu/classes/user:cpu-seconds": + cpu.user = samples[i].Value.Float64() + case "/memory/classes/total:bytes": + totalVirtual.got = samples[i].Value.Uint64() + case "/memory/classes/heap/objects:bytes": + objects.totalBytes = samples[i].Value.Uint64() + case "/gc/heap/objects:objects": + objects.total = samples[i].Value.Uint64() + case "/gc/heap/allocs:bytes": + objects.allocdBytes = samples[i].Value.Uint64() + case "/gc/heap/allocs:objects": + objects.allocs = samples[i].Value.Uint64() + case "/gc/heap/allocs-by-size:bytes": + objects.alloc = samples[i].Value.Float64Histogram() + case "/gc/heap/frees:bytes": + objects.freedBytes = samples[i].Value.Uint64() + case "/gc/heap/frees:objects": + objects.frees = samples[i].Value.Uint64() + case "/gc/heap/frees-by-size:bytes": + objects.free = samples[i].Value.Float64Histogram() + case "/gc/cycles:gc-cycles": + gc.numGC = samples[i].Value.Uint64() + case "/gc/pauses:seconds": + h := samples[i].Value.Float64Histogram() + gc.pauses = 0 + for i := range h.Counts { + gc.pauses += h.Counts[i] + } + case "/sched/gomaxprocs:threads": + if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want { + t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want) + } + case "/sched/goroutines:goroutines": + if samples[i].Value.Uint64() < 1 { + t.Error("number of goroutines is less than one") + } + } + } + // Only check this on Linux where we can be reasonably sure we have a high-resolution timer. + if runtime.GOOS == "linux" { + if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 { + t.Errorf("found no time spent on GC work: %#v", cpu) + } + if cpu.gcPause <= 0 { + t.Errorf("found no GC pauses: %f", cpu.gcPause) + } + if cpu.idle <= 0 { + t.Errorf("found no idle time: %f", cpu.idle) + } + if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.01) { + t.Errorf("calculated total GC CPU not within 1%% of sampled total: %f vs. %f", total, cpu.gcTotal) + } + if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.01) { + t.Errorf("calculated total scavenge CPU not within 1%% of sampled total: %f vs. %f", total, cpu.scavengeTotal) + } + if cpu.total <= 0 { + t.Errorf("found no total CPU time passed") + } + if cpu.user <= 0 { + t.Errorf("found no user time passed") + } + if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.02) { + t.Errorf("calculated total CPU not within 2%% of sampled total: %f vs. %f", total, cpu.total) + } + } + if totalVirtual.got != totalVirtual.want { + t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) + } + if got, want := objects.allocs-objects.frees, objects.total; got != want { + t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) + } + if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want { + t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) + } + if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 { + t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) + } + if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 { + t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) + } + if len(objects.alloc.Buckets) != len(objects.free.Buckets) { + t.Error("allocs-by-size and frees-by-size buckets don't match in length") + } else if len(objects.alloc.Counts) != len(objects.free.Counts) { + t.Error("allocs-by-size and frees-by-size counts don't match in length") + } else { + for i := range objects.alloc.Buckets { + ba := objects.alloc.Buckets[i] + bf := objects.free.Buckets[i] + if ba != bf { + t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf) + } + } + if !t.Failed() { + var gotAlloc, gotFree uint64 + want := objects.total + for i := range objects.alloc.Counts { + if objects.alloc.Counts[i] < objects.free.Counts[i] { + t.Errorf("found more allocs than frees in object dist bucket %d", i) + continue + } + gotAlloc += objects.alloc.Counts[i] + gotFree += objects.free.Counts[i] + } + if got := gotAlloc - gotFree; got != want { + t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want) + } + if gotAlloc != objects.allocs { + t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs) + } + if gotFree != objects.frees { + t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees) + } + } + } + // The current GC has at least 2 pauses per GC. + // Check to see if that value makes sense. + if gc.pauses < gc.numGC*2 { + t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2) + } +} + +func BenchmarkReadMetricsLatency(b *testing.B) { + stop := applyGCLoad(b) + + // Spend this much time measuring latencies. + latencies := make([]time.Duration, 0, 1024) + _, samples := prepareAllMetricsSamples() + + // Hit metrics.Read continuously and measure. + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := time.Now() + metrics.Read(samples) + latencies = append(latencies, time.Since(start)) + } + // Make sure to stop the timer before we wait! The load created above + // is very heavy-weight and not easy to stop, so we could end up + // confusing the benchmarking framework for small b.N. + b.StopTimer() + stop() + + // Disable the default */op metrics. + // ns/op doesn't mean anything because it's an average, but we + // have a sleep in our b.N loop above which skews this significantly. + b.ReportMetric(0, "ns/op") + b.ReportMetric(0, "B/op") + b.ReportMetric(0, "allocs/op") + + // Sort latencies then report percentiles. + sort.Slice(latencies, func(i, j int) bool { + return latencies[i] < latencies[j] + }) + b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") + b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") + b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") +} + +var readMetricsSink [1024]interface{} + +func TestReadMetricsCumulative(t *testing.T) { + // Set up the set of metrics marked cumulative. + descs := metrics.All() + var samples [2][]metrics.Sample + samples[0] = make([]metrics.Sample, len(descs)) + samples[1] = make([]metrics.Sample, len(descs)) + total := 0 + for i := range samples[0] { + if !descs[i].Cumulative { + continue + } + samples[0][total].Name = descs[i].Name + total++ + } + samples[0] = samples[0][:total] + samples[1] = samples[1][:total] + copy(samples[1], samples[0]) + + // Start some noise in the background. + var wg sync.WaitGroup + wg.Add(1) + done := make(chan struct{}) + go func() { + defer wg.Done() + for { + // Add more things here that could influence metrics. + for i := 0; i < len(readMetricsSink); i++ { + readMetricsSink[i] = make([]byte, 1024) + select { + case <-done: + return + default: + } + } + runtime.GC() + } + }() + + sum := func(us []uint64) uint64 { + total := uint64(0) + for _, u := range us { + total += u + } + return total + } + + // Populate the first generation. + metrics.Read(samples[0]) + + // Check to make sure that these metrics only grow monotonically. + for gen := 1; gen < 10; gen++ { + metrics.Read(samples[gen%2]) + for i := range samples[gen%2] { + name := samples[gen%2][i].Name + vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value + + switch vNew.Kind() { + case metrics.KindUint64: + new := vNew.Uint64() + old := vOld.Uint64() + if new < old { + t.Errorf("%s decreased: %d < %d", name, new, old) + } + case metrics.KindFloat64: + new := vNew.Float64() + old := vOld.Float64() + if new < old { + t.Errorf("%s decreased: %f < %f", name, new, old) + } + case metrics.KindFloat64Histogram: + new := sum(vNew.Float64Histogram().Counts) + old := sum(vOld.Float64Histogram().Counts) + if new < old { + t.Errorf("%s counts decreased: %d < %d", name, new, old) + } + } + } + } + close(done) + + wg.Wait() +} + +func withinEpsilon(v1, v2, e float64) bool { + return v2-v2*e <= v1 && v1 <= v2+v2*e +} + +func TestMutexWaitTimeMetric(t *testing.T) { + var sample [1]metrics.Sample + sample[0].Name = "/sync/mutex/wait/total:seconds" + + locks := []locker2{ + new(mutex), + new(rwmutexWrite), + new(rwmutexReadWrite), + new(rwmutexWriteRead), + } + for _, lock := range locks { + t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) { + metrics.Read(sample[:]) + before := time.Duration(sample[0].Value.Float64() * 1e9) + + minMutexWaitTime := generateMutexWaitTime(lock) + + metrics.Read(sample[:]) + after := time.Duration(sample[0].Value.Float64() * 1e9) + + if wt := after - before; wt < minMutexWaitTime { + t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime) + } + }) + } +} + +// locker2 represents an API surface of two concurrent goroutines +// locking the same resource, but through different APIs. It's intended +// to abstract over the relationship of two Lock calls or an RLock +// and a Lock call. +type locker2 interface { + Lock1() + Unlock1() + Lock2() + Unlock2() +} + +type mutex struct { + mu sync.Mutex +} + +func (m *mutex) Lock1() { m.mu.Lock() } +func (m *mutex) Unlock1() { m.mu.Unlock() } +func (m *mutex) Lock2() { m.mu.Lock() } +func (m *mutex) Unlock2() { m.mu.Unlock() } + +type rwmutexWrite struct { + mu sync.RWMutex +} + +func (m *rwmutexWrite) Lock1() { m.mu.Lock() } +func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() } +func (m *rwmutexWrite) Lock2() { m.mu.Lock() } +func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() } + +type rwmutexReadWrite struct { + mu sync.RWMutex +} + +func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() } +func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() } +func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() } +func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() } + +type rwmutexWriteRead struct { + mu sync.RWMutex +} + +func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() } +func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() } +func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() } +func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() } + +// generateMutexWaitTime causes a couple of goroutines +// to block a whole bunch of times on a sync.Mutex, returning +// the minimum amount of time that should be visible in the +// /sync/mutex-wait:seconds metric. +func generateMutexWaitTime(mu locker2) time.Duration { + // Set up the runtime to always track casgstatus transitions for metrics. + *runtime.CasGStatusAlwaysTrack = true + + mu.Lock1() + + // Start up a goroutine to wait on the lock. + gc := make(chan *runtime.G) + done := make(chan bool) + go func() { + gc <- runtime.Getg() + + for { + mu.Lock2() + mu.Unlock2() + if <-done { + return + } + } + }() + gp := <-gc + + // Set the block time high enough so that it will always show up, even + // on systems with coarse timer granularity. + const blockTime = 100 * time.Millisecond + + // Make sure the goroutine spawned above actually blocks on the lock. + for { + if runtime.GIsWaitingOnMutex(gp) { + break + } + runtime.Gosched() + } + + // Let some amount of time pass. + time.Sleep(blockTime) + + // Let the other goroutine acquire the lock. + mu.Unlock1() + done <- true + + // Reset flag. + *runtime.CasGStatusAlwaysTrack = false + return blockTime +} |