summaryrefslogtreecommitdiffstats
path: root/src/runtime/mgclimit.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/mgclimit.go')
-rw-r--r--src/runtime/mgclimit.go483
1 files changed, 483 insertions, 0 deletions
diff --git a/src/runtime/mgclimit.go b/src/runtime/mgclimit.go
new file mode 100644
index 0000000..bcbe7f8
--- /dev/null
+++ b/src/runtime/mgclimit.go
@@ -0,0 +1,483 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "runtime/internal/atomic"
+
+// gcCPULimiter is a mechanism to limit GC CPU utilization in situations
+// where it might become excessive and inhibit application progress (e.g.
+// a death spiral).
+//
+// The core of the limiter is a leaky bucket mechanism that fills with GC
+// CPU time and drains with mutator time. Because the bucket fills and
+// drains with time directly (i.e. without any weighting), this effectively
+// sets a very conservative limit of 50%. This limit could be enforced directly,
+// however, but the purpose of the bucket is to accommodate spikes in GC CPU
+// utilization without hurting throughput.
+//
+// Note that the bucket in the leaky bucket mechanism can never go negative,
+// so the GC never gets credit for a lot of CPU time spent without the GC
+// running. This is intentional, as an application that stays idle for, say,
+// an entire day, could build up enough credit to fail to prevent a death
+// spiral the following day. The bucket's capacity is the GC's only leeway.
+//
+// The capacity thus also sets the window the limiter considers. For example,
+// if the capacity of the bucket is 1 cpu-second, then the limiter will not
+// kick in until at least 1 full cpu-second in the last 2 cpu-second window
+// is spent on GC CPU time.
+var gcCPULimiter gcCPULimiterState
+
+type gcCPULimiterState struct {
+ lock atomic.Uint32
+
+ enabled atomic.Bool
+ bucket struct {
+ // Invariants:
+ // - fill >= 0
+ // - capacity >= 0
+ // - fill <= capacity
+ fill, capacity uint64
+ }
+ // overflow is the cumulative amount of GC CPU time that we tried to fill the
+ // bucket with but exceeded its capacity.
+ overflow uint64
+
+ // gcEnabled is an internal copy of gcBlackenEnabled that determines
+ // whether the limiter tracks total assist time.
+ //
+ // gcBlackenEnabled isn't used directly so as to keep this structure
+ // unit-testable.
+ gcEnabled bool
+
+ // transitioning is true when the GC is in a STW and transitioning between
+ // the mark and sweep phases.
+ transitioning bool
+
+ // assistTimePool is the accumulated assist time since the last update.
+ assistTimePool atomic.Int64
+
+ // idleMarkTimePool is the accumulated idle mark time since the last update.
+ idleMarkTimePool atomic.Int64
+
+ // idleTimePool is the accumulated time Ps spent on the idle list since the last update.
+ idleTimePool atomic.Int64
+
+ // lastUpdate is the nanotime timestamp of the last time update was called.
+ //
+ // Updated under lock, but may be read concurrently.
+ lastUpdate atomic.Int64
+
+ // lastEnabledCycle is the GC cycle that last had the limiter enabled.
+ lastEnabledCycle atomic.Uint32
+
+ // nprocs is an internal copy of gomaxprocs, used to determine total available
+ // CPU time.
+ //
+ // gomaxprocs isn't used directly so as to keep this structure unit-testable.
+ nprocs int32
+
+ // test indicates whether this instance of the struct was made for testing purposes.
+ test bool
+}
+
+// limiting returns true if the CPU limiter is currently enabled, meaning the Go GC
+// should take action to limit CPU utilization.
+//
+// It is safe to call concurrently with other operations.
+func (l *gcCPULimiterState) limiting() bool {
+ return l.enabled.Load()
+}
+
+// startGCTransition notifies the limiter of a GC transition.
+//
+// This call takes ownership of the limiter and disables all other means of
+// updating the limiter. Release ownership by calling finishGCTransition.
+//
+// It is safe to call concurrently with other operations.
+func (l *gcCPULimiterState) startGCTransition(enableGC bool, now int64) {
+ if !l.tryLock() {
+ // This must happen during a STW, so we can't fail to acquire the lock.
+ // If we did, something went wrong. Throw.
+ throw("failed to acquire lock to start a GC transition")
+ }
+ if l.gcEnabled == enableGC {
+ throw("transitioning GC to the same state as before?")
+ }
+ // Flush whatever was left between the last update and now.
+ l.updateLocked(now)
+ l.gcEnabled = enableGC
+ l.transitioning = true
+ // N.B. finishGCTransition releases the lock.
+ //
+ // We don't release here to increase the chance that if there's a failure
+ // to finish the transition, that we throw on failing to acquire the lock.
+}
+
+// finishGCTransition notifies the limiter that the GC transition is complete
+// and releases ownership of it. It also accumulates STW time in the bucket.
+// now must be the timestamp from the end of the STW pause.
+func (l *gcCPULimiterState) finishGCTransition(now int64) {
+ if !l.transitioning {
+ throw("finishGCTransition called without starting one?")
+ }
+ // Count the full nprocs set of CPU time because the world is stopped
+ // between startGCTransition and finishGCTransition. Even though the GC
+ // isn't running on all CPUs, it is preventing user code from doing so,
+ // so it might as well be.
+ if lastUpdate := l.lastUpdate.Load(); now >= lastUpdate {
+ l.accumulate(0, (now-lastUpdate)*int64(l.nprocs))
+ }
+ l.lastUpdate.Store(now)
+ l.transitioning = false
+ l.unlock()
+}
+
+// gcCPULimiterUpdatePeriod dictates the maximum amount of wall-clock time
+// we can go before updating the limiter.
+const gcCPULimiterUpdatePeriod = 10e6 // 10ms
+
+// needUpdate returns true if the limiter's maximum update period has been
+// exceeded, and so would benefit from an update.
+func (l *gcCPULimiterState) needUpdate(now int64) bool {
+ return now-l.lastUpdate.Load() > gcCPULimiterUpdatePeriod
+}
+
+// addAssistTime notifies the limiter of additional assist time. It will be
+// included in the next update.
+func (l *gcCPULimiterState) addAssistTime(t int64) {
+ l.assistTimePool.Add(t)
+}
+
+// addIdleTime notifies the limiter of additional time a P spent on the idle list. It will be
+// subtracted from the total CPU time in the next update.
+func (l *gcCPULimiterState) addIdleTime(t int64) {
+ l.idleTimePool.Add(t)
+}
+
+// update updates the bucket given runtime-specific information. now is the
+// current monotonic time in nanoseconds.
+//
+// This is safe to call concurrently with other operations, except *GCTransition.
+func (l *gcCPULimiterState) update(now int64) {
+ if !l.tryLock() {
+ // We failed to acquire the lock, which means something else is currently
+ // updating. Just drop our update, the next one to update will include
+ // our total assist time.
+ return
+ }
+ if l.transitioning {
+ throw("update during transition")
+ }
+ l.updateLocked(now)
+ l.unlock()
+}
+
+// updatedLocked is the implementation of update. l.lock must be held.
+func (l *gcCPULimiterState) updateLocked(now int64) {
+ lastUpdate := l.lastUpdate.Load()
+ if now < lastUpdate {
+ // Defensively avoid overflow. This isn't even the latest update anyway.
+ return
+ }
+ windowTotalTime := (now - lastUpdate) * int64(l.nprocs)
+ l.lastUpdate.Store(now)
+
+ // Drain the pool of assist time.
+ assistTime := l.assistTimePool.Load()
+ if assistTime != 0 {
+ l.assistTimePool.Add(-assistTime)
+ }
+
+ // Drain the pool of idle time.
+ idleTime := l.idleTimePool.Load()
+ if idleTime != 0 {
+ l.idleTimePool.Add(-idleTime)
+ }
+
+ if !l.test {
+ // Consume time from in-flight events. Make sure we're not preemptible so allp can't change.
+ //
+ // The reason we do this instead of just waiting for those events to finish and push updates
+ // is to ensure that all the time we're accounting for happened sometime between lastUpdate
+ // and now. This dramatically simplifies reasoning about the limiter because we're not at
+ // risk of extra time being accounted for in this window than actually happened in this window,
+ // leading to all sorts of weird transient behavior.
+ mp := acquirem()
+ for _, pp := range allp {
+ typ, duration := pp.limiterEvent.consume(now)
+ switch typ {
+ case limiterEventIdleMarkWork:
+ fallthrough
+ case limiterEventIdle:
+ idleTime += duration
+ case limiterEventMarkAssist:
+ fallthrough
+ case limiterEventScavengeAssist:
+ assistTime += duration
+ case limiterEventNone:
+ break
+ default:
+ throw("invalid limiter event type found")
+ }
+ }
+ releasem(mp)
+ }
+
+ // Compute total GC time.
+ windowGCTime := assistTime
+ if l.gcEnabled {
+ windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization)
+ }
+
+ // Subtract out all idle time from the total time. Do this after computing
+ // GC time, because the background utilization is dependent on the *real*
+ // total time, not the total time after idle time is subtracted.
+ //
+ // Idle time is counted as any time that a P is on the P idle list plus idle mark
+ // time. Idle mark workers soak up time that the application spends idle.
+ //
+ // On a heavily undersubscribed system, any additional idle time can skew GC CPU
+ // utilization, because the GC might be executing continuously and thrashing,
+ // yet the CPU utilization with respect to GOMAXPROCS will be quite low, so
+ // the limiter fails to turn on. By subtracting idle time, we're removing time that
+ // we know the application was idle giving a more accurate picture of whether
+ // the GC is thrashing.
+ //
+ // Note that this can cause the limiter to turn on even if it's not needed. For
+ // instance, on a system with 32 Ps but only 1 running goroutine, each GC will have
+ // 8 dedicated GC workers. Assuming the GC cycle is half mark phase and half sweep
+ // phase, then the GC CPU utilization over that cycle, with idle time removed, will
+ // be 8/(8+2) = 80%. Even though the limiter turns on, though, assist should be
+ // unnecessary, as the GC has way more CPU time to outpace the 1 goroutine that's
+ // running.
+ windowTotalTime -= idleTime
+
+ l.accumulate(windowTotalTime-windowGCTime, windowGCTime)
+}
+
+// accumulate adds time to the bucket and signals whether the limiter is enabled.
+//
+// This is an internal function that deals just with the bucket. Prefer update.
+// l.lock must be held.
+func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) {
+ headroom := l.bucket.capacity - l.bucket.fill
+ enabled := headroom == 0
+
+ // Let's be careful about three things here:
+ // 1. The addition and subtraction, for the invariants.
+ // 2. Overflow.
+ // 3. Excessive mutation of l.enabled, which is accessed
+ // by all assists, potentially more than once.
+ change := gcTime - mutatorTime
+
+ // Handle limiting case.
+ if change > 0 && headroom <= uint64(change) {
+ l.overflow += uint64(change) - headroom
+ l.bucket.fill = l.bucket.capacity
+ if !enabled {
+ l.enabled.Store(true)
+ l.lastEnabledCycle.Store(memstats.numgc + 1)
+ }
+ return
+ }
+
+ // Handle non-limiting cases.
+ if change < 0 && l.bucket.fill <= uint64(-change) {
+ // Bucket emptied.
+ l.bucket.fill = 0
+ } else {
+ // All other cases.
+ l.bucket.fill -= uint64(-change)
+ }
+ if change != 0 && enabled {
+ l.enabled.Store(false)
+ }
+}
+
+// tryLock attempts to lock l. Returns true on success.
+func (l *gcCPULimiterState) tryLock() bool {
+ return l.lock.CompareAndSwap(0, 1)
+}
+
+// unlock releases the lock on l. Must be called if tryLock returns true.
+func (l *gcCPULimiterState) unlock() {
+ old := l.lock.Swap(0)
+ if old != 1 {
+ throw("double unlock")
+ }
+}
+
+// capacityPerProc is the limiter's bucket capacity for each P in GOMAXPROCS.
+const capacityPerProc = 1e9 // 1 second in nanoseconds
+
+// resetCapacity updates the capacity based on GOMAXPROCS. Must not be called
+// while the GC is enabled.
+//
+// It is safe to call concurrently with other operations.
+func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
+ if !l.tryLock() {
+ // This must happen during a STW, so we can't fail to acquire the lock.
+ // If we did, something went wrong. Throw.
+ throw("failed to acquire lock to reset capacity")
+ }
+ // Flush the rest of the time for this period.
+ l.updateLocked(now)
+ l.nprocs = nprocs
+
+ l.bucket.capacity = uint64(nprocs) * capacityPerProc
+ if l.bucket.fill > l.bucket.capacity {
+ l.bucket.fill = l.bucket.capacity
+ l.enabled.Store(true)
+ l.lastEnabledCycle.Store(memstats.numgc + 1)
+ } else if l.bucket.fill < l.bucket.capacity {
+ l.enabled.Store(false)
+ }
+ l.unlock()
+}
+
+// limiterEventType indicates the type of an event occurring on some P.
+//
+// These events represent the full set of events that the GC CPU limiter tracks
+// to execute its function.
+//
+// This type may use no more than limiterEventBits bits of information.
+type limiterEventType uint8
+
+const (
+ limiterEventNone limiterEventType = iota // None of the following events.
+ limiterEventIdleMarkWork // Refers to an idle mark worker (see gcMarkWorkerMode).
+ limiterEventMarkAssist // Refers to mark assist (see gcAssistAlloc).
+ limiterEventScavengeAssist // Refers to a scavenge assist (see allocSpan).
+ limiterEventIdle // Refers to time a P spent on the idle list.
+
+ limiterEventBits = 3
+)
+
+// limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent
+// the event type. The rest of the bits of that field represent a timestamp.
+const (
+ limiterEventTypeMask = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)
+ limiterEventStampNone = limiterEventStamp(0)
+)
+
+// limiterEventStamp is a nanotime timestamp packed with a limiterEventType.
+type limiterEventStamp uint64
+
+// makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.
+func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp {
+ return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask))
+}
+
+// duration computes the difference between now and the start time stored in the stamp.
+//
+// Returns 0 if the difference is negative, which may happen if now is stale or if the
+// before and after timestamps cross a 2^(64-limiterEventBits) boundary.
+func (s limiterEventStamp) duration(now int64) int64 {
+ // The top limiterEventBits bits of the timestamp are derived from the current time
+ // when computing a duration.
+ start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask))
+ if now < start {
+ return 0
+ }
+ return now - start
+}
+
+// type extracts the event type from the stamp.
+func (s limiterEventStamp) typ() limiterEventType {
+ return limiterEventType(s >> (64 - limiterEventBits))
+}
+
+// limiterEvent represents tracking state for an event tracked by the GC CPU limiter.
+type limiterEvent struct {
+ stamp atomic.Uint64 // Stores a limiterEventStamp.
+}
+
+// start begins tracking a new limiter event of the current type. If an event
+// is already in flight, then a new event cannot begin because the current time is
+// already being attributed to that event. In this case, this function returns false.
+// Otherwise, it returns true.
+//
+// The caller must be non-preemptible until at least stop is called or this function
+// returns false. Because this is trying to measure "on-CPU" time of some event, getting
+// scheduled away during it can mean that whatever we're measuring isn't a reflection
+// of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as
+// close of an approximation as we can.
+func (e *limiterEvent) start(typ limiterEventType, now int64) bool {
+ if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone {
+ return false
+ }
+ e.stamp.Store(uint64(makeLimiterEventStamp(typ, now)))
+ return true
+}
+
+// consume acquires the partial event CPU time from any in-flight event.
+// It achieves this by storing the current time as the new event time.
+//
+// Returns the type of the in-flight event, as well as how long it's currently been
+// executing for. Returns limiterEventNone if no event is active.
+func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) {
+ // Read the limiter event timestamp and update it to now.
+ for {
+ old := limiterEventStamp(e.stamp.Load())
+ typ = old.typ()
+ if typ == limiterEventNone {
+ // There's no in-flight event, so just push that up.
+ return
+ }
+ duration = old.duration(now)
+ if duration == 0 {
+ // We might have a stale now value, or this crossed the
+ // 2^(64-limiterEventBits) boundary in the clock readings.
+ // Just ignore it.
+ return limiterEventNone, 0
+ }
+ new := makeLimiterEventStamp(typ, now)
+ if e.stamp.CompareAndSwap(uint64(old), uint64(new)) {
+ break
+ }
+ }
+ return
+}
+
+// stop stops the active limiter event. Throws if the
+//
+// The caller must be non-preemptible across the event. See start as to why.
+func (e *limiterEvent) stop(typ limiterEventType, now int64) {
+ var stamp limiterEventStamp
+ for {
+ stamp = limiterEventStamp(e.stamp.Load())
+ if stamp.typ() != typ {
+ print("runtime: want=", typ, " got=", stamp.typ(), "\n")
+ throw("limiterEvent.stop: found wrong event in p's limiter event slot")
+ }
+ if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) {
+ break
+ }
+ }
+ duration := stamp.duration(now)
+ if duration == 0 {
+ // It's possible that we're missing time because we crossed a
+ // 2^(64-limiterEventBits) boundary between the start and end.
+ // In this case, we're dropping that information. This is OK because
+ // at worst it'll cause a transient hiccup that will quickly resolve
+ // itself as all new timestamps begin on the other side of the boundary.
+ // Such a hiccup should be incredibly rare.
+ return
+ }
+ // Account for the event.
+ switch typ {
+ case limiterEventIdleMarkWork:
+ gcCPULimiter.addIdleTime(duration)
+ case limiterEventIdle:
+ gcCPULimiter.addIdleTime(duration)
+ sched.idleTime.Add(duration)
+ case limiterEventMarkAssist:
+ fallthrough
+ case limiterEventScavengeAssist:
+ gcCPULimiter.addAssistTime(duration)
+ default:
+ throw("limiterEvent.stop: invalid limiter event type found")
+ }
+}