diff options
Diffstat (limited to '')
-rw-r--r-- | src/runtime/runtime2.go | 1242 |
1 files changed, 1242 insertions, 0 deletions
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go new file mode 100644 index 0000000..63320d4 --- /dev/null +++ b/src/runtime/runtime2.go @@ -0,0 +1,1242 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "internal/abi" + "internal/chacha8rand" + "internal/goarch" + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// defined constants +const ( + // G status + // + // Beyond indicating the general state of a G, the G status + // acts like a lock on the goroutine's stack (and hence its + // ability to execute user code). + // + // If you add to this list, add to the list + // of "okay during garbage collection" status + // in mgcmark.go too. + // + // TODO(austin): The _Gscan bit could be much lighter-weight. + // For example, we could choose not to run _Gscanrunnable + // goroutines found in the run queue, rather than CAS-looping + // until they become _Grunnable. And transitions like + // _Gscanwaiting -> _Gscanrunnable are actually okay because + // they don't affect stack ownership. + + // _Gidle means this goroutine was just allocated and has not + // yet been initialized. + _Gidle = iota // 0 + + // _Grunnable means this goroutine is on a run queue. It is + // not currently executing user code. The stack is not owned. + _Grunnable // 1 + + // _Grunning means this goroutine may execute user code. The + // stack is owned by this goroutine. It is not on a run queue. + // It is assigned an M and a P (g.m and g.m.p are valid). + _Grunning // 2 + + // _Gsyscall means this goroutine is executing a system call. + // It is not executing user code. The stack is owned by this + // goroutine. It is not on a run queue. It is assigned an M. + _Gsyscall // 3 + + // _Gwaiting means this goroutine is blocked in the runtime. + // It is not executing user code. It is not on a run queue, + // but should be recorded somewhere (e.g., a channel wait + // queue) so it can be ready()d when necessary. The stack is + // not owned *except* that a channel operation may read or + // write parts of the stack under the appropriate channel + // lock. Otherwise, it is not safe to access the stack after a + // goroutine enters _Gwaiting (e.g., it may get moved). + _Gwaiting // 4 + + // _Gmoribund_unused is currently unused, but hardcoded in gdb + // scripts. + _Gmoribund_unused // 5 + + // _Gdead means this goroutine is currently unused. It may be + // just exited, on a free list, or just being initialized. It + // is not executing user code. It may or may not have a stack + // allocated. The G and its stack (if any) are owned by the M + // that is exiting the G or that obtained the G from the free + // list. + _Gdead // 6 + + // _Genqueue_unused is currently unused. + _Genqueue_unused // 7 + + // _Gcopystack means this goroutine's stack is being moved. It + // is not executing user code and is not on a run queue. The + // stack is owned by the goroutine that put it in _Gcopystack. + _Gcopystack // 8 + + // _Gpreempted means this goroutine stopped itself for a + // suspendG preemption. It is like _Gwaiting, but nothing is + // yet responsible for ready()ing it. Some suspendG must CAS + // the status to _Gwaiting to take responsibility for + // ready()ing this G. + _Gpreempted // 9 + + // _Gscan combined with one of the above states other than + // _Grunning indicates that GC is scanning the stack. The + // goroutine is not executing user code and the stack is owned + // by the goroutine that set the _Gscan bit. + // + // _Gscanrunning is different: it is used to briefly block + // state transitions while GC signals the G to scan its own + // stack. This is otherwise like _Grunning. + // + // atomicstatus&~Gscan gives the state the goroutine will + // return to when the scan completes. + _Gscan = 0x1000 + _Gscanrunnable = _Gscan + _Grunnable // 0x1001 + _Gscanrunning = _Gscan + _Grunning // 0x1002 + _Gscansyscall = _Gscan + _Gsyscall // 0x1003 + _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 + _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 +) + +const ( + // P status + + // _Pidle means a P is not being used to run user code or the + // scheduler. Typically, it's on the idle P list and available + // to the scheduler, but it may just be transitioning between + // other states. + // + // The P is owned by the idle list or by whatever is + // transitioning its state. Its run queue is empty. + _Pidle = iota + + // _Prunning means a P is owned by an M and is being used to + // run user code or the scheduler. Only the M that owns this P + // is allowed to change the P's status from _Prunning. The M + // may transition the P to _Pidle (if it has no more work to + // do), _Psyscall (when entering a syscall), or _Pgcstop (to + // halt for the GC). The M may also hand ownership of the P + // off directly to another M (e.g., to schedule a locked G). + _Prunning + + // _Psyscall means a P is not running user code. It has + // affinity to an M in a syscall but is not owned by it and + // may be stolen by another M. This is similar to _Pidle but + // uses lightweight transitions and maintains M affinity. + // + // Leaving _Psyscall must be done with a CAS, either to steal + // or retake the P. Note that there's an ABA hazard: even if + // an M successfully CASes its original P back to _Prunning + // after a syscall, it must understand the P may have been + // used by another M in the interim. + _Psyscall + + // _Pgcstop means a P is halted for STW and owned by the M + // that stopped the world. The M that stopped the world + // continues to use its P, even in _Pgcstop. Transitioning + // from _Prunning to _Pgcstop causes an M to release its P and + // park. + // + // The P retains its run queue and startTheWorld will restart + // the scheduler on Ps with non-empty run queues. + _Pgcstop + + // _Pdead means a P is no longer used (GOMAXPROCS shrank). We + // reuse Ps if GOMAXPROCS increases. A dead P is mostly + // stripped of its resources, though a few things remain + // (e.g., trace buffers). + _Pdead +) + +// Mutual exclusion locks. In the uncontended case, +// as fast as spin locks (just a few user-level instructions), +// but on the contention path they sleep in the kernel. +// A zeroed Mutex is unlocked (no need to initialize each lock). +// Initialization is helpful for static lock ranking, but not required. +type mutex struct { + // Empty struct if lock ranking is disabled, otherwise includes the lock rank + lockRankStruct + // Futex-based impl treats it as uint32 key, + // while sema-based impl as M* waitm. + // Used to be a union, but unions break precise GC. + key uintptr +} + +// sleep and wakeup on one-time events. +// before any calls to notesleep or notewakeup, +// must call noteclear to initialize the Note. +// then, exactly one thread can call notesleep +// and exactly one thread can call notewakeup (once). +// once notewakeup has been called, the notesleep +// will return. future notesleep will return immediately. +// subsequent noteclear must be called only after +// previous notesleep has returned, e.g. it's disallowed +// to call noteclear straight after notewakeup. +// +// notetsleep is like notesleep but wakes up after +// a given number of nanoseconds even if the event +// has not yet happened. if a goroutine uses notetsleep to +// wake up early, it must wait to call noteclear until it +// can be sure that no other goroutine is calling +// notewakeup. +// +// notesleep/notetsleep are generally called on g0, +// notetsleepg is similar to notetsleep but is called on user g. +type note struct { + // Futex-based impl treats it as uint32 key, + // while sema-based impl as M* waitm. + // Used to be a union, but unions break precise GC. + key uintptr +} + +type funcval struct { + fn uintptr + // variable-size, fn-specific data here +} + +type iface struct { + tab *itab + data unsafe.Pointer +} + +type eface struct { + _type *_type + data unsafe.Pointer +} + +func efaceOf(ep *any) *eface { + return (*eface)(unsafe.Pointer(ep)) +} + +// The guintptr, muintptr, and puintptr are all used to bypass write barriers. +// It is particularly important to avoid write barriers when the current P has +// been released, because the GC thinks the world is stopped, and an +// unexpected write barrier would not be synchronized with the GC, +// which can lead to a half-executed write barrier that has marked the object +// but not queued it. If the GC skips the object and completes before the +// queuing can occur, it will incorrectly free the object. +// +// We tried using special assignment functions invoked only when not +// holding a running P, but then some updates to a particular memory +// word went through write barriers and some did not. This breaks the +// write barrier shadow checking mode, and it is also scary: better to have +// a word that is completely ignored by the GC than to have one for which +// only a few updates are ignored. +// +// Gs and Ps are always reachable via true pointers in the +// allgs and allp lists or (during allocation before they reach those lists) +// from stack variables. +// +// Ms are always reachable via true pointers either from allm or +// freem. Unlike Gs and Ps we do free Ms, so it's important that +// nothing ever hold an muintptr across a safe point. + +// A guintptr holds a goroutine pointer, but typed as a uintptr +// to bypass write barriers. It is used in the Gobuf goroutine state +// and in scheduling lists that are manipulated without a P. +// +// The Gobuf.g goroutine pointer is almost always updated by assembly code. +// In one of the few places it is updated by Go code - func save - it must be +// treated as a uintptr to avoid a write barrier being emitted at a bad time. +// Instead of figuring out how to emit the write barriers missing in the +// assembly manipulation, we change the type of the field to uintptr, +// so that it does not require write barriers at all. +// +// Goroutine structs are published in the allg list and never freed. +// That will keep the goroutine structs from being collected. +// There is never a time that Gobuf.g's contain the only references +// to a goroutine: the publishing of the goroutine in allg comes first. +// Goroutine pointers are also kept in non-GC-visible places like TLS, +// so I can't see them ever moving. If we did want to start moving data +// in the GC, we'd need to allocate the goroutine structs from an +// alternate arena. Using guintptr doesn't make that problem any worse. +// Note that pollDesc.rg, pollDesc.wg also store g in uintptr form, +// so they would need to be updated too if g's start moving. +type guintptr uintptr + +//go:nosplit +func (gp guintptr) ptr() *g { return (*g)(unsafe.Pointer(gp)) } + +//go:nosplit +func (gp *guintptr) set(g *g) { *gp = guintptr(unsafe.Pointer(g)) } + +//go:nosplit +func (gp *guintptr) cas(old, new guintptr) bool { + return atomic.Casuintptr((*uintptr)(unsafe.Pointer(gp)), uintptr(old), uintptr(new)) +} + +//go:nosplit +func (gp *g) guintptr() guintptr { + return guintptr(unsafe.Pointer(gp)) +} + +// setGNoWB performs *gp = new without a write barrier. +// For times when it's impractical to use a guintptr. +// +//go:nosplit +//go:nowritebarrier +func setGNoWB(gp **g, new *g) { + (*guintptr)(unsafe.Pointer(gp)).set(new) +} + +type puintptr uintptr + +//go:nosplit +func (pp puintptr) ptr() *p { return (*p)(unsafe.Pointer(pp)) } + +//go:nosplit +func (pp *puintptr) set(p *p) { *pp = puintptr(unsafe.Pointer(p)) } + +// muintptr is a *m that is not tracked by the garbage collector. +// +// Because we do free Ms, there are some additional constrains on +// muintptrs: +// +// 1. Never hold an muintptr locally across a safe point. +// +// 2. Any muintptr in the heap must be owned by the M itself so it can +// ensure it is not in use when the last true *m is released. +type muintptr uintptr + +//go:nosplit +func (mp muintptr) ptr() *m { return (*m)(unsafe.Pointer(mp)) } + +//go:nosplit +func (mp *muintptr) set(m *m) { *mp = muintptr(unsafe.Pointer(m)) } + +// setMNoWB performs *mp = new without a write barrier. +// For times when it's impractical to use an muintptr. +// +//go:nosplit +//go:nowritebarrier +func setMNoWB(mp **m, new *m) { + (*muintptr)(unsafe.Pointer(mp)).set(new) +} + +type gobuf struct { + // The offsets of sp, pc, and g are known to (hard-coded in) libmach. + // + // ctxt is unusual with respect to GC: it may be a + // heap-allocated funcval, so GC needs to track it, but it + // needs to be set and cleared from assembly, where it's + // difficult to have write barriers. However, ctxt is really a + // saved, live register, and we only ever exchange it between + // the real register and the gobuf. Hence, we treat it as a + // root during stack scanning, which means assembly that saves + // and restores it doesn't need write barriers. It's still + // typed as a pointer so that any other writes from Go get + // write barriers. + sp uintptr + pc uintptr + g guintptr + ctxt unsafe.Pointer + ret uintptr + lr uintptr + bp uintptr // for framepointer-enabled architectures +} + +// sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving +// on a channel. +// +// sudog is necessary because the g ↔ synchronization object relation +// is many-to-many. A g can be on many wait lists, so there may be +// many sudogs for one g; and many gs may be waiting on the same +// synchronization object, so there may be many sudogs for one object. +// +// sudogs are allocated from a special pool. Use acquireSudog and +// releaseSudog to allocate and free them. +type sudog struct { + // The following fields are protected by the hchan.lock of the + // channel this sudog is blocking on. shrinkstack depends on + // this for sudogs involved in channel ops. + + g *g + + next *sudog + prev *sudog + elem unsafe.Pointer // data element (may point to stack) + + // The following fields are never accessed concurrently. + // For channels, waitlink is only accessed by g. + // For semaphores, all fields (including the ones above) + // are only accessed when holding a semaRoot lock. + + acquiretime int64 + releasetime int64 + ticket uint32 + + // isSelect indicates g is participating in a select, so + // g.selectDone must be CAS'd to win the wake-up race. + isSelect bool + + // success indicates whether communication over channel c + // succeeded. It is true if the goroutine was awoken because a + // value was delivered over channel c, and false if awoken + // because c was closed. + success bool + + // waiters is a count of semaRoot waiting list other than head of list, + // clamped to a uint16 to fit in unused space. + // Only meaningful at the head of the list. + // (If we wanted to be overly clever, we could store a high 16 bits + // in the second entry in the list.) + waiters uint16 + + parent *sudog // semaRoot binary tree + waitlink *sudog // g.waiting list or semaRoot + waittail *sudog // semaRoot + c *hchan // channel +} + +type libcall struct { + fn uintptr + n uintptr // number of parameters + args uintptr // parameters + r1 uintptr // return values + r2 uintptr + err uintptr // error number +} + +// Stack describes a Go execution stack. +// The bounds of the stack are exactly [lo, hi), +// with no implicit data structures on either side. +type stack struct { + lo uintptr + hi uintptr +} + +// heldLockInfo gives info on a held lock and the rank of that lock +type heldLockInfo struct { + lockAddr uintptr + rank lockRank +} + +type g struct { + // Stack parameters. + // stack describes the actual stack memory: [stack.lo, stack.hi). + // stackguard0 is the stack pointer compared in the Go stack growth prologue. + // It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption. + // stackguard1 is the stack pointer compared in the //go:systemstack stack growth prologue. + // It is stack.lo+StackGuard on g0 and gsignal stacks. + // It is ~0 on other goroutine stacks, to trigger a call to morestackc (and crash). + stack stack // offset known to runtime/cgo + stackguard0 uintptr // offset known to liblink + stackguard1 uintptr // offset known to liblink + + _panic *_panic // innermost panic - offset known to liblink + _defer *_defer // innermost defer + m *m // current m; offset known to arm liblink + sched gobuf + syscallsp uintptr // if status==Gsyscall, syscallsp = sched.sp to use during gc + syscallpc uintptr // if status==Gsyscall, syscallpc = sched.pc to use during gc + stktopsp uintptr // expected sp at top of stack, to check in traceback + // param is a generic pointer parameter field used to pass + // values in particular contexts where other storage for the + // parameter would be difficult to find. It is currently used + // in four ways: + // 1. When a channel operation wakes up a blocked goroutine, it sets param to + // point to the sudog of the completed blocking operation. + // 2. By gcAssistAlloc1 to signal back to its caller that the goroutine completed + // the GC cycle. It is unsafe to do so in any other way, because the goroutine's + // stack may have moved in the meantime. + // 3. By debugCallWrap to pass parameters to a new goroutine because allocating a + // closure in the runtime is forbidden. + // 4. When a panic is recovered and control returns to the respective frame, + // param may point to a savedOpenDeferState. + param unsafe.Pointer + atomicstatus atomic.Uint32 + stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus + goid uint64 + schedlink guintptr + waitsince int64 // approx time when the g become blocked + waitreason waitReason // if status==Gwaiting + + preempt bool // preemption signal, duplicates stackguard0 = stackpreempt + preemptStop bool // transition to _Gpreempted on preemption; otherwise, just deschedule + preemptShrink bool // shrink stack at synchronous safe point + + // asyncSafePoint is set if g is stopped at an asynchronous + // safe point. This means there are frames on the stack + // without precise pointer information. + asyncSafePoint bool + + paniconfault bool // panic (instead of crash) on unexpected fault address + gcscandone bool // g has scanned stack; protected by _Gscan bit in status + throwsplit bool // must not split stack + // activeStackChans indicates that there are unlocked channels + // pointing into this goroutine's stack. If true, stack + // copying needs to acquire channel locks to protect these + // areas of the stack. + activeStackChans bool + // parkingOnChan indicates that the goroutine is about to + // park on a chansend or chanrecv. Used to signal an unsafe point + // for stack shrinking. + parkingOnChan atomic.Bool + // inMarkAssist indicates whether the goroutine is in mark assist. + // Used by the execution tracer. + inMarkAssist bool + coroexit bool // argument to coroswitch_m + + raceignore int8 // ignore race detection events + nocgocallback bool // whether disable callback from C + tracking bool // whether we're tracking this G for sched latency statistics + trackingSeq uint8 // used to decide whether to track this G + trackingStamp int64 // timestamp of when the G last started being tracked + runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking + lockedm muintptr + sig uint32 + writebuf []byte + sigcode0 uintptr + sigcode1 uintptr + sigpc uintptr + parentGoid uint64 // goid of goroutine that created this goroutine + gopc uintptr // pc of go statement that created this goroutine + ancestors *[]ancestorInfo // ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors) + startpc uintptr // pc of goroutine function + racectx uintptr + waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order + cgoCtxt []uintptr // cgo traceback context + labels unsafe.Pointer // profiler labels + timer *timer // cached timer for time.Sleep + selectDone atomic.Uint32 // are we participating in a select and did someone win the race? + + coroarg *coro // argument during coroutine transfers + + // goroutineProfiled indicates the status of this goroutine's stack for the + // current in-progress goroutine profile + goroutineProfiled goroutineProfileStateHolder + + // Per-G tracer state. + trace gTraceState + + // Per-G GC state + + // gcAssistBytes is this G's GC assist credit in terms of + // bytes allocated. If this is positive, then the G has credit + // to allocate gcAssistBytes bytes without assisting. If this + // is negative, then the G must correct this by performing + // scan work. We track this in bytes to make it fast to update + // and check for debt in the malloc hot path. The assist ratio + // determines how this corresponds to scan work debt. + gcAssistBytes int64 +} + +// gTrackingPeriod is the number of transitions out of _Grunning between +// latency tracking runs. +const gTrackingPeriod = 8 + +const ( + // tlsSlots is the number of pointer-sized slots reserved for TLS on some platforms, + // like Windows. + tlsSlots = 6 + tlsSize = tlsSlots * goarch.PtrSize +) + +// Values for m.freeWait. +const ( + freeMStack = 0 // M done, free stack and reference. + freeMRef = 1 // M done, free reference. + freeMWait = 2 // M still in use. +) + +type m struct { + g0 *g // goroutine with scheduling stack + morebuf gobuf // gobuf arg to morestack + divmod uint32 // div/mod denominator for arm - known to liblink + _ uint32 // align next field to 8 bytes + + // Fields not known to debuggers. + procid uint64 // for debuggers, but offset not hard-coded + gsignal *g // signal-handling g + goSigStack gsignalStack // Go-allocated signal handling stack + sigmask sigset // storage for saved signal mask + tls [tlsSlots]uintptr // thread-local storage (for x86 extern register) + mstartfn func() + curg *g // current running goroutine + caughtsig guintptr // goroutine running during fatal signal + p puintptr // attached p for executing go code (nil if not executing go code) + nextp puintptr + oldp puintptr // the p that was attached before executing a syscall + id int64 + mallocing int32 + throwing throwType + preemptoff string // if != "", keep curg running on this m + locks int32 + dying int32 + profilehz int32 + spinning bool // m is out of work and is actively looking for work + blocked bool // m is blocked on a note + newSigstack bool // minit on C thread called sigaltstack + printlock int8 + incgo bool // m is executing a cgo call + isextra bool // m is an extra m + isExtraInC bool // m is an extra m that is not executing Go code + isExtraInSig bool // m is an extra m in a signal handler + freeWait atomic.Uint32 // Whether it is safe to free g0 and delete m (one of freeMRef, freeMStack, freeMWait) + needextram bool + traceback uint8 + ncgocall uint64 // number of cgo calls in total + ncgo int32 // number of cgo calls currently in progress + cgoCallersUse atomic.Uint32 // if non-zero, cgoCallers in use temporarily + cgoCallers *cgoCallers // cgo traceback if crashing in cgo call + park note + alllink *m // on allm + schedlink muintptr + lockedg guintptr + createstack [32]uintptr // stack that created this thread, it's used for StackRecord.Stack0, so it must align with it. + lockedExt uint32 // tracking for external LockOSThread + lockedInt uint32 // tracking for internal lockOSThread + nextwaitm muintptr // next m waiting for lock + + mLockProfile mLockProfile // fields relating to runtime.lock contention + + // wait* are used to carry arguments from gopark into park_m, because + // there's no stack to put them on. That is their sole purpose. + waitunlockf func(*g, unsafe.Pointer) bool + waitlock unsafe.Pointer + waitTraceBlockReason traceBlockReason + waitTraceSkip int + + syscalltick uint32 + freelink *m // on sched.freem + trace mTraceState + + // these are here because they are too large to be on the stack + // of low-level NOSPLIT functions. + libcall libcall + libcallpc uintptr // for cpu profiler + libcallsp uintptr + libcallg guintptr + syscall libcall // stores syscall parameters on windows + + vdsoSP uintptr // SP for traceback while in VDSO call (0 if not in call) + vdsoPC uintptr // PC for traceback while in VDSO call + + // preemptGen counts the number of completed preemption + // signals. This is used to detect when a preemption is + // requested, but fails. + preemptGen atomic.Uint32 + + // Whether this is a pending preemption signal on this M. + signalPending atomic.Uint32 + + // pcvalue lookup cache + pcvalueCache pcvalueCache + + dlogPerM + + mOS + + chacha8 chacha8rand.State + cheaprand uint64 + + // Up to 10 locks held by this m, maintained by the lock ranking code. + locksHeldLen int + locksHeld [10]heldLockInfo +} + +type p struct { + id int32 + status uint32 // one of pidle/prunning/... + link puintptr + schedtick uint32 // incremented on every scheduler call + syscalltick uint32 // incremented on every system call + sysmontick sysmontick // last tick observed by sysmon + m muintptr // back-link to associated m (nil if idle) + mcache *mcache + pcache pageCache + raceprocctx uintptr + + deferpool []*_defer // pool of available defer structs (see panic.go) + deferpoolbuf [32]*_defer + + // Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen. + goidcache uint64 + goidcacheend uint64 + + // Queue of runnable goroutines. Accessed without lock. + runqhead uint32 + runqtail uint32 + runq [256]guintptr + // runnext, if non-nil, is a runnable G that was ready'd by + // the current G and should be run next instead of what's in + // runq if there's time remaining in the running G's time + // slice. It will inherit the time left in the current time + // slice. If a set of goroutines is locked in a + // communicate-and-wait pattern, this schedules that set as a + // unit and eliminates the (potentially large) scheduling + // latency that otherwise arises from adding the ready'd + // goroutines to the end of the run queue. + // + // Note that while other P's may atomically CAS this to zero, + // only the owner P can CAS it to a valid G. + runnext guintptr + + // Available G's (status == Gdead) + gFree struct { + gList + n int32 + } + + sudogcache []*sudog + sudogbuf [128]*sudog + + // Cache of mspan objects from the heap. + mspancache struct { + // We need an explicit length here because this field is used + // in allocation codepaths where write barriers are not allowed, + // and eliminating the write barrier/keeping it eliminated from + // slice updates is tricky, more so than just managing the length + // ourselves. + len int + buf [128]*mspan + } + + // Cache of a single pinner object to reduce allocations from repeated + // pinner creation. + pinnerCache *pinner + + trace pTraceState + + palloc persistentAlloc // per-P to avoid mutex + + // The when field of the first entry on the timer heap. + // This is 0 if the timer heap is empty. + timer0When atomic.Int64 + + // The earliest known nextwhen field of a timer with + // timerModifiedEarlier status. Because the timer may have been + // modified again, there need not be any timer with this value. + // This is 0 if there are no timerModifiedEarlier timers. + timerModifiedEarliest atomic.Int64 + + // Per-P GC state + gcAssistTime int64 // Nanoseconds in assistAlloc + gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic) + + // limiterEvent tracks events for the GC CPU limiter. + limiterEvent limiterEvent + + // gcMarkWorkerMode is the mode for the next mark worker to run in. + // That is, this is used to communicate with the worker goroutine + // selected for immediate execution by + // gcController.findRunnableGCWorker. When scheduling other goroutines, + // this field must be set to gcMarkWorkerNotWorker. + gcMarkWorkerMode gcMarkWorkerMode + // gcMarkWorkerStartTime is the nanotime() at which the most recent + // mark worker started. + gcMarkWorkerStartTime int64 + + // gcw is this P's GC work buffer cache. The work buffer is + // filled by write barriers, drained by mutator assists, and + // disposed on certain GC state transitions. + gcw gcWork + + // wbBuf is this P's GC write barrier buffer. + // + // TODO: Consider caching this in the running G. + wbBuf wbBuf + + runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point + + // statsSeq is a counter indicating whether this P is currently + // writing any stats. Its value is even when not, odd when it is. + statsSeq atomic.Uint32 + + // Lock for timers. We normally access the timers while running + // on this P, but the scheduler can also do it from a different P. + timersLock mutex + + // Actions to take at some time. This is used to implement the + // standard library's time package. + // Must hold timersLock to access. + timers []*timer + + // Number of timers in P's heap. + numTimers atomic.Uint32 + + // Number of timerDeleted timers in P's heap. + deletedTimers atomic.Uint32 + + // Race context used while executing timer functions. + timerRaceCtx uintptr + + // maxStackScanDelta accumulates the amount of stack space held by + // live goroutines (i.e. those eligible for stack scanning). + // Flushed to gcController.maxStackScan once maxStackScanSlack + // or -maxStackScanSlack is reached. + maxStackScanDelta int64 + + // gc-time statistics about current goroutines + // Note that this differs from maxStackScan in that this + // accumulates the actual stack observed to be used at GC time (hi - sp), + // not an instantaneous measure of the total stack size that might need + // to be scanned (hi - lo). + scannedStackSize uint64 // stack size of goroutines scanned by this P + scannedStacks uint64 // number of goroutines scanned by this P + + // preempt is set to indicate that this P should be enter the + // scheduler ASAP (regardless of what G is running on it). + preempt bool + + // pageTraceBuf is a buffer for writing out page allocation/free/scavenge traces. + // + // Used only if GOEXPERIMENT=pagetrace. + pageTraceBuf pageTraceBuf + + // Padding is no longer needed. False sharing is now not a worry because p is large enough + // that its size class is an integer multiple of the cache line size (for any of our architectures). +} + +type schedt struct { + goidgen atomic.Uint64 + lastpoll atomic.Int64 // time of last network poll, 0 if currently polling + pollUntil atomic.Int64 // time to which current poll is sleeping + + lock mutex + + // When increasing nmidle, nmidlelocked, nmsys, or nmfreed, be + // sure to call checkdead(). + + midle muintptr // idle m's waiting for work + nmidle int32 // number of idle m's waiting for work + nmidlelocked int32 // number of locked m's waiting for work + mnext int64 // number of m's that have been created and next M ID + maxmcount int32 // maximum number of m's allowed (or die) + nmsys int32 // number of system m's not counted for deadlock + nmfreed int64 // cumulative number of freed m's + + ngsys atomic.Int32 // number of system goroutines + + pidle puintptr // idle p's + npidle atomic.Int32 + nmspinning atomic.Int32 // See "Worker thread parking/unparking" comment in proc.go. + needspinning atomic.Uint32 // See "Delicate dance" comment in proc.go. Boolean. Must hold sched.lock to set to 1. + + // Global runnable queue. + runq gQueue + runqsize int32 + + // disable controls selective disabling of the scheduler. + // + // Use schedEnableUser to control this. + // + // disable is protected by sched.lock. + disable struct { + // user disables scheduling of user goroutines. + user bool + runnable gQueue // pending runnable Gs + n int32 // length of runnable + } + + // Global cache of dead G's. + gFree struct { + lock mutex + stack gList // Gs with stacks + noStack gList // Gs without stacks + n int32 + } + + // Central cache of sudog structs. + sudoglock mutex + sudogcache *sudog + + // Central pool of available defer structs. + deferlock mutex + deferpool *_defer + + // freem is the list of m's waiting to be freed when their + // m.exited is set. Linked through m.freelink. + freem *m + + gcwaiting atomic.Bool // gc is waiting to run + stopwait int32 + stopnote note + sysmonwait atomic.Bool + sysmonnote note + + // safePointFn should be called on each P at the next GC + // safepoint if p.runSafePointFn is set. + safePointFn func(*p) + safePointWait int32 + safePointNote note + + profilehz int32 // cpu profiling rate + + procresizetime int64 // nanotime() of last change to gomaxprocs + totaltime int64 // ∫gomaxprocs dt up to procresizetime + + // sysmonlock protects sysmon's actions on the runtime. + // + // Acquire and hold this mutex to block sysmon from interacting + // with the rest of the runtime. + sysmonlock mutex + + // timeToRun is a distribution of scheduling latencies, defined + // as the sum of time a G spends in the _Grunnable state before + // it transitions to _Grunning. + timeToRun timeHistogram + + // idleTime is the total CPU time Ps have "spent" idle. + // + // Reset on each GC cycle. + idleTime atomic.Int64 + + // totalMutexWaitTime is the sum of time goroutines have spent in _Gwaiting + // with a waitreason of the form waitReasonSync{RW,}Mutex{R,}Lock. + totalMutexWaitTime atomic.Int64 + + // stwStoppingTimeGC/Other are distributions of stop-the-world stopping + // latencies, defined as the time taken by stopTheWorldWithSema to get + // all Ps to stop. stwStoppingTimeGC covers all GC-related STWs, + // stwStoppingTimeOther covers the others. + stwStoppingTimeGC timeHistogram + stwStoppingTimeOther timeHistogram + + // stwTotalTimeGC/Other are distributions of stop-the-world total + // latencies, defined as the total time from stopTheWorldWithSema to + // startTheWorldWithSema. This is a superset of + // stwStoppingTimeGC/Other. stwTotalTimeGC covers all GC-related STWs, + // stwTotalTimeOther covers the others. + stwTotalTimeGC timeHistogram + stwTotalTimeOther timeHistogram + + // totalRuntimeLockWaitTime (plus the value of lockWaitTime on each M in + // allm) is the sum of time goroutines have spent in _Grunnable and with an + // M, but waiting for locks within the runtime. This field stores the value + // for Ms that have exited. + totalRuntimeLockWaitTime atomic.Int64 +} + +// Values for the flags field of a sigTabT. +const ( + _SigNotify = 1 << iota // let signal.Notify have signal, even if from kernel + _SigKill // if signal.Notify doesn't take it, exit quietly + _SigThrow // if signal.Notify doesn't take it, exit loudly + _SigPanic // if the signal is from the kernel, panic + _SigDefault // if the signal isn't explicitly requested, don't monitor it + _SigGoExit // cause all runtime procs to exit (only used on Plan 9). + _SigSetStack // Don't explicitly install handler, but add SA_ONSTACK to existing libc handler + _SigUnblock // always unblock; see blockableSig + _SigIgn // _SIG_DFL action is to ignore the signal +) + +// Layout of in-memory per-function information prepared by linker +// See https://golang.org/s/go12symtab. +// Keep in sync with linker (../cmd/link/internal/ld/pcln.go:/pclntab) +// and with package debug/gosym and with symtab.go in package runtime. +type _func struct { + sys.NotInHeap // Only in static data + + entryOff uint32 // start pc, as offset from moduledata.text/pcHeader.textStart + nameOff int32 // function name, as index into moduledata.funcnametab. + + args int32 // in/out args size + deferreturn uint32 // offset of start of a deferreturn call instruction from entry, if any. + + pcsp uint32 + pcfile uint32 + pcln uint32 + npcdata uint32 + cuOffset uint32 // runtime.cutab offset of this function's CU + startLine int32 // line number of start of function (func keyword/TEXT directive) + funcID abi.FuncID // set for certain special runtime functions + flag abi.FuncFlag + _ [1]byte // pad + nfuncdata uint8 // must be last, must end on a uint32-aligned boundary + + // The end of the struct is followed immediately by two variable-length + // arrays that reference the pcdata and funcdata locations for this + // function. + + // pcdata contains the offset into moduledata.pctab for the start of + // that index's table. e.g., + // &moduledata.pctab[_func.pcdata[_PCDATA_UnsafePoint]] is the start of + // the unsafe point table. + // + // An offset of 0 indicates that there is no table. + // + // pcdata [npcdata]uint32 + + // funcdata contains the offset past moduledata.gofunc which contains a + // pointer to that index's funcdata. e.g., + // *(moduledata.gofunc + _func.funcdata[_FUNCDATA_ArgsPointerMaps]) is + // the argument pointer map. + // + // An offset of ^uint32(0) indicates that there is no entry. + // + // funcdata [nfuncdata]uint32 +} + +// Pseudo-Func that is returned for PCs that occur in inlined code. +// A *Func can be either a *_func or a *funcinl, and they are distinguished +// by the first uintptr. +// +// TODO(austin): Can we merge this with inlinedCall? +type funcinl struct { + ones uint32 // set to ^0 to distinguish from _func + entry uintptr // entry of the real (the "outermost") frame + name string + file string + line int32 + startLine int32 +} + +// layout of Itab known to compilers +// allocated in non-garbage-collected memory +// Needs to be in sync with +// ../cmd/compile/internal/reflectdata/reflect.go:/^func.WritePluginTable. +type itab struct { + inter *interfacetype + _type *_type + hash uint32 // copy of _type.hash. Used for type switches. + _ [4]byte + fun [1]uintptr // variable sized. fun[0]==0 means _type does not implement inter. +} + +// Lock-free stack node. +// Also known to export_test.go. +type lfnode struct { + next uint64 + pushcnt uintptr +} + +type forcegcstate struct { + lock mutex + g *g + idle atomic.Bool +} + +// A _defer holds an entry on the list of deferred calls. +// If you add a field here, add code to clear it in deferProcStack. +// This struct must match the code in cmd/compile/internal/ssagen/ssa.go:deferstruct +// and cmd/compile/internal/ssagen/ssa.go:(*state).call. +// Some defers will be allocated on the stack and some on the heap. +// All defers are logically part of the stack, so write barriers to +// initialize them are not required. All defers must be manually scanned, +// and for heap defers, marked. +type _defer struct { + heap bool + rangefunc bool // true for rangefunc list + sp uintptr // sp at time of defer + pc uintptr // pc at time of defer + fn func() // can be nil for open-coded defers + link *_defer // next defer on G; can point to either heap or stack! + + // If rangefunc is true, *head is the head of the atomic linked list + // during a range-over-func execution. + head *atomic.Pointer[_defer] +} + +// A _panic holds information about an active panic. +// +// A _panic value must only ever live on the stack. +// +// The argp and link fields are stack pointers, but don't need special +// handling during stack growth: because they are pointer-typed and +// _panic values only live on the stack, regular stack pointer +// adjustment takes care of them. +type _panic struct { + argp unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink + arg any // argument to panic + link *_panic // link to earlier panic + + // startPC and startSP track where _panic.start was called. + startPC uintptr + startSP unsafe.Pointer + + // The current stack frame that we're running deferred calls for. + sp unsafe.Pointer + lr uintptr + fp unsafe.Pointer + + // retpc stores the PC where the panic should jump back to, if the + // function last returned by _panic.next() recovers the panic. + retpc uintptr + + // Extra state for handling open-coded defers. + deferBitsPtr *uint8 + slotsPtr unsafe.Pointer + + recovered bool // whether this panic has been recovered + goexit bool + deferreturn bool +} + +// savedOpenDeferState tracks the extra state from _panic that's +// necessary for deferreturn to pick up where gopanic left off, +// without needing to unwind the stack. +type savedOpenDeferState struct { + retpc uintptr + deferBitsOffset uintptr + slotsOffset uintptr +} + +// ancestorInfo records details of where a goroutine was started. +type ancestorInfo struct { + pcs []uintptr // pcs from the stack of this goroutine + goid uint64 // goroutine id of this goroutine; original goroutine possibly dead + gopc uintptr // pc of go statement that created this goroutine +} + +// A waitReason explains why a goroutine has been stopped. +// See gopark. Do not re-use waitReasons, add new ones. +type waitReason uint8 + +const ( + waitReasonZero waitReason = iota // "" + waitReasonGCAssistMarking // "GC assist marking" + waitReasonIOWait // "IO wait" + waitReasonChanReceiveNilChan // "chan receive (nil chan)" + waitReasonChanSendNilChan // "chan send (nil chan)" + waitReasonDumpingHeap // "dumping heap" + waitReasonGarbageCollection // "garbage collection" + waitReasonGarbageCollectionScan // "garbage collection scan" + waitReasonPanicWait // "panicwait" + waitReasonSelect // "select" + waitReasonSelectNoCases // "select (no cases)" + waitReasonGCAssistWait // "GC assist wait" + waitReasonGCSweepWait // "GC sweep wait" + waitReasonGCScavengeWait // "GC scavenge wait" + waitReasonChanReceive // "chan receive" + waitReasonChanSend // "chan send" + waitReasonFinalizerWait // "finalizer wait" + waitReasonForceGCIdle // "force gc (idle)" + waitReasonSemacquire // "semacquire" + waitReasonSleep // "sleep" + waitReasonSyncCondWait // "sync.Cond.Wait" + waitReasonSyncMutexLock // "sync.Mutex.Lock" + waitReasonSyncRWMutexRLock // "sync.RWMutex.RLock" + waitReasonSyncRWMutexLock // "sync.RWMutex.Lock" + waitReasonTraceReaderBlocked // "trace reader (blocked)" + waitReasonWaitForGCCycle // "wait for GC cycle" + waitReasonGCWorkerIdle // "GC worker (idle)" + waitReasonGCWorkerActive // "GC worker (active)" + waitReasonPreempted // "preempted" + waitReasonDebugCall // "debug call" + waitReasonGCMarkTermination // "GC mark termination" + waitReasonStoppingTheWorld // "stopping the world" + waitReasonFlushProcCaches // "flushing proc caches" + waitReasonTraceGoroutineStatus // "trace goroutine status" + waitReasonTraceProcStatus // "trace proc status" + waitReasonPageTraceFlush // "page trace flush" + waitReasonCoroutine // "coroutine" +) + +var waitReasonStrings = [...]string{ + waitReasonZero: "", + waitReasonGCAssistMarking: "GC assist marking", + waitReasonIOWait: "IO wait", + waitReasonChanReceiveNilChan: "chan receive (nil chan)", + waitReasonChanSendNilChan: "chan send (nil chan)", + waitReasonDumpingHeap: "dumping heap", + waitReasonGarbageCollection: "garbage collection", + waitReasonGarbageCollectionScan: "garbage collection scan", + waitReasonPanicWait: "panicwait", + waitReasonSelect: "select", + waitReasonSelectNoCases: "select (no cases)", + waitReasonGCAssistWait: "GC assist wait", + waitReasonGCSweepWait: "GC sweep wait", + waitReasonGCScavengeWait: "GC scavenge wait", + waitReasonChanReceive: "chan receive", + waitReasonChanSend: "chan send", + waitReasonFinalizerWait: "finalizer wait", + waitReasonForceGCIdle: "force gc (idle)", + waitReasonSemacquire: "semacquire", + waitReasonSleep: "sleep", + waitReasonSyncCondWait: "sync.Cond.Wait", + waitReasonSyncMutexLock: "sync.Mutex.Lock", + waitReasonSyncRWMutexRLock: "sync.RWMutex.RLock", + waitReasonSyncRWMutexLock: "sync.RWMutex.Lock", + waitReasonTraceReaderBlocked: "trace reader (blocked)", + waitReasonWaitForGCCycle: "wait for GC cycle", + waitReasonGCWorkerIdle: "GC worker (idle)", + waitReasonGCWorkerActive: "GC worker (active)", + waitReasonPreempted: "preempted", + waitReasonDebugCall: "debug call", + waitReasonGCMarkTermination: "GC mark termination", + waitReasonStoppingTheWorld: "stopping the world", + waitReasonFlushProcCaches: "flushing proc caches", + waitReasonTraceGoroutineStatus: "trace goroutine status", + waitReasonTraceProcStatus: "trace proc status", + waitReasonPageTraceFlush: "page trace flush", + waitReasonCoroutine: "coroutine", +} + +func (w waitReason) String() string { + if w < 0 || w >= waitReason(len(waitReasonStrings)) { + return "unknown wait reason" + } + return waitReasonStrings[w] +} + +func (w waitReason) isMutexWait() bool { + return w == waitReasonSyncMutexLock || + w == waitReasonSyncRWMutexRLock || + w == waitReasonSyncRWMutexLock +} + +var ( + allm *m + gomaxprocs int32 + ncpu int32 + forcegc forcegcstate + sched schedt + newprocs int32 + + // allpLock protects P-less reads and size changes of allp, idlepMask, + // and timerpMask, and all writes to allp. + allpLock mutex + // len(allp) == gomaxprocs; may change at safe points, otherwise + // immutable. + allp []*p + // Bitmask of Ps in _Pidle list, one bit per P. Reads and writes must + // be atomic. Length may change at safe points. + // + // Each P must update only its own bit. In order to maintain + // consistency, a P going idle must the idle mask simultaneously with + // updates to the idle P list under the sched.lock, otherwise a racing + // pidleget may clear the mask before pidleput sets the mask, + // corrupting the bitmap. + // + // N.B., procresize takes ownership of all Ps in stopTheWorldWithSema. + idlepMask pMask + // Bitmask of Ps that may have a timer, one bit per P. Reads and writes + // must be atomic. Length may change at safe points. + timerpMask pMask + + // Pool of GC parked background workers. Entries are type + // *gcBgMarkWorkerNode. + gcBgMarkWorkerPool lfstack + + // Total number of gcBgMarkWorker goroutines. Protected by worldsema. + gcBgMarkWorkerCount int32 + + // Information about what cpu features are available. + // Packages outside the runtime should not use these + // as they are not an external api. + // Set on startup in asm_{386,amd64}.s + processorVersionInfo uint32 + isIntel bool + + // set by cmd/link on arm systems + goarm uint8 + goarmsoftfp uint8 +) + +// Set by the linker so the runtime can determine the buildmode. +var ( + islibrary bool // -buildmode=c-shared + isarchive bool // -buildmode=c-archive +) + +// Must agree with internal/buildcfg.FramePointerEnabled. +const framepointer_enabled = GOARCH == "amd64" || GOARCH == "arm64" |