summaryrefslogtreecommitdiffstats
path: root/src/runtime/netpoll.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/netpoll.go')
-rw-r--r--src/runtime/netpoll.go652
1 files changed, 652 insertions, 0 deletions
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
new file mode 100644
index 0000000..bb3dd35
--- /dev/null
+++ b/src/runtime/netpoll.go
@@ -0,0 +1,652 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris || windows
+
+package runtime
+
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
+
+// Integrated network poller (platform-independent part).
+// A particular implementation (epoll/kqueue/port/AIX/Windows)
+// must define the following functions:
+//
+// func netpollinit()
+// Initialize the poller. Only called once.
+//
+// func netpollopen(fd uintptr, pd *pollDesc) int32
+// Arm edge-triggered notifications for fd. The pd argument is to pass
+// back to netpollready when fd is ready. Return an errno value.
+//
+// func netpollclose(fd uintptr) int32
+// Disable notifications for fd. Return an errno value.
+//
+// func netpoll(delta int64) gList
+// Poll the network. If delta < 0, block indefinitely. If delta == 0,
+// poll without blocking. If delta > 0, block for up to delta nanoseconds.
+// Return a list of goroutines built by calling netpollready.
+//
+// func netpollBreak()
+// Wake up the network poller, assumed to be blocked in netpoll.
+//
+// func netpollIsPollDescriptor(fd uintptr) bool
+// Reports whether fd is a file descriptor used by the poller.
+
+// Error codes returned by runtime_pollReset and runtime_pollWait.
+// These must match the values in internal/poll/fd_poll_runtime.go.
+const (
+ pollNoError = 0 // no error
+ pollErrClosing = 1 // descriptor is closed
+ pollErrTimeout = 2 // I/O timeout
+ pollErrNotPollable = 3 // general error polling descriptor
+)
+
+// pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
+// goroutines respectively. The semaphore can be in the following states:
+// pdReady - io readiness notification is pending;
+// a goroutine consumes the notification by changing the state to nil.
+// pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
+// the goroutine commits to park by changing the state to G pointer,
+// or, alternatively, concurrent io notification changes the state to pdReady,
+// or, alternatively, concurrent timeout/close changes the state to nil.
+// G pointer - the goroutine is blocked on the semaphore;
+// io notification or timeout/close changes the state to pdReady or nil respectively
+// and unparks the goroutine.
+// nil - none of the above.
+const (
+ pdReady uintptr = 1
+ pdWait uintptr = 2
+)
+
+const pollBlockSize = 4 * 1024
+
+// Network poller descriptor.
+//
+// No heap pointers.
+//
+//go:notinheap
+type pollDesc struct {
+ link *pollDesc // in pollcache, protected by pollcache.lock
+ fd uintptr // constant for pollDesc usage lifetime
+
+ // atomicInfo holds bits from closing, rd, and wd,
+ // which are only ever written while holding the lock,
+ // summarized for use by netpollcheckerr,
+ // which cannot acquire the lock.
+ // After writing these fields under lock in a way that
+ // might change the summary, code must call publishInfo
+ // before releasing the lock.
+ // Code that changes fields and then calls netpollunblock
+ // (while still holding the lock) must call publishInfo
+ // before calling netpollunblock, because publishInfo is what
+ // stops netpollblock from blocking anew
+ // (by changing the result of netpollcheckerr).
+ // atomicInfo also holds the eventErr bit,
+ // recording whether a poll event on the fd got an error;
+ // atomicInfo is the only source of truth for that bit.
+ atomicInfo atomic.Uint32 // atomic pollInfo
+
+ // rg, wg are accessed atomically and hold g pointers.
+ // (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
+ rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
+ wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil
+
+ lock mutex // protects the following fields
+ closing bool
+ user uint32 // user settable cookie
+ rseq uintptr // protects from stale read timers
+ rt timer // read deadline timer (set if rt.f != nil)
+ rd int64 // read deadline (a nanotime in the future, -1 when expired)
+ wseq uintptr // protects from stale write timers
+ wt timer // write deadline timer
+ wd int64 // write deadline (a nanotime in the future, -1 when expired)
+ self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
+}
+
+// pollInfo is the bits needed by netpollcheckerr, stored atomically,
+// mostly duplicating state that is manipulated under lock in pollDesc.
+// The one exception is the pollEventErr bit, which is maintained only
+// in the pollInfo.
+type pollInfo uint32
+
+const (
+ pollClosing = 1 << iota
+ pollEventErr
+ pollExpiredReadDeadline
+ pollExpiredWriteDeadline
+)
+
+func (i pollInfo) closing() bool { return i&pollClosing != 0 }
+func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 }
+func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 }
+func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }
+
+// info returns the pollInfo corresponding to pd.
+func (pd *pollDesc) info() pollInfo {
+ return pollInfo(pd.atomicInfo.Load())
+}
+
+// publishInfo updates pd.atomicInfo (returned by pd.info)
+// using the other values in pd.
+// It must be called while holding pd.lock,
+// and it must be called after changing anything
+// that might affect the info bits.
+// In practice this means after changing closing
+// or changing rd or wd from < 0 to >= 0.
+func (pd *pollDesc) publishInfo() {
+ var info uint32
+ if pd.closing {
+ info |= pollClosing
+ }
+ if pd.rd < 0 {
+ info |= pollExpiredReadDeadline
+ }
+ if pd.wd < 0 {
+ info |= pollExpiredWriteDeadline
+ }
+
+ // Set all of x except the pollEventErr bit.
+ x := pd.atomicInfo.Load()
+ for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) {
+ x = pd.atomicInfo.Load()
+ }
+}
+
+// setEventErr sets the result of pd.info().eventErr() to b.
+func (pd *pollDesc) setEventErr(b bool) {
+ x := pd.atomicInfo.Load()
+ for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
+ x = pd.atomicInfo.Load()
+ }
+}
+
+type pollCache struct {
+ lock mutex
+ first *pollDesc
+ // PollDesc objects must be type-stable,
+ // because we can get ready notification from epoll/kqueue
+ // after the descriptor is closed/reused.
+ // Stale notifications are detected using seq variable,
+ // seq is incremented when deadlines are changed or descriptor is reused.
+}
+
+var (
+ netpollInitLock mutex
+ netpollInited uint32
+
+ pollcache pollCache
+ netpollWaiters uint32
+)
+
+//go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
+func poll_runtime_pollServerInit() {
+ netpollGenericInit()
+}
+
+func netpollGenericInit() {
+ if atomic.Load(&netpollInited) == 0 {
+ lockInit(&netpollInitLock, lockRankNetpollInit)
+ lock(&netpollInitLock)
+ if netpollInited == 0 {
+ netpollinit()
+ atomic.Store(&netpollInited, 1)
+ }
+ unlock(&netpollInitLock)
+ }
+}
+
+func netpollinited() bool {
+ return atomic.Load(&netpollInited) != 0
+}
+
+//go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor
+
+// poll_runtime_isPollServerDescriptor reports whether fd is a
+// descriptor being used by netpoll.
+func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
+ return netpollIsPollDescriptor(fd)
+}
+
+//go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
+func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
+ pd := pollcache.alloc()
+ lock(&pd.lock)
+ wg := pd.wg.Load()
+ if wg != 0 && wg != pdReady {
+ throw("runtime: blocked write on free polldesc")
+ }
+ rg := pd.rg.Load()
+ if rg != 0 && rg != pdReady {
+ throw("runtime: blocked read on free polldesc")
+ }
+ pd.fd = fd
+ pd.closing = false
+ pd.setEventErr(false)
+ pd.rseq++
+ pd.rg.Store(0)
+ pd.rd = 0
+ pd.wseq++
+ pd.wg.Store(0)
+ pd.wd = 0
+ pd.self = pd
+ pd.publishInfo()
+ unlock(&pd.lock)
+
+ errno := netpollopen(fd, pd)
+ if errno != 0 {
+ pollcache.free(pd)
+ return nil, int(errno)
+ }
+ return pd, 0
+}
+
+//go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose
+func poll_runtime_pollClose(pd *pollDesc) {
+ if !pd.closing {
+ throw("runtime: close polldesc w/o unblock")
+ }
+ wg := pd.wg.Load()
+ if wg != 0 && wg != pdReady {
+ throw("runtime: blocked write on closing polldesc")
+ }
+ rg := pd.rg.Load()
+ if rg != 0 && rg != pdReady {
+ throw("runtime: blocked read on closing polldesc")
+ }
+ netpollclose(pd.fd)
+ pollcache.free(pd)
+}
+
+func (c *pollCache) free(pd *pollDesc) {
+ lock(&c.lock)
+ pd.link = c.first
+ c.first = pd
+ unlock(&c.lock)
+}
+
+// poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
+// prepares a descriptor for polling in mode, which is 'r' or 'w'.
+// This returns an error code; the codes are defined above.
+//go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
+func poll_runtime_pollReset(pd *pollDesc, mode int) int {
+ errcode := netpollcheckerr(pd, int32(mode))
+ if errcode != pollNoError {
+ return errcode
+ }
+ if mode == 'r' {
+ pd.rg.Store(0)
+ } else if mode == 'w' {
+ pd.wg.Store(0)
+ }
+ return pollNoError
+}
+
+// poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
+// waits for a descriptor to be ready for reading or writing,
+// according to mode, which is 'r' or 'w'.
+// This returns an error code; the codes are defined above.
+//go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
+func poll_runtime_pollWait(pd *pollDesc, mode int) int {
+ errcode := netpollcheckerr(pd, int32(mode))
+ if errcode != pollNoError {
+ return errcode
+ }
+ // As for now only Solaris, illumos, and AIX use level-triggered IO.
+ if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
+ netpollarm(pd, mode)
+ }
+ for !netpollblock(pd, int32(mode), false) {
+ errcode = netpollcheckerr(pd, int32(mode))
+ if errcode != pollNoError {
+ return errcode
+ }
+ // Can happen if timeout has fired and unblocked us,
+ // but before we had a chance to run, timeout has been reset.
+ // Pretend it has not happened and retry.
+ }
+ return pollNoError
+}
+
+//go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
+func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) {
+ // This function is used only on windows after a failed attempt to cancel
+ // a pending async IO operation. Wait for ioready, ignore closing or timeouts.
+ for !netpollblock(pd, int32(mode), true) {
+ }
+}
+
+//go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline
+func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
+ lock(&pd.lock)
+ if pd.closing {
+ unlock(&pd.lock)
+ return
+ }
+ rd0, wd0 := pd.rd, pd.wd
+ combo0 := rd0 > 0 && rd0 == wd0
+ if d > 0 {
+ d += nanotime()
+ if d <= 0 {
+ // If the user has a deadline in the future, but the delay calculation
+ // overflows, then set the deadline to the maximum possible value.
+ d = 1<<63 - 1
+ }
+ }
+ if mode == 'r' || mode == 'r'+'w' {
+ pd.rd = d
+ }
+ if mode == 'w' || mode == 'r'+'w' {
+ pd.wd = d
+ }
+ pd.publishInfo()
+ combo := pd.rd > 0 && pd.rd == pd.wd
+ rtf := netpollReadDeadline
+ if combo {
+ rtf = netpollDeadline
+ }
+ if pd.rt.f == nil {
+ if pd.rd > 0 {
+ pd.rt.f = rtf
+ // Copy current seq into the timer arg.
+ // Timer func will check the seq against current descriptor seq,
+ // if they differ the descriptor was reused or timers were reset.
+ pd.rt.arg = pd.makeArg()
+ pd.rt.seq = pd.rseq
+ resettimer(&pd.rt, pd.rd)
+ }
+ } else if pd.rd != rd0 || combo != combo0 {
+ pd.rseq++ // invalidate current timers
+ if pd.rd > 0 {
+ modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq)
+ } else {
+ deltimer(&pd.rt)
+ pd.rt.f = nil
+ }
+ }
+ if pd.wt.f == nil {
+ if pd.wd > 0 && !combo {
+ pd.wt.f = netpollWriteDeadline
+ pd.wt.arg = pd.makeArg()
+ pd.wt.seq = pd.wseq
+ resettimer(&pd.wt, pd.wd)
+ }
+ } else if pd.wd != wd0 || combo != combo0 {
+ pd.wseq++ // invalidate current timers
+ if pd.wd > 0 && !combo {
+ modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq)
+ } else {
+ deltimer(&pd.wt)
+ pd.wt.f = nil
+ }
+ }
+ // If we set the new deadline in the past, unblock currently pending IO if any.
+ // Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
+ var rg, wg *g
+ if pd.rd < 0 {
+ rg = netpollunblock(pd, 'r', false)
+ }
+ if pd.wd < 0 {
+ wg = netpollunblock(pd, 'w', false)
+ }
+ unlock(&pd.lock)
+ if rg != nil {
+ netpollgoready(rg, 3)
+ }
+ if wg != nil {
+ netpollgoready(wg, 3)
+ }
+}
+
+//go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock
+func poll_runtime_pollUnblock(pd *pollDesc) {
+ lock(&pd.lock)
+ if pd.closing {
+ throw("runtime: unblock on closing polldesc")
+ }
+ pd.closing = true
+ pd.rseq++
+ pd.wseq++
+ var rg, wg *g
+ pd.publishInfo()
+ rg = netpollunblock(pd, 'r', false)
+ wg = netpollunblock(pd, 'w', false)
+ if pd.rt.f != nil {
+ deltimer(&pd.rt)
+ pd.rt.f = nil
+ }
+ if pd.wt.f != nil {
+ deltimer(&pd.wt)
+ pd.wt.f = nil
+ }
+ unlock(&pd.lock)
+ if rg != nil {
+ netpollgoready(rg, 3)
+ }
+ if wg != nil {
+ netpollgoready(wg, 3)
+ }
+}
+
+// netpollready is called by the platform-specific netpoll function.
+// It declares that the fd associated with pd is ready for I/O.
+// The toRun argument is used to build a list of goroutines to return
+// from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
+// whether the fd is ready for reading or writing or both.
+//
+// This may run while the world is stopped, so write barriers are not allowed.
+//go:nowritebarrier
+func netpollready(toRun *gList, pd *pollDesc, mode int32) {
+ var rg, wg *g
+ if mode == 'r' || mode == 'r'+'w' {
+ rg = netpollunblock(pd, 'r', true)
+ }
+ if mode == 'w' || mode == 'r'+'w' {
+ wg = netpollunblock(pd, 'w', true)
+ }
+ if rg != nil {
+ toRun.push(rg)
+ }
+ if wg != nil {
+ toRun.push(wg)
+ }
+}
+
+func netpollcheckerr(pd *pollDesc, mode int32) int {
+ info := pd.info()
+ if info.closing() {
+ return pollErrClosing
+ }
+ if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) {
+ return pollErrTimeout
+ }
+ // Report an event scanning error only on a read event.
+ // An error on a write event will be captured in a subsequent
+ // write call that is able to report a more specific error.
+ if mode == 'r' && info.eventErr() {
+ return pollErrNotPollable
+ }
+ return pollNoError
+}
+
+func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
+ r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
+ if r {
+ // Bump the count of goroutines waiting for the poller.
+ // The scheduler uses this to decide whether to block
+ // waiting for the poller if there is nothing else to do.
+ atomic.Xadd(&netpollWaiters, 1)
+ }
+ return r
+}
+
+func netpollgoready(gp *g, traceskip int) {
+ atomic.Xadd(&netpollWaiters, -1)
+ goready(gp, traceskip+1)
+}
+
+// returns true if IO is ready, or false if timedout or closed
+// waitio - wait only for completed IO, ignore errors
+// Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
+// can hold only a single waiting goroutine for each mode.
+func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
+ gpp := &pd.rg
+ if mode == 'w' {
+ gpp = &pd.wg
+ }
+
+ // set the gpp semaphore to pdWait
+ for {
+ // Consume notification if already ready.
+ if gpp.CompareAndSwap(pdReady, 0) {
+ return true
+ }
+ if gpp.CompareAndSwap(0, pdWait) {
+ break
+ }
+
+ // Double check that this isn't corrupt; otherwise we'd loop
+ // forever.
+ if v := gpp.Load(); v != pdReady && v != 0 {
+ throw("runtime: double wait")
+ }
+ }
+
+ // need to recheck error states after setting gpp to pdWait
+ // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
+ // do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
+ if waitio || netpollcheckerr(pd, mode) == pollNoError {
+ gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
+ }
+ // be careful to not lose concurrent pdReady notification
+ old := gpp.Swap(0)
+ if old > pdWait {
+ throw("runtime: corrupted polldesc")
+ }
+ return old == pdReady
+}
+
+func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
+ gpp := &pd.rg
+ if mode == 'w' {
+ gpp = &pd.wg
+ }
+
+ for {
+ old := gpp.Load()
+ if old == pdReady {
+ return nil
+ }
+ if old == 0 && !ioready {
+ // Only set pdReady for ioready. runtime_pollWait
+ // will check for timeout/cancel before waiting.
+ return nil
+ }
+ var new uintptr
+ if ioready {
+ new = pdReady
+ }
+ if gpp.CompareAndSwap(old, new) {
+ if old == pdWait {
+ old = 0
+ }
+ return (*g)(unsafe.Pointer(old))
+ }
+ }
+}
+
+func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
+ lock(&pd.lock)
+ // Seq arg is seq when the timer was set.
+ // If it's stale, ignore the timer event.
+ currentSeq := pd.rseq
+ if !read {
+ currentSeq = pd.wseq
+ }
+ if seq != currentSeq {
+ // The descriptor was reused or timers were reset.
+ unlock(&pd.lock)
+ return
+ }
+ var rg *g
+ if read {
+ if pd.rd <= 0 || pd.rt.f == nil {
+ throw("runtime: inconsistent read deadline")
+ }
+ pd.rd = -1
+ pd.publishInfo()
+ rg = netpollunblock(pd, 'r', false)
+ }
+ var wg *g
+ if write {
+ if pd.wd <= 0 || pd.wt.f == nil && !read {
+ throw("runtime: inconsistent write deadline")
+ }
+ pd.wd = -1
+ pd.publishInfo()
+ wg = netpollunblock(pd, 'w', false)
+ }
+ unlock(&pd.lock)
+ if rg != nil {
+ netpollgoready(rg, 0)
+ }
+ if wg != nil {
+ netpollgoready(wg, 0)
+ }
+}
+
+func netpollDeadline(arg any, seq uintptr) {
+ netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
+}
+
+func netpollReadDeadline(arg any, seq uintptr) {
+ netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
+}
+
+func netpollWriteDeadline(arg any, seq uintptr) {
+ netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
+}
+
+func (c *pollCache) alloc() *pollDesc {
+ lock(&c.lock)
+ if c.first == nil {
+ const pdSize = unsafe.Sizeof(pollDesc{})
+ n := pollBlockSize / pdSize
+ if n == 0 {
+ n = 1
+ }
+ // Must be in non-GC memory because can be referenced
+ // only from epoll/kqueue internals.
+ mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
+ for i := uintptr(0); i < n; i++ {
+ pd := (*pollDesc)(add(mem, i*pdSize))
+ pd.link = c.first
+ c.first = pd
+ }
+ }
+ pd := c.first
+ c.first = pd.link
+ lockInit(&pd.lock, lockRankPollDesc)
+ unlock(&c.lock)
+ return pd
+}
+
+// makeArg converts pd to an interface{}.
+// makeArg does not do any allocation. Normally, such
+// a conversion requires an allocation because pointers to
+// go:notinheap types (which pollDesc is) must be stored
+// in interfaces indirectly. See issue 42076.
+func (pd *pollDesc) makeArg() (i any) {
+ x := (*eface)(unsafe.Pointer(&i))
+ x._type = pdType
+ x.data = unsafe.Pointer(&pd.self)
+ return
+}
+
+var (
+ pdEface any = (*pollDesc)(nil)
+ pdType *_type = efaceOf(&pdEface)._type
+)