summaryrefslogtreecommitdiffstats
path: root/src/internal/poll/fd_unix.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/internal/poll/fd_unix.go586
1 files changed, 586 insertions, 0 deletions
diff --git a/src/internal/poll/fd_unix.go b/src/internal/poll/fd_unix.go
new file mode 100644
index 0000000..2e77e76
--- /dev/null
+++ b/src/internal/poll/fd_unix.go
@@ -0,0 +1,586 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris
+
+package poll
+
+import (
+ "io"
+ "sync/atomic"
+ "syscall"
+)
+
+// FD is a file descriptor. The net and os packages use this type as a
+// field of a larger type representing a network connection or OS file.
+type FD struct {
+ // Lock sysfd and serialize access to Read and Write methods.
+ fdmu fdMutex
+
+ // System file descriptor. Immutable until Close.
+ Sysfd int
+
+ // I/O poller.
+ pd pollDesc
+
+ // Writev cache.
+ iovecs *[]syscall.Iovec
+
+ // Semaphore signaled when file is closed.
+ csema uint32
+
+ // Non-zero if this file has been set to blocking mode.
+ isBlocking uint32
+
+ // Whether this is a streaming descriptor, as opposed to a
+ // packet-based descriptor like a UDP socket. Immutable.
+ IsStream bool
+
+ // Whether a zero byte read indicates EOF. This is false for a
+ // message based socket connection.
+ ZeroReadIsEOF bool
+
+ // Whether this is a file rather than a network socket.
+ isFile bool
+}
+
+// Init initializes the FD. The Sysfd field should already be set.
+// This can be called multiple times on a single FD.
+// The net argument is a network name from the net package (e.g., "tcp"),
+// or "file".
+// Set pollable to true if fd should be managed by runtime netpoll.
+func (fd *FD) Init(net string, pollable bool) error {
+ // We don't actually care about the various network types.
+ if net == "file" {
+ fd.isFile = true
+ }
+ if !pollable {
+ fd.isBlocking = 1
+ return nil
+ }
+ err := fd.pd.init(fd)
+ if err != nil {
+ // If we could not initialize the runtime poller,
+ // assume we are using blocking mode.
+ fd.isBlocking = 1
+ }
+ return err
+}
+
+// Destroy closes the file descriptor. This is called when there are
+// no remaining references.
+func (fd *FD) destroy() error {
+ // Poller may want to unregister fd in readiness notification mechanism,
+ // so this must be executed before CloseFunc.
+ fd.pd.close()
+
+ // We don't use ignoringEINTR here because POSIX does not define
+ // whether the descriptor is closed if close returns EINTR.
+ // If the descriptor is indeed closed, using a loop would race
+ // with some other goroutine opening a new descriptor.
+ // (The Linux kernel guarantees that it is closed on an EINTR error.)
+ err := CloseFunc(fd.Sysfd)
+
+ fd.Sysfd = -1
+ runtime_Semrelease(&fd.csema)
+ return err
+}
+
+// Close closes the FD. The underlying file descriptor is closed by the
+// destroy method when there are no remaining references.
+func (fd *FD) Close() error {
+ if !fd.fdmu.increfAndClose() {
+ return errClosing(fd.isFile)
+ }
+
+ // Unblock any I/O. Once it all unblocks and returns,
+ // so that it cannot be referring to fd.sysfd anymore,
+ // the final decref will close fd.sysfd. This should happen
+ // fairly quickly, since all the I/O is non-blocking, and any
+ // attempts to block in the pollDesc will return errClosing(fd.isFile).
+ fd.pd.evict()
+
+ // The call to decref will call destroy if there are no other
+ // references.
+ err := fd.decref()
+
+ // Wait until the descriptor is closed. If this was the only
+ // reference, it is already closed. Only wait if the file has
+ // not been set to blocking mode, as otherwise any current I/O
+ // may be blocking, and that would block the Close.
+ // No need for an atomic read of isBlocking, increfAndClose means
+ // we have exclusive access to fd.
+ if fd.isBlocking == 0 {
+ runtime_Semacquire(&fd.csema)
+ }
+
+ return err
+}
+
+// SetBlocking puts the file into blocking mode.
+func (fd *FD) SetBlocking() error {
+ if err := fd.incref(); err != nil {
+ return err
+ }
+ defer fd.decref()
+ // Atomic store so that concurrent calls to SetBlocking
+ // do not cause a race condition. isBlocking only ever goes
+ // from 0 to 1 so there is no real race here.
+ atomic.StoreUint32(&fd.isBlocking, 1)
+ return syscall.SetNonblock(fd.Sysfd, false)
+}
+
+// Darwin and FreeBSD can't read or write 2GB+ files at a time,
+// even on 64-bit systems.
+// The same is true of socket implementations on many systems.
+// See golang.org/issue/7812 and golang.org/issue/16266.
+// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
+const maxRW = 1 << 30
+
+// Read implements io.Reader.
+func (fd *FD) Read(p []byte) (int, error) {
+ if err := fd.readLock(); err != nil {
+ return 0, err
+ }
+ defer fd.readUnlock()
+ if len(p) == 0 {
+ // If the caller wanted a zero byte read, return immediately
+ // without trying (but after acquiring the readLock).
+ // Otherwise syscall.Read returns 0, nil which looks like
+ // io.EOF.
+ // TODO(bradfitz): make it wait for readability? (Issue 15735)
+ return 0, nil
+ }
+ if err := fd.pd.prepareRead(fd.isFile); err != nil {
+ return 0, err
+ }
+ if fd.IsStream && len(p) > maxRW {
+ p = p[:maxRW]
+ }
+ for {
+ n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
+ if err != nil {
+ n = 0
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitRead(fd.isFile); err == nil {
+ continue
+ }
+ }
+ }
+ err = fd.eofError(n, err)
+ return n, err
+ }
+}
+
+// Pread wraps the pread system call.
+func (fd *FD) Pread(p []byte, off int64) (int, error) {
+ // Call incref, not readLock, because since pread specifies the
+ // offset it is independent from other reads.
+ // Similarly, using the poller doesn't make sense for pread.
+ if err := fd.incref(); err != nil {
+ return 0, err
+ }
+ if fd.IsStream && len(p) > maxRW {
+ p = p[:maxRW]
+ }
+ var (
+ n int
+ err error
+ )
+ for {
+ n, err = syscall.Pread(fd.Sysfd, p, off)
+ if err != syscall.EINTR {
+ break
+ }
+ }
+ if err != nil {
+ n = 0
+ }
+ fd.decref()
+ err = fd.eofError(n, err)
+ return n, err
+}
+
+// ReadFrom wraps the recvfrom network call.
+func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
+ if err := fd.readLock(); err != nil {
+ return 0, nil, err
+ }
+ defer fd.readUnlock()
+ if err := fd.pd.prepareRead(fd.isFile); err != nil {
+ return 0, nil, err
+ }
+ for {
+ n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
+ if err != nil {
+ if err == syscall.EINTR {
+ continue
+ }
+ n = 0
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitRead(fd.isFile); err == nil {
+ continue
+ }
+ }
+ }
+ err = fd.eofError(n, err)
+ return n, sa, err
+ }
+}
+
+// ReadMsg wraps the recvmsg network call.
+func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) {
+ if err := fd.readLock(); err != nil {
+ return 0, 0, 0, nil, err
+ }
+ defer fd.readUnlock()
+ if err := fd.pd.prepareRead(fd.isFile); err != nil {
+ return 0, 0, 0, nil, err
+ }
+ for {
+ n, oobn, flags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, 0)
+ if err != nil {
+ if err == syscall.EINTR {
+ continue
+ }
+ // TODO(dfc) should n and oobn be set to 0
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitRead(fd.isFile); err == nil {
+ continue
+ }
+ }
+ }
+ err = fd.eofError(n, err)
+ return n, oobn, flags, sa, err
+ }
+}
+
+// Write implements io.Writer.
+func (fd *FD) Write(p []byte) (int, error) {
+ if err := fd.writeLock(); err != nil {
+ return 0, err
+ }
+ defer fd.writeUnlock()
+ if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+ return 0, err
+ }
+ var nn int
+ for {
+ max := len(p)
+ if fd.IsStream && max-nn > maxRW {
+ max = nn + maxRW
+ }
+ n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
+ if n > 0 {
+ nn += n
+ }
+ if nn == len(p) {
+ return nn, err
+ }
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitWrite(fd.isFile); err == nil {
+ continue
+ }
+ }
+ if err != nil {
+ return nn, err
+ }
+ if n == 0 {
+ return nn, io.ErrUnexpectedEOF
+ }
+ }
+}
+
+// Pwrite wraps the pwrite system call.
+func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
+ // Call incref, not writeLock, because since pwrite specifies the
+ // offset it is independent from other writes.
+ // Similarly, using the poller doesn't make sense for pwrite.
+ if err := fd.incref(); err != nil {
+ return 0, err
+ }
+ defer fd.decref()
+ var nn int
+ for {
+ max := len(p)
+ if fd.IsStream && max-nn > maxRW {
+ max = nn + maxRW
+ }
+ n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
+ if err == syscall.EINTR {
+ continue
+ }
+ if n > 0 {
+ nn += n
+ }
+ if nn == len(p) {
+ return nn, err
+ }
+ if err != nil {
+ return nn, err
+ }
+ if n == 0 {
+ return nn, io.ErrUnexpectedEOF
+ }
+ }
+}
+
+// WriteTo wraps the sendto network call.
+func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
+ if err := fd.writeLock(); err != nil {
+ return 0, err
+ }
+ defer fd.writeUnlock()
+ if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+ return 0, err
+ }
+ for {
+ err := syscall.Sendto(fd.Sysfd, p, 0, sa)
+ if err == syscall.EINTR {
+ continue
+ }
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitWrite(fd.isFile); err == nil {
+ continue
+ }
+ }
+ if err != nil {
+ return 0, err
+ }
+ return len(p), nil
+ }
+}
+
+// WriteMsg wraps the sendmsg network call.
+func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
+ if err := fd.writeLock(); err != nil {
+ return 0, 0, err
+ }
+ defer fd.writeUnlock()
+ if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+ return 0, 0, err
+ }
+ for {
+ n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
+ if err == syscall.EINTR {
+ continue
+ }
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitWrite(fd.isFile); err == nil {
+ continue
+ }
+ }
+ if err != nil {
+ return n, 0, err
+ }
+ return n, len(oob), err
+ }
+}
+
+// Accept wraps the accept network call.
+func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
+ if err := fd.readLock(); err != nil {
+ return -1, nil, "", err
+ }
+ defer fd.readUnlock()
+
+ if err := fd.pd.prepareRead(fd.isFile); err != nil {
+ return -1, nil, "", err
+ }
+ for {
+ s, rsa, errcall, err := accept(fd.Sysfd)
+ if err == nil {
+ return s, rsa, "", err
+ }
+ switch err {
+ case syscall.EINTR:
+ continue
+ case syscall.EAGAIN:
+ if fd.pd.pollable() {
+ if err = fd.pd.waitRead(fd.isFile); err == nil {
+ continue
+ }
+ }
+ case syscall.ECONNABORTED:
+ // This means that a socket on the listen
+ // queue was closed before we Accept()ed it;
+ // it's a silly error, so try again.
+ continue
+ }
+ return -1, nil, errcall, err
+ }
+}
+
+// Seek wraps syscall.Seek.
+func (fd *FD) Seek(offset int64, whence int) (int64, error) {
+ if err := fd.incref(); err != nil {
+ return 0, err
+ }
+ defer fd.decref()
+ return syscall.Seek(fd.Sysfd, offset, whence)
+}
+
+// ReadDirent wraps syscall.ReadDirent.
+// We treat this like an ordinary system call rather than a call
+// that tries to fill the buffer.
+func (fd *FD) ReadDirent(buf []byte) (int, error) {
+ if err := fd.incref(); err != nil {
+ return 0, err
+ }
+ defer fd.decref()
+ for {
+ n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
+ if err != nil {
+ n = 0
+ if err == syscall.EAGAIN && fd.pd.pollable() {
+ if err = fd.pd.waitRead(fd.isFile); err == nil {
+ continue
+ }
+ }
+ }
+ // Do not call eofError; caller does not expect to see io.EOF.
+ return n, err
+ }
+}
+
+// Fchmod wraps syscall.Fchmod.
+func (fd *FD) Fchmod(mode uint32) error {
+ if err := fd.incref(); err != nil {
+ return err
+ }
+ defer fd.decref()
+ return ignoringEINTR(func() error {
+ return syscall.Fchmod(fd.Sysfd, mode)
+ })
+}
+
+// Fchdir wraps syscall.Fchdir.
+func (fd *FD) Fchdir() error {
+ if err := fd.incref(); err != nil {
+ return err
+ }
+ defer fd.decref()
+ return syscall.Fchdir(fd.Sysfd)
+}
+
+// Fstat wraps syscall.Fstat
+func (fd *FD) Fstat(s *syscall.Stat_t) error {
+ if err := fd.incref(); err != nil {
+ return err
+ }
+ defer fd.decref()
+ return ignoringEINTR(func() error {
+ return syscall.Fstat(fd.Sysfd, s)
+ })
+}
+
+// tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
+// If the kernel doesn't support it, this is set to 0.
+var tryDupCloexec = int32(1)
+
+// DupCloseOnExec dups fd and marks it close-on-exec.
+func DupCloseOnExec(fd int) (int, string, error) {
+ if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 {
+ r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
+ if e1 == nil {
+ return r0, "", nil
+ }
+ switch e1.(syscall.Errno) {
+ case syscall.EINVAL, syscall.ENOSYS:
+ // Old kernel, or js/wasm (which returns
+ // ENOSYS). Fall back to the portable way from
+ // now on.
+ atomic.StoreInt32(&tryDupCloexec, 0)
+ default:
+ return -1, "fcntl", e1
+ }
+ }
+ return dupCloseOnExecOld(fd)
+}
+
+// dupCloseOnExecOld is the traditional way to dup an fd and
+// set its O_CLOEXEC bit, using two system calls.
+func dupCloseOnExecOld(fd int) (int, string, error) {
+ syscall.ForkLock.RLock()
+ defer syscall.ForkLock.RUnlock()
+ newfd, err := syscall.Dup(fd)
+ if err != nil {
+ return -1, "dup", err
+ }
+ syscall.CloseOnExec(newfd)
+ return newfd, "", nil
+}
+
+// Dup duplicates the file descriptor.
+func (fd *FD) Dup() (int, string, error) {
+ if err := fd.incref(); err != nil {
+ return -1, "", err
+ }
+ defer fd.decref()
+ return DupCloseOnExec(fd.Sysfd)
+}
+
+// On Unix variants only, expose the IO event for the net code.
+
+// WaitWrite waits until data can be read from fd.
+func (fd *FD) WaitWrite() error {
+ return fd.pd.waitWrite(fd.isFile)
+}
+
+// WriteOnce is for testing only. It makes a single write call.
+func (fd *FD) WriteOnce(p []byte) (int, error) {
+ if err := fd.writeLock(); err != nil {
+ return 0, err
+ }
+ defer fd.writeUnlock()
+ return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
+}
+
+// RawRead invokes the user-defined function f for a read operation.
+func (fd *FD) RawRead(f func(uintptr) bool) error {
+ if err := fd.readLock(); err != nil {
+ return err
+ }
+ defer fd.readUnlock()
+ if err := fd.pd.prepareRead(fd.isFile); err != nil {
+ return err
+ }
+ for {
+ if f(uintptr(fd.Sysfd)) {
+ return nil
+ }
+ if err := fd.pd.waitRead(fd.isFile); err != nil {
+ return err
+ }
+ }
+}
+
+// RawWrite invokes the user-defined function f for a write operation.
+func (fd *FD) RawWrite(f func(uintptr) bool) error {
+ if err := fd.writeLock(); err != nil {
+ return err
+ }
+ defer fd.writeUnlock()
+ if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+ return err
+ }
+ for {
+ if f(uintptr(fd.Sysfd)) {
+ return nil
+ }
+ if err := fd.pd.waitWrite(fd.isFile); err != nil {
+ return err
+ }
+ }
+}
+
+// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
+func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
+ for {
+ n, err := fn(fd, p)
+ if err != syscall.EINTR {
+ return n, err
+ }
+ }
+}