diff options
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 220 |
1 files changed, 150 insertions, 70 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb90a25550..68a58bc07c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,8 +118,6 @@ #include <linux/btf_ids.h> #include <linux/bpf-cgroup.h> -#include "scm.h" - static atomic_long_t unix_nr_socks; static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; @@ -223,15 +221,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } -static inline int unix_recvq_full(const struct sock *sk) -{ - return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; -} - static inline int unix_recvq_full_lockless(const struct sock *sk) { - return skb_queue_len_lockless(&sk->sk_receive_queue) > - READ_ONCE(sk->sk_max_ack_backlog); + return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } struct sock *unix_peer_get(struct sock *s) @@ -532,10 +524,10 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) return 0; } -static int unix_writable(const struct sock *sk) +static int unix_writable(const struct sock *sk, unsigned char state) { - return sk->sk_state != TCP_LISTEN && - (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + return state != TCP_LISTEN && + (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf); } static void unix_write_space(struct sock *sk) @@ -543,7 +535,7 @@ static void unix_write_space(struct sock *sk) struct socket_wq *wq; rcu_read_lock(); - if (unix_writable(sk)) { + if (unix_writable(sk, READ_ONCE(sk->sk_state))) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, @@ -572,7 +564,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) sk_error_report(other); } } - other->sk_state = TCP_CLOSE; } static void unix_sock_destructor(struct sock *sk) @@ -619,7 +610,7 @@ static void unix_release_sock(struct sock *sk, int embrion) u->path.dentry = NULL; u->path.mnt = NULL; state = sk->sk_state; - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); skpair = unix_peer(sk); unix_peer(sk) = NULL; @@ -640,7 +631,7 @@ static void unix_release_sock(struct sock *sk, int embrion) unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -733,7 +724,7 @@ static int unix_listen(struct socket *sock, int backlog) if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; - if (!u->addr) + if (!READ_ONCE(u->addr)) goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) @@ -741,7 +732,8 @@ static int unix_listen(struct socket *sock, int backlog) if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; + WRITE_ONCE(sk->sk_state, TCP_LISTEN); + /* set credentials so connect can copy them */ init_peercred(sk); err = 0; @@ -978,7 +970,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; - sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; + sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->inflight = 0; @@ -1133,8 +1125,8 @@ static struct sock *unix_find_other(struct net *net, static int unix_autobind(struct sock *sk) { - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct unix_address *addr; u32 lastnum, ordernum; @@ -1157,6 +1149,7 @@ static int unix_autobind(struct sock *sk) addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); + old_hash = sk->sk_hash; ordernum = get_random_u32(); lastnum = ordernum & 0xFFFFF; retry: @@ -1197,8 +1190,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, { umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct mnt_idmap *idmap; struct unix_address *addr; @@ -1236,6 +1229,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, if (u->addr) goto out_unlock; + old_hash = sk->sk_hash; new_hash = unix_bsd_hash(d_backing_inode(dentry)); unix_table_double_lock(net, old_hash, new_hash); u->path.mnt = mntget(parent.mnt); @@ -1263,8 +1257,8 @@ out: static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, int addr_len) { - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct unix_address *addr; int err; @@ -1282,6 +1276,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, goto out_mutex; } + old_hash = sk->sk_hash; new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); unix_table_double_lock(net, old_hash, new_hash); @@ -1371,7 +1366,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !unix_sk(sk)->addr) { + !READ_ONCE(unix_sk(sk)->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1401,7 +1396,8 @@ restart: if (err) goto out_unlock; - sk->sk_state = other->sk_state = TCP_ESTABLISHED; + WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); + WRITE_ONCE(other->sk_state, TCP_ESTABLISHED); } else { /* * 1003.1g breaking connected state with AF_UNSPEC @@ -1418,13 +1414,20 @@ restart: unix_peer(sk) = other; if (!other) - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); unix_state_double_unlock(sk, other); - if (other != old_peer) + if (other != old_peer) { unix_dgram_disconnected(sk, old_peer); + + unix_state_lock(old_peer); + if (!unix_peer(old_peer)) + WRITE_ONCE(old_peer->sk_state, TCP_CLOSE); + unix_state_unlock(old_peer); + } + sock_put(old_peer); } else { unix_peer(sk) = other; @@ -1472,7 +1475,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sk_buff *skb = NULL; long timeo; int err; - int st; err = unix_validate_addr(sunaddr, addr_len); if (err) @@ -1483,7 +1485,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { + test_bit(SOCK_PASSPIDFD, &sock->flags)) && + !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1536,7 +1539,7 @@ restart: if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; - if (unix_recvq_full(other)) { + if (unix_recvq_full_lockless(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; @@ -1561,9 +1564,7 @@ restart: Well, and we have to recheck the state after socket locked. */ - st = sk->sk_state; - - switch (st) { + switch (READ_ONCE(sk->sk_state)) { case TCP_CLOSE: /* This is ok... continue with connect */ break; @@ -1578,7 +1579,7 @@ restart: unix_state_lock_nested(sk, U_LOCK_SECOND); - if (sk->sk_state != st) { + if (sk->sk_state != TCP_CLOSE) { unix_state_unlock(sk); unix_state_unlock(other); sock_put(other); @@ -1630,7 +1631,7 @@ restart: copy_peercred(sk, other); sock->state = SS_CONNECTED; - sk->sk_state = TCP_ESTABLISHED; + WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); sock_hold(newsk); smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ @@ -1703,7 +1704,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, goto out; err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) + if (READ_ONCE(sk->sk_state) != TCP_LISTEN) goto out; /* If socket state is TCP_LISTEN it cannot change (for now...), @@ -1775,6 +1776,52 @@ out: return err; } +/* The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + int i; + + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + + /* Need to duplicate file references for the sake of garbage + * collection. Otherwise a socket in the fps might become a + * candidate for GC while the skb is not yet queued. + */ + UNIXCB(skb).fp = scm_fp_dup(scm->fp); + if (!UNIXCB(skb).fp) + return -ENOMEM; + + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->user, scm->fp->fp[i]); + + return 0; +} + +static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + int i; + + scm->fp = UNIXCB(skb).fp; + UNIXCB(skb).fp = NULL; + + for (i = scm->fp->count - 1; i >= 0; i--) + unix_notinflight(scm->fp->user, scm->fp->fp[i]); +} + static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); @@ -1822,6 +1869,21 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) spin_unlock(&unix_gc_lock); } +static void unix_destruct_scm(struct sk_buff *skb) +{ + struct scm_cookie scm; + + memset(&scm, 0, sizeof(scm)); + scm.pid = UNIXCB(skb).pid; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); + + /* Alas, it calls VFS */ + /* So fscking what? fput() had been SMP-safe since the last Summer */ + scm_destroy(&scm); + sock_wfree(skb); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; @@ -1908,11 +1970,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, long timeo; int err; - wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; + wait_for_unix_gc(scm.fp); + err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out; @@ -1937,14 +2000,15 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, } if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { + test_bit(SOCK_PASSPIDFD, &sock->flags)) && + !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; } err = -EMSGSIZE; - if (len > sk->sk_sndbuf - 32) + if (len > READ_ONCE(sk->sk_sndbuf) - 32) goto out; if (len > SKB_MAX_ALLOC) { @@ -2026,7 +2090,7 @@ restart_locked: unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -2157,13 +2221,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other maybe_add_creds(skb, sock, other); skb_get(skb); + scm_stat_add(other, skb); + + spin_lock(&other->sk_receive_queue.lock); if (ousk->oob_skb) consume_skb(ousk->oob_skb); - WRITE_ONCE(ousk->oob_skb, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); - scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); sk_send_sigurg(other); unix_state_unlock(other); other->sk_data_ready(other); @@ -2184,11 +2250,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, bool fds_sent = false; int data_len; - wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; + wait_for_unix_gc(scm.fp); + err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) @@ -2200,7 +2267,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } if (msg->msg_namelen) { - err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; + err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { err = -ENOTCONN; @@ -2221,7 +2288,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, &err, 0); } else { /* Keep two messages in the pipe so it schedules better */ - size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); + size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64); /* allow fallback to order-0 allocations */ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); @@ -2314,7 +2381,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, if (err) return err; - if (sk->sk_state != TCP_ESTABLISHED) + if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; if (msg->msg_namelen) @@ -2328,7 +2395,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; - if (sk->sk_state != TCP_ESTABLISHED) + if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; return unix_dgram_recvmsg(sock, msg, size, flags); @@ -2553,8 +2620,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_lock(&u->iolock); unix_state_lock(sk); + spin_lock(&sk->sk_receive_queue.lock); if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); mutex_unlock(&u->iolock); return -EINVAL; @@ -2566,6 +2635,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) WRITE_ONCE(u->oob_skb, NULL); else skb_get(oob_skb); + + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); @@ -2594,24 +2665,34 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, consume_skb(skb); skb = NULL; } else { + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + if (skb == u->oob_skb) { if (copied) { skb = NULL; - } else if (sock_flag(sk, SOCK_URGINLINE)) { - if (!(flags & MSG_PEEK)) { + } else if (!(flags & MSG_PEEK)) { + if (sock_flag(sk, SOCK_URGINLINE)) { WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; + skb = skb_peek(&sk->sk_receive_queue); } - } else if (flags & MSG_PEEK) { - skb = NULL; - } else { - skb_unlink(skb, &sk->sk_receive_queue); - WRITE_ONCE(u->oob_skb, NULL); - if (!WARN_ON_ONCE(skb_unref(skb))) - kfree_skb(skb); - skb = skb_peek(&sk->sk_receive_queue); + } else if (!sock_flag(sk, SOCK_URGINLINE)) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); } } + + spin_unlock(&sk->sk_receive_queue.lock); + + if (unlinked_skb) { + WARN_ON_ONCE(skb_unref(unlinked_skb)); + kfree_skb(unlinked_skb); + } } return skb; } @@ -2619,7 +2700,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; return unix_read_skb(sk, recv_actor); @@ -2643,7 +2724,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, size_t size = state->size; unsigned int last_len; - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; goto out; } @@ -2974,7 +3055,7 @@ long unix_inq_len(struct sock *sk) struct sk_buff *skb; long amount = 0; - if (sk->sk_state == TCP_LISTEN) + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); @@ -3086,12 +3167,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); + state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err)) @@ -3113,14 +3196,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && - sk->sk_state == TCP_CLOSE) + state == TCP_CLOSE) mask |= EPOLLHUP; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ - if (unix_writable(sk)) + if (unix_writable(sk, state)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; @@ -3131,12 +3214,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk, *other; unsigned int writable; + unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); + state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err) || @@ -3156,19 +3241,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ - if (sk->sk_type == SOCK_SEQPACKET) { - if (sk->sk_state == TCP_CLOSE) - mask |= EPOLLHUP; - /* connection hasn't started yet? */ - if (sk->sk_state == TCP_SYN_SENT) - return mask; - } + if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE) + mask |= EPOLLHUP; /* No write status requested, avoid expensive OUT tests. */ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; - writable = unix_writable(sk); + writable = unix_writable(sk, state); if (writable) { unix_state_lock(sk); |