From 50ba0232fd5312410f1b65247e774244f89a628e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:50:36 +0200 Subject: Merging upstream version 6.8.9. Signed-off-by: Daniel Baumann --- net/mptcp/mptcp_pm_gen.c | 2 +- net/mptcp/mptcp_pm_gen.h | 2 +- net/mptcp/pm_netlink.c | 2 +- net/mptcp/pm_userspace.c | 8 ++-- net/mptcp/protocol.c | 106 +++++++++++++++++++---------------------------- net/mptcp/protocol.h | 9 ++++ net/mptcp/sockopt.c | 33 +++++++++++++-- net/mptcp/subflow.c | 2 + 8 files changed, 89 insertions(+), 75 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index a2325e70d..670da7822 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN kernel source */ #include diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h index 10579d184..ac9fc7225 100644 --- a/net/mptcp/mptcp_pm_gen.h +++ b/net/mptcp/mptcp_pm_gen.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN kernel header */ #ifndef _LINUX_MPTCP_PM_GEN_H diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index cccb720c1..58d17d960 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1109,7 +1109,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, - .flags = GENL_UNS_ADMIN_PERM, + .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 6eabd1d79..bc97cc30f 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -288,12 +288,12 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto remove_err; + goto out; } if (id_val == 0) { err = mptcp_userspace_pm_remove_id_zero_address(msk, info); - goto remove_err; + goto out; } lock_sock(sk); @@ -308,7 +308,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); release_sock(sk); - goto remove_err; + goto out; } list_move(&match->list, &free_list); @@ -322,7 +322,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) } err = 0; - remove_err: +out: sock_put(sk); return err; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 046ab95bc..2b921af27 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) return READ_ONCE(msk->wnd_end); } -static bool mptcp_is_tcpsk(struct sock *sk) +static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { - struct socket *sock = sk->sk_socket; - - if (unlikely(sk->sk_prot == &tcp_prot)) { - /* we are being invoked after mptcp_accept() has - * accepted a non-mp-capable flow: sk is a tcp_sk, - * not an mptcp one. - * - * Hand the socket over to tcp so all further socket ops - * bypass mptcp. - */ - WRITE_ONCE(sock->ops, &inet_stream_ops); - return true; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { - WRITE_ONCE(sock->ops, &inet6_stream_ops); - return true; + if (sk->sk_prot == &tcpv6_prot) + return &inet6_stream_ops; #endif - } - - return false; + WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + return &inet_stream_ops; } static int __mptcp_socket_create(struct mptcp_sock *msk) @@ -3333,44 +3319,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; } -static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, - bool kern) -{ - struct sock *newsk; - - pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); - newsk = inet_csk_accept(ssk, flags, err, kern); - if (!newsk) - return NULL; - - pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); - if (sk_is_mptcp(newsk)) { - struct mptcp_subflow_context *subflow; - struct sock *new_mptcp_sock; - - subflow = mptcp_subflow_ctx(newsk); - new_mptcp_sock = subflow->conn; - - /* is_mptcp should be false if subflow->conn is missing, see - * subflow_syn_recv_sock() - */ - if (WARN_ON_ONCE(!new_mptcp_sock)) { - tcp_sk(newsk)->is_mptcp = 0; - goto out; - } - - newsk = new_mptcp_sock; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); - } - -out: - newsk->sk_kern_sock = kern; - return newsk; -} - void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) { struct mptcp_subflow_context *subflow, *tmp; @@ -3807,7 +3755,6 @@ static struct proto mptcp_prot = { .connect = mptcp_connect, .disconnect = mptcp_disconnect, .close = mptcp_close, - .accept = mptcp_accept, .setsockopt = mptcp_setsockopt, .getsockopt = mptcp_getsockopt, .shutdown = mptcp_shutdown, @@ -3917,18 +3864,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk) return -EINVAL; - newsk = mptcp_accept(ssk, flags, &err, kern); + pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); + newsk = inet_csk_accept(ssk, flags, &err, kern); if (!newsk) return err; - lock_sock(newsk); - - __inet_accept(sock, newsock, newsk); - if (!mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsk); + pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); + if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; + struct sock *new_mptcp_sock; + + subflow = mptcp_subflow_ctx(newsk); + new_mptcp_sock = subflow->conn; + + /* is_mptcp should be false if subflow->conn is missing, see + * subflow_syn_recv_sock() + */ + if (WARN_ON_ONCE(!new_mptcp_sock)) { + tcp_sk(newsk)->is_mptcp = 0; + goto tcpfallback; + } + + newsk = new_mptcp_sock; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + + newsk->sk_kern_sock = kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk = mptcp_sk(newsk); msk->in_accept_queue = 0; /* set ssk->sk_socket of accept()ed flows to mptcp socket. @@ -3950,6 +3915,19 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (unlikely(list_is_singular(&msk->conn_list))) mptcp_set_state(newsk, TCP_CLOSE); } + } else { +tcpfallback: + newsk->sk_kern_sock = kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); + /* we are being invoked after accepting a non-mp-capable + * flow: sk is a tcp_sk, not an mptcp one. + * + * Hand the socket over to tcp so all further socket ops + * bypass mptcp. + */ + WRITE_ONCE(newsock->sk->sk_socket->ops, + mptcp_fallback_tcp_ops(newsock->sk)); } release_sock(newsk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7384613ea..07f6242af 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1087,6 +1087,15 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } +static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) +{ + struct sock *ssk = READ_ONCE(msk->first); + + return ssk && ((1 << inet_sk_state_load(ssk)) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | + TCPF_SYN_RECV | TCPF_LISTEN)); +} + static inline void mptcp_do_fallback(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 353680733..ef3edba75 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname) /* should work fine */ case IP_FREEBIND: case IP_TRANSPARENT: + case IP_BIND_ADDRESS_NO_PORT: + case IP_LOCAL_PORT_RANGE: /* the following are control cmsg related */ case IP_PKTINFO: @@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname) /* common stuff that need some love */ case IP_TOS: case IP_TTL: - case IP_BIND_ADDRESS_NO_PORT: case IP_MTU_DISCOVER: case IP_RECVERR: @@ -683,8 +684,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op return 0; } -static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) +static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; @@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); break; + case IP_BIND_ADDRESS_NO_PORT: + inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, + inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + break; + case IP_LOCAL_PORT_RANGE: + WRITE_ONCE(inet_sk(ssk)->local_port_range, + READ_ONCE(inet_sk(sk)->local_port_range)); + break; default: release_sock(sk); WARN_ON_ONCE(1); @@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_FREEBIND: case IP_TRANSPARENT: - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); + case IP_BIND_ADDRESS_NO_PORT: + case IP_LOCAL_PORT_RANGE: + return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); case IP_TOS: return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); } @@ -938,6 +949,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_bytes_sent = msk->bytes_sent; info->mptcpi_bytes_received = msk->bytes_received; info->mptcpi_bytes_retrans = msk->bytes_retrans; + info->mptcpi_subflows_total = info->mptcpi_subflows + + __mptcp_has_initial_subflow(msk); unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); @@ -1348,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); + case IP_BIND_ADDRESS_NO_PORT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + case IP_LOCAL_PORT_RANGE: + return mptcp_put_int_option(msk, optval, optlen, + READ_ONCE(inet_sk(sk)->local_port_range)); } return -EOPNOTSUPP; @@ -1448,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); + inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) @@ -1479,6 +1500,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) struct mptcp_subflow_context *subflow; int space, cap; + /* bpf can land here with a wrong sk type */ + if (sk->sk_protocol == IPPROTO_TCP) + return -EINVAL; + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 71ba86246..13f66d11b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -905,6 +905,8 @@ dispose_child: return child; fallback: + if (fallback) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); mptcp_subflow_drop_ctx(child); return child; } -- cgit v1.2.3