diff options
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r-- | net/mptcp/subflow.c | 133 |
1 files changed, 79 insertions, 54 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d3c5ecf8dd..71ba86246f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -9,8 +9,8 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> -#include <crypto/algapi.h> #include <crypto/sha2.h> +#include <crypto/utils.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_sync_state(struct sock *sk, int state) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk = msk->first; + + subflow = mptcp_subflow_ctx(ssk); + __mptcp_propagate_sndbuf(sk, ssk); + if (!msk->rcvspace_init) + mptcp_rcv_space_init(msk, ssk); - __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, state); + /* subflow->idsn is always available is TCP_SYN_SENT state, + * even for the FASTOPEN scenarios + */ + WRITE_ONCE(msk->write_seq, subflow->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); + mptcp_set_state(sk, state); sk->sk_state_change(sk); } } -static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) { - __mptcp_sync_state(sk, ssk->sk_state); - } else { - msk->pending_state = ssk->sk_state; - __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); - } - mptcp_data_unlock(sk); -} - static void subflow_set_remote_key(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) @@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk, atomic64_set(&msk->rcv_wnd_sent, subflow->iasn); } +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_data_lock(sk); + if (mp_opt) { + /* Options are available only in the non fallback cases + * avoid updating rx path fields otherwise + */ + WRITE_ONCE(msk->snd_una, subflow->idsn + 1); + WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); + subflow_set_remote_key(msk, subflow, mp_opt); + } + + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } + mptcp_data_unlock(sk); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (mp_opt.deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; - subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } } else if (mptcp_check_fallback(sk)) { fallback: - mptcp_rcv_space_init(msk, sk); - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } return; @@ -557,8 +577,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk) kfree_rcu(ctx, rcu); } -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - subflow_set_remote_key(msk, subflow, mp_opt); subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) - mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); + __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -834,7 +853,6 @@ create_child: * mpc option */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { - mptcp_subflow_fully_established(ctx, &mp_opt); mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } @@ -1244,7 +1262,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); if (subflow->data_avail) return true; @@ -1278,7 +1296,7 @@ static bool subflow_check_data_avail(struct sock *ssk) continue; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); break; } return true; @@ -1300,7 +1318,7 @@ fallback: goto reset; } mptcp_subflow_fail(msk, ssk); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1317,7 +1335,7 @@ reset: while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); return false; } @@ -1329,7 +1347,7 @@ reset: subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1341,7 +1359,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@ -1412,10 +1430,18 @@ static void subflow_data_ready(struct sock *sk) WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && !subflow->mp_join && !(state & TCPF_CLOSE)); - if (mptcp_subflow_data_available(sk)) + if (mptcp_subflow_data_available(sk)) { mptcp_data_ready(parent, sk); - else if (unlikely(sk->sk_err)) + + /* subflow-level lowat test are not relevant. + * respect the msk-level threshold eventually mandating an immediate ack + */ + if (mptcp_data_avail(msk) < parent->sk_rcvlowat && + (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } else if (unlikely(sk->sk_err)) { subflow_error_report(sk); + } } static void subflow_write_space(struct sock *ssk) @@ -1532,8 +1558,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - mptcp_sockopt_sync(msk, ssk); - ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) @@ -1543,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; @@ -1644,7 +1668,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, err = security_mptcp_add_subflow(sk, sf->sk); if (err) - goto release_ssk; + goto err_free; /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1658,15 +1682,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + if (err) + goto err_free; -release_ssk: + mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk); release_sock(sf->sk); - if (err) { - sock_release(sf); - return err; - } - /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the @@ -1686,6 +1707,11 @@ release_ssk: mptcp_subflow_ops_override(sf->sk); return 0; + +err_free: + release_sock(sf->sk); + sock_release(sf); + return err; } static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, @@ -1705,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1736,10 +1763,9 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_do_fallback(sk); - mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } /* as recvmsg() does not acquire the subflow socket for ssk selection @@ -1941,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; @@ -2062,7 +2088,6 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; - subflow_v6m_specific.net_frag_header_len = 0; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; tcpv6_prot_override = tcpv6_prot; |