summaryrefslogtreecommitdiffstats
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r--net/mptcp/subflow.c133
1 files changed, 79 insertions, 54 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d3c5ecf8dd..71ba86246f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -9,8 +9,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
-#include <crypto/algapi.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sock *ssk = msk->first;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, ssk);
- __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
- inet_sk_state_store(sk, state);
+ /* subflow->idsn is always available is TCP_SYN_SENT state,
+ * even for the FASTOPEN scenarios
+ */
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk)) {
- __mptcp_sync_state(sk, ssk->sk_state);
- } else {
- msk->pending_state = ssk->sk_state;
- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
- }
- mptcp_data_unlock(sk);
-}
-
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
@@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_data_lock(sk);
+ if (mp_opt) {
+ /* Options are available only in the non fallback cases
+ * avoid updating rx path fields otherwise
+ */
+ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ }
+
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;
@@ -557,8 +577,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -834,7 +853,6 @@ create_child:
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
- mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
@@ -1244,7 +1262,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
if (subflow->data_avail)
return true;
@@ -1278,7 +1296,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
continue;
}
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
break;
}
return true;
@@ -1300,7 +1318,7 @@ fallback:
goto reset;
}
mptcp_subflow_fail(msk, ssk);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
return true;
}
@@ -1317,7 +1335,7 @@ reset:
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
return false;
}
@@ -1329,7 +1347,7 @@ reset:
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
return true;
}
@@ -1341,7 +1359,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1412,10 +1430,18 @@ static void subflow_data_ready(struct sock *sk)
WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
!subflow->mp_join && !(state & TCPF_CLOSE));
- if (mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk)) {
mptcp_data_ready(parent, sk);
- else if (unlikely(sk->sk_err))
+
+ /* subflow-level lowat test are not relevant.
+ * respect the msk-level threshold eventually mandating an immediate ack
+ */
+ if (mptcp_data_avail(msk) < parent->sk_rcvlowat &&
+ (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ } else if (unlikely(sk->sk_err)) {
subflow_error_report(sk);
+ }
}
static void subflow_write_space(struct sock *ssk)
@@ -1532,8 +1558,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- mptcp_sockopt_sync(msk, ssk);
-
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@@ -1543,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1644,7 +1668,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
err = security_mptcp_add_subflow(sk, sf->sk);
if (err)
- goto release_ssk;
+ goto err_free;
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1658,15 +1682,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
err = tcp_set_ulp(sf->sk, "mptcp");
+ if (err)
+ goto err_free;
-release_ssk:
+ mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk);
release_sock(sf->sk);
- if (err) {
- sock_release(sf);
- return err;
- }
-
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
@@ -1686,6 +1707,11 @@ release_ssk:
mptcp_subflow_ops_override(sf->sk);
return 0;
+
+err_free:
+ release_sock(sf->sk);
+ sock_release(sf);
+ return err;
}
static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
@@ -1705,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1736,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1941,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
@@ -2062,7 +2088,6 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.send_check = ipv4_specific.send_check;
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
- subflow_v6m_specific.net_frag_header_len = 0;
subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
tcpv6_prot_override = tcpv6_prot;