summaryrefslogtreecommitdiffstats
path: root/net/mptcp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:57 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:57 +0000
commitdc50eab76b709d68175a358d6e23a5a3890764d3 (patch)
treec754d0390db060af0213ff994f0ac310e4cfd6e9 /net/mptcp
parentAdding debian version 6.6.15-2. (diff)
downloadlinux-dc50eab76b709d68175a358d6e23a5a3890764d3.tar.xz
linux-dc50eab76b709d68175a358d6e23a5a3890764d3.zip
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/Makefile3
-rw-r--r--net/mptcp/crypto_test.c1
-rw-r--r--net/mptcp/ctrl.c16
-rw-r--r--net/mptcp/diag.c8
-rw-r--r--net/mptcp/fastopen.c6
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h8
-rw-r--r--net/mptcp/mptcp_diag.c1
-rw-r--r--net/mptcp/mptcp_pm_gen.c179
-rw-r--r--net/mptcp/mptcp_pm_gen.h58
-rw-r--r--net/mptcp/options.c9
-rw-r--r--net/mptcp/pm.c2
-rw-r--r--net/mptcp/pm_netlink.c188
-rw-r--r--net/mptcp/pm_userspace.c115
-rw-r--r--net/mptcp/protocol.c149
-rw-r--r--net/mptcp/protocol.h70
-rw-r--r--net/mptcp/sockopt.c68
-rw-r--r--net/mptcp/subflow.c133
-rw-r--r--net/mptcp/token_test.c1
19 files changed, 693 insertions, 323 deletions
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 84e531f86b..bcf1dbf3a4 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,8 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \
+ mptcp_pm_gen.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c
index 017248dea0..220414e5c8 100644
--- a/net/mptcp/crypto_test.c
+++ b/net/mptcp/crypto_test.c
@@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = {
kunit_test_suite(mptcp_crypto_suite);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto");
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index e72b518c5d..13fe0748dd 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -27,6 +27,7 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int close_timeout;
unsigned int stale_loss_cnt;
u8 mptcp_enabled;
u8 checksum_enabled;
@@ -65,6 +66,13 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
return mptcp_get_pernet(net)->stale_loss_cnt;
}
+unsigned int mptcp_close_timeout(const struct sock *sk)
+{
+ if (sock_flag(sk, SOCK_DEAD))
+ return TCP_TIMEWAIT_LEN;
+ return mptcp_get_pernet(sock_net(sk))->close_timeout;
+}
+
int mptcp_get_pm_type(const struct net *net)
{
return mptcp_get_pernet(net)->pm_type;
@@ -79,6 +87,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
@@ -141,6 +150,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dostring,
},
+ {
+ .procname = "close_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{}
};
@@ -163,6 +178,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
table[6].data = &pernet->scheduler;
+ table[7].data = &pernet->close_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742..6ff6f14674 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,19 @@
#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -63,17 +65,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index 74698582a2..ad28da655f 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;
- mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
}
pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
- mptcp_data_unlock(sk);
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index a0990c365a..c30405e768 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
+ SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index cae71d9472..dd7fd1f246 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
* conflict with another subflow while updating msk rcv wnd
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
+ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
__MPTCP_MIB_MAX
};
@@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
__SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
+static inline void MPTCP_DEC_STATS(struct net *net,
+ enum linux_mptcp_mib_field field)
+{
+ if (likely(net->mib.mptcp_statistics))
+ SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
+}
+
bool mptcp_mib_alloc(struct net *net);
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 8df1bdb647..5409c2ea3f 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -245,4 +245,5 @@ static void __exit mptcp_diag_exit(void)
module_init(mptcp_diag_init);
module_exit(mptcp_diag_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP socket monitoring via SOCK_DIAG");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
new file mode 100644
index 0000000000..a2325e70dd
--- /dev/null
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/mptcp.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "mptcp_pm_gen.h"
+
+#include <uapi/linux/mptcp_pm.h>
+
+/* Common nested types */
+const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = {
+ [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
+ [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
+ [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16),
+ [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, },
+ [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32, },
+};
+
+/* MPTCP_PM_CMD_ADD_ADDR - do */
+const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_DEL_ADDR - do */
+const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_GET_ADDR - do */
+const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_FLUSH_ADDRS - do */
+const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = {
+ [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_SET_LIMITS - do */
+const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = {
+ [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+};
+
+/* MPTCP_PM_CMD_GET_LIMITS - do */
+const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = {
+ [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+};
+
+/* MPTCP_PM_CMD_SET_FLAGS - do */
+const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_ANNOUNCE - do */
+const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+};
+
+/* MPTCP_PM_CMD_REMOVE - do */
+const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1] = {
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
+};
+
+/* MPTCP_PM_CMD_SUBFLOW_CREATE - do */
+const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* MPTCP_PM_CMD_SUBFLOW_DESTROY - do */
+const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = {
+ [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy),
+};
+
+/* Ops table for mptcp_pm */
+const struct genl_ops mptcp_pm_nl_ops[11] = {
+ {
+ .cmd = MPTCP_PM_CMD_ADD_ADDR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_add_addr_doit,
+ .policy = mptcp_pm_add_addr_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_DEL_ADDR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_del_addr_doit,
+ .policy = mptcp_pm_del_addr_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_GET_ADDR,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_get_addr_doit,
+ .dumpit = mptcp_pm_nl_get_addr_dumpit,
+ .policy = mptcp_pm_get_addr_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_flush_addrs_doit,
+ .policy = mptcp_pm_flush_addrs_nl_policy,
+ .maxattr = MPTCP_PM_ENDPOINT_ADDR,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SET_LIMITS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_set_limits_doit,
+ .policy = mptcp_pm_set_limits_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_SUBFLOWS,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_GET_LIMITS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_get_limits_doit,
+ .policy = mptcp_pm_get_limits_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_SUBFLOWS,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SET_FLAGS,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_set_flags_doit,
+ .policy = mptcp_pm_set_flags_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_ANNOUNCE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_announce_doit,
+ .policy = mptcp_pm_announce_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_TOKEN,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_REMOVE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_remove_doit,
+ .policy = mptcp_pm_remove_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_LOC_ID,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_subflow_create_doit,
+ .policy = mptcp_pm_subflow_create_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .doit = mptcp_pm_nl_subflow_destroy_doit,
+ .policy = mptcp_pm_subflow_destroy_nl_policy,
+ .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+};
diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h
new file mode 100644
index 0000000000..10579d1845
--- /dev/null
+++ b/net/mptcp/mptcp_pm_gen.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/mptcp.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_MPTCP_PM_GEN_H
+#define _LINUX_MPTCP_PM_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/mptcp_pm.h>
+
+/* Common nested types */
+extern const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1];
+
+extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1];
+
+extern const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1];
+
+extern const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1];
+
+extern const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1];
+
+extern const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1];
+
+extern const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1];
+
+extern const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1];
+
+extern const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1];
+
+/* Ops table for mptcp_pm */
+extern const struct genl_ops mptcp_pm_nl_ops[11];
+
+int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info);
+int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb,
+ struct genl_info *info);
+
+#endif /* _LINUX_MPTCP_PM_GEN_H */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index d2527d189a..e3e96a49f9 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
- subflow->fully_established = 1;
- WRITE_ONCE(msk->fully_established, true);
- goto check_notify;
+ goto set_fully_established;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- mptcp_subflow_fully_established(subflow, mp_opt);
+
+ mptcp_data_lock((struct sock *)msk);
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+ mptcp_data_unlock((struct sock *)msk);
check_notify:
/* if the subflow is not already linked into the conn_list, we can't
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d8da5374d9..4ae19113b8 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -184,7 +184,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
const struct mptcp_subflow_context *subflow)
{
struct mptcp_pm_data *pm = &msk->pm;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 3011bc3784..cccb720c1c 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);
- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
}
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
}
}
- if (!entry->addr.id) {
+ if (!entry->addr.id && needs_id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +964,7 @@ find_next:
}
}
- if (!entry->addr.id)
+ if (!entry->addr.id && needs_id)
goto out;
__set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1048,6 +1052,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ /* We don't use mptcp_set_state() here because it needs to be called
+ * under the msk socket lock. For the moment, that will not bring
+ * anything more than only calling inet_sk_state_store(), because the
+ * old status is known (TCP_CLOSE).
+ */
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
@@ -1087,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
if (ret < 0)
kfree(entry);
@@ -1104,29 +1113,6 @@ static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
},
};
-static const struct nla_policy
-mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = {
- [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
- [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
- [MPTCP_PM_ADDR_ATTR_ADDR6] =
- NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
- [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 },
- [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 },
- [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 },
-};
-
-static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
- [MPTCP_PM_ATTR_ADDR] =
- NLA_POLICY_NESTED(mptcp_pm_addr_policy),
- [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
- [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
- [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
- [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ATTR_ADDR_REMOTE] =
- NLA_POLICY_NESTED(mptcp_pm_addr_policy),
-};
-
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
@@ -1188,7 +1174,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
/* no validation needed - was already done via nested policy */
err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
- mptcp_pm_addr_policy, info->extack);
+ mptcp_pm_address_nl_policy, info->extack);
if (err)
return err;
@@ -1303,9 +1289,21 @@ next:
return 0;
}
-static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+ struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+ mptcp_pm_address_nl_policy, info->extack) &&
+ tb[MPTCP_PM_ADDR_ATTR_ID])
+ return true;
+ return false;
+}
+
+int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
int ret;
@@ -1344,7 +1342,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
}
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+ !mptcp_pm_has_addr_attr_id(attr, info));
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1484,9 +1483,9 @@ next:
return 0;
}
-static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
unsigned int addr_max;
@@ -1620,7 +1619,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet)
pernet->addrs = 0;
}
-static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
LIST_HEAD(free_list);
@@ -1676,9 +1675,9 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
struct sk_buff *msg;
@@ -1726,8 +1725,8 @@ fail:
return ret;
}
-static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
- struct netlink_callback *cb)
+int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
{
struct net *net = sock_net(msg->sk);
struct mptcp_pm_addr_entry *entry;
@@ -1784,8 +1783,7 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
return 0;
}
-static int
-mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
unsigned int rcv_addrs, subflows;
@@ -1810,8 +1808,7 @@ unlock:
return ret;
}
-static int
-mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct sk_buff *msg;
@@ -1920,7 +1917,7 @@ int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8
return 0;
}
-static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
{
struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
@@ -2000,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;
- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
@@ -2284,72 +2281,13 @@ nla_put_failure:
nlmsg_free(skb);
}
-static const struct genl_small_ops mptcp_pm_ops[] = {
- {
- .cmd = MPTCP_PM_CMD_ADD_ADDR,
- .doit = mptcp_nl_cmd_add_addr,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_DEL_ADDR,
- .doit = mptcp_nl_cmd_del_addr,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
- .doit = mptcp_nl_cmd_flush_addrs,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_GET_ADDR,
- .doit = mptcp_nl_cmd_get_addr,
- .dumpit = mptcp_nl_cmd_dump_addrs,
- },
- {
- .cmd = MPTCP_PM_CMD_SET_LIMITS,
- .doit = mptcp_nl_cmd_set_limits,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_GET_LIMITS,
- .doit = mptcp_nl_cmd_get_limits,
- },
- {
- .cmd = MPTCP_PM_CMD_SET_FLAGS,
- .doit = mptcp_nl_cmd_set_flags,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_ANNOUNCE,
- .doit = mptcp_nl_cmd_announce,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_REMOVE,
- .doit = mptcp_nl_cmd_remove,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
- .doit = mptcp_nl_cmd_sf_create,
- .flags = GENL_UNS_ADMIN_PERM,
- },
- {
- .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
- .doit = mptcp_nl_cmd_sf_destroy,
- .flags = GENL_UNS_ADMIN_PERM,
- },
-};
-
static struct genl_family mptcp_genl_family __ro_after_init = {
.name = MPTCP_PM_NAME,
.version = MPTCP_PM_VER,
- .maxattr = MPTCP_PM_ATTR_MAX,
- .policy = mptcp_pm_policy,
.netnsok = true,
.module = THIS_MODULE,
- .small_ops = mptcp_pm_ops,
- .n_small_ops = ARRAY_SIZE(mptcp_pm_ops),
+ .ops = mptcp_pm_nl_ops,
+ .n_ops = ARRAY_SIZE(mptcp_pm_nl_ops),
.resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1,
.mcgrps = mptcp_pm_mcgrps,
.n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps),
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index d042d32beb..01b3a8f2f0 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
- if (addr_match && entry->addr.id == 0)
+ if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
id_match = (e->addr.id == entry->addr.id);
if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
}
*e = *entry;
- if (!e->addr.id)
+ if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
@@ -130,10 +131,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&e->addr, skc, false)) {
+ entry = e;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (entry)
+ return entry->addr.id;
+
memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
@@ -142,16 +154,17 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
if (new_entry.addr.port == msk_sport)
new_entry.addr.port = 0;
- return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
-int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct mptcp_pm_addr_entry addr_val;
struct mptcp_sock *msk;
int err = -EINVAL;
+ struct sock *sk;
u32 token_val;
if (!addr || !token) {
@@ -167,6 +180,8 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto announce_err;
@@ -184,13 +199,13 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto announce_err;
}
- lock_sock((struct sock *)msk);
+ lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
@@ -200,15 +215,49 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
}
spin_unlock_bh(&msk->pm.lock);
- release_sock((struct sock *)msk);
+ release_sock(sk);
err = 0;
announce_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
-int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
+static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
+ struct genl_info *info)
+{
+ struct mptcp_rm_list list = { .nr = 0 };
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ bool has_id_0 = false;
+ int err = -EINVAL;
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->local_id) == 0) {
+ has_id_0 = true;
+ break;
+ }
+ }
+ if (!has_id_0) {
+ GENL_SET_ERR_MSG(info, "address with id 0 not found");
+ goto remove_err;
+ }
+
+ list.ids[list.nr++] = 0;
+
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_remove_addr(msk, &list);
+ spin_unlock_bh(&msk->pm.lock);
+
+ err = 0;
+
+remove_err:
+ release_sock(sk);
+ return err;
+}
+
+int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
@@ -217,6 +266,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
struct mptcp_sock *msk;
LIST_HEAD(free_list);
int err = -EINVAL;
+ struct sock *sk;
u32 token_val;
u8 id_val;
@@ -234,12 +284,19 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto remove_err;
}
- lock_sock((struct sock *)msk);
+ if (id_val == 0) {
+ err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
+ goto remove_err;
+ }
+
+ lock_sock(sk);
list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
if (entry->addr.id == id_val) {
@@ -250,7 +307,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
- release_sock((struct sock *)msk);
+ release_sock(sk);
goto remove_err;
}
@@ -258,19 +315,19 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
mptcp_pm_remove_addrs(msk, &free_list);
- release_sock((struct sock *)msk);
+ release_sock(sk);
list_for_each_entry_safe(match, entry, &free_list, list) {
- sock_kfree_s((struct sock *)msk, match, sizeof(*match));
+ sock_kfree_s(sk, match, sizeof(*match));
}
err = 0;
remove_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
-int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
@@ -296,6 +353,8 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto create_err;
@@ -313,8 +372,6 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- sk = (struct sock *)msk;
-
if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
@@ -322,7 +379,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
}
local.addr = addr_l;
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
@@ -342,7 +399,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&msk->pm.lock);
create_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
@@ -394,7 +451,7 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
return NULL;
}
-int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
+int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
@@ -419,6 +476,8 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
goto destroy_err;
@@ -448,7 +507,6 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
goto destroy_err;
}
- sk = (struct sock *)msk;
lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
@@ -468,7 +526,7 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
release_sock(sk);
destroy_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return err;
}
@@ -478,6 +536,7 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
{
struct mptcp_sock *msk;
int ret = -EINVAL;
+ struct sock *sk;
u32 token_val;
token_val = nla_get_u32(token);
@@ -486,6 +545,8 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
if (!msk)
return ret;
+ sk = (struct sock *)msk;
+
if (!mptcp_pm_is_userspace(msk))
goto set_flags_err;
@@ -493,11 +554,11 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
rem->addr.family == AF_UNSPEC)
goto set_flags_err;
- lock_sock((struct sock *)msk);
+ lock_sock(sk);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
- release_sock((struct sock *)msk);
+ release_sock(sk);
set_flags_err:
- sock_put((struct sock *)msk);
+ sock_put(sk);
return ret;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5c003a0f0f..5305f2ff0f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -99,7 +99,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);
iput(SOCK_INODE(ssock));
@@ -121,8 +121,6 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
-
- mptcp_sockopt_sync(msk, msk->first);
}
return msk->first;
@@ -445,11 +443,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
switch (sk->sk_state) {
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
+ mptcp_set_state(sk, TCP_FIN_WAIT2);
break;
case TCP_CLOSING:
case TCP_LAST_ACK:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
}
@@ -610,13 +608,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
switch (sk->sk_state) {
case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
+ mptcp_set_state(sk, TCP_CLOSE_WAIT);
break;
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
+ mptcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
default:
/* Other states not expected */
@@ -791,7 +789,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
*/
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
+ mptcp_set_state(sk, ssk_state);
WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -863,9 +861,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
- if (move_skbs_to_msk(msk, ssk))
+ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
sk->sk_data_ready(sk);
-
mptcp_data_unlock(sk);
}
@@ -1274,7 +1271,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
* queue management operation, to avoid breaking the ext <->
* SSN association set here
*/
- mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+ mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
goto alloc_skb;
@@ -1296,7 +1293,7 @@ alloc_skb:
i = skb_shinfo(skb)->nr_frags;
reuse_skb = false;
- mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
+ mpext = mptcp_get_ext(skb);
}
/* Zero window and all data acked? Probe. */
@@ -1522,8 +1519,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk))
- mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+ if (mptcp_send_head(sk)) {
+ mptcp_data_lock(sk);
+ mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+ mptcp_data_unlock(sk);
+ }
}
static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1767,6 +1767,18 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
return ret;
}
+static int do_copy_data_nocache(struct sock *sk, int copy,
+ struct iov_iter *from, char *to)
+{
+ if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
+ if (!copy_from_iter_full_nocache(to, copy, from))
+ return -EFAULT;
+ } else if (!copy_from_iter_full(to, copy, from)) {
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1840,11 +1852,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!sk_wmem_schedule(sk, total_ts))
goto wait_for_memory;
- if (copy_page_from_iter(dfrag->page, offset, psize,
- &msg->msg_iter) != psize) {
- ret = -EFAULT;
+ ret = do_copy_data_nocache(sk, psize, &msg->msg_iter,
+ page_address(dfrag->page) + offset);
+ if (ret)
goto do_error;
- }
/* data successfully copied into the write queue */
sk_forward_alloc_add(sk, -total_ts);
@@ -1928,6 +1939,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
if (!(flags & MSG_PEEK)) {
MPTCP_SKB_CB(skb)->offset += count;
MPTCP_SKB_CB(skb)->map_seq += count;
+ msk->bytes_consumed += count;
}
break;
}
@@ -1938,6 +1950,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
__skb_unlink(skb, &msk->receive_queue);
__kfree_skb(skb);
+ msk->bytes_consumed += count;
}
if (copied >= len)
@@ -1964,6 +1977,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+
msk->rcvq_space.copied += copied;
mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -2318,9 +2334,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;
- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
@@ -2397,8 +2410,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (msk->in_accept_queue && msk->first == ssk &&
(sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
/* ensure later check in mptcp_worker() will dispose the msk */
- mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
sock_set_flag(sk, SOCK_DEAD);
+ mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1));
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
@@ -2467,7 +2480,7 @@ out:
inet_sk_state_load(msk->first) == TCP_CLOSE) {
if (sk->sk_state != TCP_ESTABLISHED ||
msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
} else {
mptcp_start_tout_timer(sk);
@@ -2484,7 +2497,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* subflow aborted before reaching the fully_established status
* attempt the creation of the next subflow
*/
- mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow);
+ mptcp_pm_subflow_check_next(mptcp_sk(sk), subflow);
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH);
}
@@ -2523,7 +2536,7 @@ static bool mptcp_close_tout_expired(const struct sock *sk)
return false;
return time_after32(tcp_jiffies32,
- inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
+ inet_csk(sk)->icsk_mtup.probe_timestamp + mptcp_close_timeout(sk));
}
static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2562,7 +2575,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
WRITE_ONCE(sk->sk_err, ECONNRESET);
}
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@@ -2666,7 +2679,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
return;
close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
- TCP_TIMEWAIT_LEN;
+ mptcp_close_timeout(sk);
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
@@ -2697,7 +2710,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@@ -2762,6 +2775,7 @@ static void __mptcp_init_sock(struct sock *sk)
msk->rmem_fwd_alloc = 0;
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
+ msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
@@ -2874,6 +2888,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
release_sock(ssk);
}
+void mptcp_set_state(struct sock *sk, int state)
+{
+ int oldstate = sk->sk_state;
+
+ switch (state) {
+ case TCP_ESTABLISHED:
+ if (oldstate != TCP_ESTABLISHED)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ break;
+
+ default:
+ if (oldstate == TCP_ESTABLISHED)
+ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ }
+
+ inet_sk_state_store(sk, state);
+}
+
static const unsigned char new_state[16] = {
/* current state: new state: action: */
[0 /* (Invalid) */] = TCP_CLOSE,
@@ -2896,7 +2928,7 @@ static int mptcp_close_state(struct sock *sk)
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
- inet_sk_state_store(sk, ns);
+ mptcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
@@ -2971,16 +3003,9 @@ void __mptcp_unaccepted_force_close(struct sock *sk)
__mptcp_destroy_sock(sk);
}
-static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+static __poll_t mptcp_check_readable(struct sock *sk)
{
- /* Concurrent splices from sk_receive_queue into receive_queue will
- * always show at least one non-empty queue when checked in this order.
- */
- if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
- skb_queue_empty_lockless(&msk->receive_queue))
- return 0;
-
- return EPOLLIN | EPOLLRDNORM;
+ return mptcp_epollin_ready(sk) ? EPOLLIN | EPOLLRDNORM : 0;
}
static void mptcp_check_listen_stop(struct sock *sk)
@@ -3014,11 +3039,11 @@ bool __mptcp_close(struct sock *sk, long timeout)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
goto cleanup;
}
- if (mptcp_check_readable(msk) || timeout < 0) {
+ if (mptcp_data_avail(msk) || timeout < 0) {
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
@@ -3057,7 +3082,7 @@ cleanup:
* state, let's not keep resources busy for no reasons
*/
if (subflows_alive == 0)
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
@@ -3123,7 +3148,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
return -EBUSY;
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
@@ -3137,7 +3162,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
- msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
@@ -3145,6 +3169,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->snd_data_fin_enable = false;
msk->rcv_fastclose = false;
msk->use_64bit_ack = false;
+ msk->bytes_consumed = 0;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
@@ -3152,6 +3177,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
+ msk->rcvspace_init = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3174,6 +3200,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
if (!nsk)
@@ -3210,11 +3237,12 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
- inet_sk_state_store(nsk, TCP_ESTABLISHED);
+ mptcp_set_state(nsk, TCP_ESTABLISHED);
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
- list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ subflow = mptcp_subflow_ctx(ssk);
+ list_add(&subflow->node, &msk->conn_list);
sock_hold(ssk);
/* new mpc subflow takes ownership of the newly
@@ -3229,6 +3257,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
+
+ if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3239,6 +3270,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);
+ msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;
@@ -3249,8 +3281,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
@@ -3362,8 +3392,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
for (;;) {
- unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
- msk->push_pending;
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;
if (!flags)
@@ -3379,7 +3408,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
- msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
@@ -3507,13 +3535,8 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
- WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, ssk, 0);
-
- mptcp_rcv_space_init(msk, ssk);
}
void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -3669,7 +3692,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(ssk))
return PTR_ERR(ssk);
- inet_sk_state_store(sk, TCP_SYN_SENT);
+ mptcp_set_state(sk, TCP_SYN_SENT);
subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -3719,7 +3742,7 @@ out:
if (unlikely(err)) {
/* avoid leaving a dangling token in an unconnected socket */
mptcp_token_destroy(msk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
return err;
}
@@ -3809,13 +3832,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
- inet_sk_state_store(sk, TCP_LISTEN);
+ mptcp_set_state(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
release_sock(ssk);
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ mptcp_set_state(sk, inet_sk_state_load(ssk));
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -3875,7 +3898,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_is_singular(&msk->conn_list)))
- inet_sk_state_store(newsk, TCP_CLOSE);
+ mptcp_set_state(newsk, TCP_CLOSE);
}
}
release_sock(newsk);
@@ -3928,7 +3951,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
- mask |= mptcp_check_readable(msk);
+ mask |= mptcp_check_readable(sk);
if (shutdown & SEND_SHUTDOWN)
mask |= EPOLLOUT | EPOLLWRNORM;
else
@@ -3966,6 +3989,7 @@ static const struct proto_ops mptcp_stream_ops = {
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
+ .set_rcvlowat = mptcp_set_rcvlowat,
};
static struct inet_protosw mptcp_protosw = {
@@ -4067,6 +4091,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
#endif
+ .set_rcvlowat = mptcp_set_rcvlowat,
};
static struct proto mptcp_v6_prot;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 07c5ac37d0..3e50baba1b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -13,6 +13,8 @@
#include <uapi/linux/mptcp.h>
#include <net/genetlink.h>
+#include "mptcp_pm_gen.h"
+
#define MPTCP_SUPPORTED_VERSION 1
/* MPTCP option bits */
@@ -268,6 +270,7 @@ struct mptcp_sock {
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
u64 bytes_retrans;
+ u64 bytes_consumed;
int rmem_fwd_alloc;
int snd_burst;
int old_wspace;
@@ -283,7 +286,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
- unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -302,7 +304,8 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
- free_first:1;
+ free_first:1,
+ rcvspace_init:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -436,11 +439,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
return (struct mptcp_subflow_request_sock *)rsk;
}
-enum mptcp_data_avail {
- MPTCP_SUBFLOW_NODATA,
- MPTCP_SUBFLOW_DATA_AVAIL,
-};
-
struct mptcp_delegated_action {
struct napi_struct napi;
struct list_head head;
@@ -493,11 +491,10 @@ struct mptcp_subflow_context {
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 9;
- enum mptcp_data_avail data_avail;
+ __unused : 10;
+ bool data_avail;
bool scheduled;
u32 remote_nonce;
u64 thmac;
@@ -507,7 +504,7 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -558,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}
static inline u64
@@ -621,10 +619,12 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
+unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -642,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
+void mptcp_set_state(struct sock *sk, int state);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -669,6 +670,24 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
int mptcp_sched_get_send(struct mptcp_sock *msk);
int mptcp_sched_get_retrans(struct mptcp_sock *msk);
+static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed);
+}
+
+static inline bool mptcp_epollin_ready(const struct sock *sk)
+{
+ /* mptcp doesn't have to deal with small skbs in the receive queue,
+ * at it can always coalesce them
+ */
+ return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
+ READ_ONCE(tcp_memory_pressure);
+}
+
+int mptcp_set_rcvlowat(struct sock *sk, int val);
+
static inline bool __tcp_can_send(const struct sock *ssk)
{
/* only send if our side has not closed yet */
@@ -743,6 +762,7 @@ static inline bool mptcp_is_fully_established(struct sock *sk)
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
READ_ONCE(mptcp_sk(sk)->fully_established);
}
+
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
@@ -873,7 +893,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
-void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
+void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
const struct mptcp_subflow_context *subflow);
void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr);
@@ -924,10 +944,6 @@ void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
-int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
-int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
-int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
-int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
@@ -937,8 +953,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
@@ -1006,6 +1022,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
+
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
@@ -1054,7 +1079,7 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
{
- if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
+ if (__mptcp_check_fallback(msk)) {
pr_debug("TCP fallback already done (msk=%p)", msk);
return;
}
@@ -1104,7 +1129,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+ return (1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 116e300823..3536807337 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -89,7 +89,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
break;
case SO_PRIORITY:
- ssk->sk_priority = val;
+ WRITE_ONCE(ssk->sk_priority, val);
break;
case SO_SNDBUF:
case SO_SNDBUFFORCE:
@@ -735,7 +735,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
lock_sock(sk);
sockopt_seq_inc(msk);
- val = inet_sk(sk)->tos;
+ val = READ_ONCE(inet_sk(sk)->tos);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow;
@@ -919,7 +919,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
mptcp_pm_get_local_addr_max(msk);
}
- if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
+ if (__mptcp_check_fallback(msk))
flags |= MPTCP_INFO_FLAG_FALLBACK;
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
@@ -1347,7 +1347,7 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
- return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos);
+ return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
}
return -EOPNOTSUPP;
@@ -1450,37 +1450,63 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
}
-static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
-{
- bool slow = lock_sock_fast(ssk);
-
- sync_socket_options(msk, ssk);
-
- unlock_sock_fast(ssk, slow);
-}
-
-void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
msk_owned_by_me(msk);
+ ssk->sk_rcvlowat = 0;
+
+ /* subflows must ignore any latency-related settings: will not affect
+ * the user-space - only the msk is relevant - but will foul the
+ * mptcp scheduler
+ */
+ tcp_sk(ssk)->notsent_lowat = UINT_MAX;
+
if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
- __mptcp_sockopt_sync(msk, ssk);
+ sync_socket_options(msk, ssk);
subflow->setsockopt_seq = msk->setsockopt_seq;
}
}
-void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
+/* unfortunately this is different enough from the tcp version so
+ * that we can't factor it out
+ */
+int mptcp_set_rcvlowat(struct sock *sk, int val)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_subflow_context *subflow;
+ int space, cap;
- msk_owned_by_me(msk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ cap = sk->sk_rcvbuf >> 1;
+ else
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ val = min(val, cap);
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
- if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
- sync_socket_options(msk, ssk);
+ /* Check if we need to signal EPOLLIN right now */
+ if (mptcp_epollin_ready(sk))
+ sk->sk_data_ready(sk);
- subflow->setsockopt_seq = msk->setsockopt_seq;
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ return 0;
+
+ space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val);
+ if (space <= sk->sk_rcvbuf)
+ return 0;
+
+ /* propagate the rcvbuf changes to all the subflows */
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+
+ slow = lock_sock_fast(ssk);
+ WRITE_ONCE(ssk->sk_rcvbuf, space);
+ tcp_sk(ssk)->window_clamp = val;
+ unlock_sock_fast(ssk, slow);
}
+ return 0;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d3c5ecf8dd..71ba86246f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -9,8 +9,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
-#include <crypto/algapi.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sock *ssk = msk->first;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, ssk);
- __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
- inet_sk_state_store(sk, state);
+ /* subflow->idsn is always available is TCP_SYN_SENT state,
+ * even for the FASTOPEN scenarios
+ */
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk)) {
- __mptcp_sync_state(sk, ssk->sk_state);
- } else {
- msk->pending_state = ssk->sk_state;
- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
- }
- mptcp_data_unlock(sk);
-}
-
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
@@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_data_lock(sk);
+ if (mp_opt) {
+ /* Options are available only in the non fallback cases
+ * avoid updating rx path fields otherwise
+ */
+ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ }
+
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;
@@ -557,8 +577,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -834,7 +853,6 @@ create_child:
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
- mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
@@ -1244,7 +1262,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
if (subflow->data_avail)
return true;
@@ -1278,7 +1296,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
continue;
}
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
break;
}
return true;
@@ -1300,7 +1318,7 @@ fallback:
goto reset;
}
mptcp_subflow_fail(msk, ssk);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
return true;
}
@@ -1317,7 +1335,7 @@ reset:
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
return false;
}
@@ -1329,7 +1347,7 @@ reset:
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ WRITE_ONCE(subflow->data_avail, true);
return true;
}
@@ -1341,7 +1359,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ WRITE_ONCE(subflow->data_avail, false);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1412,10 +1430,18 @@ static void subflow_data_ready(struct sock *sk)
WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
!subflow->mp_join && !(state & TCPF_CLOSE));
- if (mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk)) {
mptcp_data_ready(parent, sk);
- else if (unlikely(sk->sk_err))
+
+ /* subflow-level lowat test are not relevant.
+ * respect the msk-level threshold eventually mandating an immediate ack
+ */
+ if (mptcp_data_avail(msk) < parent->sk_rcvlowat &&
+ (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ } else if (unlikely(sk->sk_err)) {
subflow_error_report(sk);
+ }
}
static void subflow_write_space(struct sock *ssk)
@@ -1532,8 +1558,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- mptcp_sockopt_sync(msk, ssk);
-
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@@ -1543,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1644,7 +1668,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
err = security_mptcp_add_subflow(sk, sf->sk);
if (err)
- goto release_ssk;
+ goto err_free;
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1658,15 +1682,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
err = tcp_set_ulp(sf->sk, "mptcp");
+ if (err)
+ goto err_free;
-release_ssk:
+ mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk);
release_sock(sf->sk);
- if (err) {
- sock_release(sf);
- return err;
- }
-
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
@@ -1686,6 +1707,11 @@ release_ssk:
mptcp_subflow_ops_override(sf->sk);
return 0;
+
+err_free:
+ release_sock(sf->sk);
+ sock_release(sf);
+ return err;
}
static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
@@ -1705,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1736,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1941,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
@@ -2062,7 +2088,6 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.send_check = ipv4_specific.send_check;
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
- subflow_v6m_specific.net_frag_header_len = 0;
subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
tcpv6_prot_override = tcpv6_prot;
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index 0758865ab6..bfff53e668 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = {
kunit_test_suite(mptcp_token_suite);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("KUnit tests for MPTCP Token");