From 511e4ecd3211371086a9698ce4042700957cee33 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 1 Jul 2024 19:13:56 +0200 Subject: Merging upstream version 6.9.7. Signed-off-by: Daniel Baumann --- net/9p/client.c | 2 + net/atm/clip.c | 2 +- net/ax25/af_ax25.c | 6 + net/ax25/ax25_dev.c | 50 +++----- net/batman-adv/originator.c | 2 + net/bluetooth/6lowpan.c | 2 +- net/bluetooth/hci_conn.c | 3 +- net/bluetooth/hci_core.c | 141 +++------------------ net/bluetooth/hci_event.c | 208 ++++++------------------------- net/bluetooth/hci_sock.c | 5 +- net/bluetooth/hci_sync.c | 209 +++++++------------------------- net/bluetooth/iso.c | 149 +++++++++++------------ net/bluetooth/l2cap_core.c | 85 ++++++++----- net/bluetooth/l2cap_sock.c | 91 +++++++++++--- net/bluetooth/mgmt.c | 84 +++++-------- net/bpf/test_run.c | 6 + net/bridge/br_device.c | 6 + net/bridge/br_mst.c | 29 +++-- net/core/dev.c | 3 +- net/core/drop_monitor.c | 20 +-- net/core/dst_cache.c | 4 +- net/core/filter.c | 10 +- net/core/net_namespace.c | 9 +- net/core/netdev-genl.c | 16 +-- net/core/netpoll.c | 2 +- net/core/rtnetlink.c | 44 ++++++- net/core/sock.c | 3 + net/core/sock_map.c | 16 ++- net/devlink/core.c | 6 +- net/ethernet/eth.c | 4 +- net/ethtool/ioctl.c | 2 +- net/ipv4/af_inet.c | 6 +- net/ipv4/cipso_ipv4.c | 12 +- net/ipv4/devinet.c | 22 ++-- net/ipv4/fib_frontend.c | 7 +- net/ipv4/icmp.c | 26 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/ip_output.c | 8 +- net/ipv4/ip_tunnel.c | 2 +- net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 + net/ipv4/route.c | 46 +++---- net/ipv4/tcp.c | 9 +- net/ipv4/tcp_ao.c | 19 ++- net/ipv4/tcp_dctcp.c | 13 +- net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_ipv4.c | 13 +- net/ipv4/tcp_timer.c | 6 +- net/ipv4/udp.c | 23 +++- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/icmp.c | 8 +- net/ipv6/ila/ila_lwt.c | 4 +- net/ipv6/ioam6_iptunnel.c | 8 +- net/ipv6/ip6_fib.c | 6 +- net/ipv6/ip6_output.c | 18 +-- net/ipv6/ip6mr.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 4 +- net/ipv6/reassembly.c | 2 +- net/ipv6/route.c | 71 ++++++----- net/ipv6/seg6.c | 5 +- net/ipv6/seg6_hmac.c | 42 ++++--- net/ipv6/seg6_iptunnel.c | 25 ++-- net/ipv6/seg6_local.c | 8 +- net/ipv6/tcp_ipv6.c | 7 +- net/ipv6/udp.c | 31 +++-- net/ipv6/xfrm6_policy.c | 10 +- net/l2tp/l2tp_core.c | 44 +++++-- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/mac80211/cfg.c | 16 +-- net/mac80211/driver-ops.c | 17 +++ net/mac80211/he.c | 10 +- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/iface.c | 26 ++-- net/mac80211/mesh_pathtbl.c | 13 ++ net/mac80211/mlme.c | 53 +++++--- net/mac80211/parse.c | 2 +- net/mac80211/scan.c | 16 +-- net/mac80211/sta_info.c | 4 +- net/mac80211/util.c | 2 +- net/mpls/mpls_iptunnel.c | 4 +- net/mptcp/pm_netlink.c | 21 ++-- net/mptcp/protocol.c | 10 +- net/mptcp/protocol.h | 3 + net/mptcp/sockopt.c | 60 ++++++++- net/ncsi/internal.h | 2 + net/ncsi/ncsi-manage.c | 73 ++++++----- net/ncsi/ncsi-rsp.c | 4 +- net/netfilter/core.c | 13 +- net/netfilter/ipset/ip_set_core.c | 92 ++++++++------ net/netfilter/ipset/ip_set_list_set.c | 33 ++--- net/netfilter/ipvs/ip_vs_xmit.c | 16 +-- net/netfilter/nf_conntrack_standalone.c | 15 --- net/netfilter/nf_flow_table_core.c | 8 +- net/netfilter/nf_flow_table_ip.c | 8 +- net/netfilter/nf_hooks_lwtunnel.c | 67 ++++++++++ net/netfilter/nf_internals.h | 6 + net/netfilter/nfnetlink_queue.c | 2 + net/netfilter/nft_fib.c | 8 +- net/netfilter/nft_meta.c | 3 + net/netfilter/nft_payload.c | 99 +++++++++++---- net/netfilter/nft_rt.c | 4 +- net/netrom/nr_route.c | 19 ++- net/netrom/nr_timer.c | 3 +- net/nfc/nci/core.c | 18 ++- net/openvswitch/actions.c | 6 + net/openvswitch/flow.c | 3 +- net/packet/af_packet.c | 29 ++--- net/qrtr/ns.c | 27 +++++ net/sched/act_api.c | 3 +- net/sched/act_ct.c | 16 ++- net/sched/sch_api.c | 1 + net/sched/sch_generic.c | 5 + net/sched/sch_htb.c | 22 +--- net/sched/sch_multiq.c | 2 +- net/sched/sch_taprio.c | 29 +++-- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 4 +- net/smc/af_smc.c | 22 +--- net/sunrpc/auth_gss/auth_gss.c | 4 +- net/sunrpc/auth_gss/svcauth_gss.c | 12 +- net/sunrpc/clnt.c | 1 + net/sunrpc/svc.c | 2 - net/sunrpc/xprtrdma/verbs.c | 6 +- net/tipc/node.c | 1 + net/tipc/udp_media.c | 2 +- net/tls/tls_main.c | 10 +- net/unix/af_unix.c | 153 +++++++++++++---------- net/unix/diag.c | 12 +- net/wireless/core.c | 2 +- net/wireless/nl80211.c | 14 +-- net/wireless/pmsr.c | 8 +- net/wireless/scan.c | 50 ++++++-- net/wireless/sysfs.c | 4 +- net/wireless/util.c | 7 +- net/xdp/xsk.c | 5 +- net/xfrm/xfrm_policy.c | 14 +-- 138 files changed, 1578 insertions(+), 1394 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index f7e90b4769..b05f73c291 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -235,6 +235,8 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc, if (!fc->sdata) return -ENOMEM; fc->capacity = alloc_msize; + fc->id = 0; + fc->tag = P9_NOTAG; return 0; } diff --git a/net/atm/clip.c b/net/atm/clip.c index 294cb9efe3..015fb679be 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, dev->stats.tx_dropped++; return NETDEV_TX_OK; } - rt = (struct rtable *) dst; + rt = dst_rtable(dst); if (rt->rt_gw_family == AF_INET) daddr = &rt->rt_gw4; else diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 9169efb2f4..5fff5930e4 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1378,8 +1378,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, { struct sk_buff *skb; struct sock *newsk; + ax25_dev *ax25_dev; DEFINE_WAIT(wait); struct sock *sk; + ax25_cb *ax25; int err = 0; if (sock->state != SS_UNCONNECTED) @@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, kfree_skb(skb); sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; + ax25 = sk_to_ax25(newsk); + ax25_dev = ax25->ax25_dev; + netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); + ax25_dev_hold(ax25_dev); out: release_sock(sk); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 282ec581c0..67ae6b8c52 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -22,11 +22,12 @@ #include #include #include +#include #include #include #include -ax25_dev *ax25_dev_list; +static LIST_HEAD(ax25_dev_list); DEFINE_SPINLOCK(ax25_dev_lock); ax25_dev *ax25_addr_ax25dev(ax25_address *addr) @@ -34,10 +35,11 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) ax25_dev *ax25_dev, *res = NULL; spin_lock_bh(&ax25_dev_lock); - for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) + list_for_each_entry(ax25_dev, &ax25_dev_list, list) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; ax25_dev_hold(ax25_dev); + break; } spin_unlock_bh(&ax25_dev_lock); @@ -59,7 +61,6 @@ void ax25_dev_device_up(struct net_device *dev) } refcount_set(&ax25_dev->refcount, 1); - dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); ax25_dev->forward = NULL; @@ -85,10 +86,9 @@ void ax25_dev_device_up(struct net_device *dev) #endif spin_lock_bh(&ax25_dev_lock); - ax25_dev->next = ax25_dev_list; - ax25_dev_list = ax25_dev; + list_add(&ax25_dev->list, &ax25_dev_list); + dev->ax25_ptr = ax25_dev; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -111,32 +111,19 @@ void ax25_dev_device_down(struct net_device *dev) /* * Remove any packet forwarding that points to this device. */ - for (s = ax25_dev_list; s != NULL; s = s->next) + list_for_each_entry(s, &ax25_dev_list, list) if (s->forward == dev) s->forward = NULL; - if ((s = ax25_dev_list) == ax25_dev) { - ax25_dev_list = s->next; - goto unlock_put; - } - - while (s != NULL && s->next != NULL) { - if (s->next == ax25_dev) { - s->next = ax25_dev->next; - goto unlock_put; + list_for_each_entry(s, &ax25_dev_list, list) { + if (s == ax25_dev) { + list_del(&s->list); + break; } - - s = s->next; } - spin_unlock_bh(&ax25_dev_lock); - dev->ax25_ptr = NULL; - ax25_dev_put(ax25_dev); - return; -unlock_put: - spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; + spin_unlock_bh(&ax25_dev_lock); netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -200,16 +187,13 @@ struct net_device *ax25_fwd_dev(struct net_device *dev) */ void __exit ax25_dev_free(void) { - ax25_dev *s, *ax25_dev; + ax25_dev *s, *n; spin_lock_bh(&ax25_dev_lock); - ax25_dev = ax25_dev_list; - while (ax25_dev != NULL) { - s = ax25_dev; - netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); - ax25_dev = ax25_dev->next; - kfree(s); + list_for_each_entry_safe(s, n, &ax25_dev_list, list) { + netdev_put(s->dev, &s->dev_tracker); + list_del(&s->list); + ax25_dev_put(s); } - ax25_dev_list = NULL; spin_unlock_bh(&ax25_dev_lock); } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 71c143d4b6..ac74f6ead6 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1266,6 +1266,8 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) /* for all origins... */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + if (hlist_empty(head)) + continue; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 27520a8a48..50cfec8cca 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int count = atomic_read(&dev->peer_count); const struct in6_addr *nexthop; struct lowpan_peer *peer; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6ab404dda7..08ae30fd31 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1173,8 +1173,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || - hci_dev_test_flag(d, HCI_USER_CHANNEL) || - d->dev_type != HCI_PRIMARY) + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Simple routing: diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc5086423a..24f6b6a5c7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -395,11 +395,6 @@ int hci_inquiry(void __user *arg) goto done; } - if (hdev->dev_type != HCI_PRIMARY) { - err = -EOPNOTSUPP; - goto done; - } - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; @@ -752,11 +747,6 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) goto done; } - if (hdev->dev_type != HCI_PRIMARY) { - err = -EOPNOTSUPP; - goto done; - } - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; @@ -910,7 +900,7 @@ int hci_get_dev_info(void __user *arg) strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; - di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); + di.type = (hdev->bus & 0x0f); di.flags = flags; di.pkt_type = hdev->pkt_type; if (lmp_bredr_capable(hdev)) { @@ -1026,8 +1016,7 @@ static void hci_power_on(struct work_struct *work) */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || - (hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && + (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_do_close(hdev); @@ -1769,6 +1758,15 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, adv->pending = true; adv->instance = instance; + + /* If controller support only one set and the instance is set to + * 1 then there is no option other than using handle 0x00. + */ + if (hdev->le_num_of_adv_sets == 1 && instance == 1) + adv->handle = 0x00; + else + adv->handle = instance; + list_add(&adv->list, &hdev->adv_instances); hdev->adv_instance_cnt++; } @@ -2635,21 +2633,7 @@ int hci_register_dev(struct hci_dev *hdev) if (!hdev->open || !hdev->close || !hdev->send) return -EINVAL; - /* Do not allow HCI_AMP devices to register at index 0, - * so the index can be used as the AMP controller ID. - */ - switch (hdev->dev_type) { - case HCI_PRIMARY: - id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); - break; - case HCI_AMP: - id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1, - GFP_KERNEL); - break; - default: - return -EINVAL; - } - + id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); if (id < 0) return id; @@ -2701,12 +2685,10 @@ int hci_register_dev(struct hci_dev *hdev) hci_dev_set_flag(hdev, HCI_SETUP); hci_dev_set_flag(hdev, HCI_AUTO_OFF); - if (hdev->dev_type == HCI_PRIMARY) { - /* Assume BR/EDR support until proven otherwise (such as - * through reading supported features during init. - */ - hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); - } + /* Assume BR/EDR support until proven otherwise (such as + * through reading supported features during init. + */ + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); write_lock(&hci_dev_list_lock); list_add(&hdev->list, &hci_dev_list); @@ -3242,17 +3224,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; - switch (hdev->dev_type) { - case HCI_PRIMARY: - hci_add_acl_hdr(skb, conn->handle, flags); - break; - case HCI_AMP: - hci_add_acl_hdr(skb, chan->handle, flags); - break; - default: - bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); - return; - } + hci_add_acl_hdr(skb, conn->handle, flags); list = skb_shinfo(skb)->frag_list; if (!list) { @@ -3412,9 +3384,6 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) case ACL_LINK: cnt = hdev->acl_cnt; break; - case AMP_LINK: - cnt = hdev->block_cnt; - break; case SCO_LINK: case ESCO_LINK: cnt = hdev->sco_cnt; @@ -3612,12 +3581,6 @@ static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type) } -static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) -{ - /* Calculate count of blocks used by this packet */ - return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len); -} - static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) { unsigned long last_tx; @@ -3731,81 +3694,15 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) hci_prio_recalculate(hdev, ACL_LINK); } -static void hci_sched_acl_blk(struct hci_dev *hdev) -{ - unsigned int cnt = hdev->block_cnt; - struct hci_chan *chan; - struct sk_buff *skb; - int quote; - u8 type; - - BT_DBG("%s", hdev->name); - - if (hdev->dev_type == HCI_AMP) - type = AMP_LINK; - else - type = ACL_LINK; - - __check_timeout(hdev, cnt, type); - - while (hdev->block_cnt > 0 && - (chan = hci_chan_sent(hdev, type, "e))) { - u32 priority = (skb_peek(&chan->data_q))->priority; - while (quote > 0 && (skb = skb_peek(&chan->data_q))) { - int blocks; - - BT_DBG("chan %p skb %p len %d priority %u", chan, skb, - skb->len, skb->priority); - - /* Stop if priority has changed */ - if (skb->priority < priority) - break; - - skb = skb_dequeue(&chan->data_q); - - blocks = __get_blocks(hdev, skb); - if (blocks > hdev->block_cnt) - return; - - hci_conn_enter_active_mode(chan->conn, - bt_cb(skb)->force_active); - - hci_send_frame(hdev, skb); - hdev->acl_last_tx = jiffies; - - hdev->block_cnt -= blocks; - quote -= blocks; - - chan->sent += blocks; - chan->conn->sent += blocks; - } - } - - if (cnt != hdev->block_cnt) - hci_prio_recalculate(hdev, type); -} - static void hci_sched_acl(struct hci_dev *hdev) { BT_DBG("%s", hdev->name); /* No ACL link over BR/EDR controller */ - if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY) - return; - - /* No AMP link over AMP controller */ - if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP) + if (!hci_conn_num(hdev, ACL_LINK)) return; - switch (hdev->flow_ctl_mode) { - case HCI_FLOW_CTL_MODE_PACKET_BASED: - hci_sched_acl_pkt(hdev); - break; - - case HCI_FLOW_CTL_MODE_BLOCK_BASED: - hci_sched_acl_blk(hdev); - break; - } + hci_sched_acl_pkt(hdev); } static void hci_sched_le(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4de8f0dc1a..1ed734a7fb 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1,7 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. - Copyright 2023 NXP + Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky @@ -913,21 +913,6 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_flow_control_mode *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->flow_ctl_mode = rp->mode; - - return rp->status; -} - static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1071,28 +1056,6 @@ static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_data_block_size *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->block_mtu = __le16_to_cpu(rp->max_acl_len); - hdev->block_len = __le16_to_cpu(rp->block_len); - hdev->num_blocks = __le16_to_cpu(rp->num_blocks); - - hdev->block_cnt = hdev->num_blocks; - - BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, - hdev->block_cnt, hdev->block_len); - - return rp->status; -} - static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1127,30 +1090,6 @@ unlock: return rp->status; } -static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_local_amp_info *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->amp_status = rp->amp_status; - hdev->amp_total_bw = __le32_to_cpu(rp->total_bw); - hdev->amp_max_bw = __le32_to_cpu(rp->max_bw); - hdev->amp_min_latency = __le32_to_cpu(rp->min_latency); - hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu); - hdev->amp_type = rp->amp_type; - hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap); - hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size); - hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); - hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - - return rp->status; -} - static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4121,12 +4060,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type, sizeof(struct hci_rp_read_page_scan_type)), HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type), - HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size, - sizeof(struct hci_rp_read_data_block_size)), - HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode, - sizeof(struct hci_rp_read_flow_control_mode)), - HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info, - sizeof(struct hci_rp_read_local_amp_info)), HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock, sizeof(struct hci_rp_read_clock)), HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size, @@ -4317,7 +4250,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); /* Remove connection if command failed */ - for (i = 0; cp->num_cis; cp->num_cis--, i++) { + for (i = 0; i < cp->num_cis; i++) { struct hci_conn *conn; u16 handle; @@ -4333,6 +4266,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status) hci_conn_del(conn); } } + cp->num_cis = 0; if (pending) hci_le_create_cis_pending(hdev); @@ -4461,11 +4395,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, flex_array_size(ev, handles, ev->num))) return; - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); - return; - } - bt_dev_dbg(hdev, "num %d", ev->num); for (i = 0; i < ev->num; i++) { @@ -4533,78 +4462,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, queue_work(hdev->workqueue, &hdev->tx_work); } -static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev, - __u16 handle) -{ - struct hci_chan *chan; - - switch (hdev->dev_type) { - case HCI_PRIMARY: - return hci_conn_hash_lookup_handle(hdev, handle); - case HCI_AMP: - chan = hci_chan_lookup_handle(hdev, handle); - if (chan) - return chan->conn; - break; - default: - bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); - break; - } - - return NULL; -} - -static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_num_comp_blocks *ev = data; - int i; - - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS, - flex_array_size(ev, handles, ev->num_hndl))) - return; - - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", - hdev->flow_ctl_mode); - return; - } - - bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks, - ev->num_hndl); - - for (i = 0; i < ev->num_hndl; i++) { - struct hci_comp_blocks_info *info = &ev->handles[i]; - struct hci_conn *conn = NULL; - __u16 handle, block_count; - - handle = __le16_to_cpu(info->handle); - block_count = __le16_to_cpu(info->blocks); - - conn = __hci_conn_lookup_handle(hdev, handle); - if (!conn) - continue; - - conn->sent -= block_count; - - switch (conn->type) { - case ACL_LINK: - case AMP_LINK: - hdev->block_cnt += block_count; - if (hdev->block_cnt > hdev->num_blocks) - hdev->block_cnt = hdev->num_blocks; - break; - - default: - bt_dev_err(hdev, "unknown type %d conn %p", - conn->type, conn); - break; - } - } - - queue_work(hdev->workqueue, &hdev->tx_work); -} - static void hci_mode_change_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -6502,14 +6359,16 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, if (!(flags & HCI_PROTO_DEFER)) goto unlock; - if (ev->status) { - /* Add connection to indicate the failed PA sync event */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); + /* Add connection to indicate PA sync event */ + pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); - if (!pa_sync) - goto unlock; + if (IS_ERR(pa_sync)) + goto unlock; + + pa_sync->sync_handle = le16_to_cpu(ev->handle); + if (ev->status) { set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags); /* Notify iso layer */ @@ -6526,6 +6385,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data, struct hci_ev_le_per_adv_report *ev = data; int mask = hdev->link_mode; __u8 flags = 0; + struct hci_conn *pa_sync; bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle)); @@ -6533,8 +6393,28 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data, mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) - hci_le_pa_term_sync(hdev, ev->sync_handle); + goto unlock; + + if (!(flags & HCI_PROTO_DEFER)) + goto unlock; + pa_sync = hci_conn_hash_lookup_pa_sync_handle + (hdev, + le16_to_cpu(ev->sync_handle)); + + if (!pa_sync) + goto unlock; + + if (ev->data_status == LE_PA_DATA_COMPLETE && + !test_and_set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags)) { + /* Notify iso layer */ + hci_connect_cfm(pa_sync, 0); + + /* Notify MGMT layer */ + mgmt_device_connected(hdev, pa_sync, NULL, 0); + } + +unlock: hci_dev_unlock(hdev); } @@ -7069,10 +6949,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); - if (!(mask & HCI_LM_ACCEPT)) { - hci_le_pa_term_sync(hdev, ev->sync_handle); + if (!(mask & HCI_LM_ACCEPT)) goto unlock; - } if (!(flags & HCI_PROTO_DEFER)) goto unlock; @@ -7081,24 +6959,11 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, (hdev, le16_to_cpu(ev->sync_handle)); - if (pa_sync) - goto unlock; - - /* Add connection to indicate the PA sync event */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); - if (IS_ERR(pa_sync)) goto unlock; - pa_sync->sync_handle = le16_to_cpu(ev->sync_handle); - set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags); - /* Notify iso layer */ - hci_connect_cfm(pa_sync, 0x00); - - /* Notify MGMT layer */ - mgmt_device_connected(hdev, pa_sync, NULL, 0); + hci_connect_cfm(pa_sync, 0); unlock: hci_dev_unlock(hdev); @@ -7512,9 +7377,6 @@ static const struct hci_ev { /* [0x3e = HCI_EV_LE_META] */ HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt, sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE), - /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */ - HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, - sizeof(struct hci_ev_num_comp_blocks)), /* [0xff = HCI_EV_VENDOR] */ HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE), }; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 703b84bd48..69c2ba1e84 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -485,7 +485,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return NULL; ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE); - ni->type = hdev->dev_type; + ni->type = 0x00; /* Old hdev->dev_type */ ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, @@ -1007,9 +1007,6 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; - if (hdev->dev_type != HCI_PRIMARY) - return -EOPNOTSUPP; - switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 4c707eb64e..7bfa6b59ba 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1043,11 +1043,10 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) struct hci_cp_ext_adv_set *set; u8 data[sizeof(*cp) + sizeof(*set) * 1]; u8 size; + struct adv_info *adv = NULL; /* If request specifies an instance that doesn't exist, fail */ if (instance > 0) { - struct adv_info *adv; - adv = hci_find_adv_instance(hdev, instance); if (!adv) return -EINVAL; @@ -1066,7 +1065,7 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp->num_of_sets = !!instance; cp->enable = 0x00; - set->handle = instance; + set->handle = adv ? adv->handle : instance; size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets; @@ -1195,7 +1194,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.own_addr_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; - cp.handle = instance; + cp.handle = adv ? adv->handle : instance; if (flags & MGMT_ADV_FLAG_SEC_2M) { cp.primary_phy = HCI_ADV_PHY_1M; @@ -1235,31 +1234,27 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) { - struct { - struct hci_cp_le_set_ext_scan_rsp_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_set_ext_scan_rsp_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); u8 len; struct adv_info *adv = NULL; int err; - memset(&pdu, 0, sizeof(pdu)); - if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->scan_rsp_changed) return 0; } - len = eir_create_scan_rsp(hdev, instance, pdu.data); + len = eir_create_scan_rsp(hdev, instance, pdu->data); - pdu.cp.handle = instance; - pdu.cp.length = len; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; + pdu->handle = adv ? adv->handle : instance; + pdu->length = len; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, - sizeof(pdu.cp) + len, &pdu.cp, + struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); if (err) return err; @@ -1267,7 +1262,7 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) if (adv) { adv->scan_rsp_changed = false; } else { - memcpy(hdev->scan_rsp_data, pdu.data, len); + memcpy(hdev->scan_rsp_data, pdu->data, len); hdev->scan_rsp_data_len = len; } @@ -1335,7 +1330,7 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance) memset(set, 0, sizeof(*set)); - set->handle = instance; + set->handle = adv ? adv->handle : instance; /* Set duration per instance since controller is responsible for * scheduling it. @@ -1411,29 +1406,25 @@ static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance, static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance) { - struct { - struct hci_cp_le_set_per_adv_data cp; - u8 data[HCI_MAX_PER_AD_LENGTH]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_set_per_adv_data, pdu, data, length, + HCI_MAX_PER_AD_LENGTH); u8 len; - - memset(&pdu, 0, sizeof(pdu)); + struct adv_info *adv = NULL; if (instance) { - struct adv_info *adv = hci_find_adv_instance(hdev, instance); - + adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->periodic) return 0; } - len = eir_create_per_adv_data(hdev, instance, pdu.data); + len = eir_create_per_adv_data(hdev, instance, pdu->data); - pdu.cp.length = len; - pdu.cp.handle = instance; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA, - sizeof(pdu.cp) + len, &pdu, + struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); } @@ -1727,31 +1718,27 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) { - struct { - struct hci_cp_le_set_ext_adv_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); u8 len; struct adv_info *adv = NULL; int err; - memset(&pdu, 0, sizeof(pdu)); - if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->adv_data_changed) return 0; } - len = eir_create_adv_data(hdev, instance, pdu.data); + len = eir_create_adv_data(hdev, instance, pdu->data); - pdu.cp.length = len; - pdu.cp.handle = instance; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - sizeof(pdu.cp) + len, &pdu.cp, + struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); if (err) return err; @@ -1760,7 +1747,7 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) if (adv) { adv->adv_data_changed = false; } else { - memcpy(hdev->adv_data, pdu.data, len); + memcpy(hdev->adv_data, pdu->data, len); hdev->adv_data_len = len; } @@ -3523,10 +3510,6 @@ static int hci_unconf_init_sync(struct hci_dev *hdev) /* Read Local Supported Features. */ static int hci_read_local_features_sync(struct hci_dev *hdev) { - /* Not all AMP controllers support this command */ - if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20)) - return 0; - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL, HCI_CMD_TIMEOUT); } @@ -3561,51 +3544,6 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev) return 0; } -/* Read Local AMP Info */ -static int hci_read_local_amp_info_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Data Blk size */ -static int hci_read_data_block_size_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Flow Control Mode */ -static int hci_read_flow_control_mode_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Location Data */ -static int hci_read_location_data_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* AMP Controller init stage 1 command sequence */ -static const struct hci_init_stage amp_init1[] = { - /* HCI_OP_READ_LOCAL_VERSION */ - HCI_INIT(hci_read_local_version_sync), - /* HCI_OP_READ_LOCAL_COMMANDS */ - HCI_INIT(hci_read_local_cmds_sync), - /* HCI_OP_READ_LOCAL_AMP_INFO */ - HCI_INIT(hci_read_local_amp_info_sync), - /* HCI_OP_READ_DATA_BLOCK_SIZE */ - HCI_INIT(hci_read_data_block_size_sync), - /* HCI_OP_READ_FLOW_CONTROL_MODE */ - HCI_INIT(hci_read_flow_control_mode_sync), - /* HCI_OP_READ_LOCATION_DATA */ - HCI_INIT(hci_read_location_data_sync), - {} -}; - static int hci_init1_sync(struct hci_dev *hdev) { int err; @@ -3619,28 +3557,9 @@ static int hci_init1_sync(struct hci_dev *hdev) return err; } - switch (hdev->dev_type) { - case HCI_PRIMARY: - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; - return hci_init_stage_sync(hdev, br_init1); - case HCI_AMP: - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; - return hci_init_stage_sync(hdev, amp_init1); - default: - bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type); - break; - } - - return 0; + return hci_init_stage_sync(hdev, br_init1); } -/* AMP Controller init stage 2 command sequence */ -static const struct hci_init_stage amp_init2[] = { - /* HCI_OP_READ_LOCAL_FEATURES */ - HCI_INIT(hci_read_local_features_sync), - {} -}; - /* Read Buffer Size (ACL mtu, max pkt, etc.) */ static int hci_read_buffer_size_sync(struct hci_dev *hdev) { @@ -3898,9 +3817,6 @@ static int hci_init2_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); - if (hdev->dev_type == HCI_AMP) - return hci_init_stage_sync(hdev, amp_init2); - err = hci_init_stage_sync(hdev, hci_init2); if (err) return err; @@ -4728,13 +4644,6 @@ static int hci_init_sync(struct hci_dev *hdev) if (err < 0) return err; - /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode - * BR/EDR/LE type controllers. AMP controllers only need the - * first two stages of init. - */ - if (hdev->dev_type != HCI_PRIMARY) - return 0; - err = hci_init3_sync(hdev); if (err < 0) return err; @@ -4963,12 +4872,8 @@ int hci_dev_open_sync(struct hci_dev *hdev) * In case of user channel usage, it is not important * if a public address or static random address is * available. - * - * This check is only valid for BR/EDR controllers - * since AMP controllers do not have an address. */ if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hdev->dev_type == HCI_PRIMARY && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY)) { ret = -EADDRNOTAVAIL; @@ -5003,8 +4908,7 @@ int hci_dev_open_sync(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hci_dev_test_flag(hdev, HCI_MGMT) && - hdev->dev_type == HCI_PRIMARY) { + hci_dev_test_flag(hdev, HCI_MGMT)) { ret = hci_powered_update_sync(hdev); mgmt_power_on(hdev, ret); } @@ -5149,8 +5053,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); - if (!auto_off && hdev->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); @@ -5212,9 +5115,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->flags &= BIT(HCI_RAW); hci_dev_clear_volatile_flags(hdev); - /* Controller radio is available but is currently powered down */ - hdev->amp_status = AMP_STATUS_POWERED_DOWN; - memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); @@ -5251,8 +5151,7 @@ static int hci_power_on_sync(struct hci_dev *hdev) */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || - (hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && + (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_close_sync(hdev); @@ -5354,27 +5253,11 @@ int hci_stop_discovery_sync(struct hci_dev *hdev) return 0; } -static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle, - u8 reason) -{ - struct hci_cp_disconn_phy_link cp; - - memset(&cp, 0, sizeof(cp)); - cp.phy_handle = HCI_PHY_HANDLE(handle); - cp.reason = reason; - - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_disconnect cp; - if (conn->type == AMP_LINK) - return hci_disconnect_phy_link_sync(hdev, conn->handle, reason); - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. @@ -6493,10 +6376,8 @@ done: int hci_le_create_cis_sync(struct hci_dev *hdev) { - struct { - struct hci_cp_le_create_cis cp; - struct hci_cis cis[0x1f]; - } cmd; + DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f); + size_t aux_num_cis = 0; struct hci_conn *conn; u8 cig = BT_ISO_QOS_CIG_UNSET; @@ -6523,8 +6404,6 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) * remains pending. */ - memset(&cmd, 0, sizeof(cmd)); - hci_dev_lock(hdev); rcu_read_lock(); @@ -6561,7 +6440,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) goto done; list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis]; + struct hci_cis *cis = &cmd->cis[aux_num_cis]; if (hci_conn_check_create_cis(conn) || conn->iso_qos.ucast.cig != cig) @@ -6570,25 +6449,25 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) set_bit(HCI_CONN_CREATE_CIS, &conn->flags); cis->acl_handle = cpu_to_le16(conn->parent->handle); cis->cis_handle = cpu_to_le16(conn->handle); - cmd.cp.num_cis++; + aux_num_cis++; - if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis)) + if (aux_num_cis >= 0x1f) break; } + cmd->num_cis = aux_num_cis; done: rcu_read_unlock(); hci_dev_unlock(hdev); - if (!cmd.cp.num_cis) + if (!aux_num_cis) return 0; /* Wait for HCI_LE_CIS_Established */ return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS, - sizeof(cmd.cp) + sizeof(cmd.cis[0]) * - cmd.cp.num_cis, &cmd, - HCI_EVT_LE_CIS_ESTABLISHED, + struct_size(cmd, cis, cmd->num_cis), + cmd, HCI_EVT_LE_CIS_ESTABLISHED, conn->conn_timeout, NULL); } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 6bed4aa829..00c0d8413c 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -54,7 +54,6 @@ static void iso_sock_kill(struct sock *sk); enum { BT_SK_BIG_SYNC, BT_SK_PA_SYNC, - BT_SK_PA_SYNC_TERM, }; struct iso_pinfo { @@ -81,12 +80,14 @@ static bool check_ucast_qos(struct bt_iso_qos *qos); static bool check_bcast_qos(struct bt_iso_qos *qos); static bool iso_match_sid(struct sock *sk, void *data); static bool iso_match_sync_handle(struct sock *sk, void *data); +static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data); static void iso_sock_disconn(struct sock *sk); typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); -static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, - iso_sock_match_t match, void *data); +static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst, + enum bt_sock_state state, + iso_sock_match_t match, void *data); /* ---- ISO timers ---- */ #define ISO_CONN_TIMEOUT (HZ * 40) @@ -196,21 +197,10 @@ static void iso_chan_del(struct sock *sk, int err) sock_set_flag(sk, SOCK_ZAPPED); } -static bool iso_match_conn_sync_handle(struct sock *sk, void *data) -{ - struct hci_conn *hcon = data; - - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) - return false; - - return hcon->sync_handle == iso_pi(sk)->sync_handle; -} - static void iso_conn_del(struct hci_conn *hcon, int err) { struct iso_conn *conn = hcon->iso_data; struct sock *sk; - struct sock *parent; if (!conn) return; @@ -226,25 +216,6 @@ static void iso_conn_del(struct hci_conn *hcon, int err) if (sk) { lock_sock(sk); - - /* While a PA sync hcon is in the process of closing, - * mark parent socket with a flag, so that any residual - * BIGInfo adv reports that arrive before PA sync is - * terminated are not processed anymore. - */ - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_conn_sync_handle, - hcon); - - if (parent) { - set_bit(BT_SK_PA_SYNC_TERM, - &iso_pi(parent)->flags); - sock_put(parent); - } - } - iso_sock_clear_timer(sk); iso_chan_del(sk, err); release_sock(sk); @@ -581,22 +552,23 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc, return NULL; } -/* Find socket listening: +/* Find socket in given state: * source bdaddr (Unicast) * destination bdaddr (Broadcast only) * match func - pass NULL to ignore * match func data - pass -1 to ignore * Returns closest match. */ -static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, - iso_sock_match_t match, void *data) +static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst, + enum bt_sock_state state, + iso_sock_match_t match, void *data) { struct sock *sk = NULL, *sk1 = NULL; read_lock(&iso_sk_list.lock); sk_for_each(sk, &iso_sk_list.head) { - if (sk->sk_state != BT_LISTEN) + if (sk->sk_state != state) continue; /* Match Broadcast destination */ @@ -857,6 +829,7 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock, iso_pi(sk)->src_type = BDADDR_LE_PUBLIC; iso_pi(sk)->qos = default_qos; + iso_pi(sk)->sync_handle = -1; bt_sock_link(&iso_sk_list, sk); return sk; @@ -904,7 +877,6 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, return -EINVAL; iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; - iso_pi(sk)->sync_handle = -1; if (sa->iso_bc->bc_sid > 0x0f) return -EINVAL; @@ -981,7 +953,8 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, /* Allow the user to bind a PA sync socket to a number * of BISes to sync to. */ - if (sk->sk_state == BT_CONNECT2 && + if ((sk->sk_state == BT_CONNECT2 || + sk->sk_state == BT_CONNECTED) && test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { err = iso_sock_bind_pa_sk(sk, sa, addr_len); goto done; @@ -1393,6 +1366,16 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, } release_sock(sk); return 0; + case BT_CONNECTED: + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + iso_conn_big_sync(sk); + sk->sk_state = BT_LISTEN; + release_sock(sk); + return 0; + } + + release_sock(sk); + break; case BT_CONNECT: release_sock(sk); return iso_connect_cis(sk); @@ -1538,7 +1521,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, case BT_ISO_QOS: if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND && - sk->sk_state != BT_CONNECT2) { + sk->sk_state != BT_CONNECT2 && + (!test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags) || + sk->sk_state != BT_CONNECTED)) { err = -EINVAL; break; } @@ -1759,7 +1744,7 @@ static void iso_conn_ready(struct iso_conn *conn) struct sock *sk = conn->sk; struct hci_ev_le_big_sync_estabilished *ev = NULL; struct hci_ev_le_pa_sync_established *ev2 = NULL; - struct hci_evt_le_big_info_adv_report *ev3 = NULL; + struct hci_ev_le_per_adv_report *ev3 = NULL; struct hci_conn *hcon; BT_DBG("conn %p", conn); @@ -1777,32 +1762,37 @@ static void iso_conn_ready(struct iso_conn *conn) HCI_EVT_LE_BIG_SYNC_ESTABILISHED); /* Get reference to PA sync parent socket, if it exists */ - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_pa_sync_flag, NULL); + parent = iso_get_sock(&hcon->src, &hcon->dst, + BT_LISTEN, + iso_match_pa_sync_flag, + NULL); if (!parent && ev) - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_big, ev); + parent = iso_get_sock(&hcon->src, + &hcon->dst, + BT_LISTEN, + iso_match_big, ev); } else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) { ev2 = hci_recv_event_data(hcon->hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED); if (ev2) - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_sid, ev2); + parent = iso_get_sock(&hcon->src, + &hcon->dst, + BT_LISTEN, + iso_match_sid, ev2); } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { ev3 = hci_recv_event_data(hcon->hdev, - HCI_EVT_LE_BIG_INFO_ADV_REPORT); + HCI_EV_LE_PER_ADV_REPORT); if (ev3) - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_sync_handle, ev3); + parent = iso_get_sock(&hcon->src, + &hcon->dst, + BT_LISTEN, + iso_match_sync_handle_pa_report, + ev3); } if (!parent) - parent = iso_get_sock_listen(&hcon->src, - BDADDR_ANY, NULL, NULL); + parent = iso_get_sock(&hcon->src, BDADDR_ANY, + BT_LISTEN, NULL, NULL); if (!parent) return; @@ -1839,7 +1829,6 @@ static void iso_conn_ready(struct iso_conn *conn) if (ev3) { iso_pi(sk)->qos = iso_pi(parent)->qos; - iso_pi(sk)->qos.bcast.encryption = ev3->encryption; hcon->iso_qos = iso_pi(sk)->qos; iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis; memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS); @@ -1923,8 +1912,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) */ ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED); if (ev1) { - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid, - ev1); + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN, + iso_match_sid, ev1); if (sk && !ev1->status) iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); @@ -1933,26 +1922,29 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT); if (ev2) { - /* Try to get PA sync listening socket, if it exists */ - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, - iso_match_pa_sync_flag, NULL); - - if (!sk) { - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, - iso_match_sync_handle, ev2); - - /* If PA Sync is in process of terminating, - * do not handle any more BIGInfo adv reports. - */ - - if (sk && test_bit(BT_SK_PA_SYNC_TERM, - &iso_pi(sk)->flags)) - return 0; + /* Check if BIGInfo report has already been handled */ + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECTED, + iso_match_sync_handle, ev2); + if (sk) { + sock_put(sk); + sk = NULL; + goto done; } + /* Try to get PA sync socket, if it exists */ + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECT2, + iso_match_sync_handle, ev2); + if (!sk) + sk = iso_get_sock(&hdev->bdaddr, bdaddr, + BT_LISTEN, + iso_match_sync_handle, + ev2); + if (sk) { int err; + iso_pi(sk)->qos.bcast.encryption = ev2->encryption; + if (ev2->num_bis < iso_pi(sk)->bc_num_bis) iso_pi(sk)->bc_num_bis = ev2->num_bis; @@ -1971,6 +1963,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) } } } + + goto done; } ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT); @@ -1979,8 +1973,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) u8 *base; struct hci_conn *hcon; - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, - iso_match_sync_handle_pa_report, ev3); + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN, + iso_match_sync_handle_pa_report, ev3); if (!sk) goto done; @@ -2029,7 +2023,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) hcon->le_per_adv_data_len = 0; } } else { - sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL); + sk = iso_get_sock(&hdev->bdaddr, BDADDR_ANY, + BT_LISTEN, NULL, NULL); } done: diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3f7a82f10f..9394a158d1 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -457,6 +457,9 @@ struct l2cap_chan *l2cap_chan_create(void) /* Set default lock nesting level */ atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL); + /* Available receive buffer space is initially unknown */ + chan->rx_avail = -1; + write_lock(&chan_list_lock); list_add(&chan->global_l, &chan_list); write_unlock(&chan_list_lock); @@ -538,6 +541,28 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) } EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); +static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan) +{ + size_t sdu_len = chan->sdu ? chan->sdu->len : 0; + + if (chan->mps == 0) + return 0; + + /* If we don't know the available space in the receiver buffer, give + * enough credits for a full packet. + */ + if (chan->rx_avail == -1) + return (chan->imtu / chan->mps) + 1; + + /* If we know how much space is available in the receive buffer, give + * out as many credits as would fill the buffer. + */ + if (chan->rx_avail <= sdu_len) + return 0; + + return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps); +} + static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) { chan->sdu = NULL; @@ -546,8 +571,7 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) chan->tx_credits = tx_credits; /* Derive MPS from connection MTU to stop HCI fragmentation */ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); - /* Give enough credits for a full packet */ - chan->rx_credits = (chan->imtu / chan->mps) + 1; + chan->rx_credits = l2cap_le_rx_credits(chan); skb_queue_head_init(&chan->tx_q); } @@ -559,7 +583,7 @@ static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) /* L2CAP implementations shall support a minimum MPS of 64 octets */ if (chan->mps < L2CAP_ECRED_MIN_MPS) { chan->mps = L2CAP_ECRED_MIN_MPS; - chan->rx_credits = (chan->imtu / chan->mps) + 1; + chan->rx_credits = l2cap_le_rx_credits(chan); } } @@ -3906,7 +3930,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, } static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, - u8 *data, u8 rsp_code, u8 amp_id) + u8 *data, u8 rsp_code) { struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; @@ -3985,17 +4009,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { - /* Force pending result for AMP controllers. - * The connection will succeed after the - * physical link is up. - */ - if (amp_id == AMP_ID_BREDR) { - l2cap_state_change(chan, BT_CONFIG); - result = L2CAP_CR_SUCCESS; - } else { - l2cap_state_change(chan, BT_CONNECT2); - result = L2CAP_CR_PEND; - } + l2cap_state_change(chan, BT_CONFIG); + result = L2CAP_CR_SUCCESS; status = L2CAP_CS_NO_INFO; } } else { @@ -4060,7 +4075,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn, mgmt_device_connected(hdev, hcon, NULL, 0); hci_dev_unlock(hdev); - l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0); + l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } @@ -4630,13 +4645,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else @@ -6513,9 +6522,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_credits pkt; - u16 return_credits; - - return_credits = (chan->imtu / chan->mps) + 1; + u16 return_credits = l2cap_le_rx_credits(chan); if (chan->rx_credits >= return_credits) return; @@ -6534,6 +6541,19 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } +void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail) +{ + if (chan->rx_avail == rx_avail) + return; + + BT_DBG("chan %p has %zd bytes avail for rx", chan, rx_avail); + + chan->rx_avail = rx_avail; + + if (chan->state == BT_CONNECTED) + l2cap_chan_le_send_credits(chan); +} + static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6543,6 +6563,12 @@ static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) /* Wait recv to confirm reception before updating the credits */ err = chan->ops->recv(chan, skb); + if (err < 0 && chan->rx_avail != -1) { + BT_ERR("Queueing received LE L2CAP data failed"); + l2cap_send_disconn_req(chan, ECONNRESET); + return err; + } + /* Update credits whenever an SDU is received */ l2cap_chan_le_send_credits(chan); @@ -6565,7 +6591,8 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) } chan->rx_credits--; - BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits); + BT_DBG("chan %p: rx_credits %u -> %u", + chan, chan->rx_credits + 1, chan->rx_credits); /* Update if remote had run out of credits, this should only happens * if the remote is not using the entire MPS. @@ -7453,10 +7480,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) struct l2cap_conn *conn = hcon->l2cap_data; int len; - /* For AMP controller do not create l2cap conn */ - if (!conn && hcon->hdev->dev_type != HCI_PRIMARY) - goto drop; - if (!conn) conn = l2cap_conn_add(hcon); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5cc83f906c..8645461d45 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1131,6 +1131,34 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, return err; } +static void l2cap_publish_rx_avail(struct l2cap_chan *chan) +{ + struct sock *sk = chan->data; + ssize_t avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc); + int expected_skbs, skb_overhead; + + if (avail <= 0) { + l2cap_chan_rx_avail(chan, 0); + return; + } + + if (!chan->mps) { + l2cap_chan_rx_avail(chan, -1); + return; + } + + /* Correct available memory by estimated sk_buff overhead. + * This is significant due to small transfer sizes. However, accept + * at least one full packet if receive space is non-zero. + */ + expected_skbs = DIV_ROUND_UP(avail, chan->mps); + skb_overhead = expected_skbs * sizeof(struct sk_buff); + if (skb_overhead < avail) + l2cap_chan_rx_avail(chan, avail - skb_overhead); + else + l2cap_chan_rx_avail(chan, -1); +} + static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -1167,28 +1195,33 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, else err = bt_sock_recvmsg(sock, msg, len, flags); - if (pi->chan->mode != L2CAP_MODE_ERTM) + if (pi->chan->mode != L2CAP_MODE_ERTM && + pi->chan->mode != L2CAP_MODE_LE_FLOWCTL && + pi->chan->mode != L2CAP_MODE_EXT_FLOWCTL) return err; - /* Attempt to put pending rx data in the socket buffer */ - lock_sock(sk); - if (!test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state)) - goto done; + l2cap_publish_rx_avail(pi->chan); - if (pi->rx_busy_skb) { - if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb)) - pi->rx_busy_skb = NULL; - else + /* Attempt to put pending rx data in the socket buffer */ + while (!list_empty(&pi->rx_busy)) { + struct l2cap_rx_busy *rx_busy = + list_first_entry(&pi->rx_busy, + struct l2cap_rx_busy, + list); + if (__sock_queue_rcv_skb(sk, rx_busy->skb) < 0) goto done; + list_del(&rx_busy->list); + kfree(rx_busy); } /* Restore data flow when half of the receive buffer is * available. This avoids resending large numbers of * frames. */ - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1) + if (test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state) && + atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1) l2cap_chan_busy(pi->chan, 0); done: @@ -1449,17 +1482,20 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { struct sock *sk = chan->data; + struct l2cap_pinfo *pi = l2cap_pi(sk); int err; lock_sock(sk); - if (l2cap_pi(sk)->rx_busy_skb) { + if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) { err = -ENOMEM; goto done; } if (chan->mode != L2CAP_MODE_ERTM && - chan->mode != L2CAP_MODE_STREAMING) { + chan->mode != L2CAP_MODE_STREAMING && + chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) { /* Even if no filter is attached, we could potentially * get errors from security modules, etc. */ @@ -1470,7 +1506,9 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) err = __sock_queue_rcv_skb(sk, skb); - /* For ERTM, handle one skb that doesn't fit into the recv + l2cap_publish_rx_avail(chan); + + /* For ERTM and LE, handle a skb that doesn't fit into the recv * buffer. This is important to do because the data frames * have already been acked, so the skb cannot be discarded. * @@ -1479,8 +1517,18 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) * acked and reassembled until there is buffer space * available. */ - if (err < 0 && chan->mode == L2CAP_MODE_ERTM) { - l2cap_pi(sk)->rx_busy_skb = skb; + if (err < 0 && + (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_LE_FLOWCTL || + chan->mode == L2CAP_MODE_EXT_FLOWCTL)) { + struct l2cap_rx_busy *rx_busy = + kmalloc(sizeof(*rx_busy), GFP_KERNEL); + if (!rx_busy) { + err = -ENOMEM; + goto done; + } + rx_busy->skb = skb; + list_add_tail(&rx_busy->list, &pi->rx_busy); l2cap_chan_busy(chan, 1); err = 0; } @@ -1706,6 +1754,8 @@ static const struct l2cap_ops l2cap_chan_ops = { static void l2cap_sock_destruct(struct sock *sk) { + struct l2cap_rx_busy *rx_busy, *next; + BT_DBG("sk %p", sk); if (l2cap_pi(sk)->chan) { @@ -1713,9 +1763,10 @@ static void l2cap_sock_destruct(struct sock *sk) l2cap_chan_put(l2cap_pi(sk)->chan); } - if (l2cap_pi(sk)->rx_busy_skb) { - kfree_skb(l2cap_pi(sk)->rx_busy_skb); - l2cap_pi(sk)->rx_busy_skb = NULL; + list_for_each_entry_safe(rx_busy, next, &l2cap_pi(sk)->rx_busy, list) { + kfree_skb(rx_busy->skb); + list_del(&rx_busy->list); + kfree(rx_busy); } skb_queue_purge(&sk->sk_receive_queue); @@ -1799,6 +1850,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->data = sk; chan->ops = &l2cap_chan_ops; + + l2cap_publish_rx_avail(chan); } static struct proto l2cap_proto = { @@ -1820,6 +1873,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, sk->sk_destruct = l2cap_sock_destruct; sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; + INIT_LIST_HEAD(&l2cap_pi(sk)->rx_busy); + chan = l2cap_chan_create(); if (!chan) { sk_free(sk); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 965f621ef8..80f220b7e1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -443,8 +443,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(d, HCI_UNCONFIGURED)) + if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -468,8 +467,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(d, HCI_UNCONFIGURED)) { + if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } @@ -503,8 +501,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY && - hci_dev_test_flag(d, HCI_UNCONFIGURED)) + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -528,8 +525,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY && - hci_dev_test_flag(d, HCI_UNCONFIGURED)) { + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } @@ -561,10 +557,8 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, read_lock(&hci_dev_list_lock); count = 0; - list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP) - count++; - } + list_for_each_entry(d, &hci_dev_list, list) + count++; rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC); if (!rp) { @@ -585,16 +579,10 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY) { - if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) - rp->entry[count].type = 0x01; - else - rp->entry[count].type = 0x00; - } else if (d->dev_type == HCI_AMP) { - rp->entry[count].type = 0x02; - } else { - continue; - } + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) + rp->entry[count].type = 0x01; + else + rp->entry[count].type = 0x00; rp->entry[count].bus = d->bus; rp->entry[count++].index = cpu_to_le16(d->id); @@ -9331,23 +9319,14 @@ void mgmt_index_added(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - switch (hdev->dev_type) { - case HCI_PRIMARY: - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, - NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); - ev.type = 0x01; - } else { - mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, - HCI_MGMT_INDEX_EVENTS); - ev.type = 0x00; - } - break; - case HCI_AMP: - ev.type = 0x02; - break; - default: - return; + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; } ev.bus = hdev->bus; @@ -9364,25 +9343,16 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - switch (hdev->dev_type) { - case HCI_PRIMARY: - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, - NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); - ev.type = 0x01; - } else { - mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, - HCI_MGMT_INDEX_EVENTS); - ev.type = 0x00; - } - break; - case HCI_AMP: - ev.type = 0x02; - break; - default: - return; + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; } ev.bus = hdev->bus; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 61efeadaff..4cd29fb490 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -719,10 +719,16 @@ static void __bpf_prog_test_run_raw_tp(void *data) { struct bpf_raw_tp_test_run_info *info = data; + struct bpf_trace_run_ctx run_ctx = {}; + struct bpf_run_ctx *old_run_ctx; + + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); rcu_read_lock(); info->retval = bpf_prog_run(info->prog, info->ctx); rcu_read_unlock(); + + bpf_reset_run_ctx(old_run_ctx); } int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index c366ccc8b3..ecac788698 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -27,6 +27,7 @@ EXPORT_SYMBOL_GPL(nf_br_ops); /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { + enum skb_drop_reason reason = pskb_may_pull_reason(skb, ETH_HLEN); struct net_bridge_mcast_port *pmctx_null = NULL; struct net_bridge *br = netdev_priv(dev); struct net_bridge_mcast *brmctx = &br->multicast_ctx; @@ -38,6 +39,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest; u16 vid = 0; + if (unlikely(reason != SKB_NOT_DROPPED_YET)) { + kfree_skb_reason(skb, reason); + return NETDEV_TX_OK; + } + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); br_tc_skb_miss_set(skb, false); diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index ee680adcee..1820f09ff5 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -73,12 +73,11 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) } EXPORT_SYMBOL_GPL(br_mst_get_state); -static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, +static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, u8 state) { - struct net_bridge_vlan_group *vg = nbp_vlan_group(p); - - if (v->state == state) + if (br_vlan_get_state(v) == state) return; br_vlan_set_state(v, state); @@ -100,11 +99,12 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, }; struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; - int err; + int err = 0; - vg = nbp_vlan_group(p); + rcu_read_lock(); + vg = nbp_vlan_group_rcu(p); if (!vg) - return 0; + goto out; /* MSTI 0 (CST) state changes are notified via the regular * SWITCHDEV_ATTR_ID_PORT_STP_STATE. @@ -112,17 +112,20 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, if (msti) { err = switchdev_port_attr_set(p->dev, &attr, extack); if (err && err != -EOPNOTSUPP) - return err; + goto out; } - list_for_each_entry(v, &vg->vlan_list, vlist) { + err = 0; + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { if (v->brvlan->msti != msti) continue; - br_mst_vlan_set_state(p, v, state); + br_mst_vlan_set_state(vg, v, state); } - return 0; +out: + rcu_read_unlock(); + return err; } static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) @@ -136,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) * it. */ if (v != pv && v->brvlan->msti == msti) { - br_mst_vlan_set_state(pv->port, pv, v->state); + br_mst_vlan_set_state(vg, pv, v->state); return; } } /* Otherwise, start out in a new MSTI with all ports disabled. */ - return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); + return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED); } int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) diff --git a/net/core/dev.c b/net/core/dev.c index 331848eca7..e8fb4ef8a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10488,8 +10488,9 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) rebroadcast_time = jiffies; } + rcu_barrier(); + if (!wait) { - rcu_barrier(); wait = WAIT_REFS_MIN_MSECS; } else { msleep(wait); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index b0f221d658..430ed18f85 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -74,7 +74,7 @@ struct net_dm_hw_entries { }; struct per_cpu_dm_data { - spinlock_t lock; /* Protects 'skb', 'hw_entries' and + raw_spinlock_t lock; /* Protects 'skb', 'hw_entries' and * 'send_timer' */ union { @@ -168,9 +168,9 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) err: mod_timer(&data->send_timer, jiffies + HZ / 10); out: - spin_lock_irqsave(&data->lock, flags); + raw_spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); - spin_unlock_irqrestore(&data->lock, flags); + raw_spin_unlock_irqrestore(&data->lock, flags); if (skb) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; @@ -225,7 +225,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) local_irq_save(flags); data = this_cpu_ptr(&dm_cpu_data); - spin_lock(&data->lock); + raw_spin_lock(&data->lock); dskb = data->skb; if (!dskb) @@ -259,7 +259,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) } out: - spin_unlock_irqrestore(&data->lock, flags); + raw_spin_unlock_irqrestore(&data->lock, flags); } static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, @@ -314,9 +314,9 @@ net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data) mod_timer(&hw_data->send_timer, jiffies + HZ / 10); } - spin_lock_irqsave(&hw_data->lock, flags); + raw_spin_lock_irqsave(&hw_data->lock, flags); swap(hw_data->hw_entries, hw_entries); - spin_unlock_irqrestore(&hw_data->lock, flags); + raw_spin_unlock_irqrestore(&hw_data->lock, flags); return hw_entries; } @@ -448,7 +448,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink, return; hw_data = this_cpu_ptr(&dm_hw_cpu_data); - spin_lock_irqsave(&hw_data->lock, flags); + raw_spin_lock_irqsave(&hw_data->lock, flags); hw_entries = hw_data->hw_entries; if (!hw_entries) @@ -477,7 +477,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink, } out: - spin_unlock_irqrestore(&hw_data->lock, flags); + raw_spin_unlock_irqrestore(&hw_data->lock, flags); } static const struct net_dm_alert_ops net_dm_alert_summary_ops = { @@ -1673,7 +1673,7 @@ static struct notifier_block dropmon_net_notifier = { static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data) { - spin_lock_init(&data->lock); + raw_spin_lock_init(&data->lock); skb_queue_head_init(&data->drop_queue); u64_stats_init(&data->stats.syncp); } diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 0ccfd5fa5c..0c0bdb058c 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -83,7 +83,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) return NULL; *saddr = idst->in_saddr.s_addr; - return container_of(dst, struct rtable, dst); + return dst_rtable(dst); } EXPORT_SYMBOL_GPL(dst_cache_get_ip4); @@ -112,7 +112,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, - rt6_get_cookie((struct rt6_info *)dst)); + rt6_get_cookie(dst_rt6_info(dst))); idst->in6_saddr = *saddr; } EXPORT_SYMBOL_GPL(dst_cache_set_ip6); diff --git a/net/core/filter.c b/net/core/filter.c index ae5254f712..ce255e0a2f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1662,6 +1662,11 @@ static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); static inline int __bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) { +#ifdef CONFIG_DEBUG_NET + /* Avoid a splat in pskb_may_pull_reason() */ + if (write_len > INT_MAX) + return -EINVAL; +#endif return skb_ensure_writable(skb, write_len); } @@ -2215,7 +2220,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, rcu_read_lock(); if (!nh) { dst = skb_dst(skb); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); } else { nexthop = &nh->ipv6_nh; @@ -2314,8 +2319,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, rcu_read_lock(); if (!nh) { - struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = container_of(dst, struct rtable, dst); + struct rtable *rt = skb_rtable(skb); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); } else if (nh->nh_family == AF_INET6) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9d690d32da..b1dc84c4fd 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -693,11 +693,16 @@ EXPORT_SYMBOL_GPL(__put_net); * get_net_ns - increment the refcount of the network namespace * @ns: common namespace (net) * - * Returns the net's common namespace. + * Returns the net's common namespace or ERR_PTR() if ref is zero. */ struct ns_common *get_net_ns(struct ns_common *ns) { - return &get_net(container_of(ns, struct net, ns))->ns; + struct net *net; + + net = maybe_get_net(container_of(ns, struct net, ns)); + if (net) + return &net->ns; + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(get_net_ns); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 7004b3399c..8c2d5a0bc2 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -59,22 +59,22 @@ XDP_METADATA_KFUNC_xxx nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, xdp_rx_meta, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, - xsk_features, NETDEV_A_DEV_PAD)) { - genlmsg_cancel(rsp, hdr); - return -EINVAL; - } + xsk_features, NETDEV_A_DEV_PAD)) + goto err_cancel_msg; if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, - netdev->xdp_zc_max_segs)) { - genlmsg_cancel(rsp, hdr); - return -EINVAL; - } + netdev->xdp_zc_max_segs)) + goto err_cancel_msg; } genlmsg_end(rsp, hdr); return 0; + +err_cancel_msg: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; } static void diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 543007f159..55bcacf67d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -316,7 +316,7 @@ static int netpoll_owner_active(struct net_device *dev) struct napi_struct *napi; list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { - if (napi->poll_owner == smp_processor_id()) + if (READ_ONCE(napi->poll_owner) == smp_processor_id()) return 1; } return 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8ba6a4e4be..74e6f9746f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6484,6 +6484,46 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Process one rtnetlink message. */ +static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + rtnl_dumpit_func dumpit = cb->data; + int err; + + /* Previous iteration have already finished, avoid calling->dumpit() + * again, it may not expect to be called after it reached the end. + */ + if (!dumpit) + return 0; + + err = dumpit(skb, cb); + + /* Old dump handlers used to send NLM_DONE as in a separate recvmsg(). + * Some applications which parse netlink manually depend on this. + */ + if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) { + if (err < 0 && err != -EMSGSIZE) + return err; + if (!err) + cb->data = NULL; + + return skb->len; + } + return err; +} + +static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control) +{ + if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) { + WARN_ON(control->data); + control->data = control->dump; + control->dump = rtnl_dumpit; + } + + return netlink_dump_start(ssk, skb, nlh, control); +} + static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -6548,7 +6588,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .module = owner, .flags = flags, }; - err = netlink_dump_start(rtnl, skb, nlh, &c); + err = rtnetlink_dump_start(rtnl, skb, nlh, &c); /* netlink_dump_start() will keep a reference on * module if dump is still in progress. */ @@ -6694,7 +6734,7 @@ void __init rtnetlink_init(void) register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, 0); + rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE); rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); diff --git a/net/core/sock.c b/net/core/sock.c index 0963689a59..09eccc9c50 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3743,6 +3743,9 @@ void sk_common_release(struct sock *sk) sk->sk_prot->unhash(sk); + if (sk->sk_socket) + sk->sk_socket->sk = NULL; + /* * In this point socket cannot receive new packets, but it is possible * that some packets are in flight because some CPU runs receiver and diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 8598466a38..01be07b485 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1639,19 +1639,23 @@ void sock_map_close(struct sock *sk, long timeout) lock_sock(sk); rcu_read_lock(); - psock = sk_psock_get(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - saved_close = READ_ONCE(sk->sk_prot)->close; - } else { + psock = sk_psock(sk); + if (likely(psock)) { saved_close = psock->saved_close; sock_map_remove_links(sk, psock); + psock = sk_psock_get(sk); + if (unlikely(!psock)) + goto no_psock; rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); + } else { + saved_close = READ_ONCE(sk->sk_prot)->close; +no_psock: + rcu_read_unlock(); + release_sock(sk); } /* Make sure we do not recurse. This is a bug. diff --git a/net/devlink/core.c b/net/devlink/core.c index 7f0b093208..f49cd83f19 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -314,7 +314,7 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); put_device(devlink->dev); - kfree(devlink); + kvfree(devlink); } void devlink_put(struct devlink *devlink) @@ -420,7 +420,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (!devlink_reload_actions_valid(ops)) return NULL; - devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); + devlink = kvzalloc(struct_size(devlink, priv, priv_size), GFP_KERNEL); if (!devlink) return NULL; @@ -455,7 +455,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, return devlink; err_xa_alloc: - kfree(devlink); + kvfree(devlink); return NULL; } EXPORT_SYMBOL_GPL(devlink_alloc_ns); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 049c3adeb8..4e3651101b 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb_reset_mac_header(skb); - eth = (struct ethhdr *)skb->data; - skb_pull_inline(skb, ETH_HLEN); - + eth = eth_skb_pull_mac(skb); eth_skb_pkt_type(skb, dev); /* diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 5a55270aa8..e645d751a5 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2220,7 +2220,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; int n_stats, ret; - if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats) + if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats) return -EOPNOTSUPP; n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fafb123f79..5622ddd3bf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -758,7 +758,9 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new sock_rps_record_flow(newsk); WARN_ON(!((1 << newsk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | - TCPF_CLOSE_WAIT | TCPF_CLOSE))); + TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | + TCPF_CLOSING | TCPF_CLOSE_WAIT | + TCPF_CLOSE))); if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) set_bit(SOCK_SUPPORT_ZC, &newsock->flags); @@ -1306,8 +1308,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) int inet_sk_rebuild_header(struct sock *sk) { + struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); __be32 daddr; struct ip_options_rcu *inet_opt; struct flowi4 *fl4; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 8b17d83e5f..1eb98440c0 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -2012,12 +2012,16 @@ static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) * from there we can determine the new total option length */ iter = 0; optlen_new = 0; - while (iter < opt->opt.optlen) - if (opt->opt.__data[iter] != IPOPT_NOP) { + while (iter < opt->opt.optlen) { + if (opt->opt.__data[iter] == IPOPT_END) { + break; + } else if (opt->opt.__data[iter] == IPOPT_NOP) { + iter++; + } else { iter += opt->opt.__data[iter + 1]; optlen_new = iter; - } else - iter++; + } + } hdr_delta = opt->opt.optlen; opt->opt.optlen = (optlen_new + 3) & ~3; hdr_delta -= opt->opt.optlen; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7a437f0d41..84b5d1ccf7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1683,6 +1683,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, struct nlmsghdr *nlh; unsigned long tstamp; u32 preferred, valid; + u32 flags; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), args->flags); @@ -1692,7 +1693,13 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = READ_ONCE(ifa->ifa_flags); + + flags = READ_ONCE(ifa->ifa_flags); + /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits. + * The 32bit value is given in IFA_FLAGS attribute. + */ + ifm->ifa_flags = (__u8)flags; + ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -1701,7 +1708,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, goto nla_put_failure; tstamp = READ_ONCE(ifa->ifa_tstamp); - if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + if (!(flags & IFA_F_PERMANENT)) { preferred = READ_ONCE(ifa->ifa_preferred_lft); valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { @@ -1732,7 +1739,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || (ifa->ifa_proto && nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || - nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) || + nla_put_u32(skb, IFA_FLAGS, flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, @@ -1875,10 +1882,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) goto done; if (fillargs.ifindex) { - err = -ENODEV; dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); - if (!dev) + if (!dev) { + err = -ENODEV; goto done; + } in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto done; @@ -1890,7 +1898,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = inet_base_seq(tgt_net); - for_each_netdev_dump(net, dev, ctx->ifindex) { + for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { in_dev = __in_dev_get_rcu(dev); if (!in_dev) continue; @@ -2793,7 +2801,7 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, - RTNL_FLAG_DUMP_UNLOCKED); + RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, inet_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c484b1c0fc..7ad2cafb92 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1050,11 +1050,6 @@ next: e++; } } - - /* Don't let NLM_DONE coalesce into a message, even if it could. - * Some user space expects NLM_DONE in a separate recv(). - */ - err = skb->len; out: cb->args[1] = e; @@ -1665,5 +1660,5 @@ void __init ip_fib_init(void) rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, - RTNL_FLAG_DUMP_UNLOCKED); + RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 437e782b96..207482d30d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -483,6 +483,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct icmp_bxm *param) { struct net_device *route_lookup_dev; + struct dst_entry *dst, *dst2; struct rtable *rt, *rt2; struct flowi4 fl4_dec; int err; @@ -508,16 +509,17 @@ static struct rtable *icmp_route_lookup(struct net *net, /* No need to clone since we're just using its address. */ rt2 = rt; - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, - flowi4_to_flowi(fl4), NULL, 0); - if (!IS_ERR(rt)) { + dst = xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(fl4), NULL, 0); + rt = dst_rtable(dst); + if (!IS_ERR(dst)) { if (rt != rt2) return rt; - } else if (PTR_ERR(rt) == -EPERM) { + } else if (PTR_ERR(dst) == -EPERM) { rt = NULL; - } else + } else { return rt; - + } err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; @@ -551,19 +553,19 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, - flowi4_to_flowi(&fl4_dec), NULL, - XFRM_LOOKUP_ICMP); - if (!IS_ERR(rt2)) { + dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL, + XFRM_LOOKUP_ICMP); + rt2 = dst_rtable(dst2); + if (!IS_ERR(dst2)) { dst_release(&rt->dst); memcpy(fl4, &fl4_dec, sizeof(*fl4)); rt = rt2; - } else if (PTR_ERR(rt2) == -EPERM) { + } else if (PTR_ERR(dst2) == -EPERM) { if (rt) dst_release(&rt->dst); return rt2; } else { - err = PTR_ERR(rt2); + err = PTR_ERR(dst2); goto relookup_failed; } return rt; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 5e9c815665..d6fbcbd235 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, dst = skb_dst(skb); if (curr_dst != dst) { hint = ip_extract_route_hint(net, skb, - ((struct rtable *)dst)->rt_type); + dst_rtable(dst)->rt_type); /* dispatch old sublist */ if (!list_empty(&sublist)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 39229fd060..9500031a1f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = (struct rtable *)dst; + struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; @@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, goto packet_routed; /* Make sure we can route this packet. */ - rt = (struct rtable *)__sk_dst_check(sk, 0); + rt = dst_rtable(__sk_dst_check(sk, 0)); if (!rt) { __be32 daddr; @@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk, bool zc = false; unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; - struct rtable *rt = (struct rtable *)cork->dst; + struct rtable *rt = dst_rtable(cork->dst); bool paged, hold_tskey, extra_uref = false; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; @@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options *opt = NULL; - struct rtable *rt = (struct rtable *)cork->dst; + struct rtable *rt = dst_rtable(cork->dst); struct iphdr *iph; u8 pmtudisc, ttl; __be16 df = 0; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 1b8d8ff9a2..0e4bd52842 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -543,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rt6_info *rt6; __be32 daddr; - rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : + rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) : NULL; daddr = md ? dst : tunnel->parms.iph.daddr; diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index 69e3317996..73e66a088e 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) laddr = 0; indev = __in_dev_get_rcu(skb->dev); + if (!indev) + return daddr; in_dev_for_each_ifa_rcu(ifa, indev) { if (ifa->ifa_flags & IFA_F_SECONDARY) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b814fdab19..3fcf084fbd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -132,7 +132,8 @@ struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst); -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, @@ -831,28 +832,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf u32 mark = skb->mark; __u8 tos = iph->tos; - rt = (struct rtable *) dst; + rt = dst_rtable(dst); __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst) { - struct rtable *rt = (struct rtable *)dst; - struct dst_entry *ret = dst; + struct rtable *rt = dst_rtable(dst); - if (rt) { - if (dst->obsolete > 0) { - ip_rt_put(rt); - ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - rt->dst.expires) { - ip_rt_put(rt); - ret = NULL; - } - } - return ret; + if ((dst->obsolete > 0) || + (rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) + sk_dst_reset(sk); } /* @@ -1056,7 +1050,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { - struct rtable *rt = (struct rtable *) dst; + struct rtable *rt = dst_rtable(dst); struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); @@ -1127,7 +1121,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); - rt = (struct rtable *)odst; + rt = dst_rtable(odst); if (odst->obsolete && !odst->ops->check(odst, 0)) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) @@ -1136,7 +1130,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) new = true; } - __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); + __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu); if (!dst_check(&rt->dst, 0)) { if (new) @@ -1193,7 +1187,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect); INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - struct rtable *rt = (struct rtable *) dst; + struct rtable *rt = dst_rtable(dst); /* All IPV4 dsts are created with ->obsolete set to the value * DST_OBSOLETE_FORCE_CHK which forces validation calls down @@ -1528,10 +1522,8 @@ void rt_del_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { - struct rtable *rt = (struct rtable *)dst; - ip_dst_metrics_put(dst); - rt_del_uncached_list(rt); + rt_del_uncached_list(dst_rtable(dst)); } void rt_flush_dev(struct net_device *dev) @@ -2832,7 +2824,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *ort = (struct rtable *) dst_orig; + struct rtable *ort = dst_rtable(dst_orig); struct rtable *rt; rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); @@ -2877,9 +2869,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, if (flp4->flowi4_proto) { flp4->flowi4_oif = rt->dst.dev->ifindex; - rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, - flowi4_to_flowi(flp4), - sk, 0); + rt = dst_rtable(xfrm_lookup_route(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0)); } return rt; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 66d77faca6..77ee1eda3f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1159,6 +1159,9 @@ new_segment: process_backlog++; +#ifdef CONFIG_SKB_DECRYPTED + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif tcp_skb_entail(sk, skb); copy = size_goal; @@ -2637,6 +2640,10 @@ void tcp_set_state(struct sock *sk, int state) if (oldstate != TCP_ESTABLISHED) TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); break; + case TCP_CLOSE_WAIT: + if (oldstate == TCP_SYN_RECV) + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); + break; case TCP_CLOSE: if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) @@ -2648,7 +2655,7 @@ void tcp_set_state(struct sock *sk, int state) inet_put_port(sk); fallthrough; default: - if (oldstate == TCP_ESTABLISHED) + if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 781b67a525..09c0fa6756 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -933,6 +933,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, struct tcp_ao_key *key; __be32 sisn, disn; u8 *traffic_key; + int state; u32 sne = 0; info = rcu_dereference(tcp_sk(sk)->ao_info); @@ -948,8 +949,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, disn = 0; } + state = READ_ONCE(sk->sk_state); /* Fast-path */ - if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) { + if (likely((1 << state) & TCP_AO_ESTABLISHED)) { enum skb_drop_reason err; struct tcp_ao_key *current_key; @@ -988,6 +990,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, return SKB_NOT_DROPPED_YET; } + if (unlikely(state == TCP_CLOSE)) + return SKB_DROP_REASON_TCP_CLOSE; + /* Lookup key based on peer address and keyid. * current_key and rnext_key must not be used on tcp listen * sockets as otherwise: @@ -1001,7 +1006,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, if (th->syn && !th->ack) goto verify_hash; - if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + if ((1 << state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { /* Make the initial syn the likely case here */ if (unlikely(req)) { sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn, @@ -1018,14 +1023,14 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, /* no way to figure out initial sisn/disn - drop */ return SKB_DROP_REASON_TCP_FLAGS; } - } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + } else if ((1 << state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { disn = info->lisn; if (th->syn || th->rst) sisn = th->seq; else sisn = info->risn; } else { - WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state); + WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", state); return SKB_DROP_REASON_TCP_AOFAILURE; } verify_hash: @@ -1963,8 +1968,10 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, first = true; } - if (cmd.ao_required && tcp_ao_required_verify(sk)) - return -EKEYREJECTED; + if (cmd.ao_required && tcp_ao_required_verify(sk)) { + err = -EKEYREJECTED; + goto out; + } /* For sockets in TCP_CLOSED it's possible set keys that aren't * matching the future peer (address/port/VRF/etc), diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index e33fbe4933..b004280855 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -58,7 +58,18 @@ struct dctcp { }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ -module_param(dctcp_shift_g, uint, 0644); + +static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 0, 10); +} + +static const struct kernel_param_ops dctcp_shift_g_ops = { + .set = dctcp_shift_g_set, + .get = param_get_uint, +}; + +module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644); MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha"); static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a140d9f7a0..1054a44033 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6289,6 +6289,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, skb_rbtree_walk_from(data) tcp_mark_skb_lost(sk, data); tcp_xmit_retransmit_queue(sk); + tp->retrans_stamp = 0; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e0cef75f85..92511b7fd5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2001,7 +2001,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason) { - u32 limit, tail_gso_size, tail_gso_segs; + u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -2010,6 +2010,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, bool fragstolen; u32 gso_segs; u32 gso_size; + u64 limit; int delta; /* In case all data was pulled from skb frags (in __pskb_pull_tail()), @@ -2107,7 +2108,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, __skb_push(skb, hdrlen); no_coalesce: - limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); + /* sk->sk_backlog.len is reset only at the end of __release_sock(). + * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach + * sk_rcvbuf in normal conditions. + */ + limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1; + + limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1; /* Only socket owner can try to collapse/prune rx queues * to reduce memory overhead, so add a little headroom here. @@ -2115,6 +2122,8 @@ no_coalesce: */ limit += 64 * 1024; + limit = min_t(u64, limit, UINT_MAX); + if (unlikely(sk_add_backlog(sk, skb, limit))) { bh_unlock_sock(sk); *reason = SKB_DROP_REASON_SOCKET_BACKLOG; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d1ad20ce1c..f96f68cf79 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -483,8 +483,12 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, { const struct tcp_sock *tp = tcp_sk(sk); const int timeout = TCP_RTO_MAX * 2; - u32 rcv_delta; + s32 rcv_delta; + /* Note: timer interrupt might have been delayed by at least one jiffy, + * and tp->rcv_tstamp might very well have been written recently. + * rcv_delta can thus be negative. + */ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b32cf2eeeb..72d3bf1368 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -427,15 +427,21 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; + bool need_rescore; result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif); + need_rescore = false; +rescore: + score = compute_score(need_rescore ? result : sk, net, saddr, + sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; + if (need_rescore) + continue; + if (sk->sk_state == TCP_ESTABLISHED) { result = sk; continue; @@ -456,9 +462,14 @@ static struct sock *udp4_lib_lookup2(struct net *net, if (IS_ERR(result)) continue; - badness = compute_score(result, net, saddr, sport, - daddr, hnum, dif, sdif); - + /* compute_score is too long of a function to be + * inlined, and calling it again here yields + * measureable overhead for some + * workloads. Work around it by jumping + * backwards to rescore 'result'. + */ + need_rescore = true; + goto rescore; } } return result; @@ -1207,7 +1218,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (connected) - rt = (struct rtable *)sk_dst_check(sk, 0); + rt = dst_rtable(sk_dst_check(sk, 0)); if (!rt) { struct net *net = sock_net(sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c33bca2c38..1853a8415d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif, static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rtable *rt = (struct rtable *)xdst->route; + struct rtable *rt = dst_rtable(xdst->route); const struct flowi4 *fl4 = &fl->u.ip4; xdst->u.rt.rt_iif = fl4->flowi4_iif; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1635da0728..d285c1f6f1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = true; } else { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); int tmo = net->ipv6.sysctl.icmpv6_time; struct inet_peer *peer; @@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, dst = ip6_route_output(net, sk, fl6); if (!dst->error) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct in6_addr prefsrc; rt6_get_prefsrc(rt, &prefsrc); @@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, - (struct rt6_info *)dst, MSG_DONTWAIT)) { + dst_rt6_info(dst), MSG_DONTWAIT)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 8c1ce78956..0601bad798 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel( static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); - struct rt6_info *rt = (struct rt6_info *)orig_dst; + struct rt6_info *rt = dst_rt6_info(orig_dst); struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); struct dst_entry *dst; int err = -EINVAL; @@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = orig_dst->dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, + fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst), &ip6h->daddr); dst = ip6_route_output(net, NULL, &fl6); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 7563f8c6aa..bf7120ecea 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -351,9 +351,9 @@ do_encap: goto drop; if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { - preempt_disable(); + local_bh_disable(); dst = dst_cache_get(&ilwt->cache); - preempt_enable(); + local_bh_enable(); if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -373,9 +373,9 @@ do_encap: goto drop; } - preempt_disable(); + local_bh_disable(); dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); - preempt_enable(); + local_bh_enable(); } skb_dst_drop(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c1f62352a4..1ace4ac3ee 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -965,6 +965,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, if (!fib6_nh->rt6i_pcpu) return; + rcu_read_lock(); /* release the reference to this fib entry from * all of its cached pcpu routes */ @@ -973,7 +974,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, struct rt6_info *pcpu_rt; ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); - pcpu_rt = *ppcpu_rt; + + /* Paired with xchg() in rt6_get_pcpu_route() */ + pcpu_rt = READ_ONCE(*ppcpu_rt); /* only dropping the 'from' reference if the cached route * is using 'match'. The cached pcpu_rt->from only changes @@ -987,6 +990,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, fib6_info_release(from); } } + rcu_read_unlock(); } struct fib6_nh_pcpu_arg { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 97b0788b31..27d8725445 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); rcu_read_lock(); - nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(IS_ERR_OR_NULL(neigh))) { @@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb) * send a redirect. */ - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) target = &rt->rt6i_gateway; else @@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; bool mono_delivery_time = skb->mono_delivery_time; @@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, return NULL; } - rt = (struct rt6_info *)dst; + rt = dst_rt6_info(dst); /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, @@ -1118,7 +1118,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct rt6_info *rt; *dst = ip6_route_output(net, sk, fl6); - rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; + rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; @@ -1159,7 +1159,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - rt = (struct rt6_info *) *dst; + rt = dst_rt6_info(*dst); rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); @@ -1423,7 +1423,7 @@ static int __ip6_append_data(struct sock *sk, int offset = 0; bool zc = false; u32 tskey = 0; - struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct rt6_info *rt = dst_rt6_info(cork->dst); bool paged, hold_tskey, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; @@ -1877,7 +1877,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; - struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; @@ -1949,7 +1949,7 @@ out: int ip6_send_skb(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; err = ip6_local_out(net, skb->sk, skb); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cb0ee81a06..dd342e6ecf 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2273,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, int err; struct mr_table *mrt; struct mfc6_cache *cache; - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ae134634c3..d914b23256 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1722,7 +1722,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (IS_ERR(dst)) return; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK(2, warn, diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ef2059c889..88b3fcacd4 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false); if (IS_ERR(dst)) return PTR_ERR(dst); - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d896ca7b5..2eedf25560 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -598,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct ipv6hdr *iph; struct sk_buff *skb; int err; - struct rt6_info *rt = (struct rt6_info *)*dstp; + struct rt6_info *rt = dst_rt6_info(*dstp); int hlen = LL_RESERVED_SPACE(rt->dst.dev); int tlen = rt->dst.dev->needed_tailroom; @@ -917,7 +917,7 @@ back_from_confirm: ipc6.opt = opt; lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, - len, 0, &ipc6, &fl6, (struct rt6_info *)dst, + len, 0, &ipc6, &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index acb4f119e1..148bf9e313 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -369,7 +369,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; - if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1f4b935a0e..d7a5ca012a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst); -static struct dst_entry *ip6_negative_advice(struct dst_entry *); +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev); @@ -226,7 +227,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { - const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + const struct rt6_info *rt = dst_rt6_info(dst); return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), dst->dev, skb, daddr); @@ -234,8 +235,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) { + const struct rt6_info *rt = dst_rt6_info(dst); struct net_device *dev = dst->dev; - struct rt6_info *rt = (struct rt6_info *)dst; daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) @@ -354,7 +355,7 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct fib6_info *from; struct inet6_dev *idev; @@ -373,7 +374,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct inet6_dev *idev = rt->rt6i_idev; if (idev && idev->dev != blackhole_netdev) { @@ -637,6 +638,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh) rcu_read_lock(); last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); + if (!idev) + goto out; neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { if (READ_ONCE(neigh->nud_state) & NUD_VALID) @@ -1288,7 +1291,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); if (dst->error == 0) - return (struct rt6_info *) dst; + return dst_rt6_info(dst); dst_release(dst); @@ -1408,6 +1411,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) struct rt6_info *prev, **p; p = this_cpu_ptr(res->nh->rt6i_pcpu); + /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */ prev = xchg(p, NULL); if (prev) { dst_dev_put(&prev->dst); @@ -2647,7 +2651,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, rcu_read_lock(); dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; + rt6 = dst_rt6_info(dst); /* For dst cached in uncached_list, refcnt is already taken. */ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { dst = &net->ipv6.ip6_null_entry->dst; @@ -2661,7 +2665,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; + struct rt6_info *rt, *ort = dst_rt6_info(dst_orig); struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; @@ -2744,7 +2748,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, struct fib6_info *from; struct rt6_info *rt; - rt = container_of(dst, struct rt6_info, dst); + rt = dst_rt6_info(dst); if (rt->sernum) return rt6_is_valid(rt) ? dst : NULL; @@ -2770,24 +2774,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, } EXPORT_INDIRECT_CALLABLE(ip6_dst_check); -static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *) dst; + struct rt6_info *rt = dst_rt6_info(dst); - if (rt) { - if (rt->rt6i_flags & RTF_CACHE) { - rcu_read_lock(); - if (rt6_check_expired(rt)) { - rt6_remove_exception_rt(rt); - dst = NULL; - } - rcu_read_unlock(); - } else { - dst_release(dst); - dst = NULL; + if (rt->rt6i_flags & RTF_CACHE) { + rcu_read_lock(); + if (rt6_check_expired(rt)) { + /* counteract the dst_release() in sk_dst_reset() */ + dst_hold(dst); + sk_dst_reset(sk); + + rt6_remove_exception_rt(rt); } + rcu_read_unlock(); + return; } - return dst; + sk_dst_reset(sk); } static void ip6_link_failure(struct sk_buff *skb) @@ -2796,7 +2800,7 @@ static void ip6_link_failure(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); if (rt) { rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { @@ -2852,7 +2856,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, bool confirm_neigh) { const struct in6_addr *daddr, *saddr; - struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6_info *rt6 = dst_rt6_info(dst); /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. @@ -3601,7 +3605,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, if (!dev) goto out; - if (idev->cnf.disable_ipv6) { + if (!idev || idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); err = -EACCES; goto out; @@ -4174,7 +4178,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu } } - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_REJECT) { net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); return; @@ -4445,7 +4449,7 @@ static void rtmsg_to_fib6_config(struct net *net, .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? : RT6_TABLE_MAIN, .fc_ifindex = rtmsg->rtmsg_ifindex, - .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, + .fc_metric = rtmsg->rtmsg_metric, .fc_expires = rtmsg->rtmsg_info, .fc_dst_len = rtmsg->rtmsg_dst_len, .fc_src_len = rtmsg->rtmsg_src_len, @@ -4475,6 +4479,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) rtnl_lock(); switch (cmd) { case SIOCADDRT: + /* Only do the default setting of fc_metric in route adding */ + if (cfg.fc_metric == 0) + cfg.fc_metric = IP6_RT_PRIO_USER; err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: @@ -5608,7 +5615,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6_info *rt6 = dst_rt6_info(dst); struct rt6key *rt6_dst, *rt6_src; u32 *pmetrics, table, rt6_flags; unsigned char nh_flags = 0; @@ -6111,7 +6118,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } - rt = container_of(dst, struct rt6_info, dst); + rt = dst_rt6_info(dst); if (rt->dst.error) { err = rt->dst.error; ip6_rt_put(rt); @@ -6338,12 +6345,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, if (!write) return -EINVAL; - net = (struct net *)ctl->extra1; - delay = net->ipv6.sysctl.flush_delay; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (ret) return ret; + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 35508abd76..a31521e270 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -551,6 +551,8 @@ out_unregister_iptun: #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: +#endif +#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); #endif out_unregister_pernet: @@ -564,8 +566,9 @@ void seg6_exit(void) seg6_hmac_exit(); #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_local_exit(); seg6_iptunnel_exit(); #endif - unregister_pernet_subsys(&ip6_segments_ops); genl_unregister_family(&seg6_genl_family); + unregister_pernet_subsys(&ip6_segments_ops); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 861e0366f5..bbf5b84a70 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -356,6 +356,7 @@ static int seg6_hmac_init_algo(void) struct crypto_shash *tfm; struct shash_desc *shash; int i, alg_count, cpu; + int ret = -ENOMEM; alg_count = ARRAY_SIZE(hmac_algos); @@ -366,12 +367,14 @@ static int seg6_hmac_init_algo(void) algo = &hmac_algos[i]; algo->tfms = alloc_percpu(struct crypto_shash *); if (!algo->tfms) - return -ENOMEM; + goto error_out; for_each_possible_cpu(cpu) { tfm = crypto_alloc_shash(algo->name, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + goto error_out; + } p_tfm = per_cpu_ptr(algo->tfms, cpu); *p_tfm = tfm; } @@ -383,18 +386,22 @@ static int seg6_hmac_init_algo(void) algo->shashs = alloc_percpu(struct shash_desc *); if (!algo->shashs) - return -ENOMEM; + goto error_out; for_each_possible_cpu(cpu) { shash = kzalloc_node(shsize, GFP_KERNEL, cpu_to_node(cpu)); if (!shash) - return -ENOMEM; + goto error_out; *per_cpu_ptr(algo->shashs, cpu) = shash; } } return 0; + +error_out: + seg6_hmac_exit(); + return ret; } int __init seg6_hmac_init(void) @@ -412,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net) void seg6_hmac_exit(void) { struct seg6_hmac_algo *algo = NULL; + struct crypto_shash *tfm; + struct shash_desc *shash; int i, alg_count, cpu; alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; - for_each_possible_cpu(cpu) { - struct crypto_shash *tfm; - struct shash_desc *shash; - shash = *per_cpu_ptr(algo->shashs, cpu); - kfree(shash); - tfm = *per_cpu_ptr(algo->tfms, cpu); - crypto_free_shash(tfm); + if (algo->shashs) { + for_each_possible_cpu(cpu) { + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + } + free_percpu(algo->shashs); + } + + if (algo->tfms) { + for_each_possible_cpu(cpu) { + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); } - free_percpu(algo->tfms); - free_percpu(algo->shashs); } } EXPORT_SYMBOL(seg6_hmac_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 03b877ff45..098632adc9 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -459,34 +459,30 @@ static int seg6_input_core(struct net *net, struct sock *sk, int err; err = seg6_do_srh(skb); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } + if (unlikely(err)) + goto drop; slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); - preempt_disable(); + local_bh_disable(); dst = dst_cache_get(&slwt->cache); - preempt_enable(); if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); if (!dst->error) { - preempt_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); - preempt_enable(); } } else { skb_dst_drop(skb); skb_dst_set(skb, dst); } + local_bh_enable(); err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) - return err; + goto drop; if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, @@ -494,6 +490,9 @@ static int seg6_input_core(struct net *net, struct sock *sk, skb_dst(skb)->dev, seg6_input_finish); return seg6_input_finish(dev_net(skb->dev), NULL, skb); +drop: + kfree_skb(skb); + return err; } static int seg6_input_nf(struct sk_buff *skb) @@ -535,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk, slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); - preempt_disable(); + local_bh_disable(); dst = dst_cache_get(&slwt->cache); - preempt_enable(); + local_bh_enable(); if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -557,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk, goto drop; } - preempt_disable(); + local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - preempt_enable(); + local_bh_enable(); } skb_dst_drop(skb); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 24e2b4b494..c434940131 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, input_action_end_dx6_finish); + dev_net(skb->dev), NULL, skb, skb->dev, + NULL, input_action_end_dx6_finish); return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: @@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, input_action_end_dx4_finish); + dev_net(skb->dev), NULL, skb, skb->dev, + NULL, input_action_end_dx4_finish); return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3f4cba49e9..2b2eda5a28 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -95,11 +95,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; - sk->sk_rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } } @@ -1440,7 +1438,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ newsk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); @@ -1451,6 +1448,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + ip6_dst_store(newsk, dst, NULL, NULL); + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8f7aa8bac1..acafa0cdf7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -168,15 +168,21 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; + bool need_rescore; result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif); + need_rescore = false; +rescore: + score = compute_score(need_rescore ? result : sk, net, saddr, + sport, daddr, hnum, dif, sdif); if (score > badness) { badness = score; + if (need_rescore) + continue; + if (sk->sk_state == TCP_ESTABLISHED) { result = sk; continue; @@ -197,8 +203,14 @@ static struct sock *udp6_lib_lookup2(struct net *net, if (IS_ERR(result)) continue; - badness = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif); + /* compute_score is too long of a function to be + * inlined, and calling it again here yields + * measureable overhead for some + * workloads. Work around it by jumping + * backwards to rescore 'result'. + */ + need_rescore = true; + goto rescore; } } return result; @@ -898,11 +910,8 @@ start_lookup: static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - if (udp_sk_rx_dst_set(sk, dst)) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - - sk->sk_rx_dst_cookie = rt6_get_cookie(rt); - } + if (udp_sk_rx_dst_set(sk, dst)) + sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and @@ -1573,7 +1582,7 @@ back_from_confirm: skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - (struct rt6_info *)dst, + dst_rt6_info(dst), msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) @@ -1600,7 +1609,7 @@ do_append_data: ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, fl6, (struct rt6_info *)dst, + &ipc6, fl6, dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 42fb6996b0..4332d4b82b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -56,12 +56,18 @@ static int xfrm6_get_saddr(struct net *net, int oif, { struct dst_entry *dst; struct net_device *dev; + struct inet6_dev *idev; dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; - dev = ip6_dst_idev(dst)->dev; + idev = ip6_dst_idev(dst); + if (!idev) { + dst_release(dst); + return -EHOSTUNREACH; + } + dev = idev->dev; ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; @@ -70,7 +76,7 @@ static int xfrm6_get_saddr(struct net *net, int oif, static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rt6_info *rt = (struct rt6_info *)xdst->route; + struct rt6_info *rt = dst_rt6_info(xdst->route); xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8d21ff25f1..4a0fb8731e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -887,22 +887,20 @@ pass: return 1; } -/* UDP encapsulation receive handler. See net/ipv4/udp.c. - * Return codes: - * 0 : success. - * <0: error - * >0: skb should be passed up to userspace as UDP. +/* UDP encapsulation receive and error receive handlers. + * See net/ipv4/udp.c for details. + * + * Note that these functions are called from inside an + * RCU-protected region, but without the socket being locked. + * + * Hence we use rcu_dereference_sk_user_data to access the + * tunnel data structure rather the usual l2tp_sk_to_tunnel + * accessor function. */ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - /* Note that this is called from the encap_rcv hook inside an - * RCU-protected region, but without the socket being locked. - * Hence we use rcu_dereference_sk_user_data to access the - * tunnel data structure rather the usual l2tp_sk_to_tunnel - * accessor function. - */ tunnel = rcu_dereference_sk_user_data(sk); if (!tunnel) goto pass_up; @@ -919,6 +917,29 @@ pass_up: } EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); +static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) +{ + struct l2tp_tunnel *tunnel; + + tunnel = rcu_dereference_sk_user_data(sk); + if (!tunnel || tunnel->fd < 0) + return; + + sk->sk_err = err; + sk_error_report(sk); + + if (ip_hdr(skb)->version == IPVERSION) { + if (inet_test_bit(RECVERR, sk)) + return ip_icmp_error(sk, skb, err, port, info, payload); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (inet6_test_bit(RECVERR6, sk)) + return ipv6_icmp_error(sk, skb, err, port, info, payload); +#endif + } +} + /************************************************************************ * Transmit handling ***********************************************************************/ @@ -1493,6 +1514,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, .sk_user_data = tunnel, .encap_type = UDP_ENCAP_L2TPINUDP, .encap_rcv = l2tp_udp_encap_recv, + .encap_err_rcv = l2tp_udp_encap_err_recv, .encap_destroy = l2tp_udp_encap_destroy, }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 970af3983d..19c8cc5289 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &inet->cork.fl.u.ip4; if (connected) - rt = (struct rtable *)__sk_dst_check(sk, 0); + rt = dst_rtable(__sk_dst_check(sk, 0)); rcu_read_lock(); if (!rt) { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 7bf14cf9ff..8780ec64f3 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ back_from_confirm: ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, - &fl6, (struct rt6_info *)dst, + &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f67c1d0218..51dc2d9dd6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1607,10 +1607,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; - if (sdata->csa_blocked_tx) { + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } ieee80211_free_next_beacon(link); @@ -3648,7 +3648,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - sdata->csa_blocked_tx = block_tx; + sdata->csa_blocked_queues = block_tx; sdata_info(sdata, "channel switch failed, disconnecting\n"); wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } @@ -3734,10 +3734,10 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) ieee80211_link_info_change_notify(sdata, link_data, changed); - if (sdata->csa_blocked_tx) { + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } err = drv_post_channel_switch(link_data); @@ -4012,18 +4012,18 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } - link_data->csa_chanreq = chanreq; + link_data->csa_chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = true; + sdata->csa_blocked_queues = true; } cfg80211_ch_switch_started_notify(sdata->dev, - &link_data->csa_chanreq.oper, 0, + &link_data->csa_chanreq.oper, link_id, params->count, params->block_tx); if (changed) { diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index dce37ba8eb..254d745832 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -311,6 +311,18 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + /* + * We should perhaps push emulate chanctx down and only + * make it call ->config() when the chanctx is actually + * assigned here (and unassigned below), but that's yet + * another change to all drivers to add assign/unassign + * emulation callbacks. Maybe later. + */ + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + local->emulate_chanctx && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return 0; + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -338,6 +350,11 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + local->emulate_chanctx && + !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return; + if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 9f5ffdc9db..ecbb042dd0 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, if (!he_spr_ie_elem) return; + + he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control; data = he_spr_ie_elem->optional; if (he_spr_ie_elem->he_sr_control & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) - data++; + he_obss_pd->non_srg_max_offset = *data++; + if (he_spr_ie_elem->he_sr_control & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) { - he_obss_pd->max_offset = *data++; he_obss_pd->min_offset = *data++; + he_obss_pd->max_offset = *data++; + memcpy(he_obss_pd->bss_color_bitmap, data, 8); + data += 8; + memcpy(he_obss_pd->partial_bssid_bitmap, data, 8); he_obss_pd->enable = true; } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index bd507d6b65..70c67c860e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -974,6 +974,7 @@ struct ieee80211_link_data_managed { bool csa_waiting_bcn; bool csa_ignored_same_chan; + bool csa_blocked_tx; struct wiphy_delayed_work chswitch_work; struct wiphy_work request_smps_work; @@ -1092,7 +1093,7 @@ struct ieee80211_sub_if_data { unsigned long state; - bool csa_blocked_tx; + bool csa_blocked_queues; char name[IFNAMSIZ]; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 395de62d9c..7c8a421f09 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -544,10 +544,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->deflink.u.mgd.csa_waiting_bcn = false; - if (sdata->csa_blocked_tx) { + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); @@ -686,6 +686,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_del_virtual_monitor(local); ieee80211_recalc_idle(local); + ieee80211_recalc_offload(local); if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; @@ -1121,9 +1122,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; int ret; - if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) - return 0; - ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1145,11 +1143,13 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) ieee80211_set_default_queues(sdata); - ret = drv_add_interface(local, sdata); - if (WARN_ON(ret)) { - /* ok .. stupid driver, it asked for this! */ - kfree(sdata); - return ret; + if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { + ret = drv_add_interface(local, sdata); + if (WARN_ON(ret)) { + /* ok .. stupid driver, it asked for this! */ + kfree(sdata); + return ret; + } } set_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -1187,9 +1187,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) - return; - ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1209,7 +1206,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) ieee80211_link_release_channel(&sdata->deflink); - drv_remove_interface(local, sdata); + if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + drv_remove_interface(local, sdata); kfree(sdata); } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index a6b62169f0..c0a5c75cdd 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, */ void mesh_path_flush_pending(struct mesh_path *mpath) { + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct mesh_preq_queue *preq, *tmp; struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(mpath->sdata, skb); + + spin_lock_bh(&ifmsh->mesh_preq_queue_lock); + list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) { + if (ether_addr_equal(mpath->dst, preq->dst)) { + list_del(&preq->list); + kfree(preq); + --ifmsh->preq_queue_len; + } + } + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); } /** diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3bbb216a0f..497677e3d8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1933,13 +1933,14 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) WARN_ON(!link->conf->csa_active); - if (sdata->csa_blocked_tx) { + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } link->conf->csa_active = false; + link->u.mgd.csa_blocked_tx = false; link->u.mgd.csa_waiting_bcn = false; ret = drv_post_channel_switch(link); @@ -1999,13 +2000,14 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) ieee80211_link_unreserve_chanctx(link); - if (sdata->csa_blocked_tx) { + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } link->conf->csa_active = false; + link->u.mgd.csa_blocked_tx = false; drv_abort_channel_switch(link); } @@ -2165,12 +2167,13 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, link->csa_chanreq = csa_ie.chanreq; link->u.mgd.csa_ignored_same_chan = false; link->u.mgd.beacon_crc_valid = false; + link->u.mgd.csa_blocked_tx = csa_ie.mode; if (csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = true; + sdata->csa_blocked_queues = true; } cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper, @@ -2199,7 +2202,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, * reset when the disconnection worker runs. */ link->conf->csa_active = true; - sdata->csa_blocked_tx = + link->u.mgd.csa_blocked_tx = csa_ie.mode; + sdata->csa_blocked_queues = csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA); wiphy_work_queue(sdata->local->hw.wiphy, @@ -3252,12 +3256,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, } sdata->vif.bss_conf.csa_active = false; + sdata->deflink.u.mgd.csa_blocked_tx = false; sdata->deflink.u.mgd.csa_waiting_bcn = false; sdata->deflink.u.mgd.csa_ignored_same_chan = false; - if (sdata->csa_blocked_tx) { + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } /* existing TX TSPEC sessions no longer exist */ @@ -3563,19 +3568,32 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - bool tx; + bool tx = false; lockdep_assert_wiphy(local->hw.wiphy); if (!ifmgd->associated) return; - /* - * MLO drivers should have HANDLES_QUIET_CSA, so that csa_blocked_tx - * is always false; if they don't then this may try to transmit the - * frame but queues will be stopped. - */ - tx = !sdata->csa_blocked_tx; + /* only transmit if we have a link that makes that worthwhile */ + for (unsigned int link_id = 0; + link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link; + + if (!ieee80211_vif_link_active(&sdata->vif, link_id)) + continue; + + link = sdata_dereference(sdata->link[link_id], sdata); + if (WARN_ON_ONCE(!link)) + continue; + + if (link->u.mgd.csa_blocked_tx) + continue; + + tx = true; + break; + } if (!ifmgd->driver_disconnect) { unsigned int link_id; @@ -3608,10 +3626,11 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) /* the other links will be destroyed */ sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa_waiting_bcn = false; - if (sdata->csa_blocked_tx) { + sdata->deflink.u.mgd.csa_blocked_tx = false; + if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_tx = false; + sdata->csa_blocked_queues = false; } ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 55e5497f89..055a60e909 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -111,7 +111,7 @@ ieee80211_parse_extension_element(u32 *crc, if (params->mode < IEEE80211_CONN_MODE_HE) break; if (len >= sizeof(*elems->he_spr) && - len >= ieee80211_he_spr_size(data)) + len >= ieee80211_he_spr_size(data) - 1) elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7385031258..3da1c5c450 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -708,19 +708,11 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, return -EBUSY; /* For an MLO connection, if a link ID was specified, validate that it - * is indeed active. If no link ID was specified, select one of the - * active links. + * is indeed active. */ - if (ieee80211_vif_is_mld(&sdata->vif)) { - if (req->tsf_report_link_id >= 0) { - if (!(sdata->vif.active_links & - BIT(req->tsf_report_link_id))) - return -EINVAL; - } else { - req->tsf_report_link_id = - __ffs(sdata->vif.active_links); - } - } + if (ieee80211_vif_is_mld(&sdata->vif) && req->tsf_report_link_id >= 0 && + !(sdata->vif.active_links & BIT(req->tsf_report_link_id))) + return -EINVAL; if (!__ieee80211_can_leave_ch(sdata)) return -EBUSY; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index da5fdd6f5c..aa22f09e6d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1724,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ - spin_lock(&sta->ps_lock); + spin_lock_bh(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1753,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) */ clear_sta_flag(sta, WLAN_STA_PSPOLL); clear_sta_flag(sta, WLAN_STA_UAPSD); - spin_unlock(&sta->ps_lock); + spin_unlock_bh(&sta->ps_lock); atomic_dec(&ps->num_sta_ps); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a237cbcf7b..0da5f6082d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1841,7 +1841,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add interfaces */ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { /* in HW restart it exists already */ WARN_ON(local->resuming); res = drv_add_interface(local, sdata); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 8fc790f2a0..4385fd3b13 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -81,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb) ttl = net->mpls.default_ttl; else ttl = ip_hdr(skb)->ttl; - rt = (struct rtable *)dst; + rt = dst_rtable(dst); } else if (dst->ops->family == AF_INET6) { if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) ttl = tun_encap_info->default_ttl; @@ -90,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb) ttl = net->mpls.default_ttl; else ttl = ipv6_hdr(skb)->hop_limit; - rt6 = (struct rt6_info *)dst; + rt6 = dst_rt6_info(dst); } else { goto drop; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5c17d39146..8bf7ed6d63 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -676,6 +676,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; + bool sf_created = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); @@ -703,15 +704,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (nr == 0) return; - msk->pm.add_addr_accepted++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &addrs[i], &remote); + if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) + sf_created = true; spin_lock_bh(&msk->pm.lock); + + if (sf_created) { + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); + } } void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) @@ -813,10 +817,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); removed = true; - __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMSUBFLOW) __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); + else if (rm_type == MPTCP_MIB_RMADDR) + __MPTCP_INC_STATS(sock_net(sk), rm_type); if (!removed) continue; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 965eb69dc5..68e4c08648 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2907,9 +2907,14 @@ void mptcp_set_state(struct sock *sk, int state) if (oldstate != TCP_ESTABLISHED) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); break; - + case TCP_CLOSE_WAIT: + /* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state: + * MPTCP "accepted" sockets will be created later on. So no + * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT. + */ + break; default: - if (oldstate == TCP_ESTABLISHED) + if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT) MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); } @@ -3726,6 +3731,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) WRITE_ONCE(msk->write_seq, subflow->idsn); WRITE_ONCE(msk->snd_nxt, subflow->idsn); + WRITE_ONCE(msk->snd_una, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a10ebf3ee1..9d1ee19949 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -308,6 +308,9 @@ struct mptcp_sock { free_first:1, rcvspace_init:1; u32 notsent_lowat; + int keepalive_cnt; + int keepalive_idle; + int keepalive_intvl; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 73fdf423de..19ee684f9e 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -181,8 +181,6 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, switch (optname) { case SO_KEEPALIVE: - mptcp_sol_socket_sync_intval(msk, optname, val); - return 0; case SO_DEBUG: case SO_MARK: case SO_PRIORITY: @@ -624,6 +622,31 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } +static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, + int (*set_val)(struct sock *, int), + int *msk_val, int val) +{ + struct mptcp_subflow_context *subflow; + int err = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int ret; + + lock_sock(ssk); + ret = set_val(ssk, val); + err = err ? : ret; + release_sock(ssk); + } + + if (!err) { + *msk_val = val; + sockopt_seq_inc(msk); + } + + return err; +} + static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; @@ -820,6 +843,22 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_NODELAY: ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); break; + case TCP_KEEPIDLE: + ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, + &tcp_sock_set_keepidle_locked, + &msk->keepalive_idle, val); + break; + case TCP_KEEPINTVL: + ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, + &tcp_sock_set_keepintvl, + &msk->keepalive_intvl, val); + break; + case TCP_KEEPCNT: + ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, + &tcp_sock_set_keepcnt, + &msk->keepalive_cnt, + val); + break; default: ret = -ENOPROTOOPT; } @@ -1322,6 +1361,8 @@ static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { + struct sock *sk = (void *)msk; + switch (optname) { case TCP_ULP: case TCP_CONGESTION: @@ -1340,6 +1381,18 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->cork); case TCP_NODELAY: return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); + case TCP_KEEPIDLE: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_idle ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); + case TCP_KEEPINTVL: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_intvl ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); + case TCP_KEEPCNT: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_cnt ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); case TCP_NOTSENT_LOWAT: return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); } @@ -1457,6 +1510,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) tcp_set_congestion_control(ssk, msk->ca_name, false, true); __tcp_sock_set_cork(ssk, !!msk->cork); __tcp_sock_set_nodelay(ssk, !!msk->nodelay); + tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); + tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); + tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 374412ed78..ef0f8f7382 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -325,6 +325,7 @@ struct ncsi_dev_priv { spinlock_t lock; /* Protect the NCSI device */ unsigned int package_probe_id;/* Current ID during probe */ unsigned int package_num; /* Number of packages */ + unsigned int channel_probe_id;/* Current cahnnel ID during probe */ struct list_head packages; /* List of packages */ struct ncsi_channel *hot_channel; /* Channel was ever active */ struct ncsi_request requests[256]; /* Request table */ @@ -343,6 +344,7 @@ struct ncsi_dev_priv { bool multi_package; /* Enable multiple packages */ bool mlx_multi_host; /* Enable multi host Mellanox */ u32 package_whitelist; /* Packages to configure */ + unsigned char channel_count; /* Num of channels to probe */ }; struct ncsi_cmd_arg { diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 745c788f1d..5ecf611c88 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_suspend_gls: - ndp->pending_req_num = np->channel_num; + ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; + nca.channel = ndp->channel_probe_id; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + ndp->channel_probe_id++; - nd->state = ncsi_dev_state_suspend_dcnt; - NCSI_FOR_EACH_CHANNEL(np, nc) { - nca.channel = nc->id; - ret = ncsi_xmit_cmd(&nca); - if (ret) - goto error; + if (ndp->channel_probe_id == ndp->channel_count) { + ndp->channel_probe_id = 0; + nd->state = ncsi_dev_state_suspend_dcnt; } break; @@ -1345,7 +1347,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; - struct ncsi_channel *nc; struct ncsi_cmd_arg nca; unsigned char index; int ret; @@ -1423,23 +1424,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; - case ncsi_dev_state_probe_cis: - ndp->pending_req_num = NCSI_RESERVED_CHANNEL; - - /* Clear initial state */ - nca.type = NCSI_PKT_CMD_CIS; - nca.package = ndp->active_package->id; - for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { - nca.channel = index; - ret = ncsi_xmit_cmd(&nca); - if (ret) - goto error; - } - - nd->state = ncsi_dev_state_probe_gvi; - if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)) - nd->state = ncsi_dev_state_probe_keep_phy; - break; case ncsi_dev_state_probe_keep_phy: ndp->pending_req_num = 1; @@ -1452,14 +1436,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_gvi; break; + case ncsi_dev_state_probe_cis: case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: np = ndp->active_package; - ndp->pending_req_num = np->channel_num; + ndp->pending_req_num = 1; - /* Retrieve version, capability or link status */ - if (nd->state == ncsi_dev_state_probe_gvi) + /* Clear initial state Retrieve version, capability or link status */ + if (nd->state == ncsi_dev_state_probe_cis) + nca.type = NCSI_PKT_CMD_CIS; + else if (nd->state == ncsi_dev_state_probe_gvi) nca.type = NCSI_PKT_CMD_GVI; else if (nd->state == ncsi_dev_state_probe_gc) nca.type = NCSI_PKT_CMD_GC; @@ -1467,19 +1454,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; - NCSI_FOR_EACH_CHANNEL(np, nc) { - nca.channel = nc->id; - ret = ncsi_xmit_cmd(&nca); - if (ret) - goto error; - } + nca.channel = ndp->channel_probe_id; - if (nd->state == ncsi_dev_state_probe_gvi) + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + if (nd->state == ncsi_dev_state_probe_cis) { + nd->state = ncsi_dev_state_probe_gvi; + if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0) + nd->state = ncsi_dev_state_probe_keep_phy; + } else if (nd->state == ncsi_dev_state_probe_gvi) { nd->state = ncsi_dev_state_probe_gc; - else if (nd->state == ncsi_dev_state_probe_gc) + } else if (nd->state == ncsi_dev_state_probe_gc) { nd->state = ncsi_dev_state_probe_gls; - else + } else { + nd->state = ncsi_dev_state_probe_cis; + ndp->channel_probe_id++; + } + + if (ndp->channel_probe_id == ndp->channel_count) { + ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe_dp; + } break; case ncsi_dev_state_probe_dp: ndp->pending_req_num = 1; @@ -1780,6 +1777,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->requests[i].ndp = ndp; timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0); } + ndp->channel_count = NCSI_RESERVED_CHANNEL; spin_lock_irqsave(&ncsi_dev_lock, flags); list_add_tail_rcu(&ndp->node, &ncsi_dev_list); @@ -1813,6 +1811,7 @@ int ncsi_start_dev(struct ncsi_dev *nd) if (!(ndp->flags & NCSI_DEV_PROBED)) { ndp->package_probe_id = 0; + ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe; schedule_work(&ndp->work); return 0; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index bee290d0f4..e28be33bdf 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) struct ncsi_rsp_gc_pkt *rsp; struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_channel *nc; + struct ncsi_package *np; size_t size; /* Find the channel */ rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp); ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, - NULL, &nc); + &np, &nc); if (!nc) return -ENODEV; @@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) */ nc->vlan_filter.bitmap = U64_MAX; nc->vlan_filter.n_vids = rsp->vlan_cnt; + np->ndp->channel_count = rsp->channel_cnt; return 0; } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3126911f50..b00fc285b3 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -815,12 +815,21 @@ int __init netfilter_init(void) if (ret < 0) goto err; +#ifdef CONFIG_LWTUNNEL + ret = netfilter_lwtunnel_init(); + if (ret < 0) + goto err_lwtunnel_pernet; +#endif ret = netfilter_log_init(); if (ret < 0) - goto err_pernet; + goto err_log_pernet; return 0; -err_pernet: +err_log_pernet: +#ifdef CONFIG_LWTUNNEL + netfilter_lwtunnel_fini(); +err_lwtunnel_pernet: +#endif unregister_pernet_subsys(&netfilter_net_ops); err: return ret; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3184cc6be4..61431690cb 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex or ip_set_ref_lock is held: */ -#define ip_set_dereference(p) \ - rcu_dereference_protected(p, \ +#define ip_set_dereference(inst) \ + rcu_dereference_protected((inst)->ip_set_list, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ - lockdep_is_held(&ip_set_ref_lock)) + lockdep_is_held(&ip_set_ref_lock) || \ + (inst)->is_deleted) #define ip_set(inst, id) \ - ip_set_dereference((inst)->ip_set_list)[id] + ip_set_dereference(inst)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] #define ip_set_dereference_nfnl(p) \ @@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = ip_set_dereference(inst->ip_set_list); + tmp = ip_set_dereference(inst); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ @@ -1172,23 +1173,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; +/* In order to return quickly when destroying a single set, it is split + * into two stages: + * - Cancel garbage collector + * - Destroy the set itself via call_rcu() + */ + static void -ip_set_destroy_set(struct ip_set *set) +ip_set_destroy_set_rcu(struct rcu_head *head) { - pr_debug("set: %s\n", set->name); + struct ip_set *set = container_of(head, struct ip_set, rcu); - /* Must call it without holding any lock */ set->variant->destroy(set); module_put(set->type->me); kfree(set); } static void -ip_set_destroy_set_rcu(struct rcu_head *head) +_destroy_all_sets(struct ip_set_net *inst) { - struct ip_set *set = container_of(head, struct ip_set, rcu); + struct ip_set *set; + ip_set_id_t i; + bool need_wait = false; - ip_set_destroy_set(set); + /* First cancel gc's: set:list sets are flushed as well */ + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + set->variant->cancel_gc(set); + if (set->type->features & IPSET_TYPE_NAME) + need_wait = true; + } + } + /* Must wait for flush to be really finished */ + if (need_wait) + rcu_barrier(); + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + ip_set(inst, i) = NULL; + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); + } + } } static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, @@ -1202,11 +1230,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call - * ip_set_put|get_nfnl_* functions, that way we + * ip_set_nfnl_get_* functions, that way we * can safely check references here. * * list:set timer can only decrement the reference @@ -1214,8 +1241,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * without holding the lock. */ if (!attr[IPSET_ATTR_SETNAME]) { - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); @@ -1226,15 +1251,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, } inst->is_destroyed = true; read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < inst->ip_set_max; i++) { - s = ip_set(inst, i); - if (s) { - ip_set(inst, i) = NULL; - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); - ip_set_destroy_set(s); - } - } + _destroy_all_sets(inst); /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { @@ -1255,12 +1272,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); if (features & IPSET_TYPE_NAME) { /* Must wait for flush to be really finished */ rcu_barrier(); } - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; @@ -2365,30 +2382,25 @@ ip_set_net_init(struct net *net) } static void __net_exit -ip_set_net_exit(struct net *net) +ip_set_net_pre_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set *set = NULL; - ip_set_id_t i; - inst->is_deleted = true; /* flag for ip_set_nfnl_put */ +} - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - set = ip_set(inst, i); - if (set) { - ip_set(inst, i) = NULL; - set->variant->cancel_gc(set); - ip_set_destroy_set(set); - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + _destroy_all_sets(inst); kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { .init = ip_set_net_init, + .pre_exit = ip_set_net_pre_exit, .exit = ip_set_net_exit, .id = &ip_set_net_id, .size = sizeof(struct ip_set_net), diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 6c3f28bc59..bfae706693 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *next, *prev = NULL; - int ret; + int ret = 0; - list_for_each_entry(e, &map->members, list) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (d->before == 0) { ret = 1; + goto out; } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && @@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, } else { ret = prev && prev->id == d->refid; } - return ret; + goto out; } - return 0; +out: + rcu_read_unlock(); + return ret; } static void @@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Find where to add the new entry */ n = prev = next = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e, *next, *prev = NULL; + struct set_elem *e, *n, *next, *prev = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_safe(e, n, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -424,14 +428,8 @@ static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e, *n; - list_for_each_entry_safe(e, n, &map->members, list) { - list_del(&e->list); - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - kfree(e); - } + WARN_ON_ONCE(!list_empty(&map->members)); kfree(map); set->data = NULL; @@ -549,6 +547,9 @@ list_set_cancel_gc(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) timer_shutdown_sync(&map->gc); + + /* Flush list to drop references to other ipsets */ + list_set_flush(set); } static const struct ip_set_type_variant set_variant = { diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65e0259178..e1f17392f5 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (addr_type & IPV6_ADDR_LOOPBACK); old_rt_is_local = __ip_vs_is_local_route6( - (struct rt6_info *)skb_dst(skb)); + dst_rt6_info(skb_dst(skb))); } else #endif { @@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) - rt = (struct rtable *) dest_dst->dst_cache; + rt = dst_rtable(dest_dst->dst_cache); else { dest_dst = ip_vs_dest_dst_alloc(); spin_lock_bh(&dest->dst_lock); @@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) - rt = (struct rt6_info *) dest_dst->dst_cache; + rt = dst_rt6_info(dest_dst->dst_cache); else { u32 cookie; @@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ip_vs_dest_dst_free(dest_dst); goto err_unreach; } - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); spin_unlock_bh(&dest->dst_lock); @@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, rt_mode); if (!dst) goto err_unreach; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); } local = __ip_vs_is_local_route6(rt); @@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_RDR); if (local < 0) goto tx_error; - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); tdev = rt->dst.dev; /* @@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0ee98ce5b8..559665467b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,9 +22,6 @@ #include #include #include -#ifdef CONFIG_LWTUNNEL -#include -#endif #include static bool enable_hooks __read_mostly; @@ -612,9 +609,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif -#ifdef CONFIG_LWTUNNEL - NF_SYSCTL_CT_LWTUNNEL, -#endif __NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -947,15 +941,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#endif -#ifdef CONFIG_LWTUNNEL - [NF_SYSCTL_CT_LWTUNNEL] = { - .procname = "nf_hooks_lwtunnel", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = nf_hooks_lwtunnel_sysctl_handler, - }, #endif {} }; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a057133923..5c1ff07eae 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc); static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) { - const struct rt6_info *rt; - - if (flow_tuple->l3proto == NFPROTO_IPV6) { - rt = (const struct rt6_info *)flow_tuple->dst_cache; - return rt6_get_cookie(rt); - } + if (flow_tuple->l3proto == NFPROTO_IPV6) + return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); return 0; } diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 5383bed3d3..c2c005234d 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { - rt = (struct rtable *)tuplehash->tuple.dst_cache; + rt = dst_rtable(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet_skb_parm)); IPCB(skb)->iif = skb->dev->ifindex; IPCB(skb)->flags = IPSKB_FORWARDED; @@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: - rt = (struct rtable *)tuplehash->tuple.dst_cache; + rt = dst_rtable(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); @@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; + rt = dst_rt6_info(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->flags = IP6SKB_FORWARDED; @@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; + rt = dst_rt6_info(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c index 00e89ffd78..7cdb59bb44 100644 --- a/net/netfilter/nf_hooks_lwtunnel.c +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -3,6 +3,9 @@ #include #include #include +#include + +#include "nf_internals.h" static inline int nf_hooks_lwtunnel_get(void) { @@ -50,4 +53,68 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, return ret; } EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); + +static struct ctl_table nf_lwtunnel_sysctl_table[] = { + { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, +}; + +static int __net_init nf_lwtunnel_net_init(struct net *net) +{ + struct ctl_table_header *hdr; + struct ctl_table *table; + + table = nf_lwtunnel_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_lwtunnel_sysctl_table, + sizeof(nf_lwtunnel_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } + + hdr = register_net_sysctl_sz(net, "net/netfilter", table, + ARRAY_SIZE(nf_lwtunnel_sysctl_table)); + if (!hdr) + goto err_reg; + + net->nf.nf_lwtnl_dir_header = hdr; + + return 0; +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_lwtunnel_net_exit(struct net *net) +{ + const struct ctl_table *table; + + table = net->nf.nf_lwtnl_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations nf_lwtunnel_net_ops = { + .init = nf_lwtunnel_net_init, + .exit = nf_lwtunnel_net_exit, +}; + +int __init netfilter_lwtunnel_init(void) +{ + return register_pernet_subsys(&nf_lwtunnel_net_ops); +} + +void netfilter_lwtunnel_fini(void) +{ + unregister_pernet_subsys(&nf_lwtunnel_net_ops); +} #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 832ae64179..2540302306 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); +#ifdef CONFIG_LWTUNNEL +/* nf_hooks_lwtunnel.c */ +int __init netfilter_lwtunnel_init(void); +void netfilter_lwtunnel_fini(void); +#endif + /* core.c */ void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, const struct nf_hook_ops *reg); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 00f4bd21c5..f1c31757e4 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head) struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, rcu); + rcu_read_lock(); nfqnl_flush(inst, NULL, 0); + rcu_read_unlock(); kfree(inst); module_put(THIS_MODULE); } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 37cfe6dd71..b58f62195f 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, switch (priv->result) { case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: - hooks = (1 << NF_INET_PRE_ROUTING); - if (priv->flags & NFTA_FIB_F_IIF) { - hooks |= (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_FORWARD); - } + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); break; case NFT_FIB_RESULT_ADDRTYPE: if (priv->flags & NFTA_FIB_F_IIF) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index ba0d3683a4..9139ce38ea 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx, struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; + if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) + return -EINVAL; + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 0a689c8e02..50429cbd42 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) int mac_off = skb_mac_header(skb) - skb->data; u8 *vlanh, *dst_u8 = (u8 *) d; struct vlan_ethhdr veth; - u8 vlan_hlen = 0; - - if ((skb->protocol == htons(ETH_P_8021AD) || - skb->protocol == htons(ETH_P_8021Q)) && - offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN) - vlan_hlen += VLAN_HLEN; vlanh = (u8 *) &veth; - if (offset < VLAN_ETH_HLEN + vlan_hlen) { + if (offset < VLAN_ETH_HLEN) { u8 ethlen = len; - if (vlan_hlen && - skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0) - return false; - else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) + if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) return false; - if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; + if (offset + len > VLAN_ETH_HLEN) + ethlen -= offset + len - VLAN_ETH_HLEN; - memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); + memcpy(dst_u8, vlanh + offset, ethlen); len -= ethlen; if (len == 0) return true; dst_u8 += ethlen; - offset = ETH_HLEN + vlan_hlen; + offset = ETH_HLEN; } else { - offset -= VLAN_HLEN + vlan_hlen; + offset -= VLAN_HLEN; } return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; @@ -154,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt) return pkt->inneroff; } -static bool nft_payload_need_vlan_copy(const struct nft_payload *priv) +static bool nft_payload_need_vlan_adjust(u32 offset, u32 len) { - unsigned int len = priv->offset + priv->len; + unsigned int boundary = offset + len; /* data past ether src/dst requested, copy needed */ - if (len > offsetof(struct ethhdr, h_proto)) + if (boundary > offsetof(struct ethhdr, h_proto)) return true; return false; @@ -183,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr, goto err; if (skb_vlan_tag_present(skb) && - nft_payload_need_vlan_copy(priv)) { + nft_payload_need_vlan_adjust(priv->offset, priv->len)) { if (!nft_payload_copy_vlan(dest, skb, priv->offset, priv->len)) goto err; @@ -659,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); u32 base; + if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] || + !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG]) + return -EINVAL; + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); switch (base) { case NFT_PAYLOAD_TUN_HEADER: @@ -810,21 +805,79 @@ struct nft_payload_set { u8 csum_flags; }; +/* This is not struct vlan_hdr. */ +struct nft_payload_vlan_hdr { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; +}; + +static bool +nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len, + int *vlan_hlen) +{ + struct nft_payload_vlan_hdr *vlanh; + __be16 vlan_proto; + u16 vlan_tci; + + if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) { + *vlan_hlen = VLAN_HLEN; + return true; + } + + switch (offset) { + case offsetof(struct vlan_ethhdr, h_vlan_proto): + if (len == 2) { + vlan_proto = nft_reg_load_be16(src); + skb->vlan_proto = vlan_proto; + } else if (len == 4) { + vlanh = (struct nft_payload_vlan_hdr *)src; + __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto, + ntohs(vlanh->h_vlan_TCI)); + } else { + return false; + } + break; + case offsetof(struct vlan_ethhdr, h_vlan_TCI): + if (len != 2) + return false; + + vlan_tci = ntohs(nft_reg_load_be16(src)); + skb->vlan_tci = vlan_tci; + break; + default: + return false; + } + + return true; +} + static void nft_payload_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload_set *priv = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; const u32 *src = ®s->data[priv->sreg]; - int offset, csum_offset; + int offset, csum_offset, vlan_hlen = 0; + struct sk_buff *skb = pkt->skb; __wsum fsum, tsum; switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: if (!skb_mac_header_was_set(skb)) goto err; - offset = skb_mac_header(skb) - skb->data; + + if (skb_vlan_tag_present(skb) && + nft_payload_need_vlan_adjust(priv->offset, priv->len)) { + if (!nft_payload_set_vlan(src, skb, + priv->offset, priv->len, + &vlan_hlen)) + goto err; + + if (!vlan_hlen) + return; + } + + offset = skb_mac_header(skb) - skb->data - vlan_hlen; break; case NFT_PAYLOAD_NETWORK_HEADER: offset = skb_network_offset(skb); diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 24d9771385..14d88394bc 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -73,14 +73,14 @@ void nft_rt_get_eval(const struct nft_expr *expr, if (nft_pf(pkt) != NFPROTO_IPV4) goto err; - *dest = (__force u32)rt_nexthop((const struct rtable *)dst, + *dest = (__force u32)rt_nexthop(dst_rtable(dst), ip_hdr(skb)->daddr); break; case NFT_RT_NEXTHOP6: if (nft_pf(pkt) != NFPROTO_IPV6) goto err; - memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + memcpy(dest, rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr), sizeof(struct in6_addr)); break; diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 70480869ad..bd2b17b219 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -285,22 +285,14 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, return 0; } -static inline void __nr_remove_node(struct nr_node *nr_node) +static void nr_remove_node_locked(struct nr_node *nr_node) { + lockdep_assert_held(&nr_node_list_lock); + hlist_del_init(&nr_node->node_node); nr_node_put(nr_node); } -#define nr_remove_node_locked(__node) \ - __nr_remove_node(__node) - -static void nr_remove_node(struct nr_node *nr_node) -{ - spin_lock_bh(&nr_node_list_lock); - __nr_remove_node(nr_node); - spin_unlock_bh(&nr_node_list_lock); -} - static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh) { hlist_del_init(&nr_neigh->neigh_node); @@ -339,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n return -EINVAL; } + spin_lock_bh(&nr_node_list_lock); nr_node_lock(nr_node); for (i = 0; i < nr_node->count; i++) { if (nr_node->routes[i].neighbour == nr_neigh) { @@ -352,7 +345,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n nr_node->count--; if (nr_node->count == 0) { - nr_remove_node(nr_node); + nr_remove_node_locked(nr_node); } else { switch (i) { case 0: @@ -367,12 +360,14 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n nr_node_put(nr_node); } nr_node_unlock(nr_node); + spin_unlock_bh(&nr_node_list_lock); return 0; } } nr_neigh_put(nr_neigh); nr_node_unlock(nr_node); + spin_unlock_bh(&nr_node_list_lock); nr_node_put(nr_node); return -EINVAL; diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 4e7c968cde..5e3ca068f0 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,7 +121,8 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - sock_hold(sk); + if (sk->sk_state == TCP_LISTEN) + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index b133dc5530..f456a5911e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1463,6 +1463,19 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, ndev->ops->n_core_ops); } +static bool nci_valid_size(struct sk_buff *skb) +{ + BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE); + unsigned int hdr_size = NCI_CTRL_HDR_SIZE; + + if (skb->len < hdr_size || + !nci_plen(skb->data) || + skb->len < hdr_size + nci_plen(skb->data)) { + return false; + } + return true; +} + /* ---- NCI TX Data worker thread ---- */ static void nci_tx_work(struct work_struct *work) @@ -1516,10 +1529,9 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); - if (!nci_plen(skb->data)) { + if (!nci_valid_size(skb)) { kfree_skb(skb); - kcov_remote_stop(); - break; + continue; } /* Process frame */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 6fcd7e2ca8..9642255808 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -936,6 +936,12 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, pskb_trim(skb, ovs_mac_header_len(key)); } + /* Need to set the pkt_type to involve the routing layer. The + * packet movement through the OVS datapath doesn't generally + * use routing, but this is needed for tunnel cases. + */ + skb->pkt_type = PACKET_OUTGOING; + if (likely(!mru || (skb->len <= mru + vport->dev->hard_header_len))) { ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 33b21a0c05..8a848ce72e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); - memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, struct nd_msg *nd; int offset; + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); + /* In order to process neighbor discovery options, we need the * entire packet. */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 18f616f487..ea3ebc160e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2522,8 +2522,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); - if (!packet_read_pending(&po->tx_ring)) - complete(&po->skb_completion); + complete(&po->skb_completion); } sock_wfree(skb); @@ -3800,28 +3799,30 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, case PACKET_TX_RING: { union tpacket_req_u req_u; - int len; + ret = -EINVAL; lock_sock(sk); switch (po->tp_version) { case TPACKET_V1: case TPACKET_V2: - len = sizeof(req_u.req); + if (optlen < sizeof(req_u.req)) + break; + ret = copy_from_sockptr(&req_u.req, optval, + sizeof(req_u.req)) ? + -EINVAL : 0; break; case TPACKET_V3: default: - len = sizeof(req_u.req3); + if (optlen < sizeof(req_u.req3)) + break; + ret = copy_from_sockptr(&req_u.req3, optval, + sizeof(req_u.req3)) ? + -EINVAL : 0; break; } - if (optlen < len) { - ret = -EINVAL; - } else { - if (copy_from_sockptr(&req_u.req, optval, len)) - ret = -EFAULT; - else - ret = packet_set_ring(sk, &req_u, 0, - optname == PACKET_TX_RING); - } + if (!ret) + ret = packet_set_ring(sk, &req_u, 0, + optname == PACKET_TX_RING); release_sock(sk); return ret; } diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index abb0c70ffc..654a3cc0d3 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -725,6 +725,24 @@ int qrtr_ns_init(void) if (ret < 0) goto err_wq; + /* As the qrtr ns socket owner and creator is the same module, we have + * to decrease the qrtr module reference count to guarantee that it + * remains zero after the ns socket is created, otherwise, executing + * "rmmod" command is unable to make the qrtr module deleted after the + * qrtr module is inserted successfully. + * + * However, the reference count is increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of qrtr socket's proto_ops struct; another is to increment the + * reference count of owner of qrtr proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(qrtr_ns.sock->ops->owner); + module_put(qrtr_ns.sock->sk->sk_prot_creator->owner); + return 0; err_wq: @@ -739,6 +757,15 @@ void qrtr_ns_remove(void) { cancel_work_sync(&qrtr_ns.work); destroy_workqueue(qrtr_ns.workqueue); + + /* sock_release() expects the two references that were put during + * qrtr_ns_init(). This function is only called during module remove, + * so try_stop_module() has already set the refcnt to 0. Use + * __module_get() instead of try_module_get() to successfully take two + * references. + */ + __module_get(qrtr_ns.sock->ops->owner); + __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner); sock_release(qrtr_ns.sock); } EXPORT_SYMBOL_GPL(qrtr_ns_remove); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb11..2520708b06 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -830,7 +830,6 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, u32 max; if (*index) { -again: rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); @@ -839,7 +838,7 @@ again: * index but did not assign the pointer yet. */ rcu_read_unlock(); - goto again; + return -EAGAIN; } if (!p) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index baac083fd8..2a96d9c1db 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -41,21 +41,26 @@ static struct workqueue_struct *act_ct_wq; static struct rhashtable zones_ht; static DEFINE_MUTEX(zones_mutex); +struct zones_ht_key { + struct net *net; + u16 zone; +}; + struct tcf_ct_flow_table { struct rhash_head node; /* In zones tables */ struct rcu_work rwork; struct nf_flowtable nf_ft; refcount_t ref; - u16 zone; + struct zones_ht_key key; bool dying; }; static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), - .key_offset = offsetof(struct tcf_ct_flow_table, zone), - .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .key_offset = offsetof(struct tcf_ct_flow_table, key), + .key_len = sizeof_field(struct tcf_ct_flow_table, key), .automatic_shrinking = true, }; @@ -316,11 +321,12 @@ static struct nf_flowtable_type flowtable_ct = { static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { + struct zones_ht_key key = { .net = net, .zone = params->zone }; struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) goto out_unlock; @@ -329,7 +335,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) goto err_alloc; refcount_set(&ct_ft->ref, 1); - ct_ft->zone = params->zone; + ct_ft->key = key; err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); if (err) goto err_insert; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 60239378d4..6292d6d73b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1389,6 +1389,7 @@ err_out4: ops->destroy(sch); qdisc_put_stab(rtnl_dereference(sch->stab)); err_out3: + lockdep_unregister_key(&sch->root_lock_key); netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4a2c763e2d..fb32984d7a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -673,6 +673,7 @@ struct Qdisc noop_qdisc = { .qlen = 0, .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock), }, + .owner = -1, }; EXPORT_SYMBOL(noop_qdisc); @@ -945,7 +946,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); gnet_stats_basic_sync_init(&sch->bstats); + lockdep_register_key(&sch->root_lock_key); spin_lock_init(&sch->q.lock); + lockdep_set_class(&sch->q.lock, &sch->root_lock_key); if (ops->static_flags & TCQ_F_CPUSTATS) { sch->cpu_bstats = @@ -980,6 +983,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, return sch; errout1: + lockdep_unregister_key(&sch->root_lock_key); kfree(sch); errout: return ERR_PTR(err); @@ -1068,6 +1072,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc) if (ops->destroy) ops->destroy(qdisc); + lockdep_unregister_key(&qdisc->root_lock_key); module_put(ops->owner); netdev_put(dev, &qdisc->dev_tracker); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 93e6fb56f3..ff3de37874 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } -static void htb_set_lockdep_class_child(struct Qdisc *q) -{ - static struct lock_class_key child_key; - - lockdep_set_class(qdisc_lock(q), &child_key); -} - static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) { return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); @@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, return -ENOMEM; } - htb_set_lockdep_class_child(qdisc); q->direct_qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, } if (q->offload) { - htb_set_lockdep_class_child(new); /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ qdisc_refcount_inc(new); old_q = htb_graft_helper(dev_queue, new); @@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - if (q->offload) { - if (new_q) - htb_set_lockdep_class_child(new_q); + if (q->offload) htb_parent_to_leaf_offload(sch, dev_queue, new_q); - } } sch_tree_lock(sch); @@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); if (q->offload) { - if (new_q) { - htb_set_lockdep_class_child(new_q); - /* One ref for cl->leaf.q, the other for - * dev_queue->qdisc. - */ + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + if (new_q) qdisc_refcount_inc(new_q); - } old_q = htb_graft_helper(dev_queue, new_q); /* No qdisc_put needed. */ WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 79e93a19d5..06e03f5cd7 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qopt->bands = qdisc_dev(sch)->real_num_tx_queues; - removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands), + removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands), GFP_KERNEL); if (!removed) return -ENOMEM; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a0d54b4221..0b150b13be 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1151,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, list_for_each_entry(entry, &new->entries, list) cycle = ktime_add_ns(cycle, entry->interval); - if (!cycle) { - NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0"); - return -EINVAL; - } - if (cycle < 0 || cycle > INT_MAX) { NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); return -EINVAL; @@ -1164,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, new->cycle_time = cycle; } + if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too small"); + return -EINVAL; + } + taprio_calculate_gate_durations(q, new); return 0; @@ -1176,16 +1176,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags); - if (!qopt && !dev->num_tc) { - NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); - return -EINVAL; - } - - /* If num_tc is already set, it means that the user already - * configured the mqprio part - */ - if (dev->num_tc) + if (!qopt) { + if (!dev->num_tc) { + NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); + return -EINVAL; + } return 0; + } /* taprio imposes that traffic classes map 1:n to tx queues */ if (qopt->num_tc > dev->num_tx_queues) { @@ -1851,6 +1848,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } q->flags = taprio_flags; + /* Needed for length_to_duration() during netlink attribute parsing */ + taprio_set_picos_per_byte(dev, q); + err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) return err; @@ -1910,7 +1910,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) goto free_sched; - taprio_set_picos_per_byte(dev, q); taprio_update_queue_max_sdu(q, new_admin, stab); if (FULL_OFFLOAD_IS_ENABLED(q->flags)) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 24368f755a..f7b809c0d1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -415,7 +415,7 @@ out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; - rt = (struct rt6_info *)dst; + rt = dst_rt6_info(dst); t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e849f368ed..5a7436a13b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, struct flowi *fl) { union sctp_addr *saddr = &t->saddr; - struct rtable *rt = (struct rtable *)t->dst; + struct rtable *rt = dst_rtable(t->dst); if (rt) { saddr->v4.sin_family = AF_INET; @@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_reset_inner_mac_header(skb); skb_reset_inner_transport_header(skb); skb_set_inner_ipproto(skb, IPPROTO_SCTP); - udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr, + udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr, fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, sctp_sk(sk)->udp_port, t->encap_port, false, false); return 0; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 4b52b3b159..5f9f3d4c1d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -460,29 +460,11 @@ out: static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk, unsigned long mask) { - struct net *nnet = sock_net(nsk); - nsk->sk_userlocks = osk->sk_userlocks; - if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) { + if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) nsk->sk_sndbuf = osk->sk_sndbuf; - } else { - if (mask == SK_FLAGS_SMC_TO_CLC) - WRITE_ONCE(nsk->sk_sndbuf, - READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1])); - else - WRITE_ONCE(nsk->sk_sndbuf, - 2 * READ_ONCE(nnet->smc.sysctl_wmem)); - } - if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) { + if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) nsk->sk_rcvbuf = osk->sk_rcvbuf; - } else { - if (mask == SK_FLAGS_SMC_TO_CLC) - WRITE_ONCE(nsk->sk_rcvbuf, - READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1])); - else - WRITE_ONCE(nsk->sk_rcvbuf, - 2 * READ_ONCE(nnet->smc.sysctl_rmem)); - } } static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index c7af0220f8..369310909f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ - if (unlikely(snd_buf->len > snd_buf->buflen)) + if (unlikely(snd_buf->len > snd_buf->buflen)) { + status = -EIO; goto wrap_failed; + } /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 24de941847..73a90ad873 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1033,17 +1033,11 @@ null_verifier: static void gss_free_in_token_pages(struct gssp_in_token *in_token) { - u32 inlen; int i; i = 0; - inlen = in_token->page_len; - while (inlen) { - if (in_token->pages[i]) - put_page(in_token->pages[i]); - inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen; - } - + while (in_token->pages[i]) + put_page(in_token->pages[i++]); kfree(in_token->pages); in_token->pages = NULL; } @@ -1075,7 +1069,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, goto out_denied_free; pages = DIV_ROUND_UP(inlen, PAGE_SIZE); - in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL); + in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); if (!in_token->pages) goto out_denied_free; in_token->page_base = 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 28f3749f6d..59b2fbd88e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1071,6 +1071,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .authflavor = old->cl_auth->au_flavor, .cred = old->cl_cred, .stats = old->cl_stats, + .timeout = old->cl_timeout, }; struct rpc_clnt *clnt; int err; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b33e429336..2b4b1276d4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1265,8 +1265,6 @@ svc_generic_init_request(struct svc_rqst *rqstp, if (rqstp->rq_proc >= versp->vs_nproc) goto err_bad_proc; rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; - if (!procp) - goto err_bad_proc; /* Initialize storage for argp and resp */ memset(rqstp->rq_argp, 0, procp->pc_argzero); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4f8d7efa46..432557a553 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: pr_info("rpcrdma: removing device %s for %pISpc\n", ep->re_id->device->name, sap); - fallthrough; + switch (xchg(&ep->re_connect_status, -ENODEV)) { + case 0: goto wake_connect_worker; + case 1: goto disconnected; + } + return 0; case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; goto disconnected; diff --git a/net/tipc/node.c b/net/tipc/node.c index c1e890a824..500320e5ca 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2105,6 +2105,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) } else { n = tipc_node_find_by_id(net, ehdr->id); } + skb_dst_force(skb); tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); if (!skb) return; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index f892b0903d..b849a3d133 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -174,7 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, local_bh_disable(); ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { - struct rtable *rt = (struct rtable *)ndst; + struct rtable *rt = dst_rtable(ndst); if (!rt) { struct flowi4 fl = { diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b4674f03d7..90b7f253d3 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -816,9 +816,17 @@ struct tls_context *tls_ctx_create(struct sock *sk) return NULL; mutex_init(&ctx->tx_lock); - rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); ctx->sk = sk; + /* Release semantic of rcu_assign_pointer() ensures that + * ctx->sk_proto is visible before changing sk->sk_prot in + * update_sk_prot(), and prevents reading uninitialized value in + * tls_{getsockopt, setsockopt}. Note that we do not need a + * read barrier in tls_{getsockopt,setsockopt} as there is an + * address dependency between sk->sk_proto->{getsockopt,setsockopt} + * and ctx->sk_proto. + */ + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); return ctx; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9a6ad5974d..68a58bc07c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -221,15 +221,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } -static inline int unix_recvq_full(const struct sock *sk) -{ - return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; -} - static inline int unix_recvq_full_lockless(const struct sock *sk) { - return skb_queue_len_lockless(&sk->sk_receive_queue) > - READ_ONCE(sk->sk_max_ack_backlog); + return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } struct sock *unix_peer_get(struct sock *s) @@ -530,10 +524,10 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) return 0; } -static int unix_writable(const struct sock *sk) +static int unix_writable(const struct sock *sk, unsigned char state) { - return sk->sk_state != TCP_LISTEN && - (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + return state != TCP_LISTEN && + (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf); } static void unix_write_space(struct sock *sk) @@ -541,7 +535,7 @@ static void unix_write_space(struct sock *sk) struct socket_wq *wq; rcu_read_lock(); - if (unix_writable(sk)) { + if (unix_writable(sk, READ_ONCE(sk->sk_state))) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, @@ -570,7 +564,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) sk_error_report(other); } } - other->sk_state = TCP_CLOSE; } static void unix_sock_destructor(struct sock *sk) @@ -617,7 +610,7 @@ static void unix_release_sock(struct sock *sk, int embrion) u->path.dentry = NULL; u->path.mnt = NULL; state = sk->sk_state; - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); skpair = unix_peer(sk); unix_peer(sk) = NULL; @@ -638,7 +631,7 @@ static void unix_release_sock(struct sock *sk, int embrion) unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -731,7 +724,7 @@ static int unix_listen(struct socket *sock, int backlog) if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; /* Only stream/seqpacket sockets accept */ err = -EINVAL; - if (!u->addr) + if (!READ_ONCE(u->addr)) goto out; /* No listens on an unbound socket */ unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) @@ -739,7 +732,8 @@ static int unix_listen(struct socket *sock, int backlog) if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; + WRITE_ONCE(sk->sk_state, TCP_LISTEN); + /* set credentials so connect can copy them */ init_peercred(sk); err = 0; @@ -976,7 +970,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; - sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; + sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->inflight = 0; @@ -1131,8 +1125,8 @@ static struct sock *unix_find_other(struct net *net, static int unix_autobind(struct sock *sk) { - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct unix_address *addr; u32 lastnum, ordernum; @@ -1155,6 +1149,7 @@ static int unix_autobind(struct sock *sk) addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1); + old_hash = sk->sk_hash; ordernum = get_random_u32(); lastnum = ordernum & 0xFFFFF; retry: @@ -1195,8 +1190,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, { umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct mnt_idmap *idmap; struct unix_address *addr; @@ -1234,6 +1229,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, if (u->addr) goto out_unlock; + old_hash = sk->sk_hash; new_hash = unix_bsd_hash(d_backing_inode(dentry)); unix_table_double_lock(net, old_hash, new_hash); u->path.mnt = mntget(parent.mnt); @@ -1261,8 +1257,8 @@ out: static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, int addr_len) { - unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + unsigned int new_hash, old_hash; struct net *net = sock_net(sk); struct unix_address *addr; int err; @@ -1280,6 +1276,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, goto out_mutex; } + old_hash = sk->sk_hash; new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); unix_table_double_lock(net, old_hash, new_hash); @@ -1369,7 +1366,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !unix_sk(sk)->addr) { + !READ_ONCE(unix_sk(sk)->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1399,7 +1396,8 @@ restart: if (err) goto out_unlock; - sk->sk_state = other->sk_state = TCP_ESTABLISHED; + WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); + WRITE_ONCE(other->sk_state, TCP_ESTABLISHED); } else { /* * 1003.1g breaking connected state with AF_UNSPEC @@ -1416,13 +1414,20 @@ restart: unix_peer(sk) = other; if (!other) - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); unix_state_double_unlock(sk, other); - if (other != old_peer) + if (other != old_peer) { unix_dgram_disconnected(sk, old_peer); + + unix_state_lock(old_peer); + if (!unix_peer(old_peer)) + WRITE_ONCE(old_peer->sk_state, TCP_CLOSE); + unix_state_unlock(old_peer); + } + sock_put(old_peer); } else { unix_peer(sk) = other; @@ -1470,7 +1475,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sk_buff *skb = NULL; long timeo; int err; - int st; err = unix_validate_addr(sunaddr, addr_len); if (err) @@ -1481,7 +1485,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { + test_bit(SOCK_PASSPIDFD, &sock->flags)) && + !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1534,7 +1539,7 @@ restart: if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; - if (unix_recvq_full(other)) { + if (unix_recvq_full_lockless(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; @@ -1559,9 +1564,7 @@ restart: Well, and we have to recheck the state after socket locked. */ - st = sk->sk_state; - - switch (st) { + switch (READ_ONCE(sk->sk_state)) { case TCP_CLOSE: /* This is ok... continue with connect */ break; @@ -1576,7 +1579,7 @@ restart: unix_state_lock_nested(sk, U_LOCK_SECOND); - if (sk->sk_state != st) { + if (sk->sk_state != TCP_CLOSE) { unix_state_unlock(sk); unix_state_unlock(other); sock_put(other); @@ -1628,7 +1631,7 @@ restart: copy_peercred(sk, other); sock->state = SS_CONNECTED; - sk->sk_state = TCP_ESTABLISHED; + WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); sock_hold(newsk); smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ @@ -1701,7 +1704,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, goto out; err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) + if (READ_ONCE(sk->sk_state) != TCP_LISTEN) goto out; /* If socket state is TCP_LISTEN it cannot change (for now...), @@ -1997,14 +2000,15 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, } if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { + test_bit(SOCK_PASSPIDFD, &sock->flags)) && + !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; } err = -EMSGSIZE; - if (len > sk->sk_sndbuf - 32) + if (len > READ_ONCE(sk->sk_sndbuf) - 32) goto out; if (len > SKB_MAX_ALLOC) { @@ -2086,7 +2090,7 @@ restart_locked: unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -2217,13 +2221,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other maybe_add_creds(skb, sock, other); skb_get(skb); + scm_stat_add(other, skb); + + spin_lock(&other->sk_receive_queue.lock); if (ousk->oob_skb) consume_skb(ousk->oob_skb); - WRITE_ONCE(ousk->oob_skb, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); - scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); sk_send_sigurg(other); unix_state_unlock(other); other->sk_data_ready(other); @@ -2261,7 +2267,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } if (msg->msg_namelen) { - err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; + err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { err = -ENOTCONN; @@ -2270,7 +2276,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_err; } - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { @@ -2282,7 +2288,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, &err, 0); } else { /* Keep two messages in the pipe so it schedules better */ - size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); + size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64); /* allow fallback to order-0 allocations */ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); @@ -2375,7 +2381,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, if (err) return err; - if (sk->sk_state != TCP_ESTABLISHED) + if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; if (msg->msg_namelen) @@ -2389,7 +2395,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; - if (sk->sk_state != TCP_ESTABLISHED) + if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; return unix_dgram_recvmsg(sock, msg, size, flags); @@ -2614,8 +2620,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_lock(&u->iolock); unix_state_lock(sk); + spin_lock(&sk->sk_receive_queue.lock); if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); mutex_unlock(&u->iolock); return -EINVAL; @@ -2627,6 +2635,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) WRITE_ONCE(u->oob_skb, NULL); else skb_get(oob_skb); + + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); @@ -2655,24 +2665,34 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, consume_skb(skb); skb = NULL; } else { + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + if (skb == u->oob_skb) { if (copied) { skb = NULL; - } else if (sock_flag(sk, SOCK_URGINLINE)) { - if (!(flags & MSG_PEEK)) { + } else if (!(flags & MSG_PEEK)) { + if (sock_flag(sk, SOCK_URGINLINE)) { WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; + skb = skb_peek(&sk->sk_receive_queue); } - } else if (flags & MSG_PEEK) { - skb = NULL; - } else { - skb_unlink(skb, &sk->sk_receive_queue); - WRITE_ONCE(u->oob_skb, NULL); - if (!WARN_ON_ONCE(skb_unref(skb))) - kfree_skb(skb); - skb = skb_peek(&sk->sk_receive_queue); + } else if (!sock_flag(sk, SOCK_URGINLINE)) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); } } + + spin_unlock(&sk->sk_receive_queue.lock); + + if (unlinked_skb) { + WARN_ON_ONCE(skb_unref(unlinked_skb)); + kfree_skb(unlinked_skb); + } } return skb; } @@ -2680,7 +2700,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; return unix_read_skb(sk, recv_actor); @@ -2704,7 +2724,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, size_t size = state->size; unsigned int last_len; - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; goto out; } @@ -3035,7 +3055,7 @@ long unix_inq_len(struct sock *sk) struct sk_buff *skb; long amount = 0; - if (sk->sk_state == TCP_LISTEN) + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); @@ -3147,12 +3167,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); + state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err)) @@ -3174,14 +3196,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && - sk->sk_state == TCP_CLOSE) + state == TCP_CLOSE) mask |= EPOLLHUP; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ - if (unix_writable(sk)) + if (unix_writable(sk, state)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; @@ -3192,12 +3214,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk, *other; unsigned int writable; + unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); + state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err) || @@ -3217,19 +3241,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ - if (sk->sk_type == SOCK_SEQPACKET) { - if (sk->sk_state == TCP_CLOSE) - mask |= EPOLLHUP; - /* connection hasn't started yet? */ - if (sk->sk_state == TCP_SYN_SENT) - return mask; - } + if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE) + mask |= EPOLLHUP; /* No write status requested, avoid expensive OUT tests. */ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; - writable = unix_writable(sk); + writable = unix_writable(sk, state); if (writable) { unix_state_lock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index ae39538c50..937edf4afe 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -65,7 +65,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) u32 *buf; int i; - if (sk->sk_state == TCP_LISTEN) { + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) { spin_lock(&sk->sk_receive_queue.lock); attr = nla_reserve(nlskb, UNIX_DIAG_ICONS, @@ -103,8 +103,8 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) { struct unix_diag_rqlen rql; - if (sk->sk_state == TCP_LISTEN) { - rql.udiag_rqueue = sk->sk_receive_queue.qlen; + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) { + rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue); rql.udiag_wqueue = sk->sk_max_ack_backlog; } else { rql.udiag_rqueue = (u32) unix_inq_len(sk); @@ -136,7 +136,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep = nlmsg_data(nlh); rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; - rep->udiag_state = sk->sk_state; + rep->udiag_state = READ_ONCE(sk->sk_state); rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); @@ -165,7 +165,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; - if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) + if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown))) goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && @@ -215,7 +215,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) sk_for_each(sk, &net->unx.table.buckets[slot]) { if (num < s_num) goto next; - if (!(req->udiag_states & (1 << sk->sk_state))) + if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state)))) goto next; if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, diff --git a/net/wireless/core.c b/net/wireless/core.c index 3fb1b63735..4b1f45e307 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work) if (wk) { list_del_init(&wk->entry); if (!list_empty(&rdev->wiphy_work_list)) - schedule_work(work); + queue_work(system_unbound_wq, work); spin_unlock_irq(&rdev->wiphy_work_lock); wk->func(&rdev->wiphy, wk); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 30ff9a4708..65c416e8d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9162,6 +9162,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; size_t ie_len, size; + size_t ssids_offset, ie_offset; wiphy = &rdev->wiphy; @@ -9207,21 +9208,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) return -EINVAL; size = struct_size(request, channels, n_channels); + ssids_offset = size; size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + ie_offset = size; size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; + request->n_channels = n_channels; if (n_ssids) - request->ssids = (void *)&request->channels[n_channels]; + request->ssids = (void *)request + ssids_offset; request->n_ssids = n_ssids; - if (ie_len) { - if (n_ssids) - request->ie = (void *)(request->ssids + n_ssids); - else - request->ie = (void *)(request->channels + n_channels); - } + if (ie_len) + request->ie = (void *)request + ie_offset; i = 0; if (scan_freqs) { diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index e106dcea39..c569c37da3 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.burst_period = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]) out->ftm.burst_period = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); + nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP]; if (out->ftm.asap && !capa->ftm.asap) { @@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.num_bursts_exp = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]) out->ftm.num_bursts_exp = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); if (capa->ftm.max_bursts_exponent >= 0 && out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) { @@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.burst_duration = 15; if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]) out->ftm.burst_duration = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) @@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftmr_retries = 3; if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]) out->ftm.ftmr_retries = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI]; if (out->ftm.request_lci && !capa->ftm.request_lci) { diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 5a5dd3ce49..ecea8c08e2 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2128,7 +2128,8 @@ static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen, struct ieee80211_he_operation *he_oper; tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen); - if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) { + if (tmp && tmp->datalen >= sizeof(*he_oper) + 1 && + tmp->datalen >= ieee80211_he_oper_size(tmp->data + 1)) { const struct ieee80211_he_6ghz_oper *he_6ghz_oper; he_oper = (void *)&tmp->data[1]; @@ -2207,12 +2208,16 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.pub.use_for = data->use_for; tmp.pub.cannot_use_reasons = data->cannot_use_reasons; - if (data->bss_source != BSS_SOURCE_DIRECT) { + switch (data->bss_source) { + case BSS_SOURCE_MBSSID: tmp.pub.transmitted_bss = data->source_bss; + fallthrough; + case BSS_SOURCE_STA_PROFILE: ts = bss_from_pub(data->source_bss)->ts; tmp.pub.bssid_index = data->bssid_index; tmp.pub.max_bssid_indicator = data->max_bssid_indicator; - } else { + break; + case BSS_SOURCE_DIRECT: ts = jiffies; if (channel->band == NL80211_BAND_60GHZ) { @@ -2227,6 +2232,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, regulatory_hint_found_beacon(wiphy, channel, gfp); } + break; } /* @@ -2655,6 +2661,7 @@ struct tbtt_info_iter_data { u8 param_ch_count; u32 use_for; u8 mld_id, link_id; + bool non_tx; }; static enum cfg80211_rnr_iter_ret @@ -2665,14 +2672,20 @@ cfg802121_mld_ap_rnr_iter(void *_data, u8 type, const struct ieee80211_rnr_mld_params *mld_params; struct tbtt_info_iter_data *data = _data; u8 link_id; + bool non_tx = false; if (type == IEEE80211_TBTT_INFO_TYPE_TBTT && tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11, - mld_params)) - mld_params = (void *)(tbtt_info + - offsetof(struct ieee80211_tbtt_info_ge_11, - mld_params)); - else if (type == IEEE80211_TBTT_INFO_TYPE_MLD && + mld_params)) { + const struct ieee80211_tbtt_info_ge_11 *tbtt_info_ge_11 = + (void *)tbtt_info; + + non_tx = (tbtt_info_ge_11->bss_params & + (IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID | + IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID)) == + IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID; + mld_params = &tbtt_info_ge_11->mld_params; + } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD && tbtt_info_len >= sizeof(struct ieee80211_rnr_mld_params)) mld_params = (void *)tbtt_info; else @@ -2691,6 +2704,7 @@ cfg802121_mld_ap_rnr_iter(void *_data, u8 type, data->param_ch_count = le16_get_bits(mld_params->params, IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); + data->non_tx = non_tx; if (type == IEEE80211_TBTT_INFO_TYPE_TBTT) data->use_for = NL80211_BSS_USE_FOR_ALL; @@ -2702,7 +2716,7 @@ cfg802121_mld_ap_rnr_iter(void *_data, u8 type, static u8 cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, const struct ieee80211_neighbor_ap_info **ap_info, - u8 *param_ch_count) + u8 *param_ch_count, bool *non_tx) { struct tbtt_info_iter_data data = { .mld_id = mld_id, @@ -2713,6 +2727,7 @@ cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, *ap_info = data.ap_info; *param_ch_count = data.param_ch_count; + *non_tx = data.non_tx; return data.use_for; } @@ -2892,6 +2907,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, ssize_t profile_len; u8 param_ch_count; u8 link_id, use_for; + bool non_tx; if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i], mle->sta_prof_len[i])) @@ -2937,10 +2953,24 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, tx_data->ielen, mld_id, link_id, &ap_info, - ¶m_ch_count); + ¶m_ch_count, + &non_tx); if (!use_for) continue; + /* + * As of 802.11be_D5.0, the specification does not give us any + * way of discovering both the MaxBSSID and the Multiple-BSSID + * Index. It does seem like the Multiple-BSSID Index element + * may be provided, but section 9.4.2.45 explicitly forbids + * including a Multiple-BSSID Element (in this case without any + * subelements). + * Without both pieces of information we cannot calculate the + * reference BSSID, so simply ignore the BSS. + */ + if (non_tx) + continue; + /* We could sanity check the BSSID is included */ if (!ieee80211_operating_class_to_band(ap_info->op_class, diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 565511a3f4..62f26618f6 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -5,7 +5,7 @@ * * Copyright 2005-2006 Jiri Benc * Copyright 2006 Johannes Berg - * Copyright (C) 2020-2021, 2023 Intel Corporation + * Copyright (C) 2020-2021, 2023-2024 Intel Corporation */ #include @@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev) if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); rdev->suspended = false; - schedule_work(&rdev->wiphy_work); + queue_work(system_unbound_wq, &rdev->wiphy_work); wiphy_unlock(&rdev->wiphy); if (ret) diff --git a/net/wireless/util.c b/net/wireless/util.c index 2bde8a3546..082c6f9c54 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2549,6 +2549,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; + int ret; wdev = dev->ieee80211_ptr; if (!wdev) @@ -2560,7 +2561,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, memset(sinfo, 0, sizeof(*sinfo)); - return rdev_get_station(rdev, dev, mac_addr, sinfo); + wiphy_lock(&rdev->wiphy); + ret = rdev_get_station(rdev, dev, mac_addr, sinfo); + wiphy_unlock(&rdev->wiphy); + + return ret; } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 727aa20be4..7d1c0986f9 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -313,13 +313,10 @@ static bool xsk_is_bound(struct xdp_sock *xs) static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - struct net_device *dev = xdp->rxq->dev; - u32 qid = xdp->rxq->queue_index; - if (!xsk_is_bound(xs)) return -ENXIO; - if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem) + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 53d8fabfa6..d154597728 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2598,8 +2598,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info *)dst; - path->path_cookie = rt6_get_cookie(rt); + path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); path->u.rt6.rt6i_nfheader_len = nfheader_len; } } @@ -3905,15 +3904,10 @@ static void xfrm_link_failure(struct sk_buff *skb) /* Impossible. Such dst must be popped before reaches point of failure. */ } -static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { - if (dst) { - if (dst->obsolete) { - dst_release(dst); - dst = NULL; - } - } - return dst; + if (dst->obsolete) + sk_dst_reset(sk); } static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) -- cgit v1.2.3